Completed
Push — master ( d2492e...5642a2 )
by Lars
12s
created

UTF8::replace_diamond_question_mark()   B

Complexity

Conditions 6
Paths 10

Size

Total Lines 40
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 19
CRAP Score 6.031

Importance

Changes 0
Metric Value
dl 0
loc 40
ccs 19
cts 21
cp 0.9048
rs 8.439
c 0
b 0
f 0
cc 6
eloc 23
nc 10
nop 3
crap 6.031
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206
   * __construct()
207
   */
208 16
  public function __construct()
209
  {
210 16
    self::checkForSupport();
211 16
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219
   * @return string <p>Single Multi-Byte character.</p>
220
   */
221 2
  public static function access(string $str, int $pos): string
222
  {
223 2
    if (!isset($str[0])) {
224 1
      return '';
225
    }
226
227 2
    if ($pos < 0) {
228 1
      return '';
229
    }
230
231 2
    return (string)self::substr($str, $pos, 1);
232
  }
233
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string <p>The output string that contains BOM.</p>
242
   */
243 1
  public static function add_bom_to_string(string $str): string
244
  {
245 1
    if (self::string_has_bom($str) === false) {
246 1
      $str = self::bom() . $str;
247
    }
248
249 1
    return $str;
250
  }
251
252
  /**
253
   * Changes all keys in an array.
254
   *
255
   * @param array $array <p>The array to work on</p>
256
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
257
   *                     or <strong>CASE_LOWER</strong> (default)</p>
258
   *
259
   * @return array <p>An array with its keys lower or uppercased.</p>
260
   */
261 1
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
262
  {
263
    if (
264 1
        $case !== CASE_LOWER
265
        &&
266 1
        $case !== CASE_UPPER
267
    ) {
268
      $case = CASE_LOWER;
269
    }
270
271 1
    $return = [];
272 1
    foreach ($array as $key => $value) {
273 1
      if ($case === CASE_LOWER) {
274 1
        $key = self::strtolower($key);
275
      } else {
276 1
        $key = self::strtoupper($key);
277
      }
278
279 1
      $return[$key] = $value;
280
    }
281
282 1
    return $return;
283
  }
284
285
  /**
286
   * Convert binary into an string.
287
   *
288
   * @param mixed $bin 1|0
289
   *
290
   * @return string
291
   */
292 1
  public static function binary_to_str($bin): string
293
  {
294 1
    if (!isset($bin[0])) {
295
      return '';
296
    }
297
298 1
    $convert = \base_convert($bin, 2, 16);
299 1
    if ($convert === '0') {
300 1
      return '';
301
    }
302
303 1
    return \pack('H*', $convert);
304
  }
305
306
  /**
307
   * Returns the UTF-8 Byte Order Mark Character.
308
   *
309
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
310
   *
311
   * @return string UTF-8 Byte Order Mark
312
   */
313 2
  public static function bom(): string
314
  {
315 2
    return "\xef\xbb\xbf";
316
  }
317
318
  /**
319
   * @alias of UTF8::chr_map()
320
   *
321
   * @see   UTF8::chr_map()
322
   *
323
   * @param string|array $callback
324
   * @param string       $str
325
   *
326
   * @return array
327
   */
328 1
  public static function callback($callback, string $str): array
329
  {
330 1
    return self::chr_map($callback, $str);
331
  }
332
333
  /**
334
   * This method will auto-detect your server environment for UTF-8 support.
335
   *
336
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
337
   */
338 19
  public static function checkForSupport()
339
  {
340 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
341
342 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
343
344
      // http://php.net/manual/en/book.mbstring.php
345 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
346 1
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
347
348
      // http://php.net/manual/en/book.iconv.php
349 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
350
351
      // http://php.net/manual/en/book.intl.php
352 1
      self::$SUPPORT['intl'] = self::intl_loaded();
353 1
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
354
      if (
355 1
          self::$SUPPORT['intl'] === true
356
          &&
357 1
          \function_exists('transliterator_list_ids') === true
358
      ) {
359 1
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
360
      }
361
362
      // http://php.net/manual/en/class.intlchar.php
363 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
364
365
      // http://php.net/manual/en/book.pcre.php
366 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
367
    }
368 19
  }
369
370
  /**
371
   * Generates a UTF-8 encoded character from the given code point.
372
   *
373
   * INFO: opposite to UTF8::ord()
374
   *
375
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
376
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
377
   *
378
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
379
   */
380 10
  public static function chr($code_point, string $encoding = 'UTF-8')
381
  {
382
    // init
383 10
    static $CHAR_CACHE = [];
384
385 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
386
      self::checkForSupport();
387
    }
388
389 10
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
390 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
391
    }
392
393 View Code Duplication
    if (
394 10
        $encoding !== 'UTF-8'
395
        &&
396 10
        $encoding !== 'ISO-8859-1'
397
        &&
398 10
        $encoding !== 'WINDOWS-1252'
399
        &&
400 10
        self::$SUPPORT['mbstring'] === false
401
    ) {
402
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
403
    }
404
405 10
    $cacheKey = $code_point . $encoding;
406 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
407 8
      return $CHAR_CACHE[$cacheKey];
408
    }
409
410 9
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
411
412 7
      if (self::$CHR === null) {
413
        self::$CHR = self::getData('chr');
414
      }
415
416 7
      $chr = self::$CHR[$code_point];
417
418 7
      if ($encoding !== 'UTF-8') {
419 1
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
420
      }
421
422 7
      return $CHAR_CACHE[$cacheKey] = $chr;
423
    }
424
425 7
    if (self::$SUPPORT['intlChar'] === true) {
426 7
      $chr = \IntlChar::chr($code_point);
427
428 7
      if ($encoding !== 'UTF-8') {
429
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
430
      }
431
432 7
      return $CHAR_CACHE[$cacheKey] = $chr;
433
    }
434
435
    if (self::$CHR === null) {
436
      self::$CHR = self::getData('chr');
437
    }
438
439
    if ($code_point <= 0x7F) {
440
      $chr = self::$CHR[$code_point];
441
    } elseif ($code_point <= 0x7FF) {
442
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
443
             self::$CHR[($code_point & 0x3F) + 0x80];
444
    } elseif ($code_point <= 0xFFFF) {
445
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
446
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
447
             self::$CHR[($code_point & 0x3F) + 0x80];
448
    } else {
449
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
450
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
451
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
452
             self::$CHR[($code_point & 0x3F) + 0x80];
453
    }
454
455
    if ($encoding !== 'UTF-8') {
456
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
457
    }
458
459
    return $CHAR_CACHE[$cacheKey] = $chr;
460
  }
461
462
  /**
463
   * Applies callback to all characters of a string.
464
   *
465
   * @param string|array $callback <p>The callback function.</p>
466
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
467
   *
468
   * @return array <p>The outcome of callback.</p>
469
   */
470 1
  public static function chr_map($callback, string $str): array
471
  {
472 1
    $chars = self::split($str);
473
474 1
    return \array_map($callback, $chars);
475
  }
476
477
  /**
478
   * Generates an array of byte length of each character of a Unicode string.
479
   *
480
   * 1 byte => U+0000  - U+007F
481
   * 2 byte => U+0080  - U+07FF
482
   * 3 byte => U+0800  - U+FFFF
483
   * 4 byte => U+10000 - U+10FFFF
484
   *
485
   * @param string $str <p>The original unicode string.</p>
486
   *
487
   * @return array <p>An array of byte lengths of each character.</p>
488
   */
489 2
  public static function chr_size_list(string $str): array
490
  {
491 2
    if (!isset($str[0])) {
492 2
      return [];
493
    }
494
495 2
    $strSplit = self::split($str);
496
497 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
498
      return \array_map(
499
          function ($data) {
500
            return UTF8::strlen($data, 'CP850'); // 8-BIT
501
          },
502
          $strSplit
503
      );
504
    }
505
506 2
    return \array_map(
507 2
        function ($data) {
508 2
          return \strlen($data);
509 2
        },
510 2
        $strSplit
511
    );
512
  }
513
514
  /**
515
   * Get a decimal code representation of a specific character.
516
   *
517
   * @param string $char <p>The input character.</p>
518
   *
519
   * @return int
520
   */
521 2
  public static function chr_to_decimal(string $char): int
522
  {
523 2
    $code = self::ord($char[0]);
524 2
    $bytes = 1;
525
526 2
    if (!($code & 0x80)) {
527
      // 0xxxxxxx
528 2
      return $code;
529
    }
530
531 2
    if (($code & 0xe0) === 0xc0) {
532
      // 110xxxxx
533 2
      $bytes = 2;
534 2
      $code &= ~0xc0;
535 2
    } elseif (($code & 0xf0) === 0xe0) {
536
      // 1110xxxx
537 2
      $bytes = 3;
538 2
      $code &= ~0xe0;
539 1
    } elseif (($code & 0xf8) === 0xf0) {
540
      // 11110xxx
541 1
      $bytes = 4;
542 1
      $code &= ~0xf0;
543
    }
544
545 2
    for ($i = 2; $i <= $bytes; $i++) {
546
      // 10xxxxxx
547 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
548
    }
549
550 2
    return $code;
551
  }
552
553
  /**
554
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
555
   *
556
   * @param string $char <p>The input character</p>
557
   * @param string $pfix [optional]
558
   *
559
   * @return string <p>The code point encoded as U+xxxx<p>
560
   */
561 1
  public static function chr_to_hex(string $char, string $pfix = 'U+'): string
562
  {
563 1
    if (!isset($char[0])) {
564 1
      return '';
565
    }
566
567 1
    if ($char === '&#0;') {
568 1
      $char = '';
569
    }
570
571 1
    return self::int_to_hex(self::ord($char), $pfix);
572
  }
573
574
  /**
575
   * alias for "UTF8::chr_to_decimal()"
576
   *
577
   * @see UTF8::chr_to_decimal()
578
   *
579
   * @param string $chr
580
   *
581
   * @return int
582
   */
583 1
  public static function chr_to_int(string $chr): int
584
  {
585 1
    return self::chr_to_decimal($chr);
586
  }
587
588
  /**
589
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
590
   *
591
   * @param string $body     <p>The original string to be split.</p>
592
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
593
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
594
   *
595
   * @return string <p>The chunked string</p>
596
   */
597 1
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
598
  {
599 1
    return \implode($end, self::split($body, $chunklen));
600
  }
601
602
  /**
603
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
604
   *
605
   * @param string $str                           <p>The string to be sanitized.</p>
606
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
607
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
608
   *                                              whitespace.</p>
609
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
610
   *                                              e.g.: "…"
611
   *                                              => "..."</p>
612
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
613
   *                                              combination with
614
   *                                              $normalize_whitespace</p>
615
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
616
   *                                              mark e.g.: "�"</p>
617
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
618
   *                                              characters e.g.: "\0"</p>
619
   *
620
   * @return string <p>Clean UTF-8 encoded string.</p>
621
   */
622 64
  public static function clean(string $str, bool $remove_bom = false, bool $normalize_whitespace = false, bool $normalize_msword = false, bool $keep_non_breaking_space = false, bool $replace_diamond_question_mark = false, bool $remove_invisible_characters = true): string
623
  {
624
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
625
    // caused connection reset problem on larger strings
626
627 64
    $regx = '/
628
      (
629
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
630
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
631
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
632
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
633
        ){1,100}                      # ...one or more times
634
      )
635
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
636
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
637
    /x';
638 64
    $str = (string)\preg_replace($regx, '$1', $str);
639
640 64
    if ($replace_diamond_question_mark === true) {
641 36
      $str = self::replace_diamond_question_mark($str, '');
642
    }
643
644 64
    if ($remove_invisible_characters === true) {
645 64
      $str = self::remove_invisible_characters($str);
646
    }
647
648 64
    if ($normalize_whitespace === true) {
649 38
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
650
    }
651
652 64
    if ($normalize_msword === true) {
653 15
      $str = self::normalize_msword($str);
654
    }
655
656 64
    if ($remove_bom === true) {
657 37
      $str = self::remove_bom($str);
658
    }
659
660 64
    return $str;
661
  }
662
663
  /**
664
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
665
   *
666
   * @param string $str <p>The input string.</p>
667
   *
668
   * @return string
669
   */
670 24
  public static function cleanup(string $str): string
671
  {
672 24
    if (!isset($str[0])) {
673 2
      return '';
674
    }
675
676
    // fixed ISO <-> UTF-8 Errors
677 24
    $str = self::fix_simple_utf8($str);
678
679
    // remove all none UTF-8 symbols
680
    // && remove diamond question mark (�)
681
    // && remove remove invisible characters (e.g. "\0")
682
    // && remove BOM
683
    // && normalize whitespace chars (but keep non-breaking-spaces)
684 24
    $str = self::clean(
685 24
        $str,
686 24
        true,
687 24
        true,
688 24
        false,
689 24
        true,
690 24
        true,
691 24
        true
692
    );
693
694 24
    return $str;
695
  }
696
697
  /**
698
   * Accepts a string or a array of strings and returns an array of Unicode code points.
699
   *
700
   * INFO: opposite to UTF8::string()
701
   *
702
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
703
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
704
   *                                    default, code points will be returned as integers.</p>
705
   *
706
   * @return array <p>The array of code points.</p>
707
   */
708 7
  public static function codepoints($arg, bool $u_style = false): array
709
  {
710 7
    if (\is_string($arg) === true) {
711 7
      $arg = self::split($arg);
712
    }
713
714 7
    $arg = \array_map(
715
        [
716 7
            self::class,
717
            'ord',
718
        ],
719 7
        $arg
720
    );
721
722 7
    if ($u_style) {
723 1
      $arg = \array_map(
724
          [
725 1
              self::class,
726
              'int_to_hex',
727
          ],
728 1
          $arg
729
      );
730
    }
731
732 7
    return $arg;
733
  }
734
735
  /**
736
   * Returns count of characters used in a string.
737
   *
738
   * @param string $str       <p>The input string.</p>
739
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
740
   *
741
   * @return array <p>An associative array of Character as keys and
742
   *               their count as values.</p>
743
   */
744 9
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
745
  {
746 9
    return \array_count_values(self::split($str, 1, $cleanUtf8));
747
  }
748
749
  /**
750
   * Converts a int-value into an UTF-8 character.
751
   *
752
   * @param mixed $int
753
   *
754
   * @return string
755
   */
756 5
  public static function decimal_to_chr($int): string
757
  {
758 5
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
759
  }
760
761
  /**
762
   * Encode a string with a new charset-encoding.
763
   *
764
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
765
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
766
   *
767
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
768
   * @param string $str      <p>The input string</p>
769
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
770
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
771
   *
772
   * @return string
773
   */
774 14
  public static function encode(string $encoding, string $str, bool $force = true): string
775
  {
776 14
    if (!isset($str[0], $encoding[0])) {
777 6
      return $str;
778
    }
779
780 14
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
781 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
782
    }
783
784 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
785
      self::checkForSupport();
786
    }
787
788 14
    $encodingDetected = self::str_detect_encoding($str);
789
790
    if (
791 14
        $force === true
792
        ||
793 14
        $encodingDetected !== $encoding
794
    ) {
795
796 View Code Duplication
      if (
797 14
          $encoding === 'UTF-8'
798
          &&
799
          (
800 14
              $force === true
801 4
              || $encodingDetected === 'UTF-8'
802 4
              || $encodingDetected === 'WINDOWS-1252'
803 14
              || $encodingDetected === 'ISO-8859-1'
804
          )
805
      ) {
806 13
        return self::to_utf8($str);
807
      }
808
809 View Code Duplication
      if (
810 4
          $encoding === 'ISO-8859-1'
811
          &&
812
          (
813 2
              $force === true
814 1
              || $encodingDetected === 'ISO-8859-1'
815 1
              || $encodingDetected === 'WINDOWS-1252'
816 4
              || $encodingDetected === 'UTF-8'
817
          )
818
      ) {
819 2
        return self::to_iso8859($str);
820
      }
821
822 View Code Duplication
      if (
823 3
          $encoding !== 'UTF-8'
824
          &&
825 3
          $encoding !== 'ISO-8859-1'
826
          &&
827 3
          $encoding !== 'WINDOWS-1252'
828
          &&
829 3
          self::$SUPPORT['mbstring'] === false
830
      ) {
831
        \trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
832
      }
833
834 3
      $strEncoded = \mb_convert_encoding(
835 3
          $str,
836 3
          $encoding,
837 3
          $encodingDetected
838
      );
839
840 3
      if ($strEncoded) {
841 3
        return $strEncoded;
842
      }
843
    }
844
845 3
    return $str;
846
  }
847
848
  /**
849
   * Reads entire file into a string.
850
   *
851
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
852
   *
853
   * @link http://php.net/manual/en/function.file-get-contents.php
854
   *
855
   * @param string        $filename         <p>
856
   *                                        Name of the file to read.
857
   *                                        </p>
858
   * @param bool          $use_include_path [optional] <p>
859
   *                                        Prior to PHP 5, this parameter is called
860
   *                                        use_include_path and is a bool.
861
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
862
   *                                        to trigger include path
863
   *                                        search.
864
   *                                        </p>
865
   * @param resource|null $context          [optional] <p>
866
   *                                        A valid context resource created with
867
   *                                        stream_context_create. If you don't need to use a
868
   *                                        custom context, you can skip this parameter by &null;.
869
   *                                        </p>
870
   * @param int|null      $offset           [optional] <p>
871
   *                                        The offset where the reading starts.
872
   *                                        </p>
873
   * @param int|null      $maxLength        [optional] <p>
874
   *                                        Maximum length of data read. The default is to read until end
875
   *                                        of file is reached.
876
   *                                        </p>
877
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
878
   *
879
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
880
   *                                        images or pdf, because they used non default utf-8 chars.</p>
881
   *
882
   * @return string|false <p>The function returns the read data or false on failure.</p>
883
   */
884 6
  public static function file_get_contents(string $filename, bool $use_include_path = false, $context = null, int $offset = null, int $maxLength = null, int $timeout = 10, bool $convertToUtf8 = true)
885
  {
886
    // init
887 6
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
888
889 6
    if ($timeout && $context === null) {
890 5
      $context = \stream_context_create(
891
          [
892
              'http' =>
893
                  [
894 5
                      'timeout' => $timeout,
895
                  ],
896
          ]
897
      );
898
    }
899
900 6
    if ($offset === null) {
901 6
      $offset = 0;
902
    }
903
904 6
    if (\is_int($maxLength) === true) {
905 1
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
906
    } else {
907 6
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
908
    }
909
910
    // return false on error
911 6
    if ($data === false) {
912
      return false;
913
    }
914
915 6
    if ($convertToUtf8 === true) {
916
      // only for non binary, but also for UTF-16 or UTF-32
917
      if (
918 6
          self::is_binary($data, true) !== true
919
          ||
920 4
          self::is_utf16($data) !== false
921
          ||
922 6
          self::is_utf32($data) !== false
923
      ) {
924 5
        $data = self::encode('UTF-8', $data, false);
925 5
        $data = self::cleanup($data);
926
      }
927
    }
928
929 6
    return $data;
930
  }
931
932
  /**
933
   * Checks if a file starts with BOM (Byte Order Mark) character.
934
   *
935
   * @param string $file_path <p>Path to a valid file.</p>
936
   *
937
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
938
   */
939 1
  public static function file_has_bom(string $file_path): bool
940
  {
941 1
    return self::string_has_bom(\file_get_contents($file_path));
942
  }
943
944
  /**
945
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
946
   *
947
   * @param mixed  $var
948
   * @param int    $normalization_form
949
   * @param string $leading_combining
950
   *
951
   * @return mixed
952
   */
953 9
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
954
  {
955 9
    switch (\gettype($var)) {
956 9 View Code Duplication
      case 'array':
957 3
        foreach ($var as $k => $v) {
958
          /** @noinspection AlterInForeachInspection */
959 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
960
        }
961 3
        break;
962 9 View Code Duplication
      case 'object':
963 2
        foreach ($var as $k => $v) {
964 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
965
        }
966 2
        break;
967 9
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
968
969 9
        if (false !== \strpos($var, "\r")) {
970
          // Workaround https://bugs.php.net/65732
971 2
          $var = \str_replace(["\r\n", "\r"], "\n", $var);
972
        }
973
974 9
        if (self::is_ascii($var) === false) {
975
          /** @noinspection PhpUndefinedClassInspection */
976 9
          if (\Normalizer::isNormalized($var, $normalization_form)) {
977 6
            $n = '-';
978
          } else {
979
            /** @noinspection PhpUndefinedClassInspection */
980 7
            $n = \Normalizer::normalize($var, $normalization_form);
981
982 7
            if (isset($n[0])) {
983 4
              $var = $n;
984
            } else {
985 5
              $var = self::encode('UTF-8', $var, true);
986
            }
987
          }
988
989
          if (
990 9
              $var[0] >= "\x80"
991
              &&
992 9
              isset($n[0], $leading_combining[0])
993
              &&
994 9
              \preg_match('/^\p{Mn}/u', $var)
995
          ) {
996
            // Prevent leading combining chars
997
            // for NFC-safe concatenations.
998 2
            $var = $leading_combining . $var;
999
          }
1000
        }
1001
1002 9
        break;
1003
    }
1004
1005 9
    return $var;
1006
  }
1007
1008
  /**
1009
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1010
   *
1011
   * Gets a specific external variable by name and optionally filters it
1012
   *
1013
   * @link  http://php.net/manual/en/function.filter-input.php
1014
   *
1015
   * @param int    $type          <p>
1016
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1017
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1018
   *                              <b>INPUT_ENV</b>.
1019
   *                              </p>
1020
   * @param string $variable_name <p>
1021
   *                              Name of a variable to get.
1022
   *                              </p>
1023
   * @param int    $filter        [optional] <p>
1024
   *                              The ID of the filter to apply. The
1025
   *                              manual page lists the available filters.
1026
   *                              </p>
1027
   * @param mixed  $options       [optional] <p>
1028
   *                              Associative array of options or bitwise disjunction of flags. If filter
1029
   *                              accepts options, flags can be provided in "flags" field of array.
1030
   *                              </p>
1031
   *
1032
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1033
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1034
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1035
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1036
   * @since 5.2.0
1037
   */
1038 View Code Duplication
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1039
  {
1040
    if (4 > \func_num_args()) {
1041
      $var = \filter_input($type, $variable_name, $filter);
1042
    } else {
1043
      $var = \filter_input($type, $variable_name, $filter, $options);
1044
    }
1045
1046
    return self::filter($var);
1047
  }
1048
1049
  /**
1050
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1051
   *
1052
   * Gets external variables and optionally filters them
1053
   *
1054
   * @link  http://php.net/manual/en/function.filter-input-array.php
1055
   *
1056
   * @param int   $type       <p>
1057
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1058
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1059
   *                          <b>INPUT_ENV</b>.
1060
   *                          </p>
1061
   * @param mixed $definition [optional] <p>
1062
   *                          An array defining the arguments. A valid key is a string
1063
   *                          containing a variable name and a valid value is either a filter type, or an array
1064
   *                          optionally specifying the filter, flags and options. If the value is an
1065
   *                          array, valid keys are filter which specifies the
1066
   *                          filter type,
1067
   *                          flags which specifies any flags that apply to the
1068
   *                          filter, and options which specifies any options that
1069
   *                          apply to the filter. See the example below for a better understanding.
1070
   *                          </p>
1071
   *                          <p>
1072
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1073
   *                          input array are filtered by this filter.
1074
   *                          </p>
1075
   * @param bool  $add_empty  [optional] <p>
1076
   *                          Add missing keys as <b>NULL</b> to the return value.
1077
   *                          </p>
1078
   *
1079
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1080
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1081
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1082
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1083
   * fails.
1084
   * @since 5.2.0
1085
   */
1086 View Code Duplication
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1087
  {
1088
    if (2 > \func_num_args()) {
1089
      $a = \filter_input_array($type);
1090
    } else {
1091
      $a = \filter_input_array($type, $definition, $add_empty);
1092
    }
1093
1094
    return self::filter($a);
1095
  }
1096
1097
  /**
1098
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1099
   *
1100
   * Filters a variable with a specified filter
1101
   *
1102
   * @link  http://php.net/manual/en/function.filter-var.php
1103
   *
1104
   * @param mixed $variable <p>
1105
   *                        Value to filter.
1106
   *                        </p>
1107
   * @param int   $filter   [optional] <p>
1108
   *                        The ID of the filter to apply. The
1109
   *                        manual page lists the available filters.
1110
   *                        </p>
1111
   * @param mixed $options  [optional] <p>
1112
   *                        Associative array of options or bitwise disjunction of flags. If filter
1113
   *                        accepts options, flags can be provided in "flags" field of array. For
1114
   *                        the "callback" filter, callable type should be passed. The
1115
   *                        callback must accept one argument, the value to be filtered, and return
1116
   *                        the value after filtering/sanitizing it.
1117
   *                        </p>
1118
   *                        <p>
1119
   *                        <code>
1120
   *                        // for filters that accept options, use this format
1121
   *                        $options = array(
1122
   *                        'options' => array(
1123
   *                        'default' => 3, // value to return if the filter fails
1124
   *                        // other options here
1125
   *                        'min_range' => 0
1126
   *                        ),
1127
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1128
   *                        );
1129
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1130
   *                        // for filter that only accept flags, you can pass them directly
1131
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1132
   *                        // for filter that only accept flags, you can also pass as an array
1133
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1134
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1135
   *                        // callback validate filter
1136
   *                        function foo($value)
1137
   *                        {
1138
   *                        // Expected format: Surname, GivenNames
1139
   *                        if (strpos($value, ", ") === false) return false;
1140
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1141
   *                        $empty = (empty($surname) || empty($givennames));
1142
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1143
   *                        if ($empty || $notstrings) {
1144
   *                        return false;
1145
   *                        } else {
1146
   *                        return $value;
1147
   *                        }
1148
   *                        }
1149
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1150
   *                        </code>
1151
   *                        </p>
1152
   *
1153
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1154
   * @since 5.2.0
1155
   */
1156 1 View Code Duplication
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1157
  {
1158 1
    if (3 > \func_num_args()) {
1159 1
      $variable = \filter_var($variable, $filter);
1160
    } else {
1161 1
      $variable = \filter_var($variable, $filter, $options);
1162
    }
1163
1164 1
    return self::filter($variable);
1165
  }
1166
1167
  /**
1168
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1169
   *
1170
   * Gets multiple variables and optionally filters them
1171
   *
1172
   * @link  http://php.net/manual/en/function.filter-var-array.php
1173
   *
1174
   * @param array $data       <p>
1175
   *                          An array with string keys containing the data to filter.
1176
   *                          </p>
1177
   * @param mixed $definition [optional] <p>
1178
   *                          An array defining the arguments. A valid key is a string
1179
   *                          containing a variable name and a valid value is either a
1180
   *                          filter type, or an
1181
   *                          array optionally specifying the filter, flags and options.
1182
   *                          If the value is an array, valid keys are filter
1183
   *                          which specifies the filter type,
1184
   *                          flags which specifies any flags that apply to the
1185
   *                          filter, and options which specifies any options that
1186
   *                          apply to the filter. See the example below for a better understanding.
1187
   *                          </p>
1188
   *                          <p>
1189
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1190
   *                          input array are filtered by this filter.
1191
   *                          </p>
1192
   * @param bool  $add_empty  [optional] <p>
1193
   *                          Add missing keys as <b>NULL</b> to the return value.
1194
   *                          </p>
1195
   *
1196
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1197
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1198
   * the variable is not set.
1199
   * @since 5.2.0
1200
   */
1201 1 View Code Duplication
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1202
  {
1203 1
    if (2 > \func_num_args()) {
1204 1
      $a = \filter_var_array($data);
1205
    } else {
1206 1
      $a = \filter_var_array($data, $definition, $add_empty);
1207
    }
1208
1209 1
    return self::filter($a);
1210
  }
1211
1212
  /**
1213
   * Check if the number of unicode characters are not more than the specified integer.
1214
   *
1215
   * @param string $str      The original string to be checked.
1216
   * @param int    $box_size The size in number of chars to be checked against string.
1217
   *
1218
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1219
   */
1220 1
  public static function fits_inside(string $str, int $box_size): bool
1221
  {
1222 1
    return (self::strlen($str) <= $box_size);
1223
  }
1224
1225
  /**
1226
   * Try to fix simple broken UTF-8 strings.
1227
   *
1228
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1229
   *
1230
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1231
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1232
   * See: http://en.wikipedia.org/wiki/Windows-1252
1233
   *
1234
   * @param string $str <p>The input string</p>
1235
   *
1236
   * @return string
1237
   */
1238 29 View Code Duplication
  public static function fix_simple_utf8(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1239
  {
1240 29
    if (!isset($str[0])) {
1241 2
      return '';
1242
    }
1243
1244 29
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1245 29
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1246
1247 29
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1248
1249 1
      if (self::$BROKEN_UTF8_FIX === null) {
1250 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1251
      }
1252
1253 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1254 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1255
    }
1256
1257 29
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1258
  }
1259
1260
  /**
1261
   * Fix a double (or multiple) encoded UTF8 string.
1262
   *
1263
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1264
   *
1265
   * @return string|string[] <p>Will return the fixed input-"array" or
1266
   *                         the fixed input-"string".</p>
1267
   */
1268 1
  public static function fix_utf8($str)
1269
  {
1270 1
    if (\is_array($str) === true) {
1271 1
      foreach ($str as $k => $v) {
1272 1
        $str[$k] = self::fix_utf8($v);
1273
      }
1274
1275 1
      return $str;
1276
    }
1277
1278 1
    $last = '';
1279 1
    while ($last !== $str) {
1280 1
      $last = $str;
1281 1
      $str = self::to_utf8(
1282 1
          self::utf8_decode($str, true)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str, true)) on line 1281 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1283
      );
1284
    }
1285
1286 1
    return $str;
1287
  }
1288
1289
  /**
1290
   * Get character of a specific character.
1291
   *
1292
   * @param string $char
1293
   *
1294
   * @return string <p>'RTL' or 'LTR'</p>
1295
   */
1296 1
  public static function getCharDirection(string $char): string
1297
  {
1298 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1299
      self::checkForSupport();
1300
    }
1301
1302 1
    if (self::$SUPPORT['intlChar'] === true) {
1303 1
      $tmpReturn = \IntlChar::charDirection($char);
1304
1305
      // from "IntlChar"-Class
1306
      $charDirection = [
1307 1
          'RTL' => [1, 13, 14, 15, 21],
1308
          'LTR' => [0, 11, 12, 20],
1309
      ];
1310
1311 1
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1312
        return 'LTR';
1313
      }
1314
1315 1
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1316 1
        return 'RTL';
1317
      }
1318
    }
1319
1320 1
    $c = static::chr_to_decimal($char);
1321
1322 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1323 1
      return 'LTR';
1324
    }
1325
1326 1
    if (0x85e >= $c) {
1327
1328 1
      if (0x5be === $c ||
1329 1
          0x5c0 === $c ||
1330 1
          0x5c3 === $c ||
1331 1
          0x5c6 === $c ||
1332 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1333 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1334 1
          0x608 === $c ||
1335 1
          0x60b === $c ||
1336 1
          0x60d === $c ||
1337 1
          0x61b === $c ||
1338 1
          (0x61e <= $c && 0x64a >= $c) ||
1339
          (0x66d <= $c && 0x66f >= $c) ||
1340
          (0x671 <= $c && 0x6d5 >= $c) ||
1341
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1342
          (0x6ee <= $c && 0x6ef >= $c) ||
1343
          (0x6fa <= $c && 0x70d >= $c) ||
1344
          0x710 === $c ||
1345
          (0x712 <= $c && 0x72f >= $c) ||
1346
          (0x74d <= $c && 0x7a5 >= $c) ||
1347
          0x7b1 === $c ||
1348
          (0x7c0 <= $c && 0x7ea >= $c) ||
1349
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1350
          0x7fa === $c ||
1351
          (0x800 <= $c && 0x815 >= $c) ||
1352
          0x81a === $c ||
1353
          0x824 === $c ||
1354
          0x828 === $c ||
1355
          (0x830 <= $c && 0x83e >= $c) ||
1356
          (0x840 <= $c && 0x858 >= $c) ||
1357 1
          0x85e === $c
1358
      ) {
1359 1
        return 'RTL';
1360
      }
1361
1362 1
    } elseif (0x200f === $c) {
1363
1364
      return 'RTL';
1365
1366 1
    } elseif (0xfb1d <= $c) {
1367
1368 1
      if (0xfb1d === $c ||
1369 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1370 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1371 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1372 1
          0xfb3e === $c ||
1373 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1374 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1375 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1376 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1377 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1378 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1379 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1380 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1381 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1382 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1383 1
          0x10808 === $c ||
1384 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1385 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1386 1
          0x1083c === $c ||
1387 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1388 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1389 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1390 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1391 1
          0x1093f === $c ||
1392 1
          0x10a00 === $c ||
1393 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1394 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1395 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1396 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1397 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1398 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1399 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1400 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1401 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1402 1
          (0x10b78 <= $c && 0x10b7f >= $c)
1403
      ) {
1404 1
        return 'RTL';
1405
      }
1406
    }
1407
1408 1
    return 'LTR';
1409
  }
1410
1411
  /**
1412
   * get data from "/data/*.ser"
1413
   *
1414
   * @param string $file
1415
   *
1416
   * @return bool|string|array|int <p>Will return false on error.</p>
1417
   */
1418 7
  private static function getData(string $file)
1419
  {
1420 7
    $file = __DIR__ . '/data/' . $file . '.php';
1421 7
    if (\file_exists($file)) {
1422
      /** @noinspection PhpIncludeInspection */
1423 7
      return require $file;
1424
    }
1425
1426 1
    return false;
1427
  }
1428
1429
  /**
1430
   * Check for php-support.
1431
   *
1432
   * @param string|null $key
1433
   *
1434
   * @return mixed <p>Return the full support-"array", if $key === null<br>
1435
   *               return bool-value, if $key is used and available<br>
1436
   *               otherwise return null</p>
1437
   */
1438 19
  public static function getSupportInfo(string $key = null)
1439
  {
1440 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1441
      self::checkForSupport();
1442
    }
1443
1444 19
    if ($key === null) {
1445 2
      return self::$SUPPORT;
1446
    }
1447
1448 18
    if (!isset(self::$SUPPORT[$key])) {
1449 1
      return null;
1450
    }
1451
1452 17
    return self::$SUPPORT[$key];
1453
  }
1454
1455
  /**
1456
   * alias for "UTF8::string_has_bom()"
1457
   *
1458
   * @see        UTF8::string_has_bom()
1459
   *
1460
   * @param string $str
1461
   *
1462
   * @return bool
1463
   *
1464
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1465
   */
1466 1
  public static function hasBom(string $str): bool
1467
  {
1468 1
    return self::string_has_bom($str);
1469
  }
1470
1471
  /**
1472
   * Converts a hexadecimal-value into an UTF-8 character.
1473
   *
1474
   * @param string $hexdec <p>The hexadecimal value.</p>
1475
   *
1476
   * @return string|false <p>One single UTF-8 character.</p>
1477
   */
1478 2
  public static function hex_to_chr(string $hexdec)
1479
  {
1480 2
    return self::decimal_to_chr(\hexdec($hexdec));
1481
  }
1482
1483
  /**
1484
   * Converts hexadecimal U+xxxx code point representation to integer.
1485
   *
1486
   * INFO: opposite to UTF8::int_to_hex()
1487
   *
1488
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1489
   *
1490
   * @return int|false <p>The code point, or false on failure.</p>
1491
   */
1492 1
  public static function hex_to_int(string $hexDec)
1493
  {
1494 1
    if (!isset($hexDec[0])) {
1495 1
      return false;
1496
    }
1497
1498 1
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1499 1
      return \intval($match[1], 16);
1500
    }
1501
1502 1
    return false;
1503
  }
1504
1505
  /**
1506
   * alias for "UTF8::html_entity_decode()"
1507
   *
1508
   * @see UTF8::html_entity_decode()
1509
   *
1510
   * @param string $str
1511
   * @param int    $flags
1512
   * @param string $encoding
1513
   *
1514
   * @return string
1515
   */
1516 1
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1517
  {
1518 1
    return self::html_entity_decode($str, $flags, $encoding);
1519
  }
1520
1521
  /**
1522
   * Converts a UTF-8 string to a series of HTML numbered entities.
1523
   *
1524
   * INFO: opposite to UTF8::html_decode()
1525
   *
1526
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1527
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1528
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
1529
   *
1530
   * @return string <p>HTML numbered entities.</p>
1531
   */
1532 3
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
1533
  {
1534 3
    if (!isset($str[0])) {
1535 2
      return '';
1536
    }
1537
1538 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1539 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1540
    }
1541
1542
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1543 3
    if (\function_exists('mb_encode_numericentity')) {
1544
1545 3
      $startCode = 0x00;
1546 3
      if ($keepAsciiChars === true) {
1547 3
        $startCode = 0x80;
1548
      }
1549
1550 3
      return \mb_encode_numericentity(
1551 3
          $str,
1552 3
          [$startCode, 0xfffff, 0, 0xfffff, 0],
1553 3
          $encoding
1554
      );
1555
    }
1556
1557
    return \implode(
1558
        '',
1559
        \array_map(
1560
            function ($data) use ($keepAsciiChars, $encoding) {
1561
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1562
            },
1563
            self::split($str)
1564
        )
1565
    );
1566
  }
1567
1568
  /**
1569
   * UTF-8 version of html_entity_decode()
1570
   *
1571
   * The reason we are not using html_entity_decode() by itself is because
1572
   * while it is not technically correct to leave out the semicolon
1573
   * at the end of an entity most browsers will still interpret the entity
1574
   * correctly. html_entity_decode() does not convert entities without
1575
   * semicolons, so we are left with our own little solution here. Bummer.
1576
   *
1577
   * Convert all HTML entities to their applicable characters
1578
   *
1579
   * INFO: opposite to UTF8::html_encode()
1580
   *
1581
   * @link http://php.net/manual/en/function.html-entity-decode.php
1582
   *
1583
   * @param string $str      <p>
1584
   *                         The input string.
1585
   *                         </p>
1586
   * @param int    $flags    [optional] <p>
1587
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1588
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1589
   *                         <table>
1590
   *                         Available <i>flags</i> constants
1591
   *                         <tr valign="top">
1592
   *                         <td>Constant Name</td>
1593
   *                         <td>Description</td>
1594
   *                         </tr>
1595
   *                         <tr valign="top">
1596
   *                         <td><b>ENT_COMPAT</b></td>
1597
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1598
   *                         </tr>
1599
   *                         <tr valign="top">
1600
   *                         <td><b>ENT_QUOTES</b></td>
1601
   *                         <td>Will convert both double and single quotes.</td>
1602
   *                         </tr>
1603
   *                         <tr valign="top">
1604
   *                         <td><b>ENT_NOQUOTES</b></td>
1605
   *                         <td>Will leave both double and single quotes unconverted.</td>
1606
   *                         </tr>
1607
   *                         <tr valign="top">
1608
   *                         <td><b>ENT_HTML401</b></td>
1609
   *                         <td>
1610
   *                         Handle code as HTML 4.01.
1611
   *                         </td>
1612
   *                         </tr>
1613
   *                         <tr valign="top">
1614
   *                         <td><b>ENT_XML1</b></td>
1615
   *                         <td>
1616
   *                         Handle code as XML 1.
1617
   *                         </td>
1618
   *                         </tr>
1619
   *                         <tr valign="top">
1620
   *                         <td><b>ENT_XHTML</b></td>
1621
   *                         <td>
1622
   *                         Handle code as XHTML.
1623
   *                         </td>
1624
   *                         </tr>
1625
   *                         <tr valign="top">
1626
   *                         <td><b>ENT_HTML5</b></td>
1627
   *                         <td>
1628
   *                         Handle code as HTML 5.
1629
   *                         </td>
1630
   *                         </tr>
1631
   *                         </table>
1632
   *                         </p>
1633
   * @param string $encoding [optional] <p>Encoding to use.</p>
1634
   *
1635
   * @return string <p>The decoded string.</p>
1636
   */
1637 17
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1638
  {
1639 17
    if (!isset($str[0])) {
1640 6
      return '';
1641
    }
1642
1643 17
    if (!isset($str[3])) { // examples: &; || &x;
1644 10
      return $str;
1645
    }
1646
1647
    if (
1648 16
        \strpos($str, '&') === false
1649
        ||
1650
        (
1651 16
            \strpos($str, '&#') === false
1652
            &&
1653 16
            \strpos($str, ';') === false
1654
        )
1655
    ) {
1656 9
      return $str;
1657
    }
1658
1659 16
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1660 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1661
    }
1662
1663 16
    if ($flags === null) {
1664 5
      $flags = ENT_QUOTES | ENT_HTML5;
1665
    }
1666
1667 View Code Duplication
    if (
1668 16
        $encoding !== 'UTF-8'
1669
        &&
1670 16
        $encoding !== 'ISO-8859-1'
1671
        &&
1672 16
        $encoding !== 'WINDOWS-1252'
1673
        &&
1674 16
        self::$SUPPORT['mbstring'] === false
1675
    ) {
1676
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1677
    }
1678
1679
    do {
1680 16
      $str_compare = $str;
1681
1682 16
      $str = (string)\preg_replace_callback(
1683 16
          "/&#\d{2,6};/",
1684 16
          function ($matches) use ($encoding) {
1685 14
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
1686
1687 14
            if ($returnTmp !== '"' && $returnTmp !== "'") {
1688 13
              return $returnTmp;
1689
            }
1690
1691 7
            return $matches[0];
1692 16
          },
1693 16
          $str
1694
      );
1695
1696
      // decode numeric & UTF16 two byte entities
1697 16
      $str = \html_entity_decode(
1698 16
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
1699 16
          $flags,
1700 16
          $encoding
1701
      );
1702
1703 16
    } while ($str_compare !== $str);
1704
1705 16
    return $str;
1706
  }
1707
1708
  /**
1709
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
1710
   *
1711
   * @link http://php.net/manual/en/function.htmlentities.php
1712
   *
1713
   * @param string $str           <p>
1714
   *                              The input string.
1715
   *                              </p>
1716
   * @param int    $flags         [optional] <p>
1717
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1718
   *                              invalid code unit sequences and the used document type. The default is
1719
   *                              ENT_COMPAT | ENT_HTML401.
1720
   *                              <table>
1721
   *                              Available <i>flags</i> constants
1722
   *                              <tr valign="top">
1723
   *                              <td>Constant Name</td>
1724
   *                              <td>Description</td>
1725
   *                              </tr>
1726
   *                              <tr valign="top">
1727
   *                              <td><b>ENT_COMPAT</b></td>
1728
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1729
   *                              </tr>
1730
   *                              <tr valign="top">
1731
   *                              <td><b>ENT_QUOTES</b></td>
1732
   *                              <td>Will convert both double and single quotes.</td>
1733
   *                              </tr>
1734
   *                              <tr valign="top">
1735
   *                              <td><b>ENT_NOQUOTES</b></td>
1736
   *                              <td>Will leave both double and single quotes unconverted.</td>
1737
   *                              </tr>
1738
   *                              <tr valign="top">
1739
   *                              <td><b>ENT_IGNORE</b></td>
1740
   *                              <td>
1741
   *                              Silently discard invalid code unit sequences instead of returning
1742
   *                              an empty string. Using this flag is discouraged as it
1743
   *                              may have security implications.
1744
   *                              </td>
1745
   *                              </tr>
1746
   *                              <tr valign="top">
1747
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1748
   *                              <td>
1749
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1750
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1751
   *                              </td>
1752
   *                              </tr>
1753
   *                              <tr valign="top">
1754
   *                              <td><b>ENT_DISALLOWED</b></td>
1755
   *                              <td>
1756
   *                              Replace invalid code points for the given document type with a
1757
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1758
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1759
   *                              instance, to ensure the well-formedness of XML documents with
1760
   *                              embedded external content.
1761
   *                              </td>
1762
   *                              </tr>
1763
   *                              <tr valign="top">
1764
   *                              <td><b>ENT_HTML401</b></td>
1765
   *                              <td>
1766
   *                              Handle code as HTML 4.01.
1767
   *                              </td>
1768
   *                              </tr>
1769
   *                              <tr valign="top">
1770
   *                              <td><b>ENT_XML1</b></td>
1771
   *                              <td>
1772
   *                              Handle code as XML 1.
1773
   *                              </td>
1774
   *                              </tr>
1775
   *                              <tr valign="top">
1776
   *                              <td><b>ENT_XHTML</b></td>
1777
   *                              <td>
1778
   *                              Handle code as XHTML.
1779
   *                              </td>
1780
   *                              </tr>
1781
   *                              <tr valign="top">
1782
   *                              <td><b>ENT_HTML5</b></td>
1783
   *                              <td>
1784
   *                              Handle code as HTML 5.
1785
   *                              </td>
1786
   *                              </tr>
1787
   *                              </table>
1788
   *                              </p>
1789
   * @param string $encoding      [optional] <p>
1790
   *                              Like <b>htmlspecialchars</b>,
1791
   *                              <b>htmlentities</b> takes an optional third argument
1792
   *                              <i>encoding</i> which defines encoding used in
1793
   *                              conversion.
1794
   *                              Although this argument is technically optional, you are highly
1795
   *                              encouraged to specify the correct value for your code.
1796
   *                              </p>
1797
   * @param bool   $double_encode [optional] <p>
1798
   *                              When <i>double_encode</i> is turned off PHP will not
1799
   *                              encode existing html entities. The default is to convert everything.
1800
   *                              </p>
1801
   *
1802
   *
1803
   * @return string the encoded string.
1804
   * </p>
1805
   * <p>
1806
   * If the input <i>string</i> contains an invalid code unit
1807
   * sequence within the given <i>encoding</i> an empty string
1808
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1809
   * <b>ENT_SUBSTITUTE</b> flags are set.
1810
   */
1811 2
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
1812
  {
1813 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1814 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1815
    }
1816
1817 2
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
1818
1819
    /**
1820
     * PHP doesn't replace a backslash to its html entity since this is something
1821
     * that's mostly used to escape characters when inserting in a database. Since
1822
     * we're using a decent database layer, we don't need this shit and we're replacing
1823
     * the double backslashes by its' html entity equivalent.
1824
     *
1825
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
1826
     */
1827 2
    $str = \str_replace('\\', '&#92;', $str);
1828
1829 2
    return self::html_encode($str, true, $encoding);
1830
  }
1831
1832
  /**
1833
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
1834
   *
1835
   * INFO: Take a look at "UTF8::htmlentities()"
1836
   *
1837
   * @link http://php.net/manual/en/function.htmlspecialchars.php
1838
   *
1839
   * @param string $str           <p>
1840
   *                              The string being converted.
1841
   *                              </p>
1842
   * @param int    $flags         [optional] <p>
1843
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1844
   *                              invalid code unit sequences and the used document type. The default is
1845
   *                              ENT_COMPAT | ENT_HTML401.
1846
   *                              <table>
1847
   *                              Available <i>flags</i> constants
1848
   *                              <tr valign="top">
1849
   *                              <td>Constant Name</td>
1850
   *                              <td>Description</td>
1851
   *                              </tr>
1852
   *                              <tr valign="top">
1853
   *                              <td><b>ENT_COMPAT</b></td>
1854
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1855
   *                              </tr>
1856
   *                              <tr valign="top">
1857
   *                              <td><b>ENT_QUOTES</b></td>
1858
   *                              <td>Will convert both double and single quotes.</td>
1859
   *                              </tr>
1860
   *                              <tr valign="top">
1861
   *                              <td><b>ENT_NOQUOTES</b></td>
1862
   *                              <td>Will leave both double and single quotes unconverted.</td>
1863
   *                              </tr>
1864
   *                              <tr valign="top">
1865
   *                              <td><b>ENT_IGNORE</b></td>
1866
   *                              <td>
1867
   *                              Silently discard invalid code unit sequences instead of returning
1868
   *                              an empty string. Using this flag is discouraged as it
1869
   *                              may have security implications.
1870
   *                              </td>
1871
   *                              </tr>
1872
   *                              <tr valign="top">
1873
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1874
   *                              <td>
1875
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1876
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1877
   *                              </td>
1878
   *                              </tr>
1879
   *                              <tr valign="top">
1880
   *                              <td><b>ENT_DISALLOWED</b></td>
1881
   *                              <td>
1882
   *                              Replace invalid code points for the given document type with a
1883
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1884
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1885
   *                              instance, to ensure the well-formedness of XML documents with
1886
   *                              embedded external content.
1887
   *                              </td>
1888
   *                              </tr>
1889
   *                              <tr valign="top">
1890
   *                              <td><b>ENT_HTML401</b></td>
1891
   *                              <td>
1892
   *                              Handle code as HTML 4.01.
1893
   *                              </td>
1894
   *                              </tr>
1895
   *                              <tr valign="top">
1896
   *                              <td><b>ENT_XML1</b></td>
1897
   *                              <td>
1898
   *                              Handle code as XML 1.
1899
   *                              </td>
1900
   *                              </tr>
1901
   *                              <tr valign="top">
1902
   *                              <td><b>ENT_XHTML</b></td>
1903
   *                              <td>
1904
   *                              Handle code as XHTML.
1905
   *                              </td>
1906
   *                              </tr>
1907
   *                              <tr valign="top">
1908
   *                              <td><b>ENT_HTML5</b></td>
1909
   *                              <td>
1910
   *                              Handle code as HTML 5.
1911
   *                              </td>
1912
   *                              </tr>
1913
   *                              </table>
1914
   *                              </p>
1915
   * @param string $encoding      [optional] <p>
1916
   *                              Defines encoding used in conversion.
1917
   *                              </p>
1918
   *                              <p>
1919
   *                              For the purposes of this function, the encodings
1920
   *                              ISO-8859-1, ISO-8859-15,
1921
   *                              UTF-8, cp866,
1922
   *                              cp1251, cp1252, and
1923
   *                              KOI8-R are effectively equivalent, provided the
1924
   *                              <i>string</i> itself is valid for the encoding, as
1925
   *                              the characters affected by <b>htmlspecialchars</b> occupy
1926
   *                              the same positions in all of these encodings.
1927
   *                              </p>
1928
   * @param bool   $double_encode [optional] <p>
1929
   *                              When <i>double_encode</i> is turned off PHP will not
1930
   *                              encode existing html entities, the default is to convert everything.
1931
   *                              </p>
1932
   *
1933
   * @return string The converted string.
1934
   * </p>
1935
   * <p>
1936
   * If the input <i>string</i> contains an invalid code unit
1937
   * sequence within the given <i>encoding</i> an empty string
1938
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1939
   * <b>ENT_SUBSTITUTE</b> flags are set.
1940
   */
1941 1 View Code Duplication
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1942
  {
1943 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1944 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1945
    }
1946
1947 1
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
1948
  }
1949
1950
  /**
1951
   * Checks whether iconv is available on the server.
1952
   *
1953
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1954
   */
1955 1
  public static function iconv_loaded(): bool
1956
  {
1957 1
    return \extension_loaded('iconv') ? true : false;
1958
  }
1959
1960
  /**
1961
   * alias for "UTF8::decimal_to_chr()"
1962
   *
1963
   * @see UTF8::decimal_to_chr()
1964
   *
1965
   * @param mixed $int
1966
   *
1967
   * @return string
1968
   */
1969 2
  public static function int_to_chr($int): string
1970
  {
1971 2
    return self::decimal_to_chr($int);
1972
  }
1973
1974
  /**
1975
   * Converts Integer to hexadecimal U+xxxx code point representation.
1976
   *
1977
   * INFO: opposite to UTF8::hex_to_int()
1978
   *
1979
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
1980
   * @param string $pfix [optional]
1981
   *
1982
   * @return string <p>The code point, or empty string on failure.</p>
1983
   */
1984 3
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
1985
  {
1986 3
    $hex = \dechex($int);
1987
1988 3
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
1989
1990 3
    return $pfix . $hex;
1991
  }
1992
1993
  /**
1994
   * Checks whether intl-char is available on the server.
1995
   *
1996
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1997
   */
1998 1
  public static function intlChar_loaded(): bool
1999
  {
2000 1
    return \class_exists('IntlChar');
2001
  }
2002
2003
  /**
2004
   * Checks whether intl is available on the server.
2005
   *
2006
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2007
   */
2008 4
  public static function intl_loaded(): bool
2009
  {
2010 4
    return \extension_loaded('intl');
2011
  }
2012
2013
  /**
2014
   * alias for "UTF8::is_ascii()"
2015
   *
2016
   * @see        UTF8::is_ascii()
2017
   *
2018
   * @param string $str
2019
   *
2020
   * @return boolean
2021
   *
2022
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2023
   */
2024 1
  public static function isAscii(string $str): bool
2025
  {
2026 1
    return self::is_ascii($str);
2027
  }
2028
2029
  /**
2030
   * alias for "UTF8::is_base64()"
2031
   *
2032
   * @see        UTF8::is_base64()
2033
   *
2034
   * @param string $str
2035
   *
2036
   * @return bool
2037
   *
2038
   * @deprecated <p>use "UTF8::is_base64()"</p>
2039
   */
2040 1
  public static function isBase64(string $str): bool
2041
  {
2042 1
    return self::is_base64($str);
2043
  }
2044
2045
  /**
2046
   * alias for "UTF8::is_binary()"
2047
   *
2048
   * @see        UTF8::is_binary()
2049
   *
2050
   * @param mixed $str
2051
   * @param bool  $strict
2052
   *
2053
   * @return bool
2054
   *
2055
   * @deprecated <p>use "UTF8::is_binary()"</p>
2056
   */
2057 2
  public static function isBinary($str, $strict = false): bool
2058
  {
2059 2
    return self::is_binary($str, $strict);
2060
  }
2061
2062
  /**
2063
   * alias for "UTF8::is_bom()"
2064
   *
2065
   * @see        UTF8::is_bom()
2066
   *
2067
   * @param string $utf8_chr
2068
   *
2069
   * @return boolean
2070
   *
2071
   * @deprecated <p>use "UTF8::is_bom()"</p>
2072
   */
2073 1
  public static function isBom(string $utf8_chr): bool
2074
  {
2075 1
    return self::is_bom($utf8_chr);
2076
  }
2077
2078
  /**
2079
   * alias for "UTF8::is_html()"
2080
   *
2081
   * @see        UTF8::is_html()
2082
   *
2083
   * @param string $str
2084
   *
2085
   * @return boolean
2086
   *
2087
   * @deprecated <p>use "UTF8::is_html()"</p>
2088
   */
2089 1
  public static function isHtml(string $str): bool
2090
  {
2091 1
    return self::is_html($str);
2092
  }
2093
2094
  /**
2095
   * alias for "UTF8::is_json()"
2096
   *
2097
   * @see        UTF8::is_json()
2098
   *
2099
   * @param string $str
2100
   *
2101
   * @return bool
2102
   *
2103
   * @deprecated <p>use "UTF8::is_json()"</p>
2104
   */
2105
  public static function isJson(string $str): bool
2106
  {
2107
    return self::is_json($str);
2108
  }
2109
2110
  /**
2111
   * alias for "UTF8::is_utf16()"
2112
   *
2113
   * @see        UTF8::is_utf16()
2114
   *
2115
   * @param string $str
2116
   *
2117
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2118
   *
2119
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2120
   */
2121 1
  public static function isUtf16(string $str)
2122
  {
2123 1
    return self::is_utf16($str);
2124
  }
2125
2126
  /**
2127
   * alias for "UTF8::is_utf32()"
2128
   *
2129
   * @see        UTF8::is_utf32()
2130
   *
2131
   * @param string $str
2132
   *
2133
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2134
   *
2135
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2136
   */
2137 1
  public static function isUtf32(string $str)
2138
  {
2139 1
    return self::is_utf32($str);
2140
  }
2141
2142
  /**
2143
   * alias for "UTF8::is_utf8()"
2144
   *
2145
   * @see        UTF8::is_utf8()
2146
   *
2147
   * @param string $str
2148
   * @param bool   $strict
2149
   *
2150
   * @return bool
2151
   *
2152
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2153
   */
2154 16
  public static function isUtf8($str, $strict = false): bool
2155
  {
2156 16
    return self::is_utf8($str, $strict);
2157
  }
2158
2159
  /**
2160
   * Checks if a string is 7 bit ASCII.
2161
   *
2162
   * @param string $str <p>The string to check.</p>
2163
   *
2164
   * @return bool <p>
2165
   *              <strong>true</strong> if it is ASCII<br>
2166
   *              <strong>false</strong> otherwise
2167
   *              </p>
2168
   */
2169 58
  public static function is_ascii(string $str): bool
2170
  {
2171 58
    if (!isset($str[0])) {
2172 6
      return true;
2173
    }
2174
2175 57
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2176
  }
2177
2178
  /**
2179
   * Returns true if the string is base64 encoded, false otherwise.
2180
   *
2181
   * @param string $str <p>The input string.</p>
2182
   *
2183
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2184
   */
2185 1
  public static function is_base64(string $str): bool
2186
  {
2187 1
    $base64String = (string)\base64_decode($str, true);
2188
2189 1
    return $base64String && \base64_encode($base64String) === $str;
2190
  }
2191
2192
  /**
2193
   * Check if the input is binary... (is look like a hack).
2194
   *
2195
   * @param mixed $input
2196
   * @param bool  $strict
2197
   *
2198
   * @return bool
2199
   */
2200 19
  public static function is_binary($input, bool $strict = false): bool
2201
  {
2202 19
    $input = (string)$input;
2203 19
    if (!isset($input[0])) {
2204 5
      return false;
2205
    }
2206
2207 19
    if (\preg_match('~^[01]+$~', $input)) {
2208 6
      return true;
2209
    }
2210
2211 19
    $testNull = 0;
2212 19
    $testLength = \strlen($input);
2213 19
    if ($testLength) {
2214 19
      $testNull = \substr_count($input, "\x0");
2215 19
      if (($testNull / $testLength) > 0.3) {
2216 6
        return true;
2217
      }
2218
    }
2219
2220
    if (
2221 18
        $strict === true
2222
        &&
2223 18
        \class_exists('finfo')
2224
    ) {
2225
2226 16
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
2227 16
      $finfo_encoding = $finfo->buffer($input);
2228 16
      if ($finfo_encoding && $finfo_encoding === 'binary') {
2229 16
        return true;
2230
      }
2231
2232
2233
    } else {
2234
2235 8
      if ($testNull > 0) {
2236 3
        return true;
2237
      }
2238
2239
    }
2240
2241 17
    return false;
2242
  }
2243
2244
  /**
2245
   * Check if the file is binary.
2246
   *
2247
   * @param string $file
2248
   *
2249
   * @return boolean
2250
   */
2251 3
  public static function is_binary_file($file): bool
2252
  {
2253
    try {
2254 3
      $fp = \fopen($file, 'rb');
2255 3
      $block = \fread($fp, 512);
2256 3
      \fclose($fp);
2257
    } catch (\Exception $e) {
2258
      $block = '';
2259
    }
2260
2261 3
    return self::is_binary($block, true);
2262
  }
2263
2264
  /**
2265
   * Checks if the given string is equal to any "Byte Order Mark".
2266
   *
2267
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2268
   *
2269
   * @param string $str <p>The input string.</p>
2270
   *
2271
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2272
   */
2273 1
  public static function is_bom($str): bool
2274
  {
2275 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2276 1
      if ($str === $bomString) {
2277 1
        return true;
2278
      }
2279
    }
2280
2281 1
    return false;
2282
  }
2283
2284
  /**
2285
   * Check if the string contains any html-tags <lall>.
2286
   *
2287
   * @param string $str <p>The input string.</p>
2288
   *
2289
   * @return boolean
2290
   */
2291 1
  public static function is_html(string $str): bool
2292
  {
2293 1
    if (!isset($str[0])) {
2294 1
      return false;
2295
    }
2296
2297
    // init
2298 1
    $matches = [];
2299
2300 1
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2301
2302 1
    return !(\count($matches) === 0);
2303
  }
2304
2305
  /**
2306
   * Try to check if "$str" is an json-string.
2307
   *
2308
   * @param string $str <p>The input string.</p>
2309
   *
2310
   * @return bool
2311
   */
2312 1
  public static function is_json(string $str): bool
2313
  {
2314 1
    if (!isset($str[0])) {
2315 1
      return false;
2316
    }
2317
2318 1
    $json = self::json_decode($str);
2319
2320
    return (
2321 1
               \is_object($json) === true
2322
               ||
2323 1
               \is_array($json) === true
2324
           )
2325
           &&
2326 1
           \json_last_error() === JSON_ERROR_NONE;
2327
  }
2328
2329
  /**
2330
   * Check if the string is UTF-16.
2331
   *
2332
   * @param string $str <p>The input string.</p>
2333
   *
2334
   * @return int|false <p>
2335
   *                   <strong>false</strong> if is't not UTF-16,<br>
2336
   *                   <strong>1</strong> for UTF-16LE,<br>
2337
   *                   <strong>2</strong> for UTF-16BE.
2338
   *                   </p>
2339
   */
2340 10 View Code Duplication
  public static function is_utf16(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2341
  {
2342 10
    if (self::is_binary($str) === false) {
2343 4
      return false;
2344
    }
2345
2346
    // init
2347 8
    $strChars = [];
2348
2349 8
    $str = self::remove_bom($str);
2350
2351 8
    $maybeUTF16LE = 0;
2352 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2353 8
    if ($test) {
2354 7
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2355 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2356 7
      if ($test3 === $test) {
2357 7
        if (\count($strChars) === 0) {
2358 7
          $strChars = self::count_chars($str, true);
2359
        }
2360 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2361 7
          if (\in_array($test3char, $strChars, true) === true) {
2362 7
            $maybeUTF16LE++;
2363
          }
2364
        }
2365
      }
2366
    }
2367
2368 8
    $maybeUTF16BE = 0;
2369 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2370 8
    if ($test) {
2371 7
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2372 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2373 7
      if ($test3 === $test) {
2374 7
        if (\count($strChars) === 0) {
2375 3
          $strChars = self::count_chars($str, true);
2376
        }
2377 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2378 7
          if (\in_array($test3char, $strChars, true) === true) {
2379 7
            $maybeUTF16BE++;
2380
          }
2381
        }
2382
      }
2383
    }
2384
2385 8
    if ($maybeUTF16BE !== $maybeUTF16LE) {
2386 3
      if ($maybeUTF16LE > $maybeUTF16BE) {
2387 2
        return 1;
2388
      }
2389
2390 3
      return 2;
2391
    }
2392
2393 6
    return false;
2394
  }
2395
2396
  /**
2397
   * Check if the string is UTF-32.
2398
   *
2399
   * @param string $str
2400
   *
2401
   * @return int|false <p>
2402
   *                   <strong>false</strong> if is't not UTF-32,<br>
2403
   *                   <strong>1</strong> for UTF-32LE,<br>
2404
   *                   <strong>2</strong> for UTF-32BE.
2405
   *                   </p>
2406
   */
2407 8 View Code Duplication
  public static function is_utf32(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2408
  {
2409 8
    if (self::is_binary($str) === false) {
2410 4
      return false;
2411
    }
2412
2413
    // init
2414 6
    $strChars = [];
2415
2416 6
    $str = self::remove_bom($str);
2417
2418 6
    $maybeUTF32LE = 0;
2419 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2420 6
    if ($test) {
2421 5
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2422 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2423 5
      if ($test3 === $test) {
2424 5
        if (\count($strChars) === 0) {
2425 5
          $strChars = self::count_chars($str, true);
2426
        }
2427 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2428 5
          if (\in_array($test3char, $strChars, true) === true) {
2429 5
            $maybeUTF32LE++;
2430
          }
2431
        }
2432
      }
2433
    }
2434
2435 6
    $maybeUTF32BE = 0;
2436 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2437 6
    if ($test) {
2438 5
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2439 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2440 5
      if ($test3 === $test) {
2441 5
        if (\count($strChars) === 0) {
2442 3
          $strChars = self::count_chars($str, true);
2443
        }
2444 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2445 5
          if (\in_array($test3char, $strChars, true) === true) {
2446 5
            $maybeUTF32BE++;
2447
          }
2448
        }
2449
      }
2450
    }
2451
2452 6
    if ($maybeUTF32BE !== $maybeUTF32LE) {
2453 1
      if ($maybeUTF32LE > $maybeUTF32BE) {
2454 1
        return 1;
2455
      }
2456
2457 1
      return 2;
2458
    }
2459
2460 6
    return false;
2461
  }
2462
2463
  /**
2464
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2465
   *
2466
   * @see    http://hsivonen.iki.fi/php-utf8/
2467
   *
2468
   * @param string|string[] $str    <p>The string to be checked.</p>
2469
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2470
   *
2471
   * @return bool
2472
   */
2473 61
  public static function is_utf8($str, bool $strict = false): bool
2474
  {
2475 61
    if (\is_array($str) === true) {
2476 1
      foreach ($str as $k => $v) {
2477 1
        if (false === self::is_utf8($v, $strict)) {
2478 1
          return false;
2479
        }
2480
      }
2481
2482
      return true;
2483
    }
2484
2485 61
    if (!isset($str[0])) {
2486 3
      return true;
2487
    }
2488
2489 59
    if ($strict === true) {
2490 1
      if (self::is_utf16($str) !== false) {
2491 1
        return false;
2492
      }
2493
2494
      if (self::is_utf32($str) !== false) {
2495
        return false;
2496
      }
2497
    }
2498
2499 59
    if (self::pcre_utf8_support() !== true) {
2500
2501
      // If even just the first character can be matched, when the /u
2502
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2503
      // invalid, nothing at all will match, even if the string contains
2504
      // some valid sequences
2505
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
2506
    }
2507
2508 59
    $mState = 0; // cached expected number of octets after the current octet
2509
    // until the beginning of the next UTF8 character sequence
2510 59
    $mUcs4 = 0; // cached Unicode character
2511 59
    $mBytes = 1; // cached expected number of octets in the current sequence
2512
2513 59
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2514
      self::checkForSupport();
2515
    }
2516
2517 59
    if (self::$ORD === null) {
2518
      self::$ORD = self::getData('ord');
2519
    }
2520
2521 59
    $len = self::strlen_in_byte($str);
2522
    /** @noinspection ForeachInvariantsInspection */
2523 59
    for ($i = 0; $i < $len; $i++) {
2524 59
      $in = self::$ORD[$str[$i]];
2525 59
      if ($mState === 0) {
2526
        // When mState is zero we expect either a US-ASCII character or a
2527
        // multi-octet sequence.
2528 59
        if (0 === (0x80 & $in)) {
2529
          // US-ASCII, pass straight through.
2530 56
          $mBytes = 1;
2531 56 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
2532
          // First octet of 2 octet sequence.
2533 49
          $mUcs4 = $in;
2534 49
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
2535 49
          $mState = 1;
2536 49
          $mBytes = 2;
2537 46
        } elseif (0xE0 === (0xF0 & $in)) {
2538
          // First octet of 3 octet sequence.
2539 30
          $mUcs4 = $in;
2540 30
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
2541 30
          $mState = 2;
2542 30
          $mBytes = 3;
2543 23 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
2544
          // First octet of 4 octet sequence.
2545 13
          $mUcs4 = $in;
2546 13
          $mUcs4 = ($mUcs4 & 0x07) << 18;
2547 13
          $mState = 3;
2548 13
          $mBytes = 4;
2549 11
        } elseif (0xF8 === (0xFC & $in)) {
2550
          /* First octet of 5 octet sequence.
2551
          *
2552
          * This is illegal because the encoded codepoint must be either
2553
          * (a) not the shortest form or
2554
          * (b) outside the Unicode range of 0-0x10FFFF.
2555
          * Rather than trying to resynchronize, we will carry on until the end
2556
          * of the sequence and let the later error handling code catch it.
2557
          */
2558 4
          $mUcs4 = $in;
2559 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
2560 4
          $mState = 4;
2561 4
          $mBytes = 5;
2562 8 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
2563
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
2564 4
          $mUcs4 = $in;
2565 4
          $mUcs4 = ($mUcs4 & 1) << 30;
2566 4
          $mState = 5;
2567 4
          $mBytes = 6;
2568
        } else {
2569
          /* Current octet is neither in the US-ASCII range nor a legal first
2570
           * octet of a multi-octet sequence.
2571
           */
2572 59
          return false;
2573
        }
2574
      } else {
2575
        // When mState is non-zero, we expect a continuation of the multi-octet
2576
        // sequence
2577 56
        if (0x80 === (0xC0 & $in)) {
2578
          // Legal continuation.
2579 50
          $shift = ($mState - 1) * 6;
2580 50
          $tmp = $in;
2581 50
          $tmp = ($tmp & 0x0000003F) << $shift;
2582 50
          $mUcs4 |= $tmp;
2583
          /**
2584
           * End of the multi-octet sequence. mUcs4 now contains the final
2585
           * Unicode code point to be output
2586
           */
2587 50
          if (0 === --$mState) {
2588
            /*
2589
            * Check for illegal sequences and code points.
2590
            */
2591
            // From Unicode 3.1, non-shortest form is illegal
2592
            if (
2593 50
                (2 === $mBytes && $mUcs4 < 0x0080) ||
2594 50
                (3 === $mBytes && $mUcs4 < 0x0800) ||
2595 50
                (4 === $mBytes && $mUcs4 < 0x10000) ||
2596 50
                (4 < $mBytes) ||
2597
                // From Unicode 3.2, surrogate characters are illegal.
2598 50
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
2599
                // Code points outside the Unicode range are illegal.
2600 50
                ($mUcs4 > 0x10FFFF)
2601
            ) {
2602 7
              return false;
2603
            }
2604
            // initialize UTF8 cache
2605 50
            $mState = 0;
2606 50
            $mUcs4 = 0;
2607 50
            $mBytes = 1;
2608
          }
2609
        } else {
2610
          /**
2611
           *((0xC0 & (*in) != 0x80) && (mState != 0))
2612
           * Incomplete multi-octet sequence.
2613
           */
2614 28
          return false;
2615
        }
2616
      }
2617
    }
2618
2619 27
    return true;
2620
  }
2621
2622
  /**
2623
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2624
   * Decodes a JSON string
2625
   *
2626
   * @link http://php.net/manual/en/function.json-decode.php
2627
   *
2628
   * @param string $json    <p>
2629
   *                        The <i>json</i> string being decoded.
2630
   *                        </p>
2631
   *                        <p>
2632
   *                        This function only works with UTF-8 encoded strings.
2633
   *                        </p>
2634
   *                        <p>PHP implements a superset of
2635
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2636
   *                        only supports these values when they are nested inside an array or an object.
2637
   *                        </p>
2638
   * @param bool   $assoc   [optional] <p>
2639
   *                        When <b>TRUE</b>, returned objects will be converted into
2640
   *                        associative arrays.
2641
   *                        </p>
2642
   * @param int    $depth   [optional] <p>
2643
   *                        User specified recursion depth.
2644
   *                        </p>
2645
   * @param int    $options [optional] <p>
2646
   *                        Bitmask of JSON decode options. Currently only
2647
   *                        <b>JSON_BIGINT_AS_STRING</b>
2648
   *                        is supported (default is to cast large integers as floats)
2649
   *                        </p>
2650
   *
2651
   * @return mixed the value encoded in <i>json</i> in appropriate
2652
   * PHP type. Values true, false and
2653
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
2654
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
2655
   * <i>json</i> cannot be decoded or if the encoded
2656
   * data is deeper than the recursion limit.
2657
   */
2658 2
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
2659
  {
2660 2
    $json = self::filter($json);
2661
2662 2
    $json = \json_decode($json, $assoc, $depth, $options);
2663
2664 2
    return $json;
2665
  }
2666
2667
  /**
2668
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2669
   * Returns the JSON representation of a value.
2670
   *
2671
   * @link http://php.net/manual/en/function.json-encode.php
2672
   *
2673
   * @param mixed $value   <p>
2674
   *                       The <i>value</i> being encoded. Can be any type except
2675
   *                       a resource.
2676
   *                       </p>
2677
   *                       <p>
2678
   *                       All string data must be UTF-8 encoded.
2679
   *                       </p>
2680
   *                       <p>PHP implements a superset of
2681
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2682
   *                       only supports these values when they are nested inside an array or an object.
2683
   *                       </p>
2684
   * @param int   $options [optional] <p>
2685
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
2686
   *                       <b>JSON_HEX_TAG</b>,
2687
   *                       <b>JSON_HEX_AMP</b>,
2688
   *                       <b>JSON_HEX_APOS</b>,
2689
   *                       <b>JSON_NUMERIC_CHECK</b>,
2690
   *                       <b>JSON_PRETTY_PRINT</b>,
2691
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
2692
   *                       <b>JSON_FORCE_OBJECT</b>,
2693
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
2694
   *                       constants is described on
2695
   *                       the JSON constants page.
2696
   *                       </p>
2697
   * @param int   $depth   [optional] <p>
2698
   *                       Set the maximum depth. Must be greater than zero.
2699
   *                       </p>
2700
   *
2701
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
2702
   */
2703 2
  public static function json_encode($value, int $options = 0, int $depth = 512): string
2704
  {
2705 2
    $value = self::filter($value);
2706
2707 2
    $json = \json_encode($value, $options, $depth);
2708
2709 2
    return $json;
2710
  }
2711
2712
  /**
2713
   * Makes string's first char lowercase.
2714
   *
2715
   * @param string $str       <p>The input string</p>
2716
   * @param string $encoding  [optional] <p>Set the charset.</p>
2717
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
2718
   *
2719
   * @return string <p>The resulting string</p>
2720
   */
2721 7
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2722
  {
2723 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
2724 7
    if ($strPartTwo === false) {
2725
      $strPartTwo = '';
2726
    }
2727
2728 7
    $strPartOne = self::strtolower(
2729 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
2730 7
        $encoding,
2731 7
        $cleanUtf8
2732
    );
2733
2734 7
    return $strPartOne . $strPartTwo;
2735
  }
2736
2737
  /**
2738
   * alias for "UTF8::lcfirst()"
2739
   *
2740
   * @see UTF8::lcfirst()
2741
   *
2742
   * @param string $word
2743
   * @param string $encoding
2744
   * @param bool   $cleanUtf8
2745
   *
2746
   * @return string
2747
   */
2748 1
  public static function lcword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2749
  {
2750 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
2751
  }
2752
2753
  /**
2754
   * Lowercase for all words in the string.
2755
   *
2756
   * @param string   $str        <p>The input string.</p>
2757
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
2758
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
2759
   * @param string   $encoding   [optional] <p>Set the charset.</p>
2760
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
2761
   *
2762
   * @return string
2763
   */
2764 1
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2765
  {
2766 1
    if (!$str) {
2767 1
      return '';
2768
    }
2769
2770 1
    $words = self::str_to_words($str, $charlist);
2771 1
    $newWords = [];
2772
2773 1
    if (\count($exceptions) > 0) {
2774 1
      $useExceptions = true;
2775
    } else {
2776 1
      $useExceptions = false;
2777
    }
2778
2779 1 View Code Duplication
    foreach ($words as $word) {
2780
2781 1
      if (!$word) {
2782 1
        continue;
2783
      }
2784
2785
      if (
2786 1
          $useExceptions === false
2787
          ||
2788
          (
2789 1
              $useExceptions === true
2790
              &&
2791 1
              !\in_array($word, $exceptions, true)
2792
          )
2793
      ) {
2794 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
2795
      }
2796
2797 1
      $newWords[] = $word;
2798
    }
2799
2800 1
    return \implode('', $newWords);
2801
  }
2802
2803
  /**
2804
   * Strip whitespace or other characters from beginning of a UTF-8 string.
2805
   *
2806
   * @param string $str   <p>The string to be trimmed</p>
2807
   * @param mixed  $chars <p>Optional characters to be stripped</p>
2808
   *
2809
   * @return string <p>The string with unwanted characters stripped from the left.</p>
2810
   */
2811 24 View Code Duplication
  public static function ltrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2812
  {
2813 24
    if (!isset($str[0])) {
2814 2
      return '';
2815
    }
2816
2817
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
2818 23
    if ($chars === INF || !$chars) {
2819 2
      return \preg_replace('/^[\pZ\pC]+/u', '', $str);
2820
    }
2821
2822 23
    return \preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
2823
  }
2824
2825
  /**
2826
   * Returns the UTF-8 character with the maximum code point in the given data.
2827
   *
2828
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
2829
   *
2830
   * @return string <p>The character with the highest code point than others.</p>
2831
   */
2832 1 View Code Duplication
  public static function max($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2833
  {
2834 1
    if (\is_array($arg) === true) {
2835 1
      $arg = \implode('', $arg);
2836
    }
2837
2838 1
    return self::chr(\max(self::codepoints($arg)));
2839
  }
2840
2841
  /**
2842
   * Calculates and returns the maximum number of bytes taken by any
2843
   * UTF-8 encoded character in the given string.
2844
   *
2845
   * @param string $str <p>The original Unicode string.</p>
2846
   *
2847
   * @return int <p>Max byte lengths of the given chars.</p>
2848
   */
2849 1
  public static function max_chr_width(string $str): int
2850
  {
2851 1
    $bytes = self::chr_size_list($str);
2852 1
    if (\count($bytes) > 0) {
2853 1
      return (int)\max($bytes);
2854
    }
2855
2856 1
    return 0;
2857
  }
2858
2859
  /**
2860
   * Checks whether mbstring is available on the server.
2861
   *
2862
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2863
   */
2864 12
  public static function mbstring_loaded(): bool
2865
  {
2866 12
    $return = \extension_loaded('mbstring') ? true : false;
2867
2868 12
    if ($return === true) {
2869 12
      \mb_internal_encoding('UTF-8');
2870
    }
2871
2872 12
    return $return;
2873
  }
2874
2875 1
  private static function mbstring_overloaded(): bool
2876
  {
2877 1
    return \defined('MB_OVERLOAD_STRING')
2878
           &&
2879 1
           \ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING;
2880
  }
2881
2882
  /**
2883
   * Returns the UTF-8 character with the minimum code point in the given data.
2884
   *
2885
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
2886
   *
2887
   * @return string <p>The character with the lowest code point than others.</p>
2888
   */
2889 1 View Code Duplication
  public static function min($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2890
  {
2891 1
    if (\is_array($arg) === true) {
2892 1
      $arg = \implode('', $arg);
2893
    }
2894
2895 1
    return self::chr(\min(self::codepoints($arg)));
2896
  }
2897
2898
  /**
2899
   * alias for "UTF8::normalize_encoding()"
2900
   *
2901
   * @see        UTF8::normalize_encoding()
2902
   *
2903
   * @param string $encoding
2904
   * @param mixed  $fallback
2905
   *
2906
   * @return string
2907
   *
2908
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
2909
   */
2910 1
  public static function normalizeEncoding(string $encoding, $fallback = '')
2911
  {
2912 1
    return self::normalize_encoding($encoding, $fallback);
2913
  }
2914
2915
  /**
2916
   * Normalize the encoding-"name" input.
2917
   *
2918
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
2919
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
2920
   *
2921
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by
2922
   *                default)</p>
2923
   */
2924 27
  public static function normalize_encoding(string $encoding, $fallback = '')
2925
  {
2926 27
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
2927
2928 27
    if (!$encoding) {
2929 4
      return $fallback;
2930
    }
2931
2932
    if (
2933 26
        'UTF-8' === $encoding
2934
        ||
2935 26
        'UTF8' === $encoding
2936
    ) {
2937 11
      return 'UTF-8';
2938
    }
2939
2940 22
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
2941 19
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
2942
    }
2943
2944 6
    if (self::$ENCODINGS === null) {
2945 1
      self::$ENCODINGS = self::getData('encodings');
2946
    }
2947
2948 6
    if (\in_array($encoding, self::$ENCODINGS, true)) {
2949 3
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
2950
2951 3
      return $encoding;
2952
    }
2953
2954 5
    $encodingOrig = $encoding;
2955 5
    $encoding = \strtoupper($encoding);
2956 5
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
2957
2958
    $equivalences = [
2959 5
        'ISO8859'     => 'ISO-8859-1',
2960
        'ISO88591'    => 'ISO-8859-1',
2961
        'ISO'         => 'ISO-8859-1',
2962
        'LATIN'       => 'ISO-8859-1',
2963
        'LATIN1'      => 'ISO-8859-1', // Western European
2964
        'ISO88592'    => 'ISO-8859-2',
2965
        'LATIN2'      => 'ISO-8859-2', // Central European
2966
        'ISO88593'    => 'ISO-8859-3',
2967
        'LATIN3'      => 'ISO-8859-3', // Southern European
2968
        'ISO88594'    => 'ISO-8859-4',
2969
        'LATIN4'      => 'ISO-8859-4', // Northern European
2970
        'ISO88595'    => 'ISO-8859-5',
2971
        'ISO88596'    => 'ISO-8859-6', // Greek
2972
        'ISO88597'    => 'ISO-8859-7',
2973
        'ISO88598'    => 'ISO-8859-8', // Hebrew
2974
        'ISO88599'    => 'ISO-8859-9',
2975
        'LATIN5'      => 'ISO-8859-9', // Turkish
2976
        'ISO885911'   => 'ISO-8859-11',
2977
        'TIS620'      => 'ISO-8859-11', // Thai
2978
        'ISO885910'   => 'ISO-8859-10',
2979
        'LATIN6'      => 'ISO-8859-10', // Nordic
2980
        'ISO885913'   => 'ISO-8859-13',
2981
        'LATIN7'      => 'ISO-8859-13', // Baltic
2982
        'ISO885914'   => 'ISO-8859-14',
2983
        'LATIN8'      => 'ISO-8859-14', // Celtic
2984
        'ISO885915'   => 'ISO-8859-15',
2985
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
2986
        'ISO885916'   => 'ISO-8859-16',
2987
        'LATIN10'     => 'ISO-8859-16', // Southeast European
2988
        'CP1250'      => 'WINDOWS-1250',
2989
        'WIN1250'     => 'WINDOWS-1250',
2990
        'WINDOWS1250' => 'WINDOWS-1250',
2991
        'CP1251'      => 'WINDOWS-1251',
2992
        'WIN1251'     => 'WINDOWS-1251',
2993
        'WINDOWS1251' => 'WINDOWS-1251',
2994
        'CP1252'      => 'WINDOWS-1252',
2995
        'WIN1252'     => 'WINDOWS-1252',
2996
        'WINDOWS1252' => 'WINDOWS-1252',
2997
        'CP1253'      => 'WINDOWS-1253',
2998
        'WIN1253'     => 'WINDOWS-1253',
2999
        'WINDOWS1253' => 'WINDOWS-1253',
3000
        'CP1254'      => 'WINDOWS-1254',
3001
        'WIN1254'     => 'WINDOWS-1254',
3002
        'WINDOWS1254' => 'WINDOWS-1254',
3003
        'CP1255'      => 'WINDOWS-1255',
3004
        'WIN1255'     => 'WINDOWS-1255',
3005
        'WINDOWS1255' => 'WINDOWS-1255',
3006
        'CP1256'      => 'WINDOWS-1256',
3007
        'WIN1256'     => 'WINDOWS-1256',
3008
        'WINDOWS1256' => 'WINDOWS-1256',
3009
        'CP1257'      => 'WINDOWS-1257',
3010
        'WIN1257'     => 'WINDOWS-1257',
3011
        'WINDOWS1257' => 'WINDOWS-1257',
3012
        'CP1258'      => 'WINDOWS-1258',
3013
        'WIN1258'     => 'WINDOWS-1258',
3014
        'WINDOWS1258' => 'WINDOWS-1258',
3015
        'UTF16'       => 'UTF-16',
3016
        'UTF32'       => 'UTF-32',
3017
        'UTF8'        => 'UTF-8',
3018
        'UTF'         => 'UTF-8',
3019
        'UTF7'        => 'UTF-7',
3020
        '8BIT'        => 'CP850',
3021
        'BINARY'      => 'CP850',
3022
    ];
3023
3024 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3025 4
      $encoding = $equivalences[$encodingUpperHelper];
3026
    }
3027
3028 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3029
3030 5
    return $encoding;
3031
  }
3032
3033
  /**
3034
   * Normalize some MS Word special characters.
3035
   *
3036
   * @param string $str <p>The string to be normalized.</p>
3037
   *
3038
   * @return string
3039
   */
3040 16 View Code Duplication
  public static function normalize_msword(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3041
  {
3042 16
    if (!isset($str[0])) {
3043 1
      return '';
3044
    }
3045
3046 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
3047 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3048
3049 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3050
3051 1
      if (self::$UTF8_MSWORD === null) {
3052 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
3053
      }
3054
3055 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
3056 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
3057
    }
3058
3059 16
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3060
  }
3061
3062
  /**
3063
   * Normalize the whitespace.
3064
   *
3065
   * @param string $str                     <p>The string to be normalized.</p>
3066
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3067
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3068
   *                                        bidirectional text chars.</p>
3069
   *
3070
   * @return string
3071
   */
3072 39
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
3073
  {
3074 39
    if (!isset($str[0])) {
3075 4
      return '';
3076
    }
3077
3078 39
    static $WHITESPACE_CACHE = [];
3079 39
    $cacheKey = (int)$keepNonBreakingSpace;
3080
3081 39
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3082
3083 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3084
3085 2
      if ($keepNonBreakingSpace === true) {
3086 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3087
      }
3088
3089 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
3090
    }
3091
3092 39
    if ($keepBidiUnicodeControls === false) {
3093 39
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3094
3095 39
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3096 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3097
      }
3098
3099 39
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3100
    }
3101
3102 39
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3103
  }
3104
3105
  /**
3106
   * Calculates Unicode code point of the given UTF-8 encoded character.
3107
   *
3108
   * INFO: opposite to UTF8::chr()
3109
   *
3110
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3111
   * @param string $encoding [optional] <p>Default is UTF-8</p>
3112
   *
3113
   * @return int <p>
3114
   *             Unicode code point of the given character,<br>
3115
   *             0 on invalid UTF-8 byte sequence.
3116
   *             </p>
3117
   */
3118 23
  public static function ord(string $chr, string $encoding = 'UTF-8'): int
3119
  {
3120
    // init
3121 23
    static $CHAR_CACHE = [];
3122
3123
    // save the original string
3124 23
    $chr_orig = $chr;
3125
3126 23
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3127 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3128
3129
      // check again, if it's still not UTF-8
3130
      /** @noinspection NotOptimalIfConditionsInspection */
3131 2
      if ($encoding !== 'UTF-8') {
3132 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3133
      }
3134
    }
3135
3136 23
    $cacheKey = $chr_orig . $encoding;
3137 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3138 23
      return $CHAR_CACHE[$cacheKey];
3139
    }
3140
3141 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3142
      self::checkForSupport();
3143
    }
3144
3145 11
    if (self::$SUPPORT['intlChar'] === true) {
3146 10
      $code = \IntlChar::ord($chr);
3147 10
      if ($code) {
3148 9
        return $CHAR_CACHE[$cacheKey] = $code;
3149
      }
3150
    }
3151
3152
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3153 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
3154 6
    $code = $chr ? $chr[1] : 0;
3155
3156 6
    if (0xF0 <= $code && isset($chr[4])) {
3157
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3158
    }
3159
3160 6
    if (0xE0 <= $code && isset($chr[3])) {
3161 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3162
    }
3163
3164 6
    if (0xC0 <= $code && isset($chr[2])) {
3165 2
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3166
    }
3167
3168 5
    return $CHAR_CACHE[$cacheKey] = $code;
3169
  }
3170
3171
  /**
3172
   * Parses the string into an array (into the the second parameter).
3173
   *
3174
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3175
   *          if the second parameter is not set!
3176
   *
3177
   * @link http://php.net/manual/en/function.parse-str.php
3178
   *
3179
   * @param string $str       <p>The input string.</p>
3180
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
3181
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3182
   *
3183
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3184
   */
3185 1
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
3186
  {
3187 1
    if ($cleanUtf8 === true) {
3188 1
      $str = self::clean($str);
3189
    }
3190
3191
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3192 1
    $return = \mb_parse_str($str, $result);
3193
3194 1
    return !($return === false || empty($result));
3195
  }
3196
3197
  /**
3198
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3199
   *
3200
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3201
   */
3202 60
  public static function pcre_utf8_support(): bool
3203
  {
3204
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3205 60
    return (bool)@\preg_match('//u', '');
3206
  }
3207
3208
  /**
3209
   * Create an array containing a range of UTF-8 characters.
3210
   *
3211
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3212
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3213
   *
3214
   * @return array
3215
   */
3216 1
  public static function range($var1, $var2): array
3217
  {
3218 1
    if (!$var1 || !$var2) {
3219 1
      return [];
3220
    }
3221
3222 1 View Code Duplication
    if (\ctype_digit((string)$var1)) {
3223 1
      $start = (int)$var1;
3224 1
    } elseif (\ctype_xdigit($var1)) {
3225
      $start = (int)self::hex_to_int($var1);
3226
    } else {
3227 1
      $start = self::ord($var1);
3228
    }
3229
3230 1
    if (!$start) {
3231
      return [];
3232
    }
3233
3234 1 View Code Duplication
    if (\ctype_digit((string)$var2)) {
3235 1
      $end = (int)$var2;
3236 1
    } elseif (\ctype_xdigit($var2)) {
3237
      $end = (int)self::hex_to_int($var2);
3238
    } else {
3239 1
      $end = self::ord($var2);
3240
    }
3241
3242 1
    if (!$end) {
3243
      return [];
3244
    }
3245
3246 1
    return \array_map(
3247
        [
3248 1
            self::class,
3249
            'chr',
3250
        ],
3251 1
        \range($start, $end)
3252
    );
3253
  }
3254
3255
  /**
3256
   * Multi decode html entity & fix urlencoded-win1252-chars.
3257
   *
3258
   * e.g:
3259
   * 'test+test'                     => 'test+test'
3260
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3261
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3262
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3263
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3264
   * 'Düsseldorf'                   => 'Düsseldorf'
3265
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3266
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3267
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3268
   *
3269
   * @param string $str          <p>The input string.</p>
3270
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3271
   *
3272
   * @return string
3273
   */
3274 2 View Code Duplication
  public static function rawurldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3275
  {
3276 2
    if (!isset($str[0])) {
3277 1
      return '';
3278
    }
3279
3280 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3281 2
    if (\preg_match($pattern, $str)) {
3282 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
3283
    }
3284
3285 2
    $flags = ENT_QUOTES | ENT_HTML5;
3286
3287
    do {
3288 2
      $str_compare = $str;
3289
3290 2
      $str = self::fix_simple_utf8(
3291 2
          \rawurldecode(
3292 2
              self::html_entity_decode(
3293 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3294 2
                  $flags
3295
              )
3296
          )
3297
      );
3298
3299 2
    } while ($multi_decode === true && $str_compare !== $str);
3300
3301 2
    return $str;
3302
  }
3303
3304
  /**
3305
   * alias for "UTF8::remove_bom()"
3306
   *
3307
   * @see        UTF8::remove_bom()
3308
   *
3309
   * @param string $str
3310
   *
3311
   * @return string
3312
   *
3313
   * @deprecated <p>use "UTF8::remove_bom()"</p>
3314
   */
3315
  public static function removeBOM(string $str): string
3316
  {
3317
    return self::remove_bom($str);
3318
  }
3319
3320
  /**
3321
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3322
   *
3323
   * @param string $str <p>The input string.</p>
3324
   *
3325
   * @return string <p>String without UTF-BOM</p>
3326
   */
3327 43
  public static function remove_bom(string $str): string
3328
  {
3329 43
    if (!isset($str[0])) {
3330 3
      return '';
3331
    }
3332
3333 43
    foreach (self::$BOM as $bomString => $bomByteLength) {
3334 43
      if (0 === self::strpos($str, $bomString, 0, 'CP850')) {
3335 5
        $strTmp = self::substr($str, $bomByteLength, null, 'CP850');
3336 5
        if ($strTmp === false) {
3337
          $strTmp = '';
3338
        }
3339 43
        $str = (string)$strTmp;
3340
      }
3341
    }
3342
3343 43
    return $str;
3344
  }
3345
3346
  /**
3347
   * Removes duplicate occurrences of a string in another string.
3348
   *
3349
   * @param string          $str  <p>The base string.</p>
3350
   * @param string|string[] $what <p>String to search for in the base string.</p>
3351
   *
3352
   * @return string <p>The result string with removed duplicates.</p>
3353
   */
3354 1
  public static function remove_duplicates(string $str, $what = ' '): string
3355
  {
3356 1
    if (\is_string($what) === true) {
3357 1
      $what = [$what];
3358
    }
3359
3360 1
    if (\is_array($what) === true) {
3361
      /** @noinspection ForeachSourceInspection */
3362 1
      foreach ($what as $item) {
3363 1
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
3364
      }
3365
    }
3366
3367 1
    return $str;
3368
  }
3369
3370
  /**
3371
   * Remove invisible characters from a string.
3372
   *
3373
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3374
   *
3375
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3376
   *
3377
   * @param string $str
3378
   * @param bool   $url_encoded
3379
   * @param string $replacement
3380
   *
3381
   * @return string
3382
   */
3383 65
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
3384
  {
3385
    // init
3386 65
    $non_displayables = [];
3387
3388
    // every control character except newline (dec 10),
3389
    // carriage return (dec 13) and horizontal tab (dec 09)
3390 65
    if ($url_encoded) {
3391 65
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
3392 65
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3393
    }
3394
3395 65
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
3396
3397
    do {
3398 65
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
3399 65
    } while ($count !== 0);
3400
3401 65
    return $str;
3402
  }
3403
3404
  /**
3405
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3406
   *
3407
   * @param string $str                <p>The input string</p>
3408
   * @param string $replacementChar    <p>The replacement character.</p>
3409
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3410
   *
3411
   * @return string
3412
   */
3413 37
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
3414
  {
3415 37
    if (!isset($str[0])) {
3416 4
      return '';
3417
    }
3418
3419 37
    if ($processInvalidUtf8 === true) {
3420 37
      $replacementCharHelper = $replacementChar;
3421 37
      if ($replacementChar === '') {
3422 37
        $replacementCharHelper = 'none';
3423
      }
3424
3425 37
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3426
        self::checkForSupport();
3427
      }
3428
3429 37
      $save = \mb_substitute_character();
3430 37
      \mb_substitute_character($replacementCharHelper);
3431 37
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3432 37
      \mb_substitute_character($save);
3433
3434 37
      if (\is_string($strTmp)) {
3435 37
        $str = $strTmp;
3436
      } else {
3437
        $str = '';
3438
      }
3439
    }
3440
3441 37
    return str_replace(
3442
        [
3443 37
            "\xEF\xBF\xBD",
3444
            '�',
3445
        ],
3446
        [
3447 37
            $replacementChar,
3448 37
            $replacementChar,
3449
        ],
3450 37
        $str
3451
    );
3452
  }
3453
3454
  /**
3455
   * Strip whitespace or other characters from end of a UTF-8 string.
3456
   *
3457
   * @param string $str   <p>The string to be trimmed.</p>
3458
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
3459
   *
3460
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3461
   */
3462 23 View Code Duplication
  public static function rtrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3463
  {
3464 23
    if (!isset($str[0])) {
3465 5
      return '';
3466
    }
3467
3468
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3469 19
    if ($chars === INF || !$chars) {
3470 3
      return \preg_replace('/[\pZ\pC]+$/u', '', $str);
3471
    }
3472
3473 18
    return \preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3474
  }
3475
3476
  /**
3477
   * rxClass
3478
   *
3479
   * @param string $s
3480
   * @param string $class
3481
   *
3482
   * @return string
3483
   */
3484 60
  private static function rxClass(string $s, string $class = ''): string
3485
  {
3486 60
    static $RX_CLASSS_CACHE = [];
3487
3488 60
    $cacheKey = $s . $class;
3489
3490 60
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
3491 48
      return $RX_CLASSS_CACHE[$cacheKey];
3492
    }
3493
3494
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3495 20
    $class = [$class];
3496
3497
    /** @noinspection SuspiciousLoopInspection */
3498 20
    foreach (self::str_split($s) as $s) {
3499 19
      if ('-' === $s) {
3500
        $class[0] = '-' . $class[0];
3501 19
      } elseif (!isset($s[2])) {
3502 19
        $class[0] .= \preg_quote($s, '/');
3503 2
      } elseif (1 === self::strlen($s)) {
3504 2
        $class[0] .= $s;
3505
      } else {
3506 19
        $class[] = $s;
3507
      }
3508
    }
3509
3510 20
    if ($class[0]) {
3511 20
      $class[0] = '[' . $class[0] . ']';
3512
    }
3513
3514 20
    if (1 === \count($class)) {
3515 20
      $return = $class[0];
3516
    } else {
3517
      $return = '(?:' . \implode('|', $class) . ')';
3518
    }
3519
3520 20
    $RX_CLASSS_CACHE[$cacheKey] = $return;
3521
3522 20
    return $return;
3523
  }
3524
3525
  /**
3526
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
3527
   */
3528 1
  public static function showSupport()
3529
  {
3530 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3531
      self::checkForSupport();
3532
    }
3533
3534 1
    echo '<pre>';
3535 1
    foreach (self::$SUPPORT as $key => $value) {
3536 1
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
3537
    }
3538 1
    echo '</pre>';
3539 1
  }
3540
3541
  /**
3542
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3543
   *
3544
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3545
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3546
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3547
   *
3548
   * @return string <p>The HTML numbered entity.</p>
3549
   */
3550 1
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
3551
  {
3552 1
    if (!isset($char[0])) {
3553 1
      return '';
3554
    }
3555
3556
    if (
3557 1
        $keepAsciiChars === true
3558
        &&
3559 1
        self::is_ascii($char) === true
3560
    ) {
3561 1
      return $char;
3562
    }
3563
3564 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3565 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3566
    }
3567
3568 1
    return '&#' . self::ord($char, $encoding) . ';';
3569
  }
3570
3571
  /**
3572
   * Convert a string to an array of Unicode characters.
3573
   *
3574
   * @param string $str       <p>The string to split into array.</p>
3575
   * @param int    $length    [optional] <p>Max character length of each array element.</p>
3576
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3577
   *
3578
   * @return string[] <p>An array containing chunks of the string.</p>
3579
   */
3580 39
  public static function split(string $str, int $length = 1, bool $cleanUtf8 = false): array
3581
  {
3582 39
    if (!isset($str[0])) {
3583 3
      return [];
3584
    }
3585
3586
    // init
3587 38
    $ret = [];
3588
3589 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3590
      self::checkForSupport();
3591
    }
3592
3593 38
    if ($cleanUtf8 === true) {
3594 9
      $str = self::clean($str);
3595
    }
3596
3597 38
    if (self::$SUPPORT['pcre_utf8'] === true) {
3598
3599 38
      \preg_match_all('/./us', $str, $retArray);
3600 38
      if (isset($retArray[0])) {
3601 38
        $ret = $retArray[0];
3602
      }
3603 38
      unset($retArray);
3604
3605
    } else {
3606
3607
      // fallback
3608
3609 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3610
        self::checkForSupport();
3611
      }
3612
3613 2
      $len = self::strlen_in_byte($str);
3614
3615
      /** @noinspection ForeachInvariantsInspection */
3616 2
      for ($i = 0; $i < $len; $i++) {
3617
3618 2
        if (($str[$i] & "\x80") === "\x00") {
3619
3620 2
          $ret[] = $str[$i];
3621
3622
        } elseif (
3623 2
            isset($str[$i + 1])
3624
            &&
3625 2
            ($str[$i] & "\xE0") === "\xC0"
3626
        ) {
3627
3628
          if (($str[$i + 1] & "\xC0") === "\x80") {
3629
            $ret[] = $str[$i] . $str[$i + 1];
3630
3631
            $i++;
3632
          }
3633
3634 View Code Duplication
        } elseif (
3635 2
            isset($str[$i + 2])
3636
            &&
3637 2
            ($str[$i] & "\xF0") === "\xE0"
3638
        ) {
3639
3640
          if (
3641 2
              ($str[$i + 1] & "\xC0") === "\x80"
3642
              &&
3643 2
              ($str[$i + 2] & "\xC0") === "\x80"
3644
          ) {
3645 2
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3646
3647 2
            $i += 2;
3648
          }
3649
3650
        } elseif (
3651
            isset($str[$i + 3])
3652
            &&
3653
            ($str[$i] & "\xF8") === "\xF0"
3654
        ) {
3655
3656 View Code Duplication
          if (
3657
              ($str[$i + 1] & "\xC0") === "\x80"
3658
              &&
3659
              ($str[$i + 2] & "\xC0") === "\x80"
3660
              &&
3661
              ($str[$i + 3] & "\xC0") === "\x80"
3662
          ) {
3663
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3664
3665
            $i += 3;
3666
          }
3667
3668
        }
3669
      }
3670
    }
3671
3672 38
    if ($length > 1) {
3673 5
      $ret = \array_chunk($ret, $length);
3674
3675 5
      return \array_map(
3676 5
          function ($item) {
3677 5
            return \implode('', $item);
3678 5
          }, $ret
3679
      );
3680
    }
3681
3682 34
    if (isset($ret[0]) && $ret[0] === '') {
3683
      return [];
3684
    }
3685
3686 34
    return $ret;
3687
  }
3688
3689
  /**
3690
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3691
   *
3692
   * @param string $str <p>The input string.</p>
3693
   *
3694
   * @return false|string <p>
3695
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
3696
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
3697
   *                      </p>
3698
   */
3699 15
  public static function str_detect_encoding(string $str)
3700
  {
3701
    //
3702
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
3703
    //
3704
3705 15
    if (self::is_binary($str, true) === true) {
3706
3707 5
      if (self::is_utf16($str) === 1) {
3708 1
        return 'UTF-16LE';
3709
      }
3710
3711 5
      if (self::is_utf16($str) === 2) {
3712 1
        return 'UTF-16BE';
3713
      }
3714
3715 4
      if (self::is_utf32($str) === 1) {
3716
        return 'UTF-32LE';
3717
      }
3718
3719 4
      if (self::is_utf32($str) === 2) {
3720
        return 'UTF-32BE';
3721
      }
3722
3723
      // is binary but not "UTF-16" or "UTF-32"
3724 4
      return false;
3725
    }
3726
3727
    //
3728
    // 2.) simple check for ASCII chars
3729
    //
3730
3731 13
    if (self::is_ascii($str) === true) {
3732 5
      return 'ASCII';
3733
    }
3734
3735
    //
3736
    // 3.) simple check for UTF-8 chars
3737
    //
3738
3739 13
    if (self::is_utf8($str) === true) {
3740 9
      return 'UTF-8';
3741
    }
3742
3743
    //
3744
    // 4.) check via "\mb_detect_encoding()"
3745
    //
3746
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
3747
3748
    $detectOrder = [
3749 8
        'ISO-8859-1',
3750
        'ISO-8859-2',
3751
        'ISO-8859-3',
3752
        'ISO-8859-4',
3753
        'ISO-8859-5',
3754
        'ISO-8859-6',
3755
        'ISO-8859-7',
3756
        'ISO-8859-8',
3757
        'ISO-8859-9',
3758
        'ISO-8859-10',
3759
        'ISO-8859-13',
3760
        'ISO-8859-14',
3761
        'ISO-8859-15',
3762
        'ISO-8859-16',
3763
        'WINDOWS-1251',
3764
        'WINDOWS-1252',
3765
        'WINDOWS-1254',
3766
        'CP932',
3767
        'CP936',
3768
        'CP950',
3769
        'CP866',
3770
        'CP850',
3771
        'CP51932',
3772
        'CP50220',
3773
        'CP50221',
3774
        'CP50222',
3775
        'ISO-2022-JP',
3776
        'ISO-2022-KR',
3777
        'JIS',
3778
        'JIS-ms',
3779
        'EUC-CN',
3780
        'EUC-JP',
3781
    ];
3782
3783 8
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
3784 8
    if ($encoding) {
3785 8
      return $encoding;
3786
    }
3787
3788
    //
3789
    // 5.) check via "iconv()"
3790
    //
3791
3792
    if (self::$ENCODINGS === null) {
3793
      self::$ENCODINGS = self::getData('encodings');
3794
    }
3795
3796
    $md5 = \md5($str);
3797
    foreach (self::$ENCODINGS as $encodingTmp) {
0 ignored issues
show
Bug introduced by
The expression self::$ENCODINGS of type boolean|string|array|integer is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
3798
      # INFO: //IGNORE and //TRANSLIT still throw notice
3799
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3800
      if (\md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
3801
        return $encodingTmp;
3802
      }
3803
    }
3804
3805
    return false;
3806
  }
3807
3808
  /**
3809
   * Check if the string ends with the given substring.
3810
   *
3811
   * @param string $haystack <p>The string to search in.</p>
3812
   * @param string $needle   <p>The substring to search for.</p>
3813
   *
3814
   * @return bool
3815
   */
3816 2 View Code Duplication
  public static function str_ends_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3817
  {
3818 2
    if (!isset($haystack[0], $needle[0])) {
3819 1
      return false;
3820
    }
3821
3822 2
    if (\substr($haystack, -\strlen($needle)) === $needle) {
3823 2
      return true;
3824
    }
3825
3826 2
    return false;
3827
  }
3828
3829
  /**
3830
   * Check if the string ends with the given substring, case insensitive.
3831
   *
3832
   * @param string $haystack <p>The string to search in.</p>
3833
   * @param string $needle   <p>The substring to search for.</p>
3834
   *
3835
   * @return bool
3836
   */
3837 2 View Code Duplication
  public static function str_iends_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3838
  {
3839 2
    if (!isset($haystack[0], $needle[0])) {
3840 1
      return false;
3841
    }
3842
3843 2
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
3844 2
      return true;
3845
    }
3846
3847 2
    return false;
3848
  }
3849
3850
  /**
3851
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
3852
   *
3853
   * @link  http://php.net/manual/en/function.str-ireplace.php
3854
   *
3855
   * @param mixed $search  <p>
3856
   *                       Every replacement with search array is
3857
   *                       performed on the result of previous replacement.
3858
   *                       </p>
3859
   * @param mixed $replace <p>
3860
   *                       </p>
3861
   * @param mixed $subject <p>
3862
   *                       If subject is an array, then the search and
3863
   *                       replace is performed with every entry of
3864
   *                       subject, and the return value is an array as
3865
   *                       well.
3866
   *                       </p>
3867
   * @param int   $count   [optional] <p>
3868
   *                       The number of matched and replaced needles will
3869
   *                       be returned in count which is passed by
3870
   *                       reference.
3871
   *                       </p>
3872
   *
3873
   * @return mixed <p>A string or an array of replacements.</p>
3874
   */
3875 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
3876
  {
3877 26
    $search = (array)$search;
3878
3879
    /** @noinspection AlterInForeachInspection */
3880 26
    foreach ($search as &$s) {
3881 26
      if ('' === $s .= '') {
3882 2
        $s = '/^(?<=.)$/';
3883
      } else {
3884 26
        $s = '/' . \preg_quote($s, '/') . '/ui';
3885
      }
3886
    }
3887
3888 26
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
3889 26
    $count = $replace; // used as reference parameter
3890
3891 26
    return $subject;
3892
  }
3893
3894
  /**
3895
   * Check if the string starts with the given substring, case insensitive.
3896
   *
3897
   * @param string $haystack <p>The string to search in.</p>
3898
   * @param string $needle   <p>The substring to search for.</p>
3899
   *
3900
   * @return bool
3901
   */
3902 2 View Code Duplication
  public static function str_istarts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3903
  {
3904 2
    if (!isset($haystack[0], $needle[0])) {
3905 1
      return false;
3906
    }
3907
3908 2
    if (self::stripos($haystack, $needle) === 0) {
3909 2
      return true;
3910
    }
3911
3912 2
    return false;
3913
  }
3914
3915
  /**
3916
   * Limit the number of characters in a string, but also after the next word.
3917
   *
3918
   * @param string $str
3919
   * @param int    $length
3920
   * @param string $strAddOn
3921
   *
3922
   * @return string
3923
   */
3924 1
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…'): string
3925
  {
3926 1
    if (!isset($str[0])) {
3927 1
      return '';
3928
    }
3929
3930 1
    if (self::strlen($str) <= $length) {
3931 1
      return $str;
3932
    }
3933
3934 1
    if (self::substr($str, $length - 1, 1) === ' ') {
3935 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
3936
    }
3937
3938 1
    $str = (string)self::substr($str, 0, $length);
3939 1
    $array = \explode(' ', $str);
3940 1
    \array_pop($array);
3941 1
    $new_str = \implode(' ', $array);
3942
3943 1
    if ($new_str === '') {
3944 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
3945
    } else {
3946 1
      $str = $new_str . $strAddOn;
3947
    }
3948
3949 1
    return $str;
3950
  }
3951
3952
  /**
3953
   * Pad a UTF-8 string to given length with another string.
3954
   *
3955
   * @param string $str        <p>The input string.</p>
3956
   * @param int    $pad_length <p>The length of return string.</p>
3957
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
3958
   * @param int    $pad_type   [optional] <p>
3959
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
3960
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
3961
   *                           </p>
3962
   *
3963
   * @return string <strong>Returns the padded string</strong>
3964
   */
3965 2
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', int $pad_type = STR_PAD_RIGHT): string
3966
  {
3967 2
    $str_length = self::strlen($str);
3968
3969
    if (
3970 2
        \is_int($pad_length) === true
3971
        &&
3972 2
        $pad_length > 0
3973
        &&
3974 2
        $pad_length >= $str_length
3975
    ) {
3976 2
      $ps_length = self::strlen($pad_string);
3977
3978 2
      $diff = ($pad_length - $str_length);
3979
3980
      switch ($pad_type) {
3981 2 View Code Duplication
        case STR_PAD_LEFT:
3982 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
3983 2
          $pre = (string)self::substr($pre, 0, $diff);
3984 2
          $post = '';
3985 2
          break;
3986
3987 2
        case STR_PAD_BOTH:
3988 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
3989 2
          $pre = (string)self::substr($pre, 0, $diff / 2);
3990 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
3991 2
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2));
3992 2
          break;
3993
3994 2
        case STR_PAD_RIGHT:
3995 View Code Duplication
        default:
3996 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
3997 2
          $post = (string)self::substr($post, 0, $diff);
3998 2
          $pre = '';
3999
      }
4000
4001 2
      return $pre . $str . $post;
4002
    }
4003
4004 2
    return $str;
4005
  }
4006
4007
  /**
4008
   * Repeat a string.
4009
   *
4010
   * @param string $str        <p>
4011
   *                           The string to be repeated.
4012
   *                           </p>
4013
   * @param int    $multiplier <p>
4014
   *                           Number of time the input string should be
4015
   *                           repeated.
4016
   *                           </p>
4017
   *                           <p>
4018
   *                           multiplier has to be greater than or equal to 0.
4019
   *                           If the multiplier is set to 0, the function
4020
   *                           will return an empty string.
4021
   *                           </p>
4022
   *
4023
   * @return string <p>The repeated string.</p>
4024
   */
4025 1
  public static function str_repeat(string $str, int $multiplier): string
4026
  {
4027 1
    $str = self::filter($str);
4028
4029 1
    return \str_repeat($str, $multiplier);
4030
  }
4031
4032
  /**
4033
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4034
   *
4035
   * Replace all occurrences of the search string with the replacement string
4036
   *
4037
   * @link http://php.net/manual/en/function.str-replace.php
4038
   *
4039
   * @param mixed $search  <p>
4040
   *                       The value being searched for, otherwise known as the needle.
4041
   *                       An array may be used to designate multiple needles.
4042
   *                       </p>
4043
   * @param mixed $replace <p>
4044
   *                       The replacement value that replaces found search
4045
   *                       values. An array may be used to designate multiple replacements.
4046
   *                       </p>
4047
   * @param mixed $subject <p>
4048
   *                       The string or array being searched and replaced on,
4049
   *                       otherwise known as the haystack.
4050
   *                       </p>
4051
   *                       <p>
4052
   *                       If subject is an array, then the search and
4053
   *                       replace is performed with every entry of
4054
   *                       subject, and the return value is an array as
4055
   *                       well.
4056
   *                       </p>
4057
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4058
   *
4059
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4060
   */
4061 12
  public static function str_replace($search, $replace, $subject, int &$count = null)
4062
  {
4063 12
    return \str_replace($search, $replace, $subject, $count);
4064
  }
4065
4066
  /**
4067
   * Replace the first "$search"-term with the "$replace"-term.
4068
   *
4069
   * @param string $search
4070
   * @param string $replace
4071
   * @param string $subject
4072
   *
4073
   * @return string
4074
   */
4075 1
  public static function str_replace_first(string $search, string $replace, string $subject): string
4076
  {
4077 1
    $pos = self::strpos($subject, $search);
4078
4079 1
    if ($pos !== false) {
4080 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4081
    }
4082
4083 1
    return $subject;
4084
  }
4085
4086
  /**
4087
   * Shuffles all the characters in the string.
4088
   *
4089
   * @param string $str <p>The input string</p>
4090
   *
4091
   * @return string <p>The shuffled string.</p>
4092
   */
4093 1
  public static function str_shuffle(string $str): string
4094
  {
4095 1
    $array = self::split($str);
4096
4097 1
    \shuffle($array);
4098
4099 1
    return \implode('', $array);
4100
  }
4101
4102
  /**
4103
   * Sort all characters according to code points.
4104
   *
4105
   * @param string $str    <p>A UTF-8 string.</p>
4106
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4107
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4108
   *
4109
   * @return string <p>String of sorted characters.</p>
4110
   */
4111 1
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
4112
  {
4113 1
    $array = self::codepoints($str);
4114
4115 1
    if ($unique) {
4116 1
      $array = \array_flip(\array_flip($array));
4117
    }
4118
4119 1
    if ($desc) {
4120 1
      \arsort($array);
4121
    } else {
4122 1
      \asort($array);
4123
    }
4124
4125 1
    return self::string($array);
4126
  }
4127
4128
  /**
4129
   * Split a string into an array.
4130
   *
4131
   * @param string|string[] $str
4132
   * @param int             $len
4133
   *
4134
   * @return array
4135
   */
4136 23
  public static function str_split($str, int $len = 1): array
4137
  {
4138 23 View Code Duplication
    if (\is_array($str) === true) {
4139 1
      foreach ($str as $k => $v) {
4140 1
        $str[$k] = self::str_split($v, $len);
4141
      }
4142
4143 1
      return $str;
4144
    }
4145
4146 23
    if (!isset($str[0])) {
4147 1
      return [];
4148
    }
4149
4150 22
    if ($len < 1) {
4151
      return \str_split($str, $len);
4152
    }
4153
4154
    /** @noinspection PhpInternalEntityUsedInspection */
4155 22
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4156 22
    $a = $a[0];
4157
4158 22
    if ($len === 1) {
4159 22
      return $a;
4160
    }
4161
4162 1
    $arrayOutput = [];
4163 1
    $p = -1;
4164
4165
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4166 1
    foreach ($a as $l => $a) {
4167 1
      if ($l % $len) {
4168 1
        $arrayOutput[$p] .= $a;
4169
      } else {
4170 1
        $arrayOutput[++$p] = $a;
4171
      }
4172
    }
4173
4174 1
    return $arrayOutput;
4175
  }
4176
4177
  /**
4178
   * Check if the string starts with the given substring.
4179
   *
4180
   * @param string $haystack <p>The string to search in.</p>
4181
   * @param string $needle   <p>The substring to search for.</p>
4182
   *
4183
   * @return bool
4184
   */
4185 2 View Code Duplication
  public static function str_starts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4186
  {
4187 2
    if (!isset($haystack[0], $needle[0])) {
4188 1
      return false;
4189
    }
4190
4191 2
    if (\strpos($haystack, $needle) === 0) {
4192 2
      return true;
4193
    }
4194
4195 2
    return false;
4196
  }
4197
4198
  /**
4199
   * Get a binary representation of a specific string.
4200
   *
4201
   * @param string $str <p>The input string.</p>
4202
   *
4203
   * @return string
4204
   */
4205 1
  public static function str_to_binary(string $str): string
4206
  {
4207 1
    $value = \unpack('H*', $str);
4208
4209 1
    return \base_convert($value[1], 16, 2);
4210
  }
4211
4212
  /**
4213
   * Convert a string into an array of words.
4214
   *
4215
   * @param string   $str
4216
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
4217
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4218
   * @param null|int $removeShortValues
4219
   *
4220
   * @return array
4221
   */
4222 10
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
4223
  {
4224 10
    if (!isset($str[0])) {
4225 2
      if ($removeEmptyValues === true) {
4226
        return [];
4227
      }
4228
4229 2
      return [''];
4230
    }
4231
4232 10
    $charList = self::rxClass($charList, '\pL');
4233
4234 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4235
4236
    if (
4237 10
        $removeShortValues === null
4238
        &&
4239 10
        $removeEmptyValues === false
4240
    ) {
4241 10
      return $return;
4242
    }
4243
4244 1
    $tmpReturn = [];
4245 1
    foreach ($return as $returnValue) {
4246
      if (
4247 1
          $removeShortValues !== null
4248
          &&
4249 1
          self::strlen($returnValue) <= $removeShortValues
4250
      ) {
4251 1
        continue;
4252
      }
4253
4254
      if (
4255 1
          $removeEmptyValues === true
4256
          &&
4257 1
          \trim($returnValue) === ''
4258
      ) {
4259 1
        continue;
4260
      }
4261
4262 1
      $tmpReturn[] = $returnValue;
4263
    }
4264
4265 1
    return $tmpReturn;
4266
  }
4267
4268
  /**
4269
   * alias for "UTF8::to_ascii()"
4270
   *
4271
   * @see UTF8::to_ascii()
4272
   *
4273
   * @param string $str
4274
   * @param string $unknown
4275
   * @param bool   $strict
4276
   *
4277
   * @return string
4278
   */
4279 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
4280
  {
4281 7
    return self::to_ascii($str, $unknown, $strict);
4282
  }
4283
4284
  /**
4285
   * Counts number of words in the UTF-8 string.
4286
   *
4287
   * @param string $str      <p>The input string.</p>
4288
   * @param int    $format   [optional] <p>
4289
   *                         <strong>0</strong> => return a number of words (default)<br>
4290
   *                         <strong>1</strong> => return an array of words<br>
4291
   *                         <strong>2</strong> => return an array of words with word-offset as key
4292
   *                         </p>
4293
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4294
   *
4295
   * @return array|int <p>The number of words in the string</p>
4296
   */
4297 1
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
4298
  {
4299 1
    $strParts = self::str_to_words($str, $charlist);
4300
4301 1
    $len = \count($strParts);
4302
4303 1
    if ($format === 1) {
4304
4305 1
      $numberOfWords = [];
4306 1
      for ($i = 1; $i < $len; $i += 2) {
4307 1
        $numberOfWords[] = $strParts[$i];
4308
      }
4309
4310 1
    } elseif ($format === 2) {
4311
4312 1
      $numberOfWords = [];
4313 1
      $offset = self::strlen($strParts[0]);
4314 1
      for ($i = 1; $i < $len; $i += 2) {
4315 1
        $numberOfWords[$offset] = $strParts[$i];
4316 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4317
      }
4318
4319
    } else {
4320
4321 1
      $numberOfWords = ($len - 1) / 2;
4322
4323
    }
4324
4325 1
    return $numberOfWords;
4326
  }
4327
4328
  /**
4329
   * Case-insensitive string comparison.
4330
   *
4331
   * INFO: Case-insensitive version of UTF8::strcmp()
4332
   *
4333
   * @param string $str1
4334
   * @param string $str2
4335
   *
4336
   * @return int <p>
4337
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
4338
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
4339
   *             <strong>0</strong> if they are equal.
4340
   *             </p>
4341
   */
4342 11
  public static function strcasecmp(string $str1, string $str2): int
4343
  {
4344 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4345
  }
4346
4347
  /**
4348
   * alias for "UTF8::strstr()"
4349
   *
4350
   * @see UTF8::strstr()
4351
   *
4352
   * @param string $haystack
4353
   * @param string $needle
4354
   * @param bool   $before_needle
4355
   * @param string $encoding
4356
   * @param bool   $cleanUtf8
4357
   *
4358
   * @return string|false
4359
   */
4360 1
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4361
  {
4362 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4363
  }
4364
4365
  /**
4366
   * Case-sensitive string comparison.
4367
   *
4368
   * @param string $str1
4369
   * @param string $str2
4370
   *
4371
   * @return int  <p>
4372
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
4373
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
4374
   *              <strong>0</strong> if they are equal.
4375
   *              </p>
4376
   */
4377 14
  public static function strcmp(string $str1, string $str2): int
4378
  {
4379
    /** @noinspection PhpUndefinedClassInspection */
4380 14
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
4381 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
4382 14
        \Normalizer::normalize($str2, \Normalizer::NFD)
4383
    );
4384
  }
4385
4386
  /**
4387
   * Find length of initial segment not matching mask.
4388
   *
4389
   * @param string $str
4390
   * @param string $charList
4391
   * @param int    $offset
4392
   * @param int    $length
4393
   *
4394
   * @return int|null
4395
   */
4396 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
4397
  {
4398 15
    if ('' === $charList .= '') {
4399 1
      return null;
4400
    }
4401
4402 14 View Code Duplication
    if ($offset || $length !== null) {
4403 2
      $strTmp = self::substr($str, $offset, $length);
4404 2
      if ($strTmp === false) {
4405
        return null;
4406
      }
4407 2
      $str = (string)$strTmp;
4408
    }
4409
4410 14
    if (!isset($str[0])) {
4411 1
      return null;
4412
    }
4413
4414 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4415 13
      return self::strlen($length[1]);
4416
    }
4417
4418 1
    return self::strlen($str);
4419
  }
4420
4421
  /**
4422
   * alias for "UTF8::stristr()"
4423
   *
4424
   * @see UTF8::stristr()
4425
   *
4426
   * @param string $haystack
4427
   * @param string $needle
4428
   * @param bool   $before_needle
4429
   * @param string $encoding
4430
   * @param bool   $cleanUtf8
4431
   *
4432
   * @return string|false
4433
   */
4434 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4435
  {
4436 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4437
  }
4438
4439
  /**
4440
   * Create a UTF-8 string from code points.
4441
   *
4442
   * INFO: opposite to UTF8::codepoints()
4443
   *
4444
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4445
   *
4446
   * @return string <p>UTF-8 encoded string.</p>
4447
   */
4448 2
  public static function string(array $array): string
4449
  {
4450 2
    return \implode(
4451 2
        '',
4452 2
        \array_map(
4453
            [
4454 2
                self::class,
4455
                'chr',
4456
            ],
4457 2
            $array
4458
        )
4459
    );
4460
  }
4461
4462
  /**
4463
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4464
   *
4465
   * @param string $str <p>The input string.</p>
4466
   *
4467
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4468
   */
4469 3
  public static function string_has_bom(string $str): bool
4470
  {
4471 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
4472 3
      if (0 === \strpos($str, $bomString)) {
4473 3
        return true;
4474
      }
4475
    }
4476
4477 3
    return false;
4478
  }
4479
4480
  /**
4481
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4482
   *
4483
   * @link http://php.net/manual/en/function.strip-tags.php
4484
   *
4485
   * @param string $str             <p>
4486
   *                                The input string.
4487
   *                                </p>
4488
   * @param string $allowable_tags  [optional] <p>
4489
   *                                You can use the optional second parameter to specify tags which should
4490
   *                                not be stripped.
4491
   *                                </p>
4492
   *                                <p>
4493
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4494
   *                                can not be changed with allowable_tags.
4495
   *                                </p>
4496
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
4497
   *
4498
   * @return string <p>The stripped string.</p>
4499
   */
4500 2
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
4501
  {
4502 2
    if (!isset($str[0])) {
4503 1
      return '';
4504
    }
4505
4506 2
    if ($cleanUtf8 === true) {
4507 1
      $str = self::clean($str);
4508
    }
4509
4510 2
    return \strip_tags($str, $allowable_tags);
4511
  }
4512
4513
  /**
4514
   * Strip all whitespace characters. This includes tabs and newline
4515
   * characters, as well as multibyte whitespace such as the thin space
4516
   * and ideographic space.
4517
   *
4518
   * @param string $str
4519
   *
4520
   * @return string
4521
   */
4522 12
  public static function strip_whitespace(string $str): string
4523
  {
4524 12
    if (!isset($str[0])) {
4525 1
      return '';
4526
    }
4527
4528 11
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
4529
  }
4530
4531
  /**
4532
   * Finds position of first occurrence of a string within another, case insensitive.
4533
   *
4534
   * @link http://php.net/manual/en/function.mb-stripos.php
4535
   *
4536
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4537
   * @param string $needle    <p>The string to find in haystack.</p>
4538
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
4539
   * @param string $encoding  [optional] <p>Set the charset.</p>
4540
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4541
   *
4542
   * @return int|false <p>
4543
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
4544
   *                   or false if needle is not found.
4545
   *                   </p>
4546
   */
4547 10
  public static function stripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4548
  {
4549 10
    if (!isset($haystack[0], $needle[0])) {
4550 3
      return false;
4551
    }
4552
4553 9
    if ($cleanUtf8 === true) {
4554
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4555
      // if invalid characters are found in $haystack before $needle
4556 1
      $haystack = self::clean($haystack);
4557 1
      $needle = self::clean($needle);
4558
    }
4559
4560 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4561 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4562
    }
4563
4564 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4565
      self::checkForSupport();
4566
    }
4567
4568 View Code Duplication
    if (
4569 9
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4570
        &&
4571 9
        self::$SUPPORT['intl'] === true
4572
    ) {
4573 9
      return \grapheme_stripos($haystack, $needle, $offset);
4574
    }
4575
4576
    // fallback to "mb_"-function via polyfill
4577 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4578
  }
4579
4580
  /**
4581
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4582
   *
4583
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
4584
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
4585
   * @param bool   $before_needle  [optional] <p>
4586
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4587
   *                               haystack before the first occurrence of the needle (excluding the needle).
4588
   *                               </p>
4589
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
4590
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
4591
   *
4592
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
4593
   */
4594 17
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4595
  {
4596 17
    if (!isset($haystack[0], $needle[0])) {
4597 6
      return false;
4598
    }
4599
4600 11
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4601 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4602
    }
4603
4604 11
    if ($cleanUtf8 === true) {
4605
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4606
      // if invalid characters are found in $haystack before $needle
4607 1
      $needle = self::clean($needle);
4608 1
      $haystack = self::clean($haystack);
4609
    }
4610
4611 11
    if (!$needle) {
4612
      return $haystack;
4613
    }
4614
4615 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4616
      self::checkForSupport();
4617
    }
4618
4619 View Code Duplication
    if (
4620 11
        $encoding !== 'UTF-8'
4621
        &&
4622 11
        self::$SUPPORT['mbstring'] === false
4623
    ) {
4624
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4625
    }
4626
4627 11
    if (self::$SUPPORT['mbstring'] === true) {
4628 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4629
    }
4630
4631 View Code Duplication
    if (
4632
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4633
        &&
4634
        self::$SUPPORT['intl'] === true
4635
    ) {
4636
      return \grapheme_stristr($haystack, $needle, $before_needle);
4637
    }
4638
4639
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
4640
      return \stristr($haystack, $needle, $before_needle);
4641
    }
4642
4643
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
4644
4645
    if (!isset($match[1])) {
4646
      return false;
4647
    }
4648
4649
    if ($before_needle) {
4650
      return $match[1];
4651
    }
4652
4653
    return self::substr($haystack, self::strlen($match[1]));
4654
  }
4655
4656
  /**
4657
   * Get the string length, not the byte-length!
4658
   *
4659
   * @link     http://php.net/manual/en/function.mb-strlen.php
4660
   *
4661
   * @param string $str       <p>The string being checked for length.</p>
4662
   * @param string $encoding  [optional] <p>Set the charset.</p>
4663
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4664
   *
4665
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4666
   *             character counted as +1)</p>
4667
   */
4668 85
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
4669
  {
4670 85
    if (!isset($str[0])) {
4671 6
      return 0;
4672
    }
4673
4674 84
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4675 5
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4676
    }
4677
4678 84
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4679
      self::checkForSupport();
4680
    }
4681
4682
    switch ($encoding) {
4683 84
      case 'ASCII':
4684 84
      case 'CP850':
4685
        if (
4686 6
            $encoding === 'CP850'
4687
            &&
4688 6
            self::$SUPPORT['mbstring_func_overload'] === false
4689
        ) {
4690 6
          return \strlen($str);
4691
        }
4692
4693
        return \mb_strlen($str, 'CP850'); // 8-BIT
4694
    }
4695
4696 79
    if ($cleanUtf8 === true) {
4697
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
4698
      // if invalid characters are found in $str
4699 2
      $str = self::clean($str);
4700
    }
4701
4702 View Code Duplication
    if (
4703 79
        $encoding !== 'UTF-8'
4704
        &&
4705 79
        self::$SUPPORT['mbstring'] === false
4706
        &&
4707 79
        self::$SUPPORT['iconv'] === false
4708
    ) {
4709
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4710
    }
4711
4712
    if (
4713 79
        $encoding !== 'UTF-8'
4714
        &&
4715 79
        self::$SUPPORT['iconv'] === true
4716
        &&
4717 79
        self::$SUPPORT['mbstring'] === false
4718
    ) {
4719
      $returnTmp = \iconv_strlen($str, $encoding);
4720
      if ($returnTmp !== false) {
4721
        return $returnTmp;
4722
      }
4723
    }
4724
4725 79 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
4726 78
      $returnTmp = \mb_strlen($str, $encoding);
4727 78
      if ($returnTmp !== false) {
4728 78
        return $returnTmp;
4729
      }
4730
    }
4731
4732 2 View Code Duplication
    if (self::$SUPPORT['iconv'] === true) {
4733
      $returnTmp = \iconv_strlen($str, $encoding);
4734
      if ($returnTmp !== false) {
4735
        return $returnTmp;
4736
      }
4737
    }
4738
4739
    if (
4740 2
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4741
        &&
4742 2
        self::$SUPPORT['intl'] === true
4743
    ) {
4744
      return \grapheme_strlen($str);
4745
    }
4746
4747 2
    if (self::is_ascii($str)) {
4748 1
      return \strlen($str);
4749
    }
4750
4751
    // fallback via vanilla php
4752 2
    \preg_match_all('/./us', $str, $parts);
4753 2
    $returnTmp = \count($parts[0]);
4754 2
    if ($returnTmp !== 0) {
4755 2
      return $returnTmp;
4756
    }
4757
4758
    // fallback to "mb_"-function via polyfill
4759
    return \mb_strlen($str, $encoding);
4760
  }
4761
4762
  /**
4763
   * Get string length in byte.
4764
   *
4765
   * @param string $str
4766
   *
4767
   * @return int
4768
   */
4769 72
  public static function strlen_in_byte(string $str): int
4770
  {
4771 72
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
4772
      $len = \mb_strlen($str, 'CP850'); // 8-BIT
4773
    } else {
4774 72
      $len = \strlen($str);
4775
    }
4776
4777 72
    return $len;
4778
  }
4779
4780
  /**
4781
   * Case insensitive string comparisons using a "natural order" algorithm.
4782
   *
4783
   * INFO: natural order version of UTF8::strcasecmp()
4784
   *
4785
   * @param string $str1 <p>The first string.</p>
4786
   * @param string $str2 <p>The second string.</p>
4787
   *
4788
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
4789
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
4790
   *             <strong>0</strong> if they are equal
4791
   */
4792 1
  public static function strnatcasecmp(string $str1, string $str2): int
4793
  {
4794 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4795
  }
4796
4797
  /**
4798
   * String comparisons using a "natural order" algorithm
4799
   *
4800
   * INFO: natural order version of UTF8::strcmp()
4801
   *
4802
   * @link  http://php.net/manual/en/function.strnatcmp.php
4803
   *
4804
   * @param string $str1 <p>The first string.</p>
4805
   * @param string $str2 <p>The second string.</p>
4806
   *
4807
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
4808
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
4809
   *             <strong>0</strong> if they are equal
4810
   */
4811 2
  public static function strnatcmp(string $str1, string $str2): int
4812
  {
4813 2
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4814
  }
4815
4816
  /**
4817
   * Case-insensitive string comparison of the first n characters.
4818
   *
4819
   * @link  http://php.net/manual/en/function.strncasecmp.php
4820
   *
4821
   * @param string $str1 <p>The first string.</p>
4822
   * @param string $str2 <p>The second string.</p>
4823
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
4824
   *
4825
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4826
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4827
   *             <strong>0</strong> if they are equal
4828
   */
4829 1
  public static function strncasecmp(string $str1, string $str2, int $len): int
4830
  {
4831 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4832
  }
4833
4834
  /**
4835
   * String comparison of the first n characters.
4836
   *
4837
   * @link  http://php.net/manual/en/function.strncmp.php
4838
   *
4839
   * @param string $str1 <p>The first string.</p>
4840
   * @param string $str2 <p>The second string.</p>
4841
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
4842
   *
4843
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4844
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4845
   *             <strong>0</strong> if they are equal
4846
   */
4847 2
  public static function strncmp(string $str1, string $str2, int $len): int
4848
  {
4849 2
    $str1 = (string)self::substr($str1, 0, $len);
4850 2
    $str2 = (string)self::substr($str2, 0, $len);
4851
4852 2
    return self::strcmp($str1, $str2);
4853
  }
4854
4855
  /**
4856
   * Search a string for any of a set of characters.
4857
   *
4858
   * @link  http://php.net/manual/en/function.strpbrk.php
4859
   *
4860
   * @param string $haystack  <p>The string where char_list is looked for.</p>
4861
   * @param string $char_list <p>This parameter is case sensitive.</p>
4862
   *
4863
   * @return string|false <p>String starting from the character found, or false if it is not found.</p>
4864
   */
4865 1
  public static function strpbrk(string $haystack, string $char_list)
4866
  {
4867 1
    if (!isset($haystack[0], $char_list[0])) {
4868 1
      return false;
4869
    }
4870
4871 1
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
4872 1
      return \substr($haystack, \strpos($haystack, $m[0]));
4873
    }
4874
4875 1
    return false;
4876
  }
4877
4878
  /**
4879
   * Find position of first occurrence of string in a string.
4880
   *
4881
   * @link http://php.net/manual/en/function.mb-strpos.php
4882
   *
4883
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4884
   * @param string $needle    <p>The string to find in haystack.</p>
4885
   * @param int    $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
4886
   * @param string $encoding  [optional] <p>Set the charset.</p>
4887
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4888
   *
4889
   * @return int|false <p>
4890
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
4891
   *                   If needle is not found it returns false.
4892
   *                   </p>
4893
   */
4894 59
  public static function strpos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4895
  {
4896 59
    if (!isset($haystack[0], $needle[0])) {
4897 3
      return false;
4898
    }
4899
4900
    // iconv and mbstring do not support integer $needle
4901 58 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4902
      $needle = (string)self::chr((int)$needle);
4903
    }
4904
4905 58
    if ($cleanUtf8 === true) {
4906
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4907
      // if invalid characters are found in $haystack before $needle
4908 2
      $needle = self::clean($needle);
4909 2
      $haystack = self::clean($haystack);
4910
    }
4911
4912 58
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4913 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4914
    }
4915
4916 58
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4917
      self::checkForSupport();
4918
    }
4919
4920
    if (
4921 58
        $encoding === 'CP850'
4922
        &&
4923 58
        self::$SUPPORT['mbstring_func_overload'] === false
4924
    ) {
4925 44
      return \strpos($haystack, $needle, $offset);
4926
    }
4927
4928 View Code Duplication
    if (
4929 15
        $encoding !== 'UTF-8'
4930
        &&
4931 15
        self::$SUPPORT['iconv'] === false
4932
        &&
4933 15
        self::$SUPPORT['mbstring'] === false
4934
    ) {
4935
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4936
    }
4937
4938
    if (
4939 15
        $offset >= 0 // iconv_strpos() can't handle negative offset
4940
        &&
4941 15
        $encoding !== 'UTF-8'
4942
        &&
4943 15
        self::$SUPPORT['mbstring'] === false
4944
        &&
4945 15
        self::$SUPPORT['iconv'] === true
4946
    ) {
4947
      // ignore invalid negative offset to keep compatibility
4948
      // with php < 5.5.35, < 5.6.21, < 7.0.6
4949
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4950
      if ($returnTmp !== false) {
4951
        return $returnTmp;
4952
      }
4953
    }
4954
4955 15 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
4956 15
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
4957 15
      if ($returnTmp !== false) {
4958 13
        return $returnTmp;
4959
      }
4960
    }
4961
4962 View Code Duplication
    if (
4963 10
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4964
        &&
4965 10
        self::$SUPPORT['intl'] === true
4966
    ) {
4967 9
      return \grapheme_strpos($haystack, $needle, $offset);
4968
    }
4969
4970
    if (
4971 2
        $offset >= 0 // iconv_strpos() can't handle negative offset
4972
        &&
4973 2
        self::$SUPPORT['iconv'] === true
4974
    ) {
4975
      // ignore invalid negative offset to keep compatibility
4976
      // with php < 5.5.35, < 5.6.21, < 7.0.6
4977 1
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4978 1
      if ($returnTmp !== false) {
4979
        return $returnTmp;
4980
      }
4981
    }
4982
4983 2
    $haystackIsAscii = self::is_ascii($haystack);
4984 2
    if ($haystackIsAscii && self::is_ascii($needle)) {
4985 1
      return \strpos($haystack, $needle, $offset);
4986
    }
4987
4988
    // fallback via vanilla php
4989
4990 2
    if ($haystackIsAscii) {
4991
      $haystackTmp = \substr($haystack, $offset);
4992
    } else {
4993 2
      $haystackTmp = self::substr($haystack, $offset);
4994
    }
4995 2
    if ($haystackTmp === false) {
4996
      $haystackTmp = '';
4997
    }
4998 2
    $haystack = (string)$haystackTmp;
4999
5000 2
    if ($offset < 0) {
5001
      $offset = 0;
5002
    }
5003
5004 2
    $pos = \strpos($haystack, $needle);
5005 2
    if ($pos === false) {
5006
      return false;
5007
    }
5008
5009 2
    $returnTmp = $offset + self::strlen(\substr($haystack, 0, $pos));
5010 2
    if ($returnTmp !== false) {
5011 2
      return $returnTmp;
5012
    }
5013
5014
    // fallback to "mb_"-function via polyfill
5015
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5016
  }
5017
5018
  /**
5019
   * Finds the last occurrence of a character in a string within another.
5020
   *
5021
   * @link http://php.net/manual/en/function.mb-strrchr.php
5022
   *
5023
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5024
   * @param string $needle        <p>The string to find in haystack</p>
5025
   * @param bool   $before_needle [optional] <p>
5026
   *                              Determines which portion of haystack
5027
   *                              this function returns.
5028
   *                              If set to true, it returns all of haystack
5029
   *                              from the beginning to the last occurrence of needle.
5030
   *                              If set to false, it returns all of haystack
5031
   *                              from the last occurrence of needle to the end,
5032
   *                              </p>
5033
   * @param string $encoding      [optional] <p>
5034
   *                              Character encoding name to use.
5035
   *                              If it is omitted, internal character encoding is used.
5036
   *                              </p>
5037
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5038
   *
5039
   * @return string|false The portion of haystack or false if needle is not found.
5040
   */
5041 1 View Code Duplication
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5042
  {
5043 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5044 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5045
    }
5046
5047 1
    if ($cleanUtf8 === true) {
5048
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5049
      // if invalid characters are found in $haystack before $needle
5050 1
      $needle = self::clean($needle);
5051 1
      $haystack = self::clean($haystack);
5052
    }
5053
5054
    // fallback to "mb_"-function via polyfill
5055 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5056
  }
5057
5058
  /**
5059
   * Reverses characters order in the string.
5060
   *
5061
   * @param string $str The input string
5062
   *
5063
   * @return string The string with characters in the reverse sequence
5064
   */
5065 4
  public static function strrev(string $str): string
5066
  {
5067 4
    if (!isset($str[0])) {
5068 2
      return '';
5069
    }
5070
5071 3
    return \implode('', \array_reverse(self::split($str)));
5072
  }
5073
5074
  /**
5075
   * Finds the last occurrence of a character in a string within another, case insensitive.
5076
   *
5077
   * @link http://php.net/manual/en/function.mb-strrichr.php
5078
   *
5079
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
5080
   * @param string $needle         <p>The string to find in haystack.</p>
5081
   * @param bool   $before_needle  [optional] <p>
5082
   *                               Determines which portion of haystack
5083
   *                               this function returns.
5084
   *                               If set to true, it returns all of haystack
5085
   *                               from the beginning to the last occurrence of needle.
5086
   *                               If set to false, it returns all of haystack
5087
   *                               from the last occurrence of needle to the end,
5088
   *                               </p>
5089
   * @param string $encoding       [optional] <p>
5090
   *                               Character encoding name to use.
5091
   *                               If it is omitted, internal character encoding is used.
5092
   *                               </p>
5093
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5094
   *
5095
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
5096
   */
5097 1 View Code Duplication
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5098
  {
5099 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5100 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5101
    }
5102
5103 1
    if ($cleanUtf8 === true) {
5104
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5105
      // if invalid characters are found in $haystack before $needle
5106 1
      $needle = self::clean($needle);
5107 1
      $haystack = self::clean($haystack);
5108
    }
5109
5110 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5111
  }
5112
5113
  /**
5114
   * Find position of last occurrence of a case-insensitive string.
5115
   *
5116
   * @param string $haystack  <p>The string to look in.</p>
5117
   * @param string $needle    <p>The string to look for.</p>
5118
   * @param int    $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5119
   * @param string $encoding  [optional] <p>Set the charset.</p>
5120
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5121
   *
5122
   * @return int|false <p>
5123
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5124
   *                   not found, it returns false.
5125
   *                   </p>
5126
   */
5127 1
  public static function strripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5128
  {
5129 1 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5130
      $needle = (string)self::chr((int)$needle);
5131
    }
5132
5133 1
    if (!isset($haystack[0], $needle[0])) {
5134
      return false;
5135
    }
5136
5137 1
    if ($cleanUtf8 === true) {
5138
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5139 1
      $needle = self::clean($needle);
5140 1
      $haystack = self::clean($haystack);
5141
    }
5142
5143 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5144 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5145
    }
5146
5147 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5148
      self::checkForSupport();
5149
    }
5150
5151 View Code Duplication
    if (
5152 1
        $encoding !== 'UTF-8'
5153
        &&
5154 1
        self::$SUPPORT['mbstring'] === false
5155
    ) {
5156
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5157
    }
5158
5159 1
    if (self::$SUPPORT['mbstring'] === true) {
5160 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5161
    }
5162
5163 View Code Duplication
    if (
5164
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5165
        &&
5166
        self::$SUPPORT['intl'] === true
5167
    ) {
5168
      return \grapheme_strripos($haystack, $needle, $offset);
5169
    }
5170
5171
    // fallback via vanilla php
5172
5173
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5174
  }
5175
5176
  /**
5177
   * Find position of last occurrence of a string in a string.
5178
   *
5179
   * @link http://php.net/manual/en/function.mb-strrpos.php
5180
   *
5181
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5182
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5183
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5184
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5185
   *                              the end of the string.
5186
   *                              </p>
5187
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5188
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5189
   *
5190
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5191
   *                   is not found, it returns false.</p>
5192
   */
5193 10
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5194
  {
5195 10
    if ((int)$needle === $needle && $needle >= 0) {
5196 2
      $needle = (string)self::chr($needle);
5197
    }
5198 10
    $needle = (string)$needle;
5199
5200 10
    if (!isset($haystack[0], $needle[0])) {
5201 2
      return false;
5202
    }
5203
5204
    if (
5205 9
        $cleanUtf8 === true
5206
        ||
5207 9
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5208
    ) {
5209
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5210 3
      $needle = self::clean($needle);
5211 3
      $haystack = self::clean($haystack);
5212
    }
5213
5214 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5215 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5216
    }
5217
5218 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5219
      self::checkForSupport();
5220
    }
5221
5222 View Code Duplication
    if (
5223 9
        $encoding !== 'UTF-8'
5224
        &&
5225 9
        self::$SUPPORT['mbstring'] === false
5226
    ) {
5227
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5228
    }
5229
5230 9
    if (self::$SUPPORT['mbstring'] === true) {
5231 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5232
    }
5233
5234 View Code Duplication
    if (
5235
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5236
        &&
5237
        self::$SUPPORT['intl'] === true
5238
    ) {
5239
      return \grapheme_strrpos($haystack, $needle, $offset);
5240
    }
5241
5242
    // fallback via vanilla php
5243
5244
    $haystackTmp = null;
5245
    if ($offset > 0) {
5246
      $haystackTmp = self::substr($haystack, $offset);
5247
    } elseif ($offset < 0) {
5248
      $haystackTmp = self::substr($haystack, 0, $offset);
5249
      $offset = 0;
5250
    }
5251
5252
    if ($haystackTmp !== null) {
5253
      if ($haystackTmp === false) {
5254
        $haystackTmp = '';
5255
      }
5256
      $haystack = (string)$haystackTmp;
5257
    }
5258
5259
    $pos = \strrpos($haystack, $needle);
5260
    if ($pos === false) {
5261
      return false;
5262
    }
5263
5264
    return $offset + self::strlen(\substr($haystack, 0, $pos));
5265
  }
5266
5267
  /**
5268
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5269
   * mask.
5270
   *
5271
   * @param string $str    <p>The input string.</p>
5272
   * @param string $mask   <p>The mask of chars</p>
5273
   * @param int    $offset [optional]
5274
   * @param int    $length [optional]
5275
   *
5276
   * @return int
5277
   */
5278 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
5279
  {
5280 10 View Code Duplication
    if ($offset || $length !== null) {
5281 2
      $strTmp = self::substr($str, $offset, $length);
5282 2
      if ($strTmp === false) {
5283
        $strTmp = '';
5284
      }
5285 2
      $str = (string)$strTmp;
5286
    }
5287
5288 10
    if (!isset($str[0], $mask[0])) {
5289 2
      return 0;
5290
    }
5291
5292 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5293
  }
5294
5295
  /**
5296
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5297
   *
5298
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
5299
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
5300
   * @param bool   $before_needle  [optional] <p>
5301
   *                               If <b>TRUE</b>, strstr() returns the part of the
5302
   *                               haystack before the first occurrence of the needle (excluding the needle).
5303
   *                               </p>
5304
   * @param string $encoding       [optional] <p>Set the charset.</p>
5305
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5306
   *
5307
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
5308
   */
5309 2
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5310
  {
5311 2
    if (!isset($haystack[0], $needle[0])) {
5312 1
      return false;
5313
    }
5314
5315 2
    if ($cleanUtf8 === true) {
5316
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5317
      // if invalid characters are found in $haystack before $needle
5318
      $needle = self::clean($needle);
5319
      $haystack = self::clean($haystack);
5320
    }
5321
5322 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5323 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5324
    }
5325
5326 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5327
      self::checkForSupport();
5328
    }
5329
5330 View Code Duplication
    if (
5331 2
        $encoding !== 'UTF-8'
5332
        &&
5333 2
        self::$SUPPORT['mbstring'] === false
5334
    ) {
5335
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5336
    }
5337
5338 2
    if (self::$SUPPORT['mbstring'] === true) {
5339 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5340
    }
5341
5342 View Code Duplication
    if (
5343
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5344
        &&
5345
        self::$SUPPORT['intl'] === true
5346
    ) {
5347
      return \grapheme_strstr($haystack, $needle, $before_needle);
5348
    }
5349
5350
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
5351
5352
    if (!isset($match[1])) {
5353
      return false;
5354
    }
5355
5356
    if ($before_needle) {
5357
      return $match[1];
5358
    }
5359
5360
    return self::substr($haystack, self::strlen($match[1]));
5361
  }
5362
5363
  /**
5364
   * Unicode transformation for case-less matching.
5365
   *
5366
   * @link http://unicode.org/reports/tr21/tr21-5.html
5367
   *
5368
   * @param string $str        <p>The input string.</p>
5369
   * @param bool   $full       [optional] <p>
5370
   *                           <b>true</b>, replace full case folding chars (default)<br>
5371
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5372
   *                           </p>
5373
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
5374
   *
5375
   * @return string
5376
   */
5377 13
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
5378
  {
5379 13
    if (!isset($str[0])) {
5380 4
      return '';
5381
    }
5382
5383 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5384 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5385
5386 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5387 1
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
5388 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
5389
    }
5390
5391 12
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5392
5393 12
    if ($full) {
5394
5395 12
      static $FULL_CASE_FOLD = null;
5396 12
      if ($FULL_CASE_FOLD === null) {
5397 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
5398
      }
5399
5400 12
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
5401
    }
5402
5403 12
    if ($cleanUtf8 === true) {
5404 1
      $str = self::clean($str);
5405
    }
5406
5407 12
    return self::strtolower($str);
5408
  }
5409
5410
  /**
5411
   * Make a string lowercase.
5412
   *
5413
   * @link http://php.net/manual/en/function.mb-strtolower.php
5414
   *
5415
   * @param string      $str       <p>The string being lowercased.</p>
5416
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5417
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5418
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5419
   *
5420
   * @return string str with all alphabetic characters converted to lowercase.
5421
   */
5422 25 View Code Duplication
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5423
  {
5424
    // init
5425 25
    $str = (string)$str;
5426 25
    if (!isset($str[0])) {
5427 3
      return '';
5428
    }
5429
5430 23
    if ($cleanUtf8 === true) {
5431
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5432
      // if invalid characters are found in $haystack before $needle
5433 1
      $str = self::clean($str);
5434
    }
5435
5436 23
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5437 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5438
    }
5439
5440 23
    if ($lang !== null) {
5441
5442 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5443
        self::checkForSupport();
5444
      }
5445
5446 1
      if (self::$SUPPORT['intl'] === true) {
5447
5448 1
        $langCode = $lang . '-Lower';
5449 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5450
          \trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
5451
5452
          $langCode = 'Any-Lower';
5453
        }
5454
5455 1
        return transliterator_transliterate($langCode, $str);
5456
      }
5457
5458
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5459
    }
5460
5461 23
    return \mb_strtolower($str, $encoding);
5462
  }
5463
5464
  /**
5465
   * Generic case sensitive transformation for collation matching.
5466
   *
5467
   * @param string $str <p>The input string</p>
5468
   *
5469
   * @return string
5470
   */
5471 3
  private static function strtonatfold(string $str): string
5472
  {
5473
    /** @noinspection PhpUndefinedClassInspection */
5474 3
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5475
  }
5476
5477
  /**
5478
   * Make a string uppercase.
5479
   *
5480
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5481
   *
5482
   * @param string      $str       <p>The string being uppercased.</p>
5483
   * @param string      $encoding  [optional] <p>Set the charset.</p>
5484
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5485
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5486
   *
5487
   * @return string <p>$str with all alphabetic characters converted to uppercase.</p>
5488
   */
5489 19 View Code Duplication
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5490
  {
5491 19
    $str = (string)$str;
5492 19
    if (!isset($str[0])) {
5493 3
      return '';
5494
    }
5495
5496 17
    if ($cleanUtf8 === true) {
5497
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5498
      // if invalid characters are found in $haystack before $needle
5499 2
      $str = self::clean($str);
5500
    }
5501
5502 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5503 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5504
    }
5505
5506 17
    if ($lang !== null) {
5507
5508 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5509
        self::checkForSupport();
5510
      }
5511
5512 1
      if (self::$SUPPORT['intl'] === true) {
5513
5514 1
        $langCode = $lang . '-Upper';
5515 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5516
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
5517
5518
          $langCode = 'Any-Upper';
5519
        }
5520
5521 1
        return transliterator_transliterate($langCode, $str);
5522
      }
5523
5524
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5525
    }
5526
5527 17
    return \mb_strtoupper($str, $encoding);
5528
  }
5529
5530
  /**
5531
   * Translate characters or replace sub-strings.
5532
   *
5533
   * @link  http://php.net/manual/en/function.strtr.php
5534
   *
5535
   * @param string          $str  <p>The string being translated.</p>
5536
   * @param string|string[] $from <p>The string replacing from.</p>
5537
   * @param string|string[] $to   <p>The string being translated to to.</p>
5538
   *
5539
   * @return string <p>
5540
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5541
   *                corresponding character in to.
5542
   *                </p>
5543
   */
5544 1
  public static function strtr(string $str, $from, $to = INF): string
5545
  {
5546 1
    if (!isset($str[0])) {
5547
      return '';
5548
    }
5549
5550 1
    if ($from === $to) {
5551
      return $str;
5552
    }
5553
5554 1
    if (INF !== $to) {
5555 1
      $from = self::str_split($from);
5556 1
      $to = self::str_split($to);
5557 1
      $countFrom = \count($from);
5558 1
      $countTo = \count($to);
5559
5560 1
      if ($countFrom > $countTo) {
5561 1
        $from = \array_slice($from, 0, $countTo);
5562 1
      } elseif ($countFrom < $countTo) {
5563 1
        $to = \array_slice($to, 0, $countFrom);
5564
      }
5565
5566 1
      $from = \array_combine($from, $to);
5567
    }
5568
5569 1
    if (\is_string($from)) {
5570 1
      return \str_replace($from, '', $str);
5571
    }
5572
5573 1
    return \strtr($str, $from);
5574
  }
5575
5576
  /**
5577
   * Return the width of a string.
5578
   *
5579
   * @param string $str       <p>The input string.</p>
5580
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5581
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5582
   *
5583
   * @return int
5584
   */
5585 1 View Code Duplication
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5586
  {
5587 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5588 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5589
    }
5590
5591 1
    if ($cleanUtf8 === true) {
5592
      // iconv and mbstring are not tolerant to invalid encoding
5593
      // further, their behaviour is inconsistent with that of PHP's substr
5594 1
      $str = self::clean($str);
5595
    }
5596
5597
    // fallback to "mb_"-function via polyfill
5598 1
    return \mb_strwidth($str, $encoding);
5599
  }
5600
5601
  /**
5602
   * Get part of a string.
5603
   *
5604
   * @link http://php.net/manual/en/function.mb-substr.php
5605
   *
5606
   * @param string $str       <p>The string being checked.</p>
5607
   * @param int    $offset    <p>The first position used in str.</p>
5608
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
5609
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5610
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5611
   *
5612
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
5613
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
5614
   *                      characters long, <b>FALSE</b> will be returned.</p>
5615
   */
5616 71
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5617
  {
5618 71
    if (!isset($str[0])) {
5619 10
      return '';
5620
    }
5621
5622
    // Empty string
5623 68
    if ($length === 0) {
5624 3
      return '';
5625
    }
5626
5627 67
    if ($cleanUtf8 === true) {
5628
      // iconv and mbstring are not tolerant to invalid encoding
5629
      // further, their behaviour is inconsistent with that of PHP's substr
5630 1
      $str = self::clean($str);
5631
    }
5632
5633
    // Whole string
5634 67
    if (!$offset && $length === null) {
5635 3
      return $str;
5636
    }
5637
5638 64
    $str_length = 0;
5639 64
    if ($offset || $length === null) {
5640 44
      $str_length = self::strlen($str, $encoding);
5641
    }
5642
5643
    // Empty string
5644 64
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be zero. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5645 10
      return '';
5646
    }
5647
5648
    // Impossible
5649 64
    if ($offset && $offset > $str_length) {
5650 2
      return false;
5651
    }
5652
5653 62
    if ($length === null) {
5654 27
      $length = $str_length;
5655
    } else {
5656 53
      $length = (int)$length;
5657
    }
5658
5659 62
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5660 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5661
    }
5662
5663 62
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5664
      self::checkForSupport();
5665
    }
5666
5667
    if (
5668 62
        $encoding === 'CP850'
5669
        &&
5670 62
        self::$SUPPORT['mbstring_func_overload'] === false
5671
    ) {
5672 16
      return \substr($str, $offset, $length ?? $str_length);
5673
    }
5674
5675 View Code Duplication
    if (
5676 46
        $encoding !== 'UTF-8'
5677
        &&
5678 46
        self::$SUPPORT['mbstring'] === false
5679
    ) {
5680
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5681
    }
5682
5683 46
    if (self::$SUPPORT['mbstring'] === true) {
5684 46
      return \mb_substr($str, $offset, $length, $encoding);
5685
    }
5686
5687
    if (
5688
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5689
        &&
5690
        self::$SUPPORT['intl'] === true
5691
    ) {
5692
      return \grapheme_substr($str, $offset, $length);
5693
    }
5694
5695
    if (
5696
        $length >= 0 // "iconv_substr()" can't handle negative length
5697
        &&
5698
        self::$SUPPORT['iconv'] === true
5699
    ) {
5700
      $returnTmp = \iconv_substr($str, $offset, $length);
5701
      if ($returnTmp !== false) {
5702
        return $returnTmp;
5703
      }
5704
    }
5705
5706
    if (self::is_ascii($str)) {
5707
      return ($length === null) ?
5708
          \substr($str, $offset) :
5709
          \substr($str, $offset, $length);
5710
    }
5711
5712
    // fallback via vanilla php
5713
5714
    // split to array, and remove invalid characters
5715
    $array = self::split($str);
5716
5717
    // extract relevant part, and join to make sting again
5718
    return \implode('', \array_slice($array, $offset, $length));
5719
  }
5720
5721
  /**
5722
   * Binary safe comparison of two strings from an offset, up to length characters.
5723
   *
5724
   * @param string   $str1               <p>The main string being compared.</p>
5725
   * @param string   $str2               <p>The secondary string being compared.</p>
5726
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
5727
   *                                     counting from the end of the string.</p>
5728
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
5729
   *                                     the length of the str compared to the length of main_str less the offset.</p>
5730
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5731
   *                                     insensitive.</p>
5732
   *
5733
   * @return int <p>
5734
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5735
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5736
   *             <strong>0</strong> if they are equal.
5737
   *             </p>
5738
   */
5739 1
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
5740
  {
5741
    if (
5742 1
        $offset !== 0
5743
        ||
5744 1
        $length !== null
5745
    ) {
5746 1
      $str1Tmp = self::substr($str1, $offset, $length);
5747 1
      if ($str1Tmp === false) {
5748
        $str1Tmp = '';
5749
      }
5750 1
      $str1 = (string)$str1Tmp;
5751
5752 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
5753 1
      if ($str2Tmp === false) {
5754
        $str2Tmp = '';
5755
      }
5756 1
      $str2 = (string)$str2Tmp;
5757
    }
5758
5759 1
    if ($case_insensitivity === true) {
5760 1
      return self::strcasecmp($str1, $str2);
5761
    }
5762
5763 1
    return self::strcmp($str1, $str2);
5764
  }
5765
5766
  /**
5767
   * Count the number of substring occurrences.
5768
   *
5769
   * @link  http://php.net/manual/en/function.substr-count.php
5770
   *
5771
   * @param string $haystack   <p>The string to search in.</p>
5772
   * @param string $needle     <p>The substring to search for.</p>
5773
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
5774
   * @param int    $length     [optional] <p>
5775
   *                           The maximum length after the specified offset to search for the
5776
   *                           substring. It outputs a warning if the offset plus the length is
5777
   *                           greater than the haystack length.
5778
   *                           </p>
5779
   * @param string $encoding   <p>Set the charset.</p>
5780
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
5781
   *
5782
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5783
   */
5784 1
  public static function substr_count(string $haystack, string $needle, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5785
  {
5786 1
    if (!isset($haystack[0], $needle[0])) {
5787 1
      return false;
5788
    }
5789
5790 1
    if ($offset || $length !== null) {
5791
5792 1
      if ($length === null) {
5793 1
        $length = self::strlen($haystack);
5794
      }
5795
5796
      if (
5797
          (
5798 1
              $length !== 0
5799
              &&
5800 1
              $offset !== 0
5801
          )
5802
          &&
5803 1
          ($length + $offset) <= 0
5804
          &&
5805 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
5806
      ) {
5807 1
        return false;
5808
      }
5809
5810 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
5811 1
      if ($haystackTmp === false) {
5812
        $haystackTmp = '';
5813
      }
5814 1
      $haystack = (string)$haystackTmp;
5815
    }
5816
5817 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5818 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5819
    }
5820
5821 1
    if ($cleanUtf8 === true) {
5822
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5823
      // if invalid characters are found in $haystack before $needle
5824
      $needle = self::clean($needle);
5825
      $haystack = self::clean($haystack);
5826
    }
5827
5828 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5829
      self::checkForSupport();
5830
    }
5831
5832 View Code Duplication
    if (
5833 1
        $encoding !== 'UTF-8'
5834
        &&
5835 1
        self::$SUPPORT['mbstring'] === false
5836
    ) {
5837
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5838
    }
5839
5840 1
    if (self::$SUPPORT['mbstring'] === true) {
5841 1
      return \mb_substr_count($haystack, $needle, $encoding);
5842
    }
5843
5844
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
5845
5846
    return \count($matches);
5847
  }
5848
5849
  /**
5850
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
5851
   *
5852
   * @param string $haystack <p>The string to search in.</p>
5853
   * @param string $needle   <p>The substring to search for.</p>
5854
   *
5855
   * @return string <p>Return the sub-string.</p>
5856
   */
5857 1 View Code Duplication
  public static function substr_ileft(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5858
  {
5859 1
    if (!isset($haystack[0])) {
5860 1
      return '';
5861
    }
5862
5863 1
    if (!isset($needle[0])) {
5864 1
      return $haystack;
5865
    }
5866
5867 1
    if (self::str_istarts_with($haystack, $needle) === true) {
5868 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
5869 1
      if ($haystackTmp === false) {
5870
        $haystackTmp = '';
5871
      }
5872 1
      $haystack = (string)$haystackTmp;
5873
    }
5874
5875 1
    return $haystack;
5876
  }
5877
5878
  /**
5879
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
5880
   *
5881
   * @param string $haystack <p>The string to search in.</p>
5882
   * @param string $needle   <p>The substring to search for.</p>
5883
   *
5884
   * @return string <p>Return the sub-string.</p>
5885
   */
5886 1 View Code Duplication
  public static function substr_iright(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5887
  {
5888 1
    if (!isset($haystack[0])) {
5889 1
      return '';
5890
    }
5891
5892 1
    if (!isset($needle[0])) {
5893 1
      return $haystack;
5894
    }
5895
5896 1
    if (self::str_iends_with($haystack, $needle) === true) {
5897 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5898 1
      if ($haystackTmp === false) {
5899
        $haystackTmp = '';
5900
      }
5901 1
      $haystack = (string)$haystackTmp;
5902
    }
5903
5904 1
    return $haystack;
5905
  }
5906
5907
  /**
5908
   * Removes an prefix ($needle) from start of the string ($haystack).
5909
   *
5910
   * @param string $haystack <p>The string to search in.</p>
5911
   * @param string $needle   <p>The substring to search for.</p>
5912
   *
5913
   * @return string <p>Return the sub-string.</p>
5914
   */
5915 1 View Code Duplication
  public static function substr_left(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5916
  {
5917 1
    if (!isset($haystack[0])) {
5918 1
      return '';
5919
    }
5920
5921 1
    if (!isset($needle[0])) {
5922 1
      return $haystack;
5923
    }
5924
5925 1
    if (self::str_starts_with($haystack, $needle) === true) {
5926 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
5927 1
      if ($haystackTmp === false) {
5928
        $haystackTmp = '';
5929
      }
5930 1
      $haystack = (string)$haystackTmp;
5931
    }
5932
5933 1
    return $haystack;
5934
  }
5935
5936
  /**
5937
   * Replace text within a portion of a string.
5938
   *
5939
   * source: https://gist.github.com/stemar/8287074
5940
   *
5941
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
5942
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
5943
   * @param int|int[]       $offset           <p>
5944
   *                                          If start is positive, the replacing will begin at the start'th offset
5945
   *                                          into string.
5946
   *                                          <br><br>
5947
   *                                          If start is negative, the replacing will begin at the start'th character
5948
   *                                          from the end of string.
5949
   *                                          </p>
5950
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
5951
   *                                          portion of string which is to be replaced. If it is negative, it
5952
   *                                          represents the number of characters from the end of string at which to
5953
   *                                          stop replacing. If it is not given, then it will default to strlen(
5954
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
5955
   *                                          length is zero then this function will have the effect of inserting
5956
   *                                          replacement into string at the given start offset.</p>
5957
   *
5958
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
5959
   */
5960 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
5961
  {
5962 7
    if (\is_array($str) === true) {
5963 1
      $num = \count($str);
5964
5965
      // the replacement
5966 1
      if (\is_array($replacement) === true) {
5967 1
        $replacement = \array_slice($replacement, 0, $num);
5968
      } else {
5969 1
        $replacement = \array_pad([$replacement], $num, $replacement);
5970
      }
5971
5972
      // the offset
5973 1 View Code Duplication
      if (\is_array($offset) === true) {
5974 1
        $offset = \array_slice($offset, 0, $num);
5975 1
        foreach ($offset as &$valueTmp) {
5976 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5977
        }
5978 1
        unset($valueTmp);
5979
      } else {
5980 1
        $offset = \array_pad([$offset], $num, $offset);
5981
      }
5982
5983
      // the length
5984 1
      if (null === $length) {
5985 1
        $length = \array_fill(0, $num, 0);
5986 1 View Code Duplication
      } elseif (\is_array($length) === true) {
5987 1
        $length = \array_slice($length, 0, $num);
5988 1
        foreach ($length as &$valueTmpV2) {
5989 1
          if (null !== $valueTmpV2) {
5990 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5991
          } else {
5992 1
            $valueTmpV2 = 0;
5993
          }
5994
        }
5995 1
        unset($valueTmpV2);
5996
      } else {
5997 1
        $length = \array_pad([$length], $num, $length);
5998
      }
5999
6000
      // recursive call
6001 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return \array_map(array(...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
6002
    }
6003
6004 7
    if (\is_array($replacement) === true) {
6005 1
      if (\count($replacement) > 0) {
6006 1
        $replacement = $replacement[0];
6007
      } else {
6008 1
        $replacement = '';
6009
      }
6010
    }
6011
6012
    // init
6013 7
    $str = (string)$str;
6014 7
    $replacement = (string)$replacement;
6015
6016 7
    if (!isset($str[0])) {
6017 1
      return $replacement;
6018
    }
6019
6020 6
    if (self::is_ascii($str)) {
6021 3
      return ($length === null) ?
6022
          \substr_replace($str, $replacement, $offset) :
6023 3
          \substr_replace($str, $replacement, $offset, $length);
6024
    }
6025
6026 5
    \preg_match_all('/./us', $str, $smatches);
6027 5
    \preg_match_all('/./us', $replacement, $rmatches);
6028
6029 5
    if ($length === null) {
6030 3
      $length = self::strlen($str);
6031
    }
6032
6033 5
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
6034
6035 5
    return \implode('', $smatches[0]);
6036
  }
6037
6038
  /**
6039
   * Removes an suffix ($needle) from end of the string ($haystack).
6040
   *
6041
   * @param string $haystack <p>The string to search in.</p>
6042
   * @param string $needle   <p>The substring to search for.</p>
6043
   *
6044
   * @return string <p>Return the sub-string.</p>
6045
   */
6046 1 View Code Duplication
  public static function substr_right(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6047
  {
6048 1
    if (!isset($haystack[0])) {
6049 1
      return '';
6050
    }
6051
6052 1
    if (!isset($needle[0])) {
6053 1
      return $haystack;
6054
    }
6055
6056 1
    if (self::str_ends_with($haystack, $needle) === true) {
6057 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6058 1
      if ($haystackTmp === false) {
6059
        $haystackTmp = '';
6060
      }
6061 1
      $haystack = (string)$haystackTmp;
6062
    }
6063
6064 1
    return $haystack;
6065
  }
6066
6067
  /**
6068
   * Returns a case swapped version of the string.
6069
   *
6070
   * @param string $str       <p>The input string.</p>
6071
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
6072
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6073
   *
6074
   * @return string <p>Each character's case swapped.</p>
6075
   */
6076 1
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6077
  {
6078 1
    if (!isset($str[0])) {
6079 1
      return '';
6080
    }
6081
6082 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6083 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6084
    }
6085
6086 1
    if ($cleanUtf8 === true) {
6087
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6088
      // if invalid characters are found in $haystack before $needle
6089 1
      $str = self::clean($str);
6090
    }
6091
6092 1
    $strSwappedCase = \preg_replace_callback(
6093 1
        '/[\S]/u',
6094 1
        function ($match) use ($encoding) {
6095 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6096
6097 1
          if ($match[0] === $marchToUpper) {
6098 1
            return UTF8::strtolower($match[0], $encoding);
6099
          }
6100
6101 1
          return $marchToUpper;
6102 1
        },
6103 1
        $str
6104
    );
6105
6106 1
    return $strSwappedCase;
6107
  }
6108
6109
  /**
6110
   * alias for "UTF8::to_ascii()"
6111
   *
6112
   * @see        UTF8::to_ascii()
6113
   *
6114
   * @param string $str
6115
   * @param string $subst_chr
6116
   * @param bool   $strict
6117
   *
6118
   * @return string
6119
   *
6120
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6121
   */
6122 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
6123
  {
6124 7
    return self::to_ascii($str, $subst_chr, $strict);
6125
  }
6126
6127
  /**
6128
   * alias for "UTF8::to_iso8859()"
6129
   *
6130
   * @see        UTF8::to_iso8859()
6131
   *
6132
   * @param string|string[] $str
6133
   *
6134
   * @return string|string[]
6135
   *
6136
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6137
   */
6138 1
  public static function toIso8859($str)
6139
  {
6140 1
    return self::to_iso8859($str);
6141
  }
6142
6143
  /**
6144
   * alias for "UTF8::to_latin1()"
6145
   *
6146
   * @see        UTF8::to_latin1()
6147
   *
6148
   * @param string|string[] $str
6149
   *
6150
   * @return string|string[]
6151
   *
6152
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6153
   */
6154 1
  public static function toLatin1($str)
6155
  {
6156 1
    return self::to_latin1($str);
6157
  }
6158
6159
  /**
6160
   * alias for "UTF8::to_utf8()"
6161
   *
6162
   * @see        UTF8::to_utf8()
6163
   *
6164
   * @param string|string[] $str
6165
   *
6166
   * @return string|string[]
6167
   *
6168
   * @deprecated <p>use "UTF8::to_utf8()"</p>
6169
   */
6170 1
  public static function toUTF8($str)
6171
  {
6172 1
    return self::to_utf8($str);
6173
  }
6174
6175
  /**
6176
   * Convert a string into ASCII.
6177
   *
6178
   * @param string $str     <p>The input string.</p>
6179
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6180
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6181
   *                        performance</p>
6182
   *
6183
   * @return string
6184
   */
6185 21
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
6186
  {
6187 21
    static $UTF8_TO_ASCII;
6188
6189 21
    if (!isset($str[0])) {
6190 4
      return '';
6191
    }
6192
6193
    // check if we only have ASCII, first (better performance)
6194 18
    if (self::is_ascii($str) === true) {
6195 6
      return $str;
6196
    }
6197
6198 13
    $str = self::clean(
6199 13
        $str,
6200 13
        true,
6201 13
        true,
6202 13
        true,
6203 13
        false,
6204 13
        true,
6205 13
        true
6206
    );
6207
6208
    // check again, if we only have ASCII, now ...
6209 13
    if (self::is_ascii($str) === true) {
6210 7
      return $str;
6211
    }
6212
6213 7
    if ($strict === true) {
6214
6215 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6216
        self::checkForSupport();
6217
      }
6218
6219 1
      if (self::$SUPPORT['intl'] === true) {
6220
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
6221 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
6222
6223
        // check again, if we only have ASCII, now ...
6224 1
        if (self::is_ascii($str) === true) {
6225 1
          return $str;
6226
        }
6227
6228
      }
6229
    }
6230
6231 7
    if (self::$ORD === null) {
6232
      self::$ORD = self::getData('ord');
6233
    }
6234
6235 7
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6236 7
    $chars = $ar[0];
6237 7
    foreach ($chars as &$c) {
6238
6239 7
      $ordC0 = self::$ORD[$c[0]];
6240
6241 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6242 7
        continue;
6243
      }
6244
6245 7
      $ordC1 = self::$ORD[$c[1]];
6246
6247
      // ASCII - next please
6248 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6249 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6250
      }
6251
6252 7
      if ($ordC0 >= 224) {
6253 2
        $ordC2 = self::$ORD[$c[2]];
6254
6255 2
        if ($ordC0 <= 239) {
6256 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6257
        }
6258
6259 2
        if ($ordC0 >= 240) {
6260 1
          $ordC3 = self::$ORD[$c[3]];
6261
6262 1
          if ($ordC0 <= 247) {
6263 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6264
          }
6265
6266 1
          if ($ordC0 >= 248) {
6267
            $ordC4 = self::$ORD[$c[4]];
6268
6269 View Code Duplication
            if ($ordC0 <= 251) {
6270
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6271
            }
6272
6273
            if ($ordC0 >= 252) {
6274
              $ordC5 = self::$ORD[$c[5]];
6275
6276 View Code Duplication
              if ($ordC0 <= 253) {
6277
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6278
              }
6279
            }
6280
          }
6281
        }
6282
      }
6283
6284 7
      if ($ordC0 === 254 || $ordC0 === 255) {
6285
        $c = $unknown;
6286
        continue;
6287
      }
6288
6289 7
      if (!isset($ord)) {
6290
        $c = $unknown;
6291
        continue;
6292
      }
6293
6294 7
      $bank = $ord >> 8;
6295 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
6296 3
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
6297 3
        if ($UTF8_TO_ASCII[$bank] === false) {
6298 1
          $UTF8_TO_ASCII[$bank] = [];
6299
        }
6300
      }
6301
6302 7
      $newchar = $ord & 255;
6303
6304 7
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
6305
6306
        // keep for debugging
6307
        /*
6308
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6309
        echo "char: " . $c . "\n";
6310
        echo "ord: " . $ord . "\n";
6311
        echo "newchar: " . $newchar . "\n";
6312
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
6313
        echo "bank:" . $bank . "\n\n";
6314
        */
6315
6316 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6317
      } else {
6318
6319
        // keep for debugging missing chars
6320
        /*
6321
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6322
        echo "char: " . $c . "\n";
6323
        echo "ord: " . $ord . "\n";
6324
        echo "newchar: " . $newchar . "\n";
6325
        echo "bank:" . $bank . "\n\n";
6326
        */
6327
6328 7
        $c = $unknown;
6329
      }
6330
    }
6331
6332 7
    return \implode('', $chars);
6333
  }
6334
6335
  /**
6336
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6337
   *
6338
   * @param string|string[] $str
6339
   *
6340
   * @return string|string[]
6341
   */
6342 3
  public static function to_iso8859($str)
6343
  {
6344 3
    if (\is_array($str) === true) {
6345 1
      foreach ($str as $k => $v) {
6346 1
        $str[$k] = self::to_iso8859($v);
6347
      }
6348
6349 1
      return $str;
6350
    }
6351
6352 3
    $str = (string)$str;
6353 3
    if (!isset($str[0])) {
6354 1
      return '';
6355
    }
6356
6357 3
    return self::utf8_decode($str);
6358
  }
6359
6360
  /**
6361
   * alias for "UTF8::to_iso8859()"
6362
   *
6363
   * @see UTF8::to_iso8859()
6364
   *
6365
   * @param string|string[] $str
6366
   *
6367
   * @return string|string[]
6368
   */
6369 1
  public static function to_latin1($str)
6370
  {
6371 1
    return self::to_iso8859($str);
6372
  }
6373
6374
  /**
6375
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6376
   *
6377
   * <ul>
6378
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6379
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
6380
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
6381
   * case.</li>
6382
   * </ul>
6383
   *
6384
   * @param string|string[] $str                    <p>Any string or array.</p>
6385
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6386
   *
6387
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6388
   */
6389 22
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
6390
  {
6391 22 View Code Duplication
    if (\is_array($str) === true) {
6392 2
      foreach ($str as $k => $v) {
6393 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6394
      }
6395
6396 2
      return $str;
6397
    }
6398
6399 22
    $str = (string)$str;
6400 22
    if (!isset($str[0])) {
6401 3
      return $str;
6402
    }
6403
6404 22
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6405
      self::checkForSupport();
6406
    }
6407
6408 22
    $max = self::strlen_in_byte($str);
6409 22
    $buf = '';
6410
6411
    /** @noinspection ForeachInvariantsInspection */
6412 22
    for ($i = 0; $i < $max; $i++) {
6413 22
      $c1 = $str[$i];
6414
6415 22
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
6416
6417 22
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
6418
6419 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6420
6421 20
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
6422 15
            $buf .= $c1 . $c2;
6423 15
            $i++;
6424
          } else { // not valid UTF8 - convert it
6425 20
            $buf .= self::to_utf8_convert($c1);
6426
          }
6427
6428 21
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
6429
6430 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6431 20
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6432
6433 20
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
6434 12
            $buf .= $c1 . $c2 . $c3;
6435 12
            $i += 2;
6436
          } else { // not valid UTF8 - convert it
6437 20
            $buf .= self::to_utf8_convert($c1);
6438
          }
6439
6440 14
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
6441
6442 14
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6443 14
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6444 14
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6445
6446 14
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
6447 5
            $buf .= $c1 . $c2 . $c3 . $c4;
6448 5
            $i += 3;
6449
          } else { // not valid UTF8 - convert it
6450 14
            $buf .= self::to_utf8_convert($c1);
6451
          }
6452
6453
        } else { // doesn't look like UTF8, but should be converted
6454 22
          $buf .= self::to_utf8_convert($c1);
6455
        }
6456
6457 20
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
6458
6459 2
        $buf .= self::to_utf8_convert($c1);
6460
6461
      } else { // it doesn't need conversion
6462 20
        $buf .= $c1;
6463
      }
6464
    }
6465
6466
    // decode unicode escape sequences
6467 22
    $buf = \preg_replace_callback(
6468 22
        '/\\\\u([0-9a-f]{4})/i',
6469 22
        function ($match) {
6470 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6471 22
        },
6472 22
        $buf
6473
    );
6474
6475
    // decode UTF-8 codepoints
6476 22
    if ($decodeHtmlEntityToUtf8 === true) {
6477 1
      $buf = self::html_entity_decode($buf);
6478
    }
6479
6480 22
    return $buf;
6481
  }
6482
6483
  /**
6484
   * @param int $int
6485
   *
6486
   * @return string
6487
   */
6488 16
  private static function to_utf8_convert($int): string
6489
  {
6490
    // init
6491 16
    $buf = '';
6492
6493 16
    if (self::$ORD === null) {
6494 1
      self::$ORD = self::getData('ord');
6495
    }
6496
6497 16
    if (self::$CHR === null) {
6498 1
      self::$CHR = self::getData('chr');
6499
    }
6500
6501 16
    if (self::$WIN1252_TO_UTF8 === null) {
6502 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6503
    }
6504
6505 16
    $ordC1 = self::$ORD[$int];
6506 16
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
6507 16
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
6508
    } else {
6509 1
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
6510 1
      $cc2 = ($int & "\x3F") | "\x80";
6511 1
      $buf .= $cc1 . $cc2;
6512
    }
6513
6514 16
    return $buf;
6515
  }
6516
6517
  /**
6518
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6519
   *
6520
   * INFO: This is slower then "trim()"
6521
   *
6522
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6523
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6524
   *
6525
   * @param string $str   <p>The string to be trimmed</p>
6526
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
6527
   *
6528
   * @return string <p>The trimmed string.</p>
6529
   */
6530 26
  public static function trim(string $str = '', $chars = INF): string
6531
  {
6532 26
    if (!isset($str[0])) {
6533 5
      return '';
6534
    }
6535
6536
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6537 22
    if ($chars === INF || !$chars) {
6538 6
      return \preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6539
    }
6540
6541 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
6542
  }
6543
6544
  /**
6545
   * Makes string's first char uppercase.
6546
   *
6547
   * @param string $str       <p>The input string.</p>
6548
   * @param string $encoding  [optional] <p>Set the charset.</p>
6549
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6550
   *
6551
   * @return string <p>The resulting string</p>
6552
   */
6553 14
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6554
  {
6555 14
    if ($cleanUtf8 === true) {
6556
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6557
      // if invalid characters are found in $haystack before $needle
6558 1
      $str = self::clean($str);
6559
    }
6560
6561 14
    $strPartTwo = self::substr($str, 1, null, $encoding);
6562 14
    if ($strPartTwo === false) {
6563
      $strPartTwo = '';
6564
    }
6565
6566 14
    $strPartOne = self::strtoupper(
6567 14
        (string)self::substr($str, 0, 1, $encoding),
6568 14
        $encoding,
6569 14
        $cleanUtf8
6570
    );
6571
6572 14
    return $strPartOne . $strPartTwo;
6573
  }
6574
6575
  /**
6576
   * alias for "UTF8::ucfirst()"
6577
   *
6578
   * @see UTF8::ucfirst()
6579
   *
6580
   * @param string $word
6581
   * @param string $encoding
6582
   * @param bool   $cleanUtf8
6583
   *
6584
   * @return string
6585
   */
6586 1
  public static function ucword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6587
  {
6588 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
6589
  }
6590
6591
  /**
6592
   * Uppercase for all words in the string.
6593
   *
6594
   * @param string   $str        <p>The input string.</p>
6595
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6596
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6597
   * @param string   $encoding   [optional] <p>Set the charset.</p>
6598
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6599
   *
6600
   * @return string
6601
   */
6602 8
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6603
  {
6604 8
    if (!$str) {
6605 2
      return '';
6606
    }
6607
6608
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
6609
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
6610
6611 7
    if ($cleanUtf8 === true) {
6612
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6613
      // if invalid characters are found in $haystack before $needle
6614 1
      $str = self::clean($str);
6615
    }
6616
6617 7
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
6618
6619
    if (
6620 7
        $usePhpDefaultFunctions === true
6621
        &&
6622 7
        self::is_ascii($str) === true
6623
    ) {
6624
      return \ucwords($str);
6625
    }
6626
6627 7
    $words = self::str_to_words($str, $charlist);
6628 7
    $newWords = [];
6629
6630 7
    if (\count($exceptions) > 0) {
6631 1
      $useExceptions = true;
6632
    } else {
6633 7
      $useExceptions = false;
6634
    }
6635
6636 7 View Code Duplication
    foreach ($words as $word) {
6637
6638 7
      if (!$word) {
6639 7
        continue;
6640
      }
6641
6642
      if (
6643 7
          $useExceptions === false
6644
          ||
6645
          (
6646 1
              $useExceptions === true
6647
              &&
6648 7
              !\in_array($word, $exceptions, true)
6649
          )
6650
      ) {
6651 7
        $word = self::ucfirst($word, $encoding);
6652
      }
6653
6654 7
      $newWords[] = $word;
6655
    }
6656
6657 7
    return \implode('', $newWords);
6658
  }
6659
6660
  /**
6661
   * Multi decode html entity & fix urlencoded-win1252-chars.
6662
   *
6663
   * e.g:
6664
   * 'test+test'                     => 'test test'
6665
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6666
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6667
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6668
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6669
   * 'Düsseldorf'                   => 'Düsseldorf'
6670
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6671
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6672
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6673
   *
6674
   * @param string $str          <p>The input string.</p>
6675
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6676
   *
6677
   * @return string
6678
   */
6679 1 View Code Duplication
  public static function urldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6680
  {
6681 1
    if (!isset($str[0])) {
6682 1
      return '';
6683
    }
6684
6685 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
6686 1
    if (\preg_match($pattern, $str)) {
6687 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
6688
    }
6689
6690 1
    $flags = ENT_QUOTES | ENT_HTML5;
6691
6692
    do {
6693 1
      $str_compare = $str;
6694
6695 1
      $str = self::fix_simple_utf8(
6696 1
          \urldecode(
6697 1
              self::html_entity_decode(
6698 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6699 1
                  $flags
6700
              )
6701
          )
6702
      );
6703
6704 1
    } while ($multi_decode === true && $str_compare !== $str);
6705
6706 1
    return $str;
6707
  }
6708
6709
  /**
6710
   * Return a array with "urlencoded"-win1252 -> UTF-8
6711
   *
6712
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
6713
   *
6714
   * @return array
6715
   */
6716 1
  public static function urldecode_fix_win1252_chars(): array
6717
  {
6718
    return [
6719 1
        '%20' => ' ',
6720
        '%21' => '!',
6721
        '%22' => '"',
6722
        '%23' => '#',
6723
        '%24' => '$',
6724
        '%25' => '%',
6725
        '%26' => '&',
6726
        '%27' => "'",
6727
        '%28' => '(',
6728
        '%29' => ')',
6729
        '%2A' => '*',
6730
        '%2B' => '+',
6731
        '%2C' => ',',
6732
        '%2D' => '-',
6733
        '%2E' => '.',
6734
        '%2F' => '/',
6735
        '%30' => '0',
6736
        '%31' => '1',
6737
        '%32' => '2',
6738
        '%33' => '3',
6739
        '%34' => '4',
6740
        '%35' => '5',
6741
        '%36' => '6',
6742
        '%37' => '7',
6743
        '%38' => '8',
6744
        '%39' => '9',
6745
        '%3A' => ':',
6746
        '%3B' => ';',
6747
        '%3C' => '<',
6748
        '%3D' => '=',
6749
        '%3E' => '>',
6750
        '%3F' => '?',
6751
        '%40' => '@',
6752
        '%41' => 'A',
6753
        '%42' => 'B',
6754
        '%43' => 'C',
6755
        '%44' => 'D',
6756
        '%45' => 'E',
6757
        '%46' => 'F',
6758
        '%47' => 'G',
6759
        '%48' => 'H',
6760
        '%49' => 'I',
6761
        '%4A' => 'J',
6762
        '%4B' => 'K',
6763
        '%4C' => 'L',
6764
        '%4D' => 'M',
6765
        '%4E' => 'N',
6766
        '%4F' => 'O',
6767
        '%50' => 'P',
6768
        '%51' => 'Q',
6769
        '%52' => 'R',
6770
        '%53' => 'S',
6771
        '%54' => 'T',
6772
        '%55' => 'U',
6773
        '%56' => 'V',
6774
        '%57' => 'W',
6775
        '%58' => 'X',
6776
        '%59' => 'Y',
6777
        '%5A' => 'Z',
6778
        '%5B' => '[',
6779
        '%5C' => '\\',
6780
        '%5D' => ']',
6781
        '%5E' => '^',
6782
        '%5F' => '_',
6783
        '%60' => '`',
6784
        '%61' => 'a',
6785
        '%62' => 'b',
6786
        '%63' => 'c',
6787
        '%64' => 'd',
6788
        '%65' => 'e',
6789
        '%66' => 'f',
6790
        '%67' => 'g',
6791
        '%68' => 'h',
6792
        '%69' => 'i',
6793
        '%6A' => 'j',
6794
        '%6B' => 'k',
6795
        '%6C' => 'l',
6796
        '%6D' => 'm',
6797
        '%6E' => 'n',
6798
        '%6F' => 'o',
6799
        '%70' => 'p',
6800
        '%71' => 'q',
6801
        '%72' => 'r',
6802
        '%73' => 's',
6803
        '%74' => 't',
6804
        '%75' => 'u',
6805
        '%76' => 'v',
6806
        '%77' => 'w',
6807
        '%78' => 'x',
6808
        '%79' => 'y',
6809
        '%7A' => 'z',
6810
        '%7B' => '{',
6811
        '%7C' => '|',
6812
        '%7D' => '}',
6813
        '%7E' => '~',
6814
        '%7F' => '',
6815
        '%80' => '`',
6816
        '%81' => '',
6817
        '%82' => '‚',
6818
        '%83' => 'ƒ',
6819
        '%84' => '„',
6820
        '%85' => '…',
6821
        '%86' => '†',
6822
        '%87' => '‡',
6823
        '%88' => 'ˆ',
6824
        '%89' => '‰',
6825
        '%8A' => 'Š',
6826
        '%8B' => '‹',
6827
        '%8C' => 'Œ',
6828
        '%8D' => '',
6829
        '%8E' => 'Ž',
6830
        '%8F' => '',
6831
        '%90' => '',
6832
        '%91' => '‘',
6833
        '%92' => '’',
6834
        '%93' => '“',
6835
        '%94' => '”',
6836
        '%95' => '•',
6837
        '%96' => '–',
6838
        '%97' => '—',
6839
        '%98' => '˜',
6840
        '%99' => '™',
6841
        '%9A' => 'š',
6842
        '%9B' => '›',
6843
        '%9C' => 'œ',
6844
        '%9D' => '',
6845
        '%9E' => 'ž',
6846
        '%9F' => 'Ÿ',
6847
        '%A0' => '',
6848
        '%A1' => '¡',
6849
        '%A2' => '¢',
6850
        '%A3' => '£',
6851
        '%A4' => '¤',
6852
        '%A5' => '¥',
6853
        '%A6' => '¦',
6854
        '%A7' => '§',
6855
        '%A8' => '¨',
6856
        '%A9' => '©',
6857
        '%AA' => 'ª',
6858
        '%AB' => '«',
6859
        '%AC' => '¬',
6860
        '%AD' => '',
6861
        '%AE' => '®',
6862
        '%AF' => '¯',
6863
        '%B0' => '°',
6864
        '%B1' => '±',
6865
        '%B2' => '²',
6866
        '%B3' => '³',
6867
        '%B4' => '´',
6868
        '%B5' => 'µ',
6869
        '%B6' => '¶',
6870
        '%B7' => '·',
6871
        '%B8' => '¸',
6872
        '%B9' => '¹',
6873
        '%BA' => 'º',
6874
        '%BB' => '»',
6875
        '%BC' => '¼',
6876
        '%BD' => '½',
6877
        '%BE' => '¾',
6878
        '%BF' => '¿',
6879
        '%C0' => 'À',
6880
        '%C1' => 'Á',
6881
        '%C2' => 'Â',
6882
        '%C3' => 'Ã',
6883
        '%C4' => 'Ä',
6884
        '%C5' => 'Å',
6885
        '%C6' => 'Æ',
6886
        '%C7' => 'Ç',
6887
        '%C8' => 'È',
6888
        '%C9' => 'É',
6889
        '%CA' => 'Ê',
6890
        '%CB' => 'Ë',
6891
        '%CC' => 'Ì',
6892
        '%CD' => 'Í',
6893
        '%CE' => 'Î',
6894
        '%CF' => 'Ï',
6895
        '%D0' => 'Ð',
6896
        '%D1' => 'Ñ',
6897
        '%D2' => 'Ò',
6898
        '%D3' => 'Ó',
6899
        '%D4' => 'Ô',
6900
        '%D5' => 'Õ',
6901
        '%D6' => 'Ö',
6902
        '%D7' => '×',
6903
        '%D8' => 'Ø',
6904
        '%D9' => 'Ù',
6905
        '%DA' => 'Ú',
6906
        '%DB' => 'Û',
6907
        '%DC' => 'Ü',
6908
        '%DD' => 'Ý',
6909
        '%DE' => 'Þ',
6910
        '%DF' => 'ß',
6911
        '%E0' => 'à',
6912
        '%E1' => 'á',
6913
        '%E2' => 'â',
6914
        '%E3' => 'ã',
6915
        '%E4' => 'ä',
6916
        '%E5' => 'å',
6917
        '%E6' => 'æ',
6918
        '%E7' => 'ç',
6919
        '%E8' => 'è',
6920
        '%E9' => 'é',
6921
        '%EA' => 'ê',
6922
        '%EB' => 'ë',
6923
        '%EC' => 'ì',
6924
        '%ED' => 'í',
6925
        '%EE' => 'î',
6926
        '%EF' => 'ï',
6927
        '%F0' => 'ð',
6928
        '%F1' => 'ñ',
6929
        '%F2' => 'ò',
6930
        '%F3' => 'ó',
6931
        '%F4' => 'ô',
6932
        '%F5' => 'õ',
6933
        '%F6' => 'ö',
6934
        '%F7' => '÷',
6935
        '%F8' => 'ø',
6936
        '%F9' => 'ù',
6937
        '%FA' => 'ú',
6938
        '%FB' => 'û',
6939
        '%FC' => 'ü',
6940
        '%FD' => 'ý',
6941
        '%FE' => 'þ',
6942
        '%FF' => 'ÿ',
6943
    ];
6944
  }
6945
6946
  /**
6947
   * Decodes an UTF-8 string to ISO-8859-1.
6948
   *
6949
   * @param string $str <p>The input string.</p>
6950
   * @param bool   $keepUtf8Chars
6951
   *
6952
   * @return string
6953
   */
6954 6
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
6955
  {
6956 6
    if (!isset($str[0])) {
6957 3
      return '';
6958
    }
6959
6960 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
6961 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
6962
6963 6 View Code Duplication
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
6964
6965 1
      if (self::$WIN1252_TO_UTF8 === null) {
6966
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6967
      }
6968
6969 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
6970 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
6971
    }
6972
6973
    /** @noinspection PhpInternalEntityUsedInspection */
6974 6
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
6975
6976 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6977
      self::checkForSupport();
6978
    }
6979
6980
    // save for later comparision
6981 6
    $str_backup = $str;
6982 6
    $len = self::strlen_in_byte($str);
6983
6984 6
    if (self::$ORD === null) {
6985
      self::$ORD = self::getData('ord');
6986
    }
6987
6988 6
    if (self::$CHR === null) {
6989
      self::$CHR = self::getData('chr');
6990
    }
6991
6992 6
    $noCharFound = '?';
6993
    /** @noinspection ForeachInvariantsInspection */
6994 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
6995 6
      switch ($str[$i] & "\xF0") {
6996 6
        case "\xC0":
6997 6
        case "\xD0":
6998 6
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
6999 6
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
7000 6
          break;
7001
7002
        /** @noinspection PhpMissingBreakStatementInspection */
7003 6
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7004
          ++$i;
7005 6
        case "\xE0":
7006 5
          $str[$j] = $noCharFound;
7007 5
          $i += 2;
7008 5
          break;
7009
7010
        default:
7011 6
          $str[$j] = $str[$i];
7012
      }
7013
    }
7014
7015 6
    $return = (string)self::substr($str, 0, $j, 'CP850'); // 8-BIT
7016
7017
    if (
7018 6
        $keepUtf8Chars === true
7019
        &&
7020 6
        self::strlen($return) >= self::strlen($str_backup)
7021
    ) {
7022 1
      return $str_backup;
7023
    }
7024
7025 6
    return $return;
7026
  }
7027
7028
  /**
7029
   * Encodes an ISO-8859-1 string to UTF-8.
7030
   *
7031
   * @param string $str <p>The input string.</p>
7032
   *
7033
   * @return string
7034
   */
7035 7
  public static function utf8_encode(string $str): string
7036
  {
7037 7
    if (!isset($str[0])) {
7038 7
      return '';
7039
    }
7040
7041 7
    $strTmp = \utf8_encode($str);
7042
7043
    // the polyfill maybe return false
7044 7
    if ($strTmp === false) {
7045
      return '';
7046
    }
7047
7048 7
    $str = (string)$strTmp;
7049 7
    if (false === \strpos($str, "\xC2")) {
7050 3
      return $str;
7051
    }
7052
7053 6
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
7054 6
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
7055
7056 6 View Code Duplication
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
7057
7058 1
      if (self::$WIN1252_TO_UTF8 === null) {
7059
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
7060
      }
7061
7062 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
7063 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
7064
    }
7065
7066 6
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
7067
  }
7068
7069
  /**
7070
   * fix -> utf8-win1252 chars
7071
   *
7072
   * @param string $str <p>The input string.</p>
7073
   *
7074
   * @return string
7075
   *
7076
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
7077
   */
7078 1
  public static function utf8_fix_win1252_chars(string $str): string
7079
  {
7080 1
    return self::fix_simple_utf8($str);
7081
  }
7082
7083
  /**
7084
   * Returns an array with all utf8 whitespace characters.
7085
   *
7086
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7087
   *
7088
   * @author: Derek E. [email protected]
7089
   *
7090
   * @return array <p>
7091
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7092
   *               as defined in above URL.
7093
   *               </p>
7094
   */
7095 1
  public static function whitespace_table(): array
7096
  {
7097 1
    return self::$WHITESPACE_TABLE;
7098
  }
7099
7100
  /**
7101
   * Limit the number of words in a string.
7102
   *
7103
   * @param string $str      <p>The input string.</p>
7104
   * @param int    $limit    <p>The limit of words as integer.</p>
7105
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7106
   *
7107
   * @return string
7108
   */
7109 1
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
7110
  {
7111 1
    if (!isset($str[0])) {
7112 1
      return '';
7113
    }
7114
7115 1
    if ($limit < 1) {
7116 1
      return '';
7117
    }
7118
7119 1
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7120
7121
    if (
7122 1
        !isset($matches[0])
7123
        ||
7124 1
        self::strlen($str) === self::strlen($matches[0])
7125
    ) {
7126 1
      return $str;
7127
    }
7128
7129 1
    return self::rtrim($matches[0]) . $strAddOn;
7130
  }
7131
7132
  /**
7133
   * Wraps a string to a given number of characters
7134
   *
7135
   * @link  http://php.net/manual/en/function.wordwrap.php
7136
   *
7137
   * @param string $str   <p>The input string.</p>
7138
   * @param int    $width [optional] <p>The column width.</p>
7139
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7140
   * @param bool   $cut   [optional] <p>
7141
   *                      If the cut is set to true, the string is
7142
   *                      always wrapped at or before the specified width. So if you have
7143
   *                      a word that is larger than the given width, it is broken apart.
7144
   *                      </p>
7145
   *
7146
   * @return string <p>The given string wrapped at the specified column.</p>
7147
   */
7148 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
7149
  {
7150 10
    if (!isset($str[0], $break[0])) {
7151 3
      return '';
7152
    }
7153
7154 8
    $w = '';
7155 8
    $strSplit = \explode($break, $str);
7156 8
    $count = \count($strSplit);
7157
7158 8
    $chars = [];
7159
    /** @noinspection ForeachInvariantsInspection */
7160 8
    for ($i = 0; $i < $count; ++$i) {
7161
7162 8
      if ($i) {
7163 1
        $chars[] = $break;
7164 1
        $w .= '#';
7165
      }
7166
7167 8
      $c = $strSplit[$i];
7168 8
      unset($strSplit[$i]);
7169
7170 8
      foreach (self::split($c) as $c) {
7171 8
        $chars[] = $c;
7172 8
        $w .= ' ' === $c ? ' ' : '?';
7173
      }
7174
    }
7175
7176 8
    $strReturn = '';
7177 8
    $j = 0;
7178 8
    $b = $i = -1;
7179 8
    $w = \wordwrap($w, $width, '#', $cut);
7180
7181 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7182 6
      for (++$i; $i < $b; ++$i) {
7183 6
        $strReturn .= $chars[$j];
7184 6
        unset($chars[$j++]);
7185
      }
7186
7187 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7188 3
        unset($chars[$j++]);
7189
      }
7190
7191 6
      $strReturn .= $break;
7192
    }
7193
7194 8
    return $strReturn . \implode('', $chars);
7195
  }
7196
7197
  /**
7198
   * Returns an array of Unicode White Space characters.
7199
   *
7200
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7201
   */
7202 1
  public static function ws(): array
7203
  {
7204 1
    return self::$WHITESPACE;
7205
  }
7206
7207
}
7208