Completed
Push — master ( 6efa12...2e7c50 )
by Lars
03:47
created

UTF8::isBase64()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 1
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206
   * __construct()
207
   */
208 16
  public function __construct()
209
  {
210 16
    self::checkForSupport();
211 16
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219
   * @return string <p>Single Multi-Byte character.</p>
220
   */
221 2
  public static function access(string $str, int $pos): string
222
  {
223 2
    if (!isset($str[0])) {
224 1
      return '';
225
    }
226
227 2
    if ($pos < 0) {
228 1
      return '';
229
    }
230
231 2
    return (string)self::substr($str, $pos, 1);
232
  }
233
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string <p>The output string that contains BOM.</p>
242
   */
243 1
  public static function add_bom_to_string(string $str): string
244
  {
245 1
    if (self::string_has_bom($str) === false) {
246 1
      $str = self::bom() . $str;
247
    }
248
249 1
    return $str;
250
  }
251
252
  /**
253
   * Changes all keys in an array.
254
   *
255
   * @param array $array <p>The array to work on</p>
256
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
257
   *                     or <strong>CASE_LOWER</strong> (default)</p>
258
   *
259
   * @return array <p>An array with its keys lower or uppercased.</p>
260
   */
261 1
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
262
  {
263
    if (
264 1
        $case !== CASE_LOWER
265
        &&
266 1
        $case !== CASE_UPPER
267
    ) {
268
      $case = CASE_LOWER;
269
    }
270
271 1
    $return = [];
272 1
    foreach ($array as $key => $value) {
273 1
      if ($case === CASE_LOWER) {
274 1
        $key = self::strtolower($key);
275
      } else {
276 1
        $key = self::strtoupper($key);
277
      }
278
279 1
      $return[$key] = $value;
280
    }
281
282 1
    return $return;
283
  }
284
285
  /**
286
   * Convert binary into an string.
287
   *
288
   * @param mixed $bin 1|0
289
   *
290
   * @return string
291
   */
292 1
  public static function binary_to_str($bin): string
293
  {
294 1
    if (!isset($bin[0])) {
295
      return '';
296
    }
297
298 1
    $convert = \base_convert($bin, 2, 16);
299 1
    if ($convert === '0') {
300 1
      return '';
301
    }
302
303 1
    return \pack('H*', $convert);
304
  }
305
306
  /**
307
   * Returns the UTF-8 Byte Order Mark Character.
308
   *
309
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
310
   *
311
   * @return string UTF-8 Byte Order Mark
312
   */
313 2
  public static function bom(): string
314
  {
315 2
    return "\xef\xbb\xbf";
316
  }
317
318
  /**
319
   * @alias of UTF8::chr_map()
320
   *
321
   * @see   UTF8::chr_map()
322
   *
323
   * @param string|array $callback
324
   * @param string       $str
325
   *
326
   * @return array
327
   */
328 1
  public static function callback($callback, string $str): array
329
  {
330 1
    return self::chr_map($callback, $str);
331
  }
332
333
  /**
334
   * This method will auto-detect your server environment for UTF-8 support.
335
   *
336
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
337
   */
338 19
  public static function checkForSupport()
339
  {
340 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
341
342 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
343
344
      // http://php.net/manual/en/book.mbstring.php
345 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
346 1
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
347
348
      // http://php.net/manual/en/book.iconv.php
349 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
350
351
      // http://php.net/manual/en/book.intl.php
352 1
      self::$SUPPORT['intl'] = self::intl_loaded();
353 1
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
354
      if (
355 1
          self::$SUPPORT['intl'] === true
356
          &&
357 1
          \function_exists('transliterator_list_ids') === true
358
      ) {
359 1
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
360
      }
361
362
      // http://php.net/manual/en/class.intlchar.php
363 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
364
365
      // http://php.net/manual/en/book.pcre.php
366 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
367
    }
368 19
  }
369
370
  /**
371
   * Generates a UTF-8 encoded character from the given code point.
372
   *
373
   * INFO: opposite to UTF8::ord()
374
   *
375
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
376
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
377
   *
378
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
379
   */
380 10
  public static function chr($code_point, string $encoding = 'UTF-8')
381
  {
382
    // init
383 10
    static $CHAR_CACHE = [];
384
385 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
386
      self::checkForSupport();
387
    }
388
389 10
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
390 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
391
    }
392
393 View Code Duplication
    if (
394 10
        $encoding !== 'UTF-8'
395
        &&
396 10
        $encoding !== 'ISO-8859-1'
397
        &&
398 10
        $encoding !== 'WINDOWS-1252'
399
        &&
400 10
        self::$SUPPORT['mbstring'] === false
401
    ) {
402
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
403
    }
404
405 10
    $cacheKey = $code_point . $encoding;
406 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
407 8
      return $CHAR_CACHE[$cacheKey];
408
    }
409
410 9
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
411
412 7
      if (self::$CHR === null) {
413
        self::$CHR = self::getData('chr');
414
      }
415
416 7
      $chr = self::$CHR[$code_point];
417
418 7
      if ($encoding !== 'UTF-8') {
419 1
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
420
      }
421
422 7
      return $CHAR_CACHE[$cacheKey] = $chr;
423
    }
424
425 7
    if (self::$SUPPORT['intlChar'] === true) {
426 7
      $chr = \IntlChar::chr($code_point);
427
428 7
      if ($encoding !== 'UTF-8') {
429
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
430
      }
431
432 7
      return $CHAR_CACHE[$cacheKey] = $chr;
433
    }
434
435
    if (self::$CHR === null) {
436
      self::$CHR = self::getData('chr');
437
    }
438
439
    if ($code_point <= 0x7F) {
440
      $chr = self::$CHR[$code_point];
441
    } elseif ($code_point <= 0x7FF) {
442
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
443
             self::$CHR[($code_point & 0x3F) + 0x80];
444
    } elseif ($code_point <= 0xFFFF) {
445
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
446
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
447
             self::$CHR[($code_point & 0x3F) + 0x80];
448
    } else {
449
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
450
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
451
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
452
             self::$CHR[($code_point & 0x3F) + 0x80];
453
    }
454
455
    if ($encoding !== 'UTF-8') {
456
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
457
    }
458
459
    return $CHAR_CACHE[$cacheKey] = $chr;
460
  }
461
462
  /**
463
   * Applies callback to all characters of a string.
464
   *
465
   * @param string|array $callback <p>The callback function.</p>
466
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
467
   *
468
   * @return array <p>The outcome of callback.</p>
469
   */
470 1
  public static function chr_map($callback, string $str): array
471
  {
472 1
    $chars = self::split($str);
473
474 1
    return \array_map($callback, $chars);
475
  }
476
477
  /**
478
   * Generates an array of byte length of each character of a Unicode string.
479
   *
480
   * 1 byte => U+0000  - U+007F
481
   * 2 byte => U+0080  - U+07FF
482
   * 3 byte => U+0800  - U+FFFF
483
   * 4 byte => U+10000 - U+10FFFF
484
   *
485
   * @param string $str <p>The original unicode string.</p>
486
   *
487
   * @return array <p>An array of byte lengths of each character.</p>
488
   */
489 2
  public static function chr_size_list(string $str): array
490
  {
491 2
    if (!isset($str[0])) {
492 2
      return [];
493
    }
494
495 2
    $strSplit = self::split($str);
496
497 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
498
      return \array_map(
499
          function ($data) {
500
            return UTF8::strlen($data, 'CP850'); // 8-BIT
501
          },
502
          $strSplit
503
      );
504
    }
505
506 2
    return \array_map(
507
        function ($data) {
508 2
          return \strlen($data);
509 2
        },
510 2
        $strSplit
511
    );
512
  }
513
514
  /**
515
   * Get a decimal code representation of a specific character.
516
   *
517
   * @param string $char <p>The input character.</p>
518
   *
519
   * @return int
520
   */
521 2
  public static function chr_to_decimal(string $char): int
522
  {
523 2
    $code = self::ord($char[0]);
524 2
    $bytes = 1;
525
526 2
    if (!($code & 0x80)) {
527
      // 0xxxxxxx
528 2
      return $code;
529
    }
530
531 2
    if (($code & 0xe0) === 0xc0) {
532
      // 110xxxxx
533 2
      $bytes = 2;
534 2
      $code &= ~0xc0;
535 2
    } elseif (($code & 0xf0) === 0xe0) {
536
      // 1110xxxx
537 2
      $bytes = 3;
538 2
      $code &= ~0xe0;
539 1
    } elseif (($code & 0xf8) === 0xf0) {
540
      // 11110xxx
541 1
      $bytes = 4;
542 1
      $code &= ~0xf0;
543
    }
544
545 2
    for ($i = 2; $i <= $bytes; $i++) {
546
      // 10xxxxxx
547 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
548
    }
549
550 2
    return $code;
551
  }
552
553
  /**
554
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
555
   *
556
   * @param string $char <p>The input character</p>
557
   * @param string $pfix [optional]
558
   *
559
   * @return string <p>The code point encoded as U+xxxx<p>
560
   */
561 1
  public static function chr_to_hex(string $char, string $pfix = 'U+'): string
562
  {
563 1
    if (!isset($char[0])) {
564 1
      return '';
565
    }
566
567 1
    if ($char === '&#0;') {
568 1
      $char = '';
569
    }
570
571 1
    return self::int_to_hex(self::ord($char), $pfix);
572
  }
573
574
  /**
575
   * alias for "UTF8::chr_to_decimal()"
576
   *
577
   * @see UTF8::chr_to_decimal()
578
   *
579
   * @param string $chr
580
   *
581
   * @return int
582
   */
583 1
  public static function chr_to_int(string $chr): int
584
  {
585 1
    return self::chr_to_decimal($chr);
586
  }
587
588
  /**
589
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
590
   *
591
   * @param string $body     <p>The original string to be split.</p>
592
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
593
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
594
   *
595
   * @return string <p>The chunked string</p>
596
   */
597 1
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
598
  {
599 1
    return \implode($end, self::split($body, $chunklen));
600
  }
601
602
  /**
603
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
604
   *
605
   * @param string $str                           <p>The string to be sanitized.</p>
606
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
607
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
608
   *                                              whitespace.</p>
609
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
610
   *                                              e.g.: "…"
611
   *                                              => "..."</p>
612
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
613
   *                                              combination with
614
   *                                              $normalize_whitespace</p>
615
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
616
   *                                              mark e.g.: "�"</p>
617
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
618
   *                                              characters e.g.: "\0"</p>
619
   *
620
   * @return string <p>Clean UTF-8 encoded string.</p>
621
   */
622 64
  public static function clean(string $str, bool $remove_bom = false, bool $normalize_whitespace = false, bool $normalize_msword = false, bool $keep_non_breaking_space = false, bool $replace_diamond_question_mark = false, bool $remove_invisible_characters = true): string
623
  {
624
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
625
    // caused connection reset problem on larger strings
626
627 64
    $regx = '/
628
      (
629
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
630
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
631
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
632
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
633
        ){1,100}                      # ...one or more times
634
      )
635
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
636
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
637
    /x';
638 64
    $str = (string)\preg_replace($regx, '$1', $str);
639
640 64
    if ($replace_diamond_question_mark === true) {
641 36
      $str = self::replace_diamond_question_mark($str, '');
642
    }
643
644 64
    if ($remove_invisible_characters === true) {
645 64
      $str = self::remove_invisible_characters($str);
646
    }
647
648 64
    if ($normalize_whitespace === true) {
649 38
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
650
    }
651
652 64
    if ($normalize_msword === true) {
653 15
      $str = self::normalize_msword($str);
654
    }
655
656 64
    if ($remove_bom === true) {
657 37
      $str = self::remove_bom($str);
658
    }
659
660 64
    return $str;
661
  }
662
663
  /**
664
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
665
   *
666
   * @param string $str <p>The input string.</p>
667
   *
668
   * @return string
669
   */
670 24
  public static function cleanup(string $str): string
671
  {
672 24
    if (!isset($str[0])) {
673 2
      return '';
674
    }
675
676
    // fixed ISO <-> UTF-8 Errors
677 24
    $str = self::fix_simple_utf8($str);
678
679
    // remove all none UTF-8 symbols
680
    // && remove diamond question mark (�)
681
    // && remove remove invisible characters (e.g. "\0")
682
    // && remove BOM
683
    // && normalize whitespace chars (but keep non-breaking-spaces)
684 24
    $str = self::clean(
685 24
        $str,
686 24
        true,
687 24
        true,
688 24
        false,
689 24
        true,
690 24
        true,
691 24
        true
692
    );
693
694 24
    return $str;
695
  }
696
697
  /**
698
   * Accepts a string or a array of strings and returns an array of Unicode code points.
699
   *
700
   * INFO: opposite to UTF8::string()
701
   *
702
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
703
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
704
   *                                    default, code points will be returned as integers.</p>
705
   *
706
   * @return array <p>The array of code points.</p>
707
   */
708 7
  public static function codepoints($arg, bool $u_style = false): array
709
  {
710 7
    if (\is_string($arg) === true) {
711 7
      $arg = self::split($arg);
712
    }
713
714 7
    $arg = \array_map(
715
        [
716 7
            self::class,
717
            'ord',
718
        ],
719 7
        $arg
720
    );
721
722 7
    if ($u_style) {
723 1
      $arg = \array_map(
724
          [
725 1
              self::class,
726
              'int_to_hex',
727
          ],
728 1
          $arg
729
      );
730
    }
731
732 7
    return $arg;
733
  }
734
735
  /**
736
   * Returns count of characters used in a string.
737
   *
738
   * @param string $str       <p>The input string.</p>
739
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
740
   *
741
   * @return array <p>An associative array of Character as keys and
742
   *               their count as values.</p>
743
   */
744 9
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
745
  {
746 9
    return \array_count_values(self::split($str, 1, $cleanUtf8));
747
  }
748
749
  /**
750
   * Converts a int-value into an UTF-8 character.
751
   *
752
   * @param mixed $int
753
   *
754
   * @return string
755
   */
756 5
  public static function decimal_to_chr($int): string
757
  {
758 5
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
759
  }
760
761
  /**
762
   * Encode a string with a new charset-encoding.
763
   *
764
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
765
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
766
   *
767
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
768
   * @param string $str      <p>The input string</p>
769
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
770
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
771
   *
772
   * @return string
773
   */
774 14
  public static function encode(string $encoding, string $str, bool $force = true): string
775
  {
776 14
    if (!isset($str[0], $encoding[0])) {
777 6
      return $str;
778
    }
779
780 14
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
781 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
782
    }
783
784 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
785
      self::checkForSupport();
786
    }
787
788 14
    $encodingDetected = self::str_detect_encoding($str);
789
790
    if (
791 14
        $force === true
792
        ||
793 14
        $encodingDetected !== $encoding
794
    ) {
795
796 View Code Duplication
      if (
797 14
          $encoding === 'UTF-8'
798
          &&
799
          (
800 14
              $force === true
801 4
              || $encodingDetected === 'UTF-8'
802 4
              || $encodingDetected === 'WINDOWS-1252'
803 14
              || $encodingDetected === 'ISO-8859-1'
804
          )
805
      ) {
806 13
        return self::to_utf8($str);
807
      }
808
809 View Code Duplication
      if (
810 4
          $encoding === 'ISO-8859-1'
811
          &&
812
          (
813 2
              $force === true
814 1
              || $encodingDetected === 'ISO-8859-1'
815 1
              || $encodingDetected === 'WINDOWS-1252'
816 4
              || $encodingDetected === 'UTF-8'
817
          )
818
      ) {
819 2
        return self::to_iso8859($str);
820
      }
821
822 View Code Duplication
      if (
823 3
          $encoding !== 'UTF-8'
824
          &&
825 3
          $encoding !== 'ISO-8859-1'
826
          &&
827 3
          $encoding !== 'WINDOWS-1252'
828
          &&
829 3
          self::$SUPPORT['mbstring'] === false
830
      ) {
831
        \trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
832
      }
833
834 3
      $strEncoded = \mb_convert_encoding(
835 3
          $str,
836 3
          $encoding,
837 3
          $encodingDetected
838
      );
839
840 3
      if ($strEncoded) {
841 3
        return $strEncoded;
842
      }
843
    }
844
845 3
    return $str;
846
  }
847
848
  /**
849
   * Reads entire file into a string.
850
   *
851
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
852
   *
853
   * @link http://php.net/manual/en/function.file-get-contents.php
854
   *
855
   * @param string        $filename         <p>
856
   *                                        Name of the file to read.
857
   *                                        </p>
858
   * @param bool          $use_include_path [optional] <p>
859
   *                                        Prior to PHP 5, this parameter is called
860
   *                                        use_include_path and is a bool.
861
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
862
   *                                        to trigger include path
863
   *                                        search.
864
   *                                        </p>
865
   * @param resource|null $context          [optional] <p>
866
   *                                        A valid context resource created with
867
   *                                        stream_context_create. If you don't need to use a
868
   *                                        custom context, you can skip this parameter by &null;.
869
   *                                        </p>
870
   * @param int|null      $offset           [optional] <p>
871
   *                                        The offset where the reading starts.
872
   *                                        </p>
873
   * @param int|null      $maxLength        [optional] <p>
874
   *                                        Maximum length of data read. The default is to read until end
875
   *                                        of file is reached.
876
   *                                        </p>
877
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
878
   *
879
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
880
   *                                        images or pdf, because they used non default utf-8 chars.</p>
881
   *
882
   * @return string|false <p>The function returns the read data or false on failure.</p>
883
   */
884 6
  public static function file_get_contents(string $filename, bool $use_include_path = false, $context = null, int $offset = null, int $maxLength = null, int $timeout = 10, bool $convertToUtf8 = true)
885
  {
886
    // init
887 6
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
888
889 6
    if ($timeout && $context === null) {
890 5
      $context = \stream_context_create(
891
          [
892
              'http' =>
893
                  [
894 5
                      'timeout' => $timeout,
895
                  ],
896
          ]
897
      );
898
    }
899
900 6
    if ($offset === null) {
901 6
      $offset = 0;
902
    }
903
904 6
    if (\is_int($maxLength) === true) {
905 1
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
906
    } else {
907 6
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
908
    }
909
910
    // return false on error
911 6
    if ($data === false) {
912
      return false;
913
    }
914
915 6
    if ($convertToUtf8 === true) {
916
      if (
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
917 6
          self::is_binary($data, true) === true
918
          &&
919 6
          self::is_utf16($data) === false
920
          &&
921 6
          self::is_utf32($data) === false
922 3
      ) {
923
        // do nothing, it's binary and not UTF16 or UTF32
924
      } else {
925
926 5
        $data = self::encode('UTF-8', $data, false);
927 5
        $data = self::cleanup($data);
928
929
      }
930
    }
931
932 6
    return $data;
933
  }
934
935
  /**
936
   * Checks if a file starts with BOM (Byte Order Mark) character.
937
   *
938
   * @param string $file_path <p>Path to a valid file.</p>
939
   *
940
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
941
   */
942 1
  public static function file_has_bom(string $file_path): bool
943
  {
944 1
    return self::string_has_bom(\file_get_contents($file_path));
945
  }
946
947
  /**
948
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
949
   *
950
   * @param mixed  $var
951
   * @param int    $normalization_form
952
   * @param string $leading_combining
953
   *
954
   * @return mixed
955
   */
956 9
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
957
  {
958 9
    switch (\gettype($var)) {
959 9 View Code Duplication
      case 'array':
960 3
        foreach ($var as $k => $v) {
961
          /** @noinspection AlterInForeachInspection */
962 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
963
        }
964 3
        break;
965 9 View Code Duplication
      case 'object':
966 2
        foreach ($var as $k => $v) {
967 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
968
        }
969 2
        break;
970 9
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
971
972 9
        if (false !== \strpos($var, "\r")) {
973
          // Workaround https://bugs.php.net/65732
974 2
          $var = \str_replace(["\r\n", "\r"], "\n", $var);
975
        }
976
977 9
        if (self::is_ascii($var) === false) {
978
          /** @noinspection PhpUndefinedClassInspection */
979 9
          if (\Normalizer::isNormalized($var, $normalization_form)) {
980 6
            $n = '-';
981
          } else {
982
            /** @noinspection PhpUndefinedClassInspection */
983 7
            $n = \Normalizer::normalize($var, $normalization_form);
984
985 7
            if (isset($n[0])) {
986 4
              $var = $n;
987
            } else {
988 5
              $var = self::encode('UTF-8', $var, true);
989
            }
990
          }
991
992
          if (
993 9
              $var[0] >= "\x80"
994
              &&
995 9
              isset($n[0], $leading_combining[0])
996
              &&
997 9
              \preg_match('/^\p{Mn}/u', $var)
998
          ) {
999
            // Prevent leading combining chars
1000
            // for NFC-safe concatenations.
1001 2
            $var = $leading_combining . $var;
1002
          }
1003
        }
1004
1005 9
        break;
1006
    }
1007
1008 9
    return $var;
1009
  }
1010
1011
  /**
1012
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1013
   *
1014
   * Gets a specific external variable by name and optionally filters it
1015
   *
1016
   * @link  http://php.net/manual/en/function.filter-input.php
1017
   *
1018
   * @param int    $type          <p>
1019
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1020
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1021
   *                              <b>INPUT_ENV</b>.
1022
   *                              </p>
1023
   * @param string $variable_name <p>
1024
   *                              Name of a variable to get.
1025
   *                              </p>
1026
   * @param int    $filter        [optional] <p>
1027
   *                              The ID of the filter to apply. The
1028
   *                              manual page lists the available filters.
1029
   *                              </p>
1030
   * @param mixed  $options       [optional] <p>
1031
   *                              Associative array of options or bitwise disjunction of flags. If filter
1032
   *                              accepts options, flags can be provided in "flags" field of array.
1033
   *                              </p>
1034
   *
1035
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1036
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1037
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1038
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1039
   * @since 5.2.0
1040
   */
1041 View Code Duplication
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1042
  {
1043
    if (4 > \func_num_args()) {
1044
      $var = \filter_input($type, $variable_name, $filter);
1045
    } else {
1046
      $var = \filter_input($type, $variable_name, $filter, $options);
1047
    }
1048
1049
    return self::filter($var);
1050
  }
1051
1052
  /**
1053
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1054
   *
1055
   * Gets external variables and optionally filters them
1056
   *
1057
   * @link  http://php.net/manual/en/function.filter-input-array.php
1058
   *
1059
   * @param int   $type       <p>
1060
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1061
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1062
   *                          <b>INPUT_ENV</b>.
1063
   *                          </p>
1064
   * @param mixed $definition [optional] <p>
1065
   *                          An array defining the arguments. A valid key is a string
1066
   *                          containing a variable name and a valid value is either a filter type, or an array
1067
   *                          optionally specifying the filter, flags and options. If the value is an
1068
   *                          array, valid keys are filter which specifies the
1069
   *                          filter type,
1070
   *                          flags which specifies any flags that apply to the
1071
   *                          filter, and options which specifies any options that
1072
   *                          apply to the filter. See the example below for a better understanding.
1073
   *                          </p>
1074
   *                          <p>
1075
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1076
   *                          input array are filtered by this filter.
1077
   *                          </p>
1078
   * @param bool  $add_empty  [optional] <p>
1079
   *                          Add missing keys as <b>NULL</b> to the return value.
1080
   *                          </p>
1081
   *
1082
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1083
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1084
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1085
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1086
   * fails.
1087
   * @since 5.2.0
1088
   */
1089 View Code Duplication
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1090
  {
1091
    if (2 > \func_num_args()) {
1092
      $a = \filter_input_array($type);
1093
    } else {
1094
      $a = \filter_input_array($type, $definition, $add_empty);
1095
    }
1096
1097
    return self::filter($a);
1098
  }
1099
1100
  /**
1101
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1102
   *
1103
   * Filters a variable with a specified filter
1104
   *
1105
   * @link  http://php.net/manual/en/function.filter-var.php
1106
   *
1107
   * @param mixed $variable <p>
1108
   *                        Value to filter.
1109
   *                        </p>
1110
   * @param int   $filter   [optional] <p>
1111
   *                        The ID of the filter to apply. The
1112
   *                        manual page lists the available filters.
1113
   *                        </p>
1114
   * @param mixed $options  [optional] <p>
1115
   *                        Associative array of options or bitwise disjunction of flags. If filter
1116
   *                        accepts options, flags can be provided in "flags" field of array. For
1117
   *                        the "callback" filter, callable type should be passed. The
1118
   *                        callback must accept one argument, the value to be filtered, and return
1119
   *                        the value after filtering/sanitizing it.
1120
   *                        </p>
1121
   *                        <p>
1122
   *                        <code>
1123
   *                        // for filters that accept options, use this format
1124
   *                        $options = array(
1125
   *                        'options' => array(
1126
   *                        'default' => 3, // value to return if the filter fails
1127
   *                        // other options here
1128
   *                        'min_range' => 0
1129
   *                        ),
1130
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1131
   *                        );
1132
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1133
   *                        // for filter that only accept flags, you can pass them directly
1134
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1135
   *                        // for filter that only accept flags, you can also pass as an array
1136
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1137
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1138
   *                        // callback validate filter
1139
   *                        function foo($value)
1140
   *                        {
1141
   *                        // Expected format: Surname, GivenNames
1142
   *                        if (strpos($value, ", ") === false) return false;
1143
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1144
   *                        $empty = (empty($surname) || empty($givennames));
1145
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1146
   *                        if ($empty || $notstrings) {
1147
   *                        return false;
1148
   *                        } else {
1149
   *                        return $value;
1150
   *                        }
1151
   *                        }
1152
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1153
   *                        </code>
1154
   *                        </p>
1155
   *
1156
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1157
   * @since 5.2.0
1158
   */
1159 1 View Code Duplication
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1160
  {
1161 1
    if (3 > \func_num_args()) {
1162 1
      $variable = \filter_var($variable, $filter);
1163
    } else {
1164 1
      $variable = \filter_var($variable, $filter, $options);
1165
    }
1166
1167 1
    return self::filter($variable);
1168
  }
1169
1170
  /**
1171
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1172
   *
1173
   * Gets multiple variables and optionally filters them
1174
   *
1175
   * @link  http://php.net/manual/en/function.filter-var-array.php
1176
   *
1177
   * @param array $data       <p>
1178
   *                          An array with string keys containing the data to filter.
1179
   *                          </p>
1180
   * @param mixed $definition [optional] <p>
1181
   *                          An array defining the arguments. A valid key is a string
1182
   *                          containing a variable name and a valid value is either a
1183
   *                          filter type, or an
1184
   *                          array optionally specifying the filter, flags and options.
1185
   *                          If the value is an array, valid keys are filter
1186
   *                          which specifies the filter type,
1187
   *                          flags which specifies any flags that apply to the
1188
   *                          filter, and options which specifies any options that
1189
   *                          apply to the filter. See the example below for a better understanding.
1190
   *                          </p>
1191
   *                          <p>
1192
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1193
   *                          input array are filtered by this filter.
1194
   *                          </p>
1195
   * @param bool  $add_empty  [optional] <p>
1196
   *                          Add missing keys as <b>NULL</b> to the return value.
1197
   *                          </p>
1198
   *
1199
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1200
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1201
   * the variable is not set.
1202
   * @since 5.2.0
1203
   */
1204 1 View Code Duplication
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1205
  {
1206 1
    if (2 > \func_num_args()) {
1207 1
      $a = \filter_var_array($data);
1208
    } else {
1209 1
      $a = \filter_var_array($data, $definition, $add_empty);
1210
    }
1211
1212 1
    return self::filter($a);
1213
  }
1214
1215
  /**
1216
   * Check if the number of unicode characters are not more than the specified integer.
1217
   *
1218
   * @param string $str      The original string to be checked.
1219
   * @param int    $box_size The size in number of chars to be checked against string.
1220
   *
1221
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1222
   */
1223 1
  public static function fits_inside(string $str, int $box_size): bool
1224
  {
1225 1
    return (self::strlen($str) <= $box_size);
1226
  }
1227
1228
  /**
1229
   * Try to fix simple broken UTF-8 strings.
1230
   *
1231
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1232
   *
1233
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1234
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1235
   * See: http://en.wikipedia.org/wiki/Windows-1252
1236
   *
1237
   * @param string $str <p>The input string</p>
1238
   *
1239
   * @return string
1240
   */
1241 29 View Code Duplication
  public static function fix_simple_utf8(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1242
  {
1243 29
    if (!isset($str[0])) {
1244 2
      return '';
1245
    }
1246
1247 29
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1248 29
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1249
1250 29
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1251
1252 1
      if (self::$BROKEN_UTF8_FIX === null) {
1253 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1254
      }
1255
1256 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1257 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1258
    }
1259
1260 29
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1261
  }
1262
1263
  /**
1264
   * Fix a double (or multiple) encoded UTF8 string.
1265
   *
1266
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1267
   *
1268
   * @return string|string[] <p>Will return the fixed input-"array" or
1269
   *                         the fixed input-"string".</p>
1270
   */
1271 1
  public static function fix_utf8($str)
1272
  {
1273 1
    if (\is_array($str) === true) {
1274 1
      foreach ($str as $k => $v) {
1275 1
        $str[$k] = self::fix_utf8($v);
1276
      }
1277
1278 1
      return $str;
1279
    }
1280
1281 1
    $last = '';
1282 1
    while ($last !== $str) {
1283 1
      $last = $str;
1284 1
      $str = self::to_utf8(
1285 1
          self::utf8_decode($str, true)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str, true)) on line 1284 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1286
      );
1287
    }
1288
1289 1
    return $str;
1290
  }
1291
1292
  /**
1293
   * Get character of a specific character.
1294
   *
1295
   * @param string $char
1296
   *
1297
   * @return string <p>'RTL' or 'LTR'</p>
1298
   */
1299 1
  public static function getCharDirection(string $char): string
1300
  {
1301 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1302
      self::checkForSupport();
1303
    }
1304
1305 1
    if (self::$SUPPORT['intlChar'] === true) {
1306 1
      $tmpReturn = \IntlChar::charDirection($char);
1307
1308
      // from "IntlChar"-Class
1309
      $charDirection = [
1310 1
          'RTL' => [1, 13, 14, 15, 21],
1311
          'LTR' => [0, 11, 12, 20],
1312
      ];
1313
1314 1
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1315
        return 'LTR';
1316
      }
1317
1318 1
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1319 1
        return 'RTL';
1320
      }
1321
    }
1322
1323 1
    $c = static::chr_to_decimal($char);
1324
1325 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1326 1
      return 'LTR';
1327
    }
1328
1329 1
    if (0x85e >= $c) {
1330
1331 1
      if (0x5be === $c ||
1332 1
          0x5c0 === $c ||
1333 1
          0x5c3 === $c ||
1334 1
          0x5c6 === $c ||
1335 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1336 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1337 1
          0x608 === $c ||
1338 1
          0x60b === $c ||
1339 1
          0x60d === $c ||
1340 1
          0x61b === $c ||
1341 1
          (0x61e <= $c && 0x64a >= $c) ||
1342
          (0x66d <= $c && 0x66f >= $c) ||
1343
          (0x671 <= $c && 0x6d5 >= $c) ||
1344
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1345
          (0x6ee <= $c && 0x6ef >= $c) ||
1346
          (0x6fa <= $c && 0x70d >= $c) ||
1347
          0x710 === $c ||
1348
          (0x712 <= $c && 0x72f >= $c) ||
1349
          (0x74d <= $c && 0x7a5 >= $c) ||
1350
          0x7b1 === $c ||
1351
          (0x7c0 <= $c && 0x7ea >= $c) ||
1352
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1353
          0x7fa === $c ||
1354
          (0x800 <= $c && 0x815 >= $c) ||
1355
          0x81a === $c ||
1356
          0x824 === $c ||
1357
          0x828 === $c ||
1358
          (0x830 <= $c && 0x83e >= $c) ||
1359
          (0x840 <= $c && 0x858 >= $c) ||
1360 1
          0x85e === $c
1361
      ) {
1362 1
        return 'RTL';
1363
      }
1364
1365 1
    } elseif (0x200f === $c) {
1366
1367
      return 'RTL';
1368
1369 1
    } elseif (0xfb1d <= $c) {
1370
1371 1
      if (0xfb1d === $c ||
1372 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1373 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1374 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1375 1
          0xfb3e === $c ||
1376 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1377 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1378 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1379 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1380 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1381 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1382 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1383 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1384 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1385 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1386 1
          0x10808 === $c ||
1387 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1388 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1389 1
          0x1083c === $c ||
1390 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1391 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1392 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1393 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1394 1
          0x1093f === $c ||
1395 1
          0x10a00 === $c ||
1396 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1397 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1398 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1399 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1400 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1401 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1402 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1403 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1404 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1405 1
          (0x10b78 <= $c && 0x10b7f >= $c)
1406
      ) {
1407 1
        return 'RTL';
1408
      }
1409
    }
1410
1411 1
    return 'LTR';
1412
  }
1413
1414
  /**
1415
   * get data from "/data/*.ser"
1416
   *
1417
   * @param string $file
1418
   *
1419
   * @return bool|string|array|int <p>Will return false on error.</p>
1420
   */
1421 7
  private static function getData(string $file)
1422
  {
1423 7
    $file = __DIR__ . '/data/' . $file . '.php';
1424 7
    if (\file_exists($file)) {
1425
      /** @noinspection PhpIncludeInspection */
1426 7
      return require $file;
1427
    }
1428
1429 1
    return false;
1430
  }
1431
1432
  /**
1433
   * Check for php-support.
1434
   *
1435
   * @param string|null $key
1436
   *
1437
   * @return mixed <p>Return the full support-"array", if $key === null<br>
1438
   *               return bool-value, if $key is used and available<br>
1439
   *               otherwise return null</p>
1440
   */
1441 19
  public static function getSupportInfo(string $key = null)
1442
  {
1443 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1444
      self::checkForSupport();
1445
    }
1446
1447 19
    if ($key === null) {
1448 2
      return self::$SUPPORT;
1449
    }
1450
1451 18
    if (!isset(self::$SUPPORT[$key])) {
1452 1
      return null;
1453
    }
1454
1455 17
    return self::$SUPPORT[$key];
1456
  }
1457
1458
  /**
1459
   * alias for "UTF8::string_has_bom()"
1460
   *
1461
   * @see        UTF8::string_has_bom()
1462
   *
1463
   * @param string $str
1464
   *
1465
   * @return bool
1466
   *
1467
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1468
   */
1469 1
  public static function hasBom(string $str): bool
1470
  {
1471 1
    return self::string_has_bom($str);
1472
  }
1473
1474
  /**
1475
   * Converts a hexadecimal-value into an UTF-8 character.
1476
   *
1477
   * @param string $hexdec <p>The hexadecimal value.</p>
1478
   *
1479
   * @return string|false <p>One single UTF-8 character.</p>
1480
   */
1481 2
  public static function hex_to_chr(string $hexdec)
1482
  {
1483 2
    return self::decimal_to_chr(\hexdec($hexdec));
1484
  }
1485
1486
  /**
1487
   * Converts hexadecimal U+xxxx code point representation to integer.
1488
   *
1489
   * INFO: opposite to UTF8::int_to_hex()
1490
   *
1491
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1492
   *
1493
   * @return int|false <p>The code point, or false on failure.</p>
1494
   */
1495 1
  public static function hex_to_int(string $hexDec)
1496
  {
1497 1
    if (!isset($hexDec[0])) {
1498 1
      return false;
1499
    }
1500
1501 1
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1502 1
      return \intval($match[1], 16);
1503
    }
1504
1505 1
    return false;
1506
  }
1507
1508
  /**
1509
   * alias for "UTF8::html_entity_decode()"
1510
   *
1511
   * @see UTF8::html_entity_decode()
1512
   *
1513
   * @param string $str
1514
   * @param int    $flags
1515
   * @param string $encoding
1516
   *
1517
   * @return string
1518
   */
1519 1
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1520
  {
1521 1
    return self::html_entity_decode($str, $flags, $encoding);
1522
  }
1523
1524
  /**
1525
   * Converts a UTF-8 string to a series of HTML numbered entities.
1526
   *
1527
   * INFO: opposite to UTF8::html_decode()
1528
   *
1529
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1530
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1531
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
1532
   *
1533
   * @return string <p>HTML numbered entities.</p>
1534
   */
1535 3
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
1536
  {
1537 3
    if (!isset($str[0])) {
1538 2
      return '';
1539
    }
1540
1541 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1542 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1543
    }
1544
1545
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1546 3
    if (\function_exists('mb_encode_numericentity')) {
1547
1548 3
      $startCode = 0x00;
1549 3
      if ($keepAsciiChars === true) {
1550 3
        $startCode = 0x80;
1551
      }
1552
1553 3
      return \mb_encode_numericentity(
1554 3
          $str,
1555 3
          [$startCode, 0xfffff, 0, 0xfffff, 0],
1556 3
          $encoding
1557
      );
1558
    }
1559
1560
    return \implode(
1561
        '',
1562
        \array_map(
1563
            function ($data) use ($keepAsciiChars, $encoding) {
1564
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1565
            },
1566
            self::split($str)
1567
        )
1568
    );
1569
  }
1570
1571
  /**
1572
   * UTF-8 version of html_entity_decode()
1573
   *
1574
   * The reason we are not using html_entity_decode() by itself is because
1575
   * while it is not technically correct to leave out the semicolon
1576
   * at the end of an entity most browsers will still interpret the entity
1577
   * correctly. html_entity_decode() does not convert entities without
1578
   * semicolons, so we are left with our own little solution here. Bummer.
1579
   *
1580
   * Convert all HTML entities to their applicable characters
1581
   *
1582
   * INFO: opposite to UTF8::html_encode()
1583
   *
1584
   * @link http://php.net/manual/en/function.html-entity-decode.php
1585
   *
1586
   * @param string $str      <p>
1587
   *                         The input string.
1588
   *                         </p>
1589
   * @param int    $flags    [optional] <p>
1590
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1591
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1592
   *                         <table>
1593
   *                         Available <i>flags</i> constants
1594
   *                         <tr valign="top">
1595
   *                         <td>Constant Name</td>
1596
   *                         <td>Description</td>
1597
   *                         </tr>
1598
   *                         <tr valign="top">
1599
   *                         <td><b>ENT_COMPAT</b></td>
1600
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1601
   *                         </tr>
1602
   *                         <tr valign="top">
1603
   *                         <td><b>ENT_QUOTES</b></td>
1604
   *                         <td>Will convert both double and single quotes.</td>
1605
   *                         </tr>
1606
   *                         <tr valign="top">
1607
   *                         <td><b>ENT_NOQUOTES</b></td>
1608
   *                         <td>Will leave both double and single quotes unconverted.</td>
1609
   *                         </tr>
1610
   *                         <tr valign="top">
1611
   *                         <td><b>ENT_HTML401</b></td>
1612
   *                         <td>
1613
   *                         Handle code as HTML 4.01.
1614
   *                         </td>
1615
   *                         </tr>
1616
   *                         <tr valign="top">
1617
   *                         <td><b>ENT_XML1</b></td>
1618
   *                         <td>
1619
   *                         Handle code as XML 1.
1620
   *                         </td>
1621
   *                         </tr>
1622
   *                         <tr valign="top">
1623
   *                         <td><b>ENT_XHTML</b></td>
1624
   *                         <td>
1625
   *                         Handle code as XHTML.
1626
   *                         </td>
1627
   *                         </tr>
1628
   *                         <tr valign="top">
1629
   *                         <td><b>ENT_HTML5</b></td>
1630
   *                         <td>
1631
   *                         Handle code as HTML 5.
1632
   *                         </td>
1633
   *                         </tr>
1634
   *                         </table>
1635
   *                         </p>
1636
   * @param string $encoding [optional] <p>Encoding to use.</p>
1637
   *
1638
   * @return string <p>The decoded string.</p>
1639
   */
1640 17
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1641
  {
1642 17
    if (!isset($str[0])) {
1643 6
      return '';
1644
    }
1645
1646 17
    if (!isset($str[3])) { // examples: &; || &x;
1647 10
      return $str;
1648
    }
1649
1650
    if (
1651 16
        \strpos($str, '&') === false
1652
        ||
1653
        (
1654 16
            \strpos($str, '&#') === false
1655
            &&
1656 16
            \strpos($str, ';') === false
1657
        )
1658
    ) {
1659 9
      return $str;
1660
    }
1661
1662 16
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1663 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1664
    }
1665
1666 16
    if ($flags === null) {
1667 5
      $flags = ENT_QUOTES | ENT_HTML5;
1668
    }
1669
1670 View Code Duplication
    if (
1671 16
        $encoding !== 'UTF-8'
1672
        &&
1673 16
        $encoding !== 'ISO-8859-1'
1674
        &&
1675 16
        $encoding !== 'WINDOWS-1252'
1676
        &&
1677 16
        self::$SUPPORT['mbstring'] === false
1678
    ) {
1679
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1680
    }
1681
1682
    do {
1683 16
      $str_compare = $str;
1684
1685 16
      $str = (string)\preg_replace_callback(
1686 16
          "/&#\d{2,6};/",
1687
          function ($matches) use ($encoding) {
1688 14
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
1689
1690 14
            if ($returnTmp !== '"' && $returnTmp !== "'") {
1691 13
              return $returnTmp;
1692
            }
1693
1694 7
            return $matches[0];
1695 16
          },
1696 16
          $str
1697
      );
1698
1699
      // decode numeric & UTF16 two byte entities
1700 16
      $str = \html_entity_decode(
1701 16
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
1702 16
          $flags,
1703 16
          $encoding
1704
      );
1705
1706 16
    } while ($str_compare !== $str);
1707
1708 16
    return $str;
1709
  }
1710
1711
  /**
1712
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
1713
   *
1714
   * @link http://php.net/manual/en/function.htmlentities.php
1715
   *
1716
   * @param string $str           <p>
1717
   *                              The input string.
1718
   *                              </p>
1719
   * @param int    $flags         [optional] <p>
1720
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1721
   *                              invalid code unit sequences and the used document type. The default is
1722
   *                              ENT_COMPAT | ENT_HTML401.
1723
   *                              <table>
1724
   *                              Available <i>flags</i> constants
1725
   *                              <tr valign="top">
1726
   *                              <td>Constant Name</td>
1727
   *                              <td>Description</td>
1728
   *                              </tr>
1729
   *                              <tr valign="top">
1730
   *                              <td><b>ENT_COMPAT</b></td>
1731
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1732
   *                              </tr>
1733
   *                              <tr valign="top">
1734
   *                              <td><b>ENT_QUOTES</b></td>
1735
   *                              <td>Will convert both double and single quotes.</td>
1736
   *                              </tr>
1737
   *                              <tr valign="top">
1738
   *                              <td><b>ENT_NOQUOTES</b></td>
1739
   *                              <td>Will leave both double and single quotes unconverted.</td>
1740
   *                              </tr>
1741
   *                              <tr valign="top">
1742
   *                              <td><b>ENT_IGNORE</b></td>
1743
   *                              <td>
1744
   *                              Silently discard invalid code unit sequences instead of returning
1745
   *                              an empty string. Using this flag is discouraged as it
1746
   *                              may have security implications.
1747
   *                              </td>
1748
   *                              </tr>
1749
   *                              <tr valign="top">
1750
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1751
   *                              <td>
1752
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1753
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1754
   *                              </td>
1755
   *                              </tr>
1756
   *                              <tr valign="top">
1757
   *                              <td><b>ENT_DISALLOWED</b></td>
1758
   *                              <td>
1759
   *                              Replace invalid code points for the given document type with a
1760
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1761
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1762
   *                              instance, to ensure the well-formedness of XML documents with
1763
   *                              embedded external content.
1764
   *                              </td>
1765
   *                              </tr>
1766
   *                              <tr valign="top">
1767
   *                              <td><b>ENT_HTML401</b></td>
1768
   *                              <td>
1769
   *                              Handle code as HTML 4.01.
1770
   *                              </td>
1771
   *                              </tr>
1772
   *                              <tr valign="top">
1773
   *                              <td><b>ENT_XML1</b></td>
1774
   *                              <td>
1775
   *                              Handle code as XML 1.
1776
   *                              </td>
1777
   *                              </tr>
1778
   *                              <tr valign="top">
1779
   *                              <td><b>ENT_XHTML</b></td>
1780
   *                              <td>
1781
   *                              Handle code as XHTML.
1782
   *                              </td>
1783
   *                              </tr>
1784
   *                              <tr valign="top">
1785
   *                              <td><b>ENT_HTML5</b></td>
1786
   *                              <td>
1787
   *                              Handle code as HTML 5.
1788
   *                              </td>
1789
   *                              </tr>
1790
   *                              </table>
1791
   *                              </p>
1792
   * @param string $encoding      [optional] <p>
1793
   *                              Like <b>htmlspecialchars</b>,
1794
   *                              <b>htmlentities</b> takes an optional third argument
1795
   *                              <i>encoding</i> which defines encoding used in
1796
   *                              conversion.
1797
   *                              Although this argument is technically optional, you are highly
1798
   *                              encouraged to specify the correct value for your code.
1799
   *                              </p>
1800
   * @param bool   $double_encode [optional] <p>
1801
   *                              When <i>double_encode</i> is turned off PHP will not
1802
   *                              encode existing html entities. The default is to convert everything.
1803
   *                              </p>
1804
   *
1805
   *
1806
   * @return string the encoded string.
1807
   * </p>
1808
   * <p>
1809
   * If the input <i>string</i> contains an invalid code unit
1810
   * sequence within the given <i>encoding</i> an empty string
1811
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1812
   * <b>ENT_SUBSTITUTE</b> flags are set.
1813
   */
1814 2
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
1815
  {
1816 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1817 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1818
    }
1819
1820 2
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
1821
1822
    /**
1823
     * PHP doesn't replace a backslash to its html entity since this is something
1824
     * that's mostly used to escape characters when inserting in a database. Since
1825
     * we're using a decent database layer, we don't need this shit and we're replacing
1826
     * the double backslashes by its' html entity equivalent.
1827
     *
1828
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
1829
     */
1830 2
    $str = \str_replace('\\', '&#92;', $str);
1831
1832 2
    return self::html_encode($str, true, $encoding);
1833
  }
1834
1835
  /**
1836
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
1837
   *
1838
   * INFO: Take a look at "UTF8::htmlentities()"
1839
   *
1840
   * @link http://php.net/manual/en/function.htmlspecialchars.php
1841
   *
1842
   * @param string $str           <p>
1843
   *                              The string being converted.
1844
   *                              </p>
1845
   * @param int    $flags         [optional] <p>
1846
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1847
   *                              invalid code unit sequences and the used document type. The default is
1848
   *                              ENT_COMPAT | ENT_HTML401.
1849
   *                              <table>
1850
   *                              Available <i>flags</i> constants
1851
   *                              <tr valign="top">
1852
   *                              <td>Constant Name</td>
1853
   *                              <td>Description</td>
1854
   *                              </tr>
1855
   *                              <tr valign="top">
1856
   *                              <td><b>ENT_COMPAT</b></td>
1857
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1858
   *                              </tr>
1859
   *                              <tr valign="top">
1860
   *                              <td><b>ENT_QUOTES</b></td>
1861
   *                              <td>Will convert both double and single quotes.</td>
1862
   *                              </tr>
1863
   *                              <tr valign="top">
1864
   *                              <td><b>ENT_NOQUOTES</b></td>
1865
   *                              <td>Will leave both double and single quotes unconverted.</td>
1866
   *                              </tr>
1867
   *                              <tr valign="top">
1868
   *                              <td><b>ENT_IGNORE</b></td>
1869
   *                              <td>
1870
   *                              Silently discard invalid code unit sequences instead of returning
1871
   *                              an empty string. Using this flag is discouraged as it
1872
   *                              may have security implications.
1873
   *                              </td>
1874
   *                              </tr>
1875
   *                              <tr valign="top">
1876
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1877
   *                              <td>
1878
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1879
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1880
   *                              </td>
1881
   *                              </tr>
1882
   *                              <tr valign="top">
1883
   *                              <td><b>ENT_DISALLOWED</b></td>
1884
   *                              <td>
1885
   *                              Replace invalid code points for the given document type with a
1886
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1887
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1888
   *                              instance, to ensure the well-formedness of XML documents with
1889
   *                              embedded external content.
1890
   *                              </td>
1891
   *                              </tr>
1892
   *                              <tr valign="top">
1893
   *                              <td><b>ENT_HTML401</b></td>
1894
   *                              <td>
1895
   *                              Handle code as HTML 4.01.
1896
   *                              </td>
1897
   *                              </tr>
1898
   *                              <tr valign="top">
1899
   *                              <td><b>ENT_XML1</b></td>
1900
   *                              <td>
1901
   *                              Handle code as XML 1.
1902
   *                              </td>
1903
   *                              </tr>
1904
   *                              <tr valign="top">
1905
   *                              <td><b>ENT_XHTML</b></td>
1906
   *                              <td>
1907
   *                              Handle code as XHTML.
1908
   *                              </td>
1909
   *                              </tr>
1910
   *                              <tr valign="top">
1911
   *                              <td><b>ENT_HTML5</b></td>
1912
   *                              <td>
1913
   *                              Handle code as HTML 5.
1914
   *                              </td>
1915
   *                              </tr>
1916
   *                              </table>
1917
   *                              </p>
1918
   * @param string $encoding      [optional] <p>
1919
   *                              Defines encoding used in conversion.
1920
   *                              </p>
1921
   *                              <p>
1922
   *                              For the purposes of this function, the encodings
1923
   *                              ISO-8859-1, ISO-8859-15,
1924
   *                              UTF-8, cp866,
1925
   *                              cp1251, cp1252, and
1926
   *                              KOI8-R are effectively equivalent, provided the
1927
   *                              <i>string</i> itself is valid for the encoding, as
1928
   *                              the characters affected by <b>htmlspecialchars</b> occupy
1929
   *                              the same positions in all of these encodings.
1930
   *                              </p>
1931
   * @param bool   $double_encode [optional] <p>
1932
   *                              When <i>double_encode</i> is turned off PHP will not
1933
   *                              encode existing html entities, the default is to convert everything.
1934
   *                              </p>
1935
   *
1936
   * @return string The converted string.
1937
   * </p>
1938
   * <p>
1939
   * If the input <i>string</i> contains an invalid code unit
1940
   * sequence within the given <i>encoding</i> an empty string
1941
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1942
   * <b>ENT_SUBSTITUTE</b> flags are set.
1943
   */
1944 1 View Code Duplication
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1945
  {
1946 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1947 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1948
    }
1949
1950 1
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
1951
  }
1952
1953
  /**
1954
   * Checks whether iconv is available on the server.
1955
   *
1956
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1957
   */
1958 1
  public static function iconv_loaded(): bool
1959
  {
1960 1
    return \extension_loaded('iconv') ? true : false;
1961
  }
1962
1963
  /**
1964
   * alias for "UTF8::decimal_to_chr()"
1965
   *
1966
   * @see UTF8::decimal_to_chr()
1967
   *
1968
   * @param mixed $int
1969
   *
1970
   * @return string
1971
   */
1972 2
  public static function int_to_chr($int): string
1973
  {
1974 2
    return self::decimal_to_chr($int);
1975
  }
1976
1977
  /**
1978
   * Converts Integer to hexadecimal U+xxxx code point representation.
1979
   *
1980
   * INFO: opposite to UTF8::hex_to_int()
1981
   *
1982
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
1983
   * @param string $pfix [optional]
1984
   *
1985
   * @return string <p>The code point, or empty string on failure.</p>
1986
   */
1987 3
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
1988
  {
1989 3
    $hex = \dechex($int);
1990
1991 3
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
1992
1993 3
    return $pfix . $hex;
1994
  }
1995
1996
  /**
1997
   * Checks whether intl-char is available on the server.
1998
   *
1999
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2000
   */
2001 1
  public static function intlChar_loaded(): bool
2002
  {
2003 1
    return \class_exists('IntlChar');
2004
  }
2005
2006
  /**
2007
   * Checks whether intl is available on the server.
2008
   *
2009
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2010
   */
2011 4
  public static function intl_loaded(): bool
2012
  {
2013 4
    return \extension_loaded('intl');
2014
  }
2015
2016
  /**
2017
   * alias for "UTF8::is_ascii()"
2018
   *
2019
   * @see        UTF8::is_ascii()
2020
   *
2021
   * @param string $str
2022
   *
2023
   * @return boolean
2024
   *
2025
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2026
   */
2027 1
  public static function isAscii(string $str): bool
2028
  {
2029 1
    return self::is_ascii($str);
2030
  }
2031
2032
  /**
2033
   * alias for "UTF8::is_base64()"
2034
   *
2035
   * @see        UTF8::is_base64()
2036
   *
2037
   * @param string $str
2038
   *
2039
   * @return bool
2040
   *
2041
   * @deprecated <p>use "UTF8::is_base64()"</p>
2042
   */
2043 1
  public static function isBase64(string $str): bool
2044
  {
2045 1
    return self::is_base64($str);
2046
  }
2047
2048
  /**
2049
   * alias for "UTF8::is_binary()"
2050
   *
2051
   * @see        UTF8::is_binary()
2052
   *
2053
   * @param mixed $str
2054
   * @param bool  $strict
2055
   *
2056
   * @return bool
2057
   *
2058
   * @deprecated <p>use "UTF8::is_binary()"</p>
2059
   */
2060 2
  public static function isBinary($str, $strict = false): bool
2061
  {
2062 2
    return self::is_binary($str, $strict);
2063
  }
2064
2065
  /**
2066
   * alias for "UTF8::is_bom()"
2067
   *
2068
   * @see        UTF8::is_bom()
2069
   *
2070
   * @param string $utf8_chr
2071
   *
2072
   * @return boolean
2073
   *
2074
   * @deprecated <p>use "UTF8::is_bom()"</p>
2075
   */
2076 1
  public static function isBom(string $utf8_chr): bool
2077
  {
2078 1
    return self::is_bom($utf8_chr);
2079
  }
2080
2081
  /**
2082
   * alias for "UTF8::is_html()"
2083
   *
2084
   * @see        UTF8::is_html()
2085
   *
2086
   * @param string $str
2087
   *
2088
   * @return boolean
2089
   *
2090
   * @deprecated <p>use "UTF8::is_html()"</p>
2091
   */
2092 1
  public static function isHtml(string $str): bool
2093
  {
2094 1
    return self::is_html($str);
2095
  }
2096
2097
  /**
2098
   * alias for "UTF8::is_json()"
2099
   *
2100
   * @see        UTF8::is_json()
2101
   *
2102
   * @param string $str
2103
   *
2104
   * @return bool
2105
   *
2106
   * @deprecated <p>use "UTF8::is_json()"</p>
2107
   */
2108
  public static function isJson(string $str): bool
2109
  {
2110
    return self::is_json($str);
2111
  }
2112
2113
  /**
2114
   * alias for "UTF8::is_utf16()"
2115
   *
2116
   * @see        UTF8::is_utf16()
2117
   *
2118
   * @param string $str
2119
   *
2120
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2121
   *
2122
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2123
   */
2124 1
  public static function isUtf16(string $str)
2125
  {
2126 1
    return self::is_utf16($str);
2127
  }
2128
2129
  /**
2130
   * alias for "UTF8::is_utf32()"
2131
   *
2132
   * @see        UTF8::is_utf32()
2133
   *
2134
   * @param string $str
2135
   *
2136
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2137
   *
2138
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2139
   */
2140 1
  public static function isUtf32(string $str)
2141
  {
2142 1
    return self::is_utf32($str);
2143
  }
2144
2145
  /**
2146
   * alias for "UTF8::is_utf8()"
2147
   *
2148
   * @see        UTF8::is_utf8()
2149
   *
2150
   * @param string $str
2151
   * @param bool   $strict
2152
   *
2153
   * @return bool
2154
   *
2155
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2156
   */
2157 16
  public static function isUtf8($str, $strict = false): bool
2158
  {
2159 16
    return self::is_utf8($str, $strict);
2160
  }
2161
2162
  /**
2163
   * Checks if a string is 7 bit ASCII.
2164
   *
2165
   * @param string $str <p>The string to check.</p>
2166
   *
2167
   * @return bool <p>
2168
   *              <strong>true</strong> if it is ASCII<br>
2169
   *              <strong>false</strong> otherwise
2170
   *              </p>
2171
   */
2172 58
  public static function is_ascii(string $str): bool
2173
  {
2174 58
    if (!isset($str[0])) {
2175 6
      return true;
2176
    }
2177
2178 57
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2179
  }
2180
2181
  /**
2182
   * Returns true if the string is base64 encoded, false otherwise.
2183
   *
2184
   * @param string $str <p>The input string.</p>
2185
   *
2186
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2187
   */
2188 1
  public static function is_base64(string $str): bool
2189
  {
2190 1
    $base64String = (string)\base64_decode($str, true);
2191
2192 1
    return $base64String && \base64_encode($base64String) === $str;
2193
  }
2194
2195
  /**
2196
   * Check if the input is binary... (is look like a hack).
2197
   *
2198
   * @param mixed $input
2199
   * @param bool  $strict
2200
   *
2201
   * @return bool
2202
   */
2203 19
  public static function is_binary($input, bool $strict = false): bool
2204
  {
2205 19
    $input = (string)$input;
2206 19
    if (!isset($input[0])) {
2207 5
      return false;
2208
    }
2209
2210 19
    if (\preg_match('~^[01]+$~', $input)) {
2211 6
      return true;
2212
    }
2213
2214 19
    $testNull = 0;
2215 19
    $testLength = \strlen($input);
2216 19
    if ($testLength) {
2217 19
      $testNull = \substr_count($input, "\x0");
2218 19
      if (($testNull / $testLength) > 0.3) {
2219 6
        return true;
2220
      }
2221
    }
2222
2223
    if (
2224 18
        $strict === true
2225
        &&
2226 18
        \class_exists('finfo')
2227
    ) {
2228
2229 16
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
2230 16
      $finfo_encoding = $finfo->buffer($input);
2231 16
      if ($finfo_encoding && $finfo_encoding === 'binary') {
2232 16
        return true;
2233
      }
2234
2235
2236
    } else {
2237
2238 8
      if ($testNull > 0) {
2239 3
        return true;
2240
      }
2241
2242
    }
2243
2244 17
    return false;
2245
  }
2246
2247
  /**
2248
   * Check if the file is binary.
2249
   *
2250
   * @param string $file
2251
   *
2252
   * @return boolean
2253
   */
2254 3
  public static function is_binary_file($file): bool
2255
  {
2256
    try {
2257 3
      $fp = \fopen($file, 'rb');
2258 3
      $block = \fread($fp, 512);
2259 3
      \fclose($fp);
2260
    } catch (\Exception $e) {
2261
      $block = '';
2262
    }
2263
2264 3
    return self::is_binary($block, true);
2265
  }
2266
2267
  /**
2268
   * Checks if the given string is equal to any "Byte Order Mark".
2269
   *
2270
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2271
   *
2272
   * @param string $str <p>The input string.</p>
2273
   *
2274
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2275
   */
2276 1
  public static function is_bom($str): bool
2277
  {
2278 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2279 1
      if ($str === $bomString) {
2280 1
        return true;
2281
      }
2282
    }
2283
2284 1
    return false;
2285
  }
2286
2287
  /**
2288
   * Check if the string contains any html-tags <lall>.
2289
   *
2290
   * @param string $str <p>The input string.</p>
2291
   *
2292
   * @return boolean
2293
   */
2294 1
  public static function is_html(string $str): bool
2295
  {
2296 1
    if (!isset($str[0])) {
2297 1
      return false;
2298
    }
2299
2300
    // init
2301 1
    $matches = [];
2302
2303 1
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2304
2305 1
    return !(\count($matches) === 0);
2306
  }
2307
2308
  /**
2309
   * Try to check if "$str" is an json-string.
2310
   *
2311
   * @param string $str <p>The input string.</p>
2312
   *
2313
   * @return bool
2314
   */
2315 1
  public static function is_json(string $str): bool
2316
  {
2317 1
    if (!isset($str[0])) {
2318 1
      return false;
2319
    }
2320
2321 1
    $json = self::json_decode($str);
2322
2323
    return (
2324 1
               \is_object($json) === true
2325
               ||
2326 1
               \is_array($json) === true
2327
           )
2328
           &&
2329 1
           \json_last_error() === JSON_ERROR_NONE;
2330
  }
2331
2332
  /**
2333
   * Check if the string is UTF-16.
2334
   *
2335
   * @param string $str <p>The input string.</p>
2336
   *
2337
   * @return int|false <p>
2338
   *                   <strong>false</strong> if is't not UTF-16,<br>
2339
   *                   <strong>1</strong> for UTF-16LE,<br>
2340
   *                   <strong>2</strong> for UTF-16BE.
2341
   *                   </p>
2342
   */
2343 10 View Code Duplication
  public static function is_utf16(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2344
  {
2345 10
    if (self::is_binary($str) === false) {
2346 4
      return false;
2347
    }
2348
2349
    // init
2350 8
    $strChars = [];
2351
2352 8
    $str = self::remove_bom($str);
2353
2354 8
    $maybeUTF16LE = 0;
2355 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2356 8
    if ($test) {
2357 7
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2358 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2359 7
      if ($test3 === $test) {
2360 7
        if (\count($strChars) === 0) {
2361 7
          $strChars = self::count_chars($str, true);
2362
        }
2363 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2364 7
          if (\in_array($test3char, $strChars, true) === true) {
2365 7
            $maybeUTF16LE++;
2366
          }
2367
        }
2368
      }
2369
    }
2370
2371 8
    $maybeUTF16BE = 0;
2372 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2373 8
    if ($test) {
2374 7
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2375 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2376 7
      if ($test3 === $test) {
2377 7
        if (\count($strChars) === 0) {
2378 3
          $strChars = self::count_chars($str, true);
2379
        }
2380 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2381 7
          if (\in_array($test3char, $strChars, true) === true) {
2382 7
            $maybeUTF16BE++;
2383
          }
2384
        }
2385
      }
2386
    }
2387
2388 8
    if ($maybeUTF16BE !== $maybeUTF16LE) {
2389 3
      if ($maybeUTF16LE > $maybeUTF16BE) {
2390 2
        return 1;
2391
      }
2392
2393 3
      return 2;
2394
    }
2395
2396 6
    return false;
2397
  }
2398
2399
  /**
2400
   * Check if the string is UTF-32.
2401
   *
2402
   * @param string $str
2403
   *
2404
   * @return int|false <p>
2405
   *                   <strong>false</strong> if is't not UTF-32,<br>
2406
   *                   <strong>1</strong> for UTF-32LE,<br>
2407
   *                   <strong>2</strong> for UTF-32BE.
2408
   *                   </p>
2409
   */
2410 8 View Code Duplication
  public static function is_utf32(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2411
  {
2412 8
    if (self::is_binary($str) === false) {
2413 4
      return false;
2414
    }
2415
2416
    // init
2417 6
    $strChars = [];
2418
2419 6
    $str = self::remove_bom($str);
2420
2421 6
    $maybeUTF32LE = 0;
2422 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2423 6
    if ($test) {
2424 5
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2425 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2426 5
      if ($test3 === $test) {
2427 5
        if (\count($strChars) === 0) {
2428 5
          $strChars = self::count_chars($str, true);
2429
        }
2430 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2431 5
          if (\in_array($test3char, $strChars, true) === true) {
2432 5
            $maybeUTF32LE++;
2433
          }
2434
        }
2435
      }
2436
    }
2437
2438 6
    $maybeUTF32BE = 0;
2439 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2440 6
    if ($test) {
2441 5
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2442 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2443 5
      if ($test3 === $test) {
2444 5
        if (\count($strChars) === 0) {
2445 3
          $strChars = self::count_chars($str, true);
2446
        }
2447 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2448 5
          if (\in_array($test3char, $strChars, true) === true) {
2449 5
            $maybeUTF32BE++;
2450
          }
2451
        }
2452
      }
2453
    }
2454
2455 6
    if ($maybeUTF32BE !== $maybeUTF32LE) {
2456 1
      if ($maybeUTF32LE > $maybeUTF32BE) {
2457 1
        return 1;
2458
      }
2459
2460 1
      return 2;
2461
    }
2462
2463 6
    return false;
2464
  }
2465
2466
  /**
2467
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2468
   *
2469
   * @see    http://hsivonen.iki.fi/php-utf8/
2470
   *
2471
   * @param string|string[] $str    <p>The string to be checked.</p>
2472
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2473
   *
2474
   * @return bool
2475
   */
2476 61
  public static function is_utf8($str, bool $strict = false): bool
2477
  {
2478 61
    if (\is_array($str) === true) {
2479 1
      foreach ($str as $k => $v) {
2480 1
        if (false === self::is_utf8($v, $strict)) {
2481 1
          return false;
2482
        }
2483
      }
2484
2485
      return true;
2486
    }
2487
2488 61
    if (!isset($str[0])) {
2489 3
      return true;
2490
    }
2491
2492 59
    if ($strict === true) {
2493 1
      if (self::is_utf16($str) !== false) {
2494 1
        return false;
2495
      }
2496
2497
      if (self::is_utf32($str) !== false) {
2498
        return false;
2499
      }
2500
    }
2501
2502 59
    if (self::pcre_utf8_support() !== true) {
2503
2504
      // If even just the first character can be matched, when the /u
2505
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2506
      // invalid, nothing at all will match, even if the string contains
2507
      // some valid sequences
2508
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
2509
    }
2510
2511 59
    $mState = 0; // cached expected number of octets after the current octet
2512
    // until the beginning of the next UTF8 character sequence
2513 59
    $mUcs4 = 0; // cached Unicode character
2514 59
    $mBytes = 1; // cached expected number of octets in the current sequence
2515
2516 59
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2517
      self::checkForSupport();
2518
    }
2519
2520 59
    if (self::$ORD === null) {
2521
      self::$ORD = self::getData('ord');
2522
    }
2523
2524 59
    $len = self::strlen_in_byte($str);
2525
    /** @noinspection ForeachInvariantsInspection */
2526 59
    for ($i = 0; $i < $len; $i++) {
2527 59
      $in = self::$ORD[$str[$i]];
2528 59
      if ($mState === 0) {
2529
        // When mState is zero we expect either a US-ASCII character or a
2530
        // multi-octet sequence.
2531 59
        if (0 === (0x80 & $in)) {
2532
          // US-ASCII, pass straight through.
2533 56
          $mBytes = 1;
2534 56 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
2535
          // First octet of 2 octet sequence.
2536 49
          $mUcs4 = $in;
2537 49
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
2538 49
          $mState = 1;
2539 49
          $mBytes = 2;
2540 46
        } elseif (0xE0 === (0xF0 & $in)) {
2541
          // First octet of 3 octet sequence.
2542 30
          $mUcs4 = $in;
2543 30
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
2544 30
          $mState = 2;
2545 30
          $mBytes = 3;
2546 23 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
2547
          // First octet of 4 octet sequence.
2548 13
          $mUcs4 = $in;
2549 13
          $mUcs4 = ($mUcs4 & 0x07) << 18;
2550 13
          $mState = 3;
2551 13
          $mBytes = 4;
2552 11
        } elseif (0xF8 === (0xFC & $in)) {
2553
          /* First octet of 5 octet sequence.
2554
          *
2555
          * This is illegal because the encoded codepoint must be either
2556
          * (a) not the shortest form or
2557
          * (b) outside the Unicode range of 0-0x10FFFF.
2558
          * Rather than trying to resynchronize, we will carry on until the end
2559
          * of the sequence and let the later error handling code catch it.
2560
          */
2561 4
          $mUcs4 = $in;
2562 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
2563 4
          $mState = 4;
2564 4
          $mBytes = 5;
2565 8 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
2566
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
2567 4
          $mUcs4 = $in;
2568 4
          $mUcs4 = ($mUcs4 & 1) << 30;
2569 4
          $mState = 5;
2570 4
          $mBytes = 6;
2571
        } else {
2572
          /* Current octet is neither in the US-ASCII range nor a legal first
2573
           * octet of a multi-octet sequence.
2574
           */
2575 59
          return false;
2576
        }
2577
      } else {
2578
        // When mState is non-zero, we expect a continuation of the multi-octet
2579
        // sequence
2580 56
        if (0x80 === (0xC0 & $in)) {
2581
          // Legal continuation.
2582 50
          $shift = ($mState - 1) * 6;
2583 50
          $tmp = $in;
2584 50
          $tmp = ($tmp & 0x0000003F) << $shift;
2585 50
          $mUcs4 |= $tmp;
2586
          /**
2587
           * End of the multi-octet sequence. mUcs4 now contains the final
2588
           * Unicode code point to be output
2589
           */
2590 50
          if (0 === --$mState) {
2591
            /*
2592
            * Check for illegal sequences and code points.
2593
            */
2594
            // From Unicode 3.1, non-shortest form is illegal
2595
            if (
2596 50
                (2 === $mBytes && $mUcs4 < 0x0080) ||
2597 50
                (3 === $mBytes && $mUcs4 < 0x0800) ||
2598 50
                (4 === $mBytes && $mUcs4 < 0x10000) ||
2599 50
                (4 < $mBytes) ||
2600
                // From Unicode 3.2, surrogate characters are illegal.
2601 50
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
2602
                // Code points outside the Unicode range are illegal.
2603 50
                ($mUcs4 > 0x10FFFF)
2604
            ) {
2605 7
              return false;
2606
            }
2607
            // initialize UTF8 cache
2608 50
            $mState = 0;
2609 50
            $mUcs4 = 0;
2610 50
            $mBytes = 1;
2611
          }
2612
        } else {
2613
          /**
2614
           *((0xC0 & (*in) != 0x80) && (mState != 0))
2615
           * Incomplete multi-octet sequence.
2616
           */
2617 28
          return false;
2618
        }
2619
      }
2620
    }
2621
2622 27
    return true;
2623
  }
2624
2625
  /**
2626
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2627
   * Decodes a JSON string
2628
   *
2629
   * @link http://php.net/manual/en/function.json-decode.php
2630
   *
2631
   * @param string $json    <p>
2632
   *                        The <i>json</i> string being decoded.
2633
   *                        </p>
2634
   *                        <p>
2635
   *                        This function only works with UTF-8 encoded strings.
2636
   *                        </p>
2637
   *                        <p>PHP implements a superset of
2638
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2639
   *                        only supports these values when they are nested inside an array or an object.
2640
   *                        </p>
2641
   * @param bool   $assoc   [optional] <p>
2642
   *                        When <b>TRUE</b>, returned objects will be converted into
2643
   *                        associative arrays.
2644
   *                        </p>
2645
   * @param int    $depth   [optional] <p>
2646
   *                        User specified recursion depth.
2647
   *                        </p>
2648
   * @param int    $options [optional] <p>
2649
   *                        Bitmask of JSON decode options. Currently only
2650
   *                        <b>JSON_BIGINT_AS_STRING</b>
2651
   *                        is supported (default is to cast large integers as floats)
2652
   *                        </p>
2653
   *
2654
   * @return mixed the value encoded in <i>json</i> in appropriate
2655
   * PHP type. Values true, false and
2656
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
2657
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
2658
   * <i>json</i> cannot be decoded or if the encoded
2659
   * data is deeper than the recursion limit.
2660
   */
2661 2
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
2662
  {
2663 2
    $json = self::filter($json);
2664
2665 2
    $json = \json_decode($json, $assoc, $depth, $options);
2666
2667 2
    return $json;
2668
  }
2669
2670
  /**
2671
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2672
   * Returns the JSON representation of a value.
2673
   *
2674
   * @link http://php.net/manual/en/function.json-encode.php
2675
   *
2676
   * @param mixed $value   <p>
2677
   *                       The <i>value</i> being encoded. Can be any type except
2678
   *                       a resource.
2679
   *                       </p>
2680
   *                       <p>
2681
   *                       All string data must be UTF-8 encoded.
2682
   *                       </p>
2683
   *                       <p>PHP implements a superset of
2684
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2685
   *                       only supports these values when they are nested inside an array or an object.
2686
   *                       </p>
2687
   * @param int   $options [optional] <p>
2688
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
2689
   *                       <b>JSON_HEX_TAG</b>,
2690
   *                       <b>JSON_HEX_AMP</b>,
2691
   *                       <b>JSON_HEX_APOS</b>,
2692
   *                       <b>JSON_NUMERIC_CHECK</b>,
2693
   *                       <b>JSON_PRETTY_PRINT</b>,
2694
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
2695
   *                       <b>JSON_FORCE_OBJECT</b>,
2696
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
2697
   *                       constants is described on
2698
   *                       the JSON constants page.
2699
   *                       </p>
2700
   * @param int   $depth   [optional] <p>
2701
   *                       Set the maximum depth. Must be greater than zero.
2702
   *                       </p>
2703
   *
2704
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
2705
   */
2706 2
  public static function json_encode($value, int $options = 0, int $depth = 512): string
2707
  {
2708 2
    $value = self::filter($value);
2709
2710 2
    $json = \json_encode($value, $options, $depth);
2711
2712 2
    return $json;
2713
  }
2714
2715
  /**
2716
   * Makes string's first char lowercase.
2717
   *
2718
   * @param string $str       <p>The input string</p>
2719
   * @param string $encoding  [optional] <p>Set the charset.</p>
2720
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
2721
   *
2722
   * @return string <p>The resulting string</p>
2723
   */
2724 7
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2725
  {
2726 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
2727 7
    if ($strPartTwo === false) {
2728
      $strPartTwo = '';
2729
    }
2730
2731 7
    $strPartOne = self::strtolower(
2732 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
2733 7
        $encoding,
2734 7
        $cleanUtf8
2735
    );
2736
2737 7
    return $strPartOne . $strPartTwo;
2738
  }
2739
2740
  /**
2741
   * alias for "UTF8::lcfirst()"
2742
   *
2743
   * @see UTF8::lcfirst()
2744
   *
2745
   * @param string $word
2746
   * @param string $encoding
2747
   * @param bool   $cleanUtf8
2748
   *
2749
   * @return string
2750
   */
2751 1
  public static function lcword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2752
  {
2753 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
2754
  }
2755
2756
  /**
2757
   * Lowercase for all words in the string.
2758
   *
2759
   * @param string   $str        <p>The input string.</p>
2760
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
2761
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
2762
   * @param string   $encoding   [optional] <p>Set the charset.</p>
2763
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
2764
   *
2765
   * @return string
2766
   */
2767 1
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2768
  {
2769 1
    if (!$str) {
2770 1
      return '';
2771
    }
2772
2773 1
    $words = self::str_to_words($str, $charlist);
2774 1
    $newWords = [];
2775
2776 1
    if (\count($exceptions) > 0) {
2777 1
      $useExceptions = true;
2778
    } else {
2779 1
      $useExceptions = false;
2780
    }
2781
2782 1 View Code Duplication
    foreach ($words as $word) {
2783
2784 1
      if (!$word) {
2785 1
        continue;
2786
      }
2787
2788
      if (
2789 1
          $useExceptions === false
2790
          ||
2791
          (
2792 1
              $useExceptions === true
2793
              &&
2794 1
              !\in_array($word, $exceptions, true)
2795
          )
2796
      ) {
2797 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
2798
      }
2799
2800 1
      $newWords[] = $word;
2801
    }
2802
2803 1
    return \implode('', $newWords);
2804
  }
2805
2806
  /**
2807
   * Strip whitespace or other characters from beginning of a UTF-8 string.
2808
   *
2809
   * @param string $str   <p>The string to be trimmed</p>
2810
   * @param mixed  $chars <p>Optional characters to be stripped</p>
2811
   *
2812
   * @return string <p>The string with unwanted characters stripped from the left.</p>
2813
   */
2814 24 View Code Duplication
  public static function ltrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2815
  {
2816 24
    if (!isset($str[0])) {
2817 2
      return '';
2818
    }
2819
2820
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
2821 23
    if ($chars === INF || !$chars) {
2822 2
      return \preg_replace('/^[\pZ\pC]+/u', '', $str);
2823
    }
2824
2825 23
    return \preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
2826
  }
2827
2828
  /**
2829
   * Returns the UTF-8 character with the maximum code point in the given data.
2830
   *
2831
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
2832
   *
2833
   * @return string <p>The character with the highest code point than others.</p>
2834
   */
2835 1 View Code Duplication
  public static function max($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2836
  {
2837 1
    if (\is_array($arg) === true) {
2838 1
      $arg = \implode('', $arg);
2839
    }
2840
2841 1
    return self::chr(\max(self::codepoints($arg)));
2842
  }
2843
2844
  /**
2845
   * Calculates and returns the maximum number of bytes taken by any
2846
   * UTF-8 encoded character in the given string.
2847
   *
2848
   * @param string $str <p>The original Unicode string.</p>
2849
   *
2850
   * @return int <p>Max byte lengths of the given chars.</p>
2851
   */
2852 1
  public static function max_chr_width(string $str): int
2853
  {
2854 1
    $bytes = self::chr_size_list($str);
2855 1
    if (\count($bytes) > 0) {
2856 1
      return (int)\max($bytes);
2857
    }
2858
2859 1
    return 0;
2860
  }
2861
2862
  /**
2863
   * Checks whether mbstring is available on the server.
2864
   *
2865
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2866
   */
2867 12
  public static function mbstring_loaded(): bool
2868
  {
2869 12
    $return = \extension_loaded('mbstring') ? true : false;
2870
2871 12
    if ($return === true) {
2872 12
      \mb_internal_encoding('UTF-8');
2873
    }
2874
2875 12
    return $return;
2876
  }
2877
2878 1
  private static function mbstring_overloaded(): bool
2879
  {
2880
    return \defined('MB_OVERLOAD_STRING')
2881
           &&
2882 1
           \ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING;
2883
  }
2884
2885
  /**
2886
   * Returns the UTF-8 character with the minimum code point in the given data.
2887
   *
2888
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
2889
   *
2890
   * @return string <p>The character with the lowest code point than others.</p>
2891
   */
2892 1 View Code Duplication
  public static function min($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2893
  {
2894 1
    if (\is_array($arg) === true) {
2895 1
      $arg = \implode('', $arg);
2896
    }
2897
2898 1
    return self::chr(\min(self::codepoints($arg)));
2899
  }
2900
2901
  /**
2902
   * alias for "UTF8::normalize_encoding()"
2903
   *
2904
   * @see        UTF8::normalize_encoding()
2905
   *
2906
   * @param string $encoding
2907
   * @param mixed  $fallback
2908
   *
2909
   * @return string
2910
   *
2911
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
2912
   */
2913 1
  public static function normalizeEncoding(string $encoding, $fallback = '')
2914
  {
2915 1
    return self::normalize_encoding($encoding, $fallback);
2916
  }
2917
2918
  /**
2919
   * Normalize the encoding-"name" input.
2920
   *
2921
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
2922
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
2923
   *
2924
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by
2925
   *                default)</p>
2926
   */
2927 27
  public static function normalize_encoding(string $encoding, $fallback = '')
2928
  {
2929 27
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
2930
2931 27
    if (!$encoding) {
2932 4
      return $fallback;
2933
    }
2934
2935
    if (
2936 26
        'UTF-8' === $encoding
2937
        ||
2938 26
        'UTF8' === $encoding
2939
    ) {
2940 11
      return 'UTF-8';
2941
    }
2942
2943 22
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
2944 19
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
2945
    }
2946
2947 6
    if (self::$ENCODINGS === null) {
2948 1
      self::$ENCODINGS = self::getData('encodings');
2949
    }
2950
2951 6
    if (\in_array($encoding, self::$ENCODINGS, true)) {
2952 3
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
2953
2954 3
      return $encoding;
2955
    }
2956
2957 5
    $encodingOrig = $encoding;
2958 5
    $encoding = \strtoupper($encoding);
2959 5
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
2960
2961
    $equivalences = [
2962 5
        'ISO8859'     => 'ISO-8859-1',
2963
        'ISO88591'    => 'ISO-8859-1',
2964
        'ISO'         => 'ISO-8859-1',
2965
        'LATIN'       => 'ISO-8859-1',
2966
        'LATIN1'      => 'ISO-8859-1', // Western European
2967
        'ISO88592'    => 'ISO-8859-2',
2968
        'LATIN2'      => 'ISO-8859-2', // Central European
2969
        'ISO88593'    => 'ISO-8859-3',
2970
        'LATIN3'      => 'ISO-8859-3', // Southern European
2971
        'ISO88594'    => 'ISO-8859-4',
2972
        'LATIN4'      => 'ISO-8859-4', // Northern European
2973
        'ISO88595'    => 'ISO-8859-5',
2974
        'ISO88596'    => 'ISO-8859-6', // Greek
2975
        'ISO88597'    => 'ISO-8859-7',
2976
        'ISO88598'    => 'ISO-8859-8', // Hebrew
2977
        'ISO88599'    => 'ISO-8859-9',
2978
        'LATIN5'      => 'ISO-8859-9', // Turkish
2979
        'ISO885911'   => 'ISO-8859-11',
2980
        'TIS620'      => 'ISO-8859-11', // Thai
2981
        'ISO885910'   => 'ISO-8859-10',
2982
        'LATIN6'      => 'ISO-8859-10', // Nordic
2983
        'ISO885913'   => 'ISO-8859-13',
2984
        'LATIN7'      => 'ISO-8859-13', // Baltic
2985
        'ISO885914'   => 'ISO-8859-14',
2986
        'LATIN8'      => 'ISO-8859-14', // Celtic
2987
        'ISO885915'   => 'ISO-8859-15',
2988
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
2989
        'ISO885916'   => 'ISO-8859-16',
2990
        'LATIN10'     => 'ISO-8859-16', // Southeast European
2991
        'CP1250'      => 'WINDOWS-1250',
2992
        'WIN1250'     => 'WINDOWS-1250',
2993
        'WINDOWS1250' => 'WINDOWS-1250',
2994
        'CP1251'      => 'WINDOWS-1251',
2995
        'WIN1251'     => 'WINDOWS-1251',
2996
        'WINDOWS1251' => 'WINDOWS-1251',
2997
        'CP1252'      => 'WINDOWS-1252',
2998
        'WIN1252'     => 'WINDOWS-1252',
2999
        'WINDOWS1252' => 'WINDOWS-1252',
3000
        'CP1253'      => 'WINDOWS-1253',
3001
        'WIN1253'     => 'WINDOWS-1253',
3002
        'WINDOWS1253' => 'WINDOWS-1253',
3003
        'CP1254'      => 'WINDOWS-1254',
3004
        'WIN1254'     => 'WINDOWS-1254',
3005
        'WINDOWS1254' => 'WINDOWS-1254',
3006
        'CP1255'      => 'WINDOWS-1255',
3007
        'WIN1255'     => 'WINDOWS-1255',
3008
        'WINDOWS1255' => 'WINDOWS-1255',
3009
        'CP1256'      => 'WINDOWS-1256',
3010
        'WIN1256'     => 'WINDOWS-1256',
3011
        'WINDOWS1256' => 'WINDOWS-1256',
3012
        'CP1257'      => 'WINDOWS-1257',
3013
        'WIN1257'     => 'WINDOWS-1257',
3014
        'WINDOWS1257' => 'WINDOWS-1257',
3015
        'CP1258'      => 'WINDOWS-1258',
3016
        'WIN1258'     => 'WINDOWS-1258',
3017
        'WINDOWS1258' => 'WINDOWS-1258',
3018
        'UTF16'       => 'UTF-16',
3019
        'UTF32'       => 'UTF-32',
3020
        'UTF8'        => 'UTF-8',
3021
        'UTF'         => 'UTF-8',
3022
        'UTF7'        => 'UTF-7',
3023
        '8BIT'        => 'CP850',
3024
        'BINARY'      => 'CP850',
3025
    ];
3026
3027 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3028 4
      $encoding = $equivalences[$encodingUpperHelper];
3029
    }
3030
3031 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3032
3033 5
    return $encoding;
3034
  }
3035
3036
  /**
3037
   * Normalize some MS Word special characters.
3038
   *
3039
   * @param string $str <p>The string to be normalized.</p>
3040
   *
3041
   * @return string
3042
   */
3043 16 View Code Duplication
  public static function normalize_msword(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3044
  {
3045 16
    if (!isset($str[0])) {
3046 1
      return '';
3047
    }
3048
3049 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
3050 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3051
3052 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3053
3054 1
      if (self::$UTF8_MSWORD === null) {
3055 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
3056
      }
3057
3058 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
3059 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
3060
    }
3061
3062 16
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3063
  }
3064
3065
  /**
3066
   * Normalize the whitespace.
3067
   *
3068
   * @param string $str                     <p>The string to be normalized.</p>
3069
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3070
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3071
   *                                        bidirectional text chars.</p>
3072
   *
3073
   * @return string
3074
   */
3075 39
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
3076
  {
3077 39
    if (!isset($str[0])) {
3078 4
      return '';
3079
    }
3080
3081 39
    static $WHITESPACE_CACHE = [];
3082 39
    $cacheKey = (int)$keepNonBreakingSpace;
3083
3084 39
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3085
3086 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3087
3088 2
      if ($keepNonBreakingSpace === true) {
3089 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3090
      }
3091
3092 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
3093
    }
3094
3095 39
    if ($keepBidiUnicodeControls === false) {
3096 39
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3097
3098 39
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3099 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3100
      }
3101
3102 39
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3103
    }
3104
3105 39
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3106
  }
3107
3108
  /**
3109
   * Calculates Unicode code point of the given UTF-8 encoded character.
3110
   *
3111
   * INFO: opposite to UTF8::chr()
3112
   *
3113
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3114
   * @param string $encoding [optional] <p>Default is UTF-8</p>
3115
   *
3116
   * @return int <p>
3117
   *             Unicode code point of the given character,<br>
3118
   *             0 on invalid UTF-8 byte sequence.
3119
   *             </p>
3120
   */
3121 23
  public static function ord(string $chr, string $encoding = 'UTF-8'): int
3122
  {
3123
    // init
3124 23
    static $CHAR_CACHE = [];
3125
3126
    // save the original string
3127 23
    $chr_orig = $chr;
3128
3129 23
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3130 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3131
3132
      // check again, if it's still not UTF-8
3133
      /** @noinspection NotOptimalIfConditionsInspection */
3134 2
      if ($encoding !== 'UTF-8') {
3135 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3136
      }
3137
    }
3138
3139 23
    $cacheKey = $chr_orig . $encoding;
3140 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3141 23
      return $CHAR_CACHE[$cacheKey];
3142
    }
3143
3144 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3145
      self::checkForSupport();
3146
    }
3147
3148 11
    if (self::$SUPPORT['intlChar'] === true) {
3149 10
      $code = \IntlChar::ord($chr);
3150 10
      if ($code) {
3151 9
        return $CHAR_CACHE[$cacheKey] = $code;
3152
      }
3153
    }
3154
3155
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3156 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
3157 6
    $code = $chr ? $chr[1] : 0;
3158
3159 6
    if (0xF0 <= $code && isset($chr[4])) {
3160
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3161
    }
3162
3163 6
    if (0xE0 <= $code && isset($chr[3])) {
3164 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3165
    }
3166
3167 6
    if (0xC0 <= $code && isset($chr[2])) {
3168 2
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3169
    }
3170
3171 5
    return $CHAR_CACHE[$cacheKey] = $code;
3172
  }
3173
3174
  /**
3175
   * Parses the string into an array (into the the second parameter).
3176
   *
3177
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3178
   *          if the second parameter is not set!
3179
   *
3180
   * @link http://php.net/manual/en/function.parse-str.php
3181
   *
3182
   * @param string $str       <p>The input string.</p>
3183
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
3184
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3185
   *
3186
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3187
   */
3188 1
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
3189
  {
3190 1
    if ($cleanUtf8 === true) {
3191 1
      $str = self::clean($str);
3192
    }
3193
3194
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3195 1
    $return = \mb_parse_str($str, $result);
3196
3197 1
    return !($return === false || empty($result));
3198
  }
3199
3200
  /**
3201
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3202
   *
3203
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3204
   */
3205 60
  public static function pcre_utf8_support(): bool
3206
  {
3207
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3208 60
    return (bool)@\preg_match('//u', '');
3209
  }
3210
3211
  /**
3212
   * Create an array containing a range of UTF-8 characters.
3213
   *
3214
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3215
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3216
   *
3217
   * @return array
3218
   */
3219 1
  public static function range($var1, $var2): array
3220
  {
3221 1
    if (!$var1 || !$var2) {
3222 1
      return [];
3223
    }
3224
3225 1 View Code Duplication
    if (\ctype_digit((string)$var1)) {
3226 1
      $start = (int)$var1;
3227 1
    } elseif (\ctype_xdigit($var1)) {
3228
      $start = (int)self::hex_to_int($var1);
3229
    } else {
3230 1
      $start = self::ord($var1);
3231
    }
3232
3233 1
    if (!$start) {
3234
      return [];
3235
    }
3236
3237 1 View Code Duplication
    if (\ctype_digit((string)$var2)) {
3238 1
      $end = (int)$var2;
3239 1
    } elseif (\ctype_xdigit($var2)) {
3240
      $end = (int)self::hex_to_int($var2);
3241
    } else {
3242 1
      $end = self::ord($var2);
3243
    }
3244
3245 1
    if (!$end) {
3246
      return [];
3247
    }
3248
3249 1
    return \array_map(
3250
        [
3251 1
            self::class,
3252
            'chr',
3253
        ],
3254 1
        \range($start, $end)
3255
    );
3256
  }
3257
3258
  /**
3259
   * Multi decode html entity & fix urlencoded-win1252-chars.
3260
   *
3261
   * e.g:
3262
   * 'test+test'                     => 'test+test'
3263
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3264
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3265
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3266
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3267
   * 'Düsseldorf'                   => 'Düsseldorf'
3268
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3269
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3270
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3271
   *
3272
   * @param string $str          <p>The input string.</p>
3273
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3274
   *
3275
   * @return string
3276
   */
3277 2 View Code Duplication
  public static function rawurldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3278
  {
3279 2
    if (!isset($str[0])) {
3280 1
      return '';
3281
    }
3282
3283 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3284 2
    if (\preg_match($pattern, $str)) {
3285 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
3286
    }
3287
3288 2
    $flags = ENT_QUOTES | ENT_HTML5;
3289
3290
    do {
3291 2
      $str_compare = $str;
3292
3293 2
      $str = self::fix_simple_utf8(
3294 2
          \rawurldecode(
3295 2
              self::html_entity_decode(
3296 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3297 2
                  $flags
3298
              )
3299
          )
3300
      );
3301
3302 2
    } while ($multi_decode === true && $str_compare !== $str);
3303
3304 2
    return $str;
3305
  }
3306
3307
  /**
3308
   * alias for "UTF8::remove_bom()"
3309
   *
3310
   * @see        UTF8::remove_bom()
3311
   *
3312
   * @param string $str
3313
   *
3314
   * @return string
3315
   *
3316
   * @deprecated <p>use "UTF8::remove_bom()"</p>
3317
   */
3318
  public static function removeBOM(string $str): string
3319
  {
3320
    return self::remove_bom($str);
3321
  }
3322
3323
  /**
3324
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3325
   *
3326
   * @param string $str <p>The input string.</p>
3327
   *
3328
   * @return string <p>String without UTF-BOM</p>
3329
   */
3330 43
  public static function remove_bom(string $str): string
3331
  {
3332 43
    if (!isset($str[0])) {
3333 3
      return '';
3334
    }
3335
3336 43
    foreach (self::$BOM as $bomString => $bomByteLength) {
3337 43
      if (0 === self::strpos($str, $bomString, 0, 'CP850')) {
3338 5
        $strTmp = self::substr($str, $bomByteLength, null, 'CP850');
3339 5
        if ($strTmp === false) {
3340
          $strTmp = '';
3341
        }
3342 43
        $str = (string)$strTmp;
3343
      }
3344
    }
3345
3346 43
    return $str;
3347
  }
3348
3349
  /**
3350
   * Removes duplicate occurrences of a string in another string.
3351
   *
3352
   * @param string          $str  <p>The base string.</p>
3353
   * @param string|string[] $what <p>String to search for in the base string.</p>
3354
   *
3355
   * @return string <p>The result string with removed duplicates.</p>
3356
   */
3357 1
  public static function remove_duplicates(string $str, $what = ' '): string
3358
  {
3359 1
    if (\is_string($what) === true) {
3360 1
      $what = [$what];
3361
    }
3362
3363 1
    if (\is_array($what) === true) {
3364
      /** @noinspection ForeachSourceInspection */
3365 1
      foreach ($what as $item) {
3366 1
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
3367
      }
3368
    }
3369
3370 1
    return $str;
3371
  }
3372
3373
  /**
3374
   * Remove invisible characters from a string.
3375
   *
3376
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3377
   *
3378
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3379
   *
3380
   * @param string $str
3381
   * @param bool   $url_encoded
3382
   * @param string $replacement
3383
   *
3384
   * @return string
3385
   */
3386 65
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
3387
  {
3388
    // init
3389 65
    $non_displayables = [];
3390
3391
    // every control character except newline (dec 10),
3392
    // carriage return (dec 13) and horizontal tab (dec 09)
3393 65
    if ($url_encoded) {
3394 65
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
3395 65
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3396
    }
3397
3398 65
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
3399
3400
    do {
3401 65
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
3402 65
    } while ($count !== 0);
3403
3404 65
    return $str;
3405
  }
3406
3407
  /**
3408
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3409
   *
3410
   * @param string $str                <p>The input string</p>
3411
   * @param string $replacementChar    <p>The replacement character.</p>
3412
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3413
   *
3414
   * @return string
3415
   */
3416 37
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
3417
  {
3418 37
    if (!isset($str[0])) {
3419 4
      return '';
3420
    }
3421
3422 37
    if ($processInvalidUtf8 === true) {
3423 37
      $replacementCharHelper = $replacementChar;
3424 37
      if ($replacementChar === '') {
3425 37
        $replacementCharHelper = 'none';
3426
      }
3427
3428 37
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3429
        self::checkForSupport();
3430
      }
3431
3432 37
      $save = \mb_substitute_character();
3433 37
      \mb_substitute_character($replacementCharHelper);
3434 37
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3435 37
      \mb_substitute_character($save);
3436
3437 37
      if (\is_string($strTmp)) {
3438 37
        $str = $strTmp;
3439
      } else {
3440
        $str = '';
3441
      }
3442
    }
3443
3444 37
    return str_replace(
3445
        [
3446 37
            "\xEF\xBF\xBD",
3447
            '�',
3448
        ],
3449
        [
3450 37
            $replacementChar,
3451 37
            $replacementChar,
3452
        ],
3453 37
        $str
3454
    );
3455
  }
3456
3457
  /**
3458
   * Strip whitespace or other characters from end of a UTF-8 string.
3459
   *
3460
   * @param string $str   <p>The string to be trimmed.</p>
3461
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
3462
   *
3463
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3464
   */
3465 23 View Code Duplication
  public static function rtrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3466
  {
3467 23
    if (!isset($str[0])) {
3468 5
      return '';
3469
    }
3470
3471
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3472 19
    if ($chars === INF || !$chars) {
3473 3
      return \preg_replace('/[\pZ\pC]+$/u', '', $str);
3474
    }
3475
3476 18
    return \preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3477
  }
3478
3479
  /**
3480
   * rxClass
3481
   *
3482
   * @param string $s
3483
   * @param string $class
3484
   *
3485
   * @return string
3486
   */
3487 60
  private static function rxClass(string $s, string $class = ''): string
3488
  {
3489 60
    static $RX_CLASSS_CACHE = [];
3490
3491 60
    $cacheKey = $s . $class;
3492
3493 60
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
3494 48
      return $RX_CLASSS_CACHE[$cacheKey];
3495
    }
3496
3497
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3498 20
    $class = [$class];
3499
3500
    /** @noinspection SuspiciousLoopInspection */
3501 20
    foreach (self::str_split($s) as $s) {
3502 19
      if ('-' === $s) {
3503
        $class[0] = '-' . $class[0];
3504 19
      } elseif (!isset($s[2])) {
3505 19
        $class[0] .= \preg_quote($s, '/');
3506 2
      } elseif (1 === self::strlen($s)) {
3507 2
        $class[0] .= $s;
3508
      } else {
3509 19
        $class[] = $s;
3510
      }
3511
    }
3512
3513 20
    if ($class[0]) {
3514 20
      $class[0] = '[' . $class[0] . ']';
3515
    }
3516
3517 20
    if (1 === \count($class)) {
3518 20
      $return = $class[0];
3519
    } else {
3520
      $return = '(?:' . \implode('|', $class) . ')';
3521
    }
3522
3523 20
    $RX_CLASSS_CACHE[$cacheKey] = $return;
3524
3525 20
    return $return;
3526
  }
3527
3528
  /**
3529
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
3530
   */
3531 1
  public static function showSupport()
3532
  {
3533 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3534
      self::checkForSupport();
3535
    }
3536
3537 1
    echo '<pre>';
3538 1
    foreach (self::$SUPPORT as $key => $value) {
3539 1
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
3540
    }
3541 1
    echo '</pre>';
3542 1
  }
3543
3544
  /**
3545
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3546
   *
3547
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3548
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3549
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3550
   *
3551
   * @return string <p>The HTML numbered entity.</p>
3552
   */
3553 1
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
3554
  {
3555 1
    if (!isset($char[0])) {
3556 1
      return '';
3557
    }
3558
3559
    if (
3560 1
        $keepAsciiChars === true
3561
        &&
3562 1
        self::is_ascii($char) === true
3563
    ) {
3564 1
      return $char;
3565
    }
3566
3567 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3568 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3569
    }
3570
3571 1
    return '&#' . self::ord($char, $encoding) . ';';
3572
  }
3573
3574
  /**
3575
   * Convert a string to an array of Unicode characters.
3576
   *
3577
   * @param string $str       <p>The string to split into array.</p>
3578
   * @param int    $length    [optional] <p>Max character length of each array element.</p>
3579
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3580
   *
3581
   * @return string[] <p>An array containing chunks of the string.</p>
3582
   */
3583 39
  public static function split(string $str, int $length = 1, bool $cleanUtf8 = false): array
3584
  {
3585 39
    if (!isset($str[0])) {
3586 3
      return [];
3587
    }
3588
3589
    // init
3590 38
    $ret = [];
3591
3592 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3593
      self::checkForSupport();
3594
    }
3595
3596 38
    if ($cleanUtf8 === true) {
3597 9
      $str = self::clean($str);
3598
    }
3599
3600 38
    if (self::$SUPPORT['pcre_utf8'] === true) {
3601
3602 38
      \preg_match_all('/./us', $str, $retArray);
3603 38
      if (isset($retArray[0])) {
3604 38
        $ret = $retArray[0];
3605
      }
3606 38
      unset($retArray);
3607
3608
    } else {
3609
3610
      // fallback
3611
3612 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3613
        self::checkForSupport();
3614
      }
3615
3616 2
      $len = self::strlen_in_byte($str);
3617
3618
      /** @noinspection ForeachInvariantsInspection */
3619 2
      for ($i = 0; $i < $len; $i++) {
3620
3621 2
        if (($str[$i] & "\x80") === "\x00") {
3622
3623 2
          $ret[] = $str[$i];
3624
3625
        } elseif (
3626 2
            isset($str[$i + 1])
3627
            &&
3628 2
            ($str[$i] & "\xE0") === "\xC0"
3629
        ) {
3630
3631
          if (($str[$i + 1] & "\xC0") === "\x80") {
3632
            $ret[] = $str[$i] . $str[$i + 1];
3633
3634
            $i++;
3635
          }
3636
3637 View Code Duplication
        } elseif (
3638 2
            isset($str[$i + 2])
3639
            &&
3640 2
            ($str[$i] & "\xF0") === "\xE0"
3641
        ) {
3642
3643
          if (
3644 2
              ($str[$i + 1] & "\xC0") === "\x80"
3645
              &&
3646 2
              ($str[$i + 2] & "\xC0") === "\x80"
3647
          ) {
3648 2
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3649
3650 2
            $i += 2;
3651
          }
3652
3653
        } elseif (
3654
            isset($str[$i + 3])
3655
            &&
3656
            ($str[$i] & "\xF8") === "\xF0"
3657
        ) {
3658
3659 View Code Duplication
          if (
3660
              ($str[$i + 1] & "\xC0") === "\x80"
3661
              &&
3662
              ($str[$i + 2] & "\xC0") === "\x80"
3663
              &&
3664
              ($str[$i + 3] & "\xC0") === "\x80"
3665
          ) {
3666
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3667
3668
            $i += 3;
3669
          }
3670
3671
        }
3672
      }
3673
    }
3674
3675 38
    if ($length > 1) {
3676 5
      $ret = \array_chunk($ret, $length);
3677
3678 5
      return \array_map(
3679
          function ($item) {
3680 5
            return \implode('', $item);
3681 5
          }, $ret
3682
      );
3683
    }
3684
3685 34
    if (isset($ret[0]) && $ret[0] === '') {
3686
      return [];
3687
    }
3688
3689 34
    return $ret;
3690
  }
3691
3692
  /**
3693
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3694
   *
3695
   * @param string $str <p>The input string.</p>
3696
   *
3697
   * @return false|string <p>
3698
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
3699
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
3700
   *                      </p>
3701
   */
3702 15
  public static function str_detect_encoding(string $str)
3703
  {
3704
    //
3705
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
3706
    //
3707
3708 15
    if (self::is_binary($str, true) === true) {
3709
3710 5
      if (self::is_utf16($str) === 1) {
3711 1
        return 'UTF-16LE';
3712
      }
3713
3714 5
      if (self::is_utf16($str) === 2) {
3715 1
        return 'UTF-16BE';
3716
      }
3717
3718 4
      if (self::is_utf32($str) === 1) {
3719
        return 'UTF-32LE';
3720
      }
3721
3722 4
      if (self::is_utf32($str) === 2) {
3723
        return 'UTF-32BE';
3724
      }
3725
3726
      // is binary but not "UTF-16" or "UTF-32"
3727 4
      return false;
3728
    }
3729
3730
    //
3731
    // 2.) simple check for ASCII chars
3732
    //
3733
3734 13
    if (self::is_ascii($str) === true) {
3735 5
      return 'ASCII';
3736
    }
3737
3738
    //
3739
    // 3.) simple check for UTF-8 chars
3740
    //
3741
3742 13
    if (self::is_utf8($str) === true) {
3743 9
      return 'UTF-8';
3744
    }
3745
3746
    //
3747
    // 4.) check via "\mb_detect_encoding()"
3748
    //
3749
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
3750
3751
    $detectOrder = [
3752 8
        'ISO-8859-1',
3753
        'ISO-8859-2',
3754
        'ISO-8859-3',
3755
        'ISO-8859-4',
3756
        'ISO-8859-5',
3757
        'ISO-8859-6',
3758
        'ISO-8859-7',
3759
        'ISO-8859-8',
3760
        'ISO-8859-9',
3761
        'ISO-8859-10',
3762
        'ISO-8859-13',
3763
        'ISO-8859-14',
3764
        'ISO-8859-15',
3765
        'ISO-8859-16',
3766
        'WINDOWS-1251',
3767
        'WINDOWS-1252',
3768
        'WINDOWS-1254',
3769
        'CP932',
3770
        'CP936',
3771
        'CP950',
3772
        'CP866',
3773
        'CP850',
3774
        'CP51932',
3775
        'CP50220',
3776
        'CP50221',
3777
        'CP50222',
3778
        'ISO-2022-JP',
3779
        'ISO-2022-KR',
3780
        'JIS',
3781
        'JIS-ms',
3782
        'EUC-CN',
3783
        'EUC-JP',
3784
    ];
3785
3786 8
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
3787 8
    if ($encoding) {
3788 8
      return $encoding;
3789
    }
3790
3791
    //
3792
    // 5.) check via "iconv()"
3793
    //
3794
3795
    if (self::$ENCODINGS === null) {
3796
      self::$ENCODINGS = self::getData('encodings');
3797
    }
3798
3799
    $md5 = \md5($str);
3800
    foreach (self::$ENCODINGS as $encodingTmp) {
0 ignored issues
show
Bug introduced by
The expression self::$ENCODINGS of type boolean|string|array|integer is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
3801
      # INFO: //IGNORE and //TRANSLIT still throw notice
3802
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3803
      if (\md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
3804
        return $encodingTmp;
3805
      }
3806
    }
3807
3808
    return false;
3809
  }
3810
3811
  /**
3812
   * Check if the string ends with the given substring.
3813
   *
3814
   * @param string $haystack <p>The string to search in.</p>
3815
   * @param string $needle   <p>The substring to search for.</p>
3816
   *
3817
   * @return bool
3818
   */
3819 2 View Code Duplication
  public static function str_ends_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3820
  {
3821 2
    if (!isset($haystack[0], $needle[0])) {
3822 1
      return false;
3823
    }
3824
3825 2
    if (\substr($haystack, -\strlen($needle)) === $needle) {
3826 2
      return true;
3827
    }
3828
3829 2
    return false;
3830
  }
3831
3832
  /**
3833
   * Check if the string ends with the given substring, case insensitive.
3834
   *
3835
   * @param string $haystack <p>The string to search in.</p>
3836
   * @param string $needle   <p>The substring to search for.</p>
3837
   *
3838
   * @return bool
3839
   */
3840 2 View Code Duplication
  public static function str_iends_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3841
  {
3842 2
    if (!isset($haystack[0], $needle[0])) {
3843 1
      return false;
3844
    }
3845
3846 2
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
3847 2
      return true;
3848
    }
3849
3850 2
    return false;
3851
  }
3852
3853
  /**
3854
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
3855
   *
3856
   * @link  http://php.net/manual/en/function.str-ireplace.php
3857
   *
3858
   * @param mixed $search  <p>
3859
   *                       Every replacement with search array is
3860
   *                       performed on the result of previous replacement.
3861
   *                       </p>
3862
   * @param mixed $replace <p>
3863
   *                       </p>
3864
   * @param mixed $subject <p>
3865
   *                       If subject is an array, then the search and
3866
   *                       replace is performed with every entry of
3867
   *                       subject, and the return value is an array as
3868
   *                       well.
3869
   *                       </p>
3870
   * @param int   $count   [optional] <p>
3871
   *                       The number of matched and replaced needles will
3872
   *                       be returned in count which is passed by
3873
   *                       reference.
3874
   *                       </p>
3875
   *
3876
   * @return mixed <p>A string or an array of replacements.</p>
3877
   */
3878 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
3879
  {
3880 26
    $search = (array)$search;
3881
3882
    /** @noinspection AlterInForeachInspection */
3883 26
    foreach ($search as &$s) {
3884 26
      if ('' === $s .= '') {
3885 2
        $s = '/^(?<=.)$/';
3886
      } else {
3887 26
        $s = '/' . \preg_quote($s, '/') . '/ui';
3888
      }
3889
    }
3890
3891 26
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
3892 26
    $count = $replace; // used as reference parameter
3893
3894 26
    return $subject;
3895
  }
3896
3897
  /**
3898
   * Check if the string starts with the given substring, case insensitive.
3899
   *
3900
   * @param string $haystack <p>The string to search in.</p>
3901
   * @param string $needle   <p>The substring to search for.</p>
3902
   *
3903
   * @return bool
3904
   */
3905 2 View Code Duplication
  public static function str_istarts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3906
  {
3907 2
    if (!isset($haystack[0], $needle[0])) {
3908 1
      return false;
3909
    }
3910
3911 2
    if (self::stripos($haystack, $needle) === 0) {
3912 2
      return true;
3913
    }
3914
3915 2
    return false;
3916
  }
3917
3918
  /**
3919
   * Limit the number of characters in a string, but also after the next word.
3920
   *
3921
   * @param string $str
3922
   * @param int    $length
3923
   * @param string $strAddOn
3924
   *
3925
   * @return string
3926
   */
3927 1
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…'): string
3928
  {
3929 1
    if (!isset($str[0])) {
3930 1
      return '';
3931
    }
3932
3933 1
    if (self::strlen($str) <= $length) {
3934 1
      return $str;
3935
    }
3936
3937 1
    if (self::substr($str, $length - 1, 1) === ' ') {
3938 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
3939
    }
3940
3941 1
    $str = (string)self::substr($str, 0, $length);
3942 1
    $array = \explode(' ', $str);
3943 1
    \array_pop($array);
3944 1
    $new_str = \implode(' ', $array);
3945
3946 1
    if ($new_str === '') {
3947 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
3948
    } else {
3949 1
      $str = $new_str . $strAddOn;
3950
    }
3951
3952 1
    return $str;
3953
  }
3954
3955
  /**
3956
   * Pad a UTF-8 string to given length with another string.
3957
   *
3958
   * @param string $str        <p>The input string.</p>
3959
   * @param int    $pad_length <p>The length of return string.</p>
3960
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
3961
   * @param int    $pad_type   [optional] <p>
3962
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
3963
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
3964
   *                           </p>
3965
   *
3966
   * @return string <strong>Returns the padded string</strong>
3967
   */
3968 2
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', int $pad_type = STR_PAD_RIGHT): string
3969
  {
3970 2
    $str_length = self::strlen($str);
3971
3972
    if (
3973 2
        \is_int($pad_length) === true
3974
        &&
3975 2
        $pad_length > 0
3976
        &&
3977 2
        $pad_length >= $str_length
3978
    ) {
3979 2
      $ps_length = self::strlen($pad_string);
3980
3981 2
      $diff = ($pad_length - $str_length);
3982
3983
      switch ($pad_type) {
3984 2 View Code Duplication
        case STR_PAD_LEFT:
3985 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
3986 2
          $pre = (string)self::substr($pre, 0, $diff);
3987 2
          $post = '';
3988 2
          break;
3989
3990 2
        case STR_PAD_BOTH:
3991 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
3992 2
          $pre = (string)self::substr($pre, 0, $diff / 2);
3993 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
3994 2
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2));
3995 2
          break;
3996
3997 2
        case STR_PAD_RIGHT:
3998 View Code Duplication
        default:
3999 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
4000 2
          $post = (string)self::substr($post, 0, $diff);
4001 2
          $pre = '';
4002
      }
4003
4004 2
      return $pre . $str . $post;
4005
    }
4006
4007 2
    return $str;
4008
  }
4009
4010
  /**
4011
   * Repeat a string.
4012
   *
4013
   * @param string $str        <p>
4014
   *                           The string to be repeated.
4015
   *                           </p>
4016
   * @param int    $multiplier <p>
4017
   *                           Number of time the input string should be
4018
   *                           repeated.
4019
   *                           </p>
4020
   *                           <p>
4021
   *                           multiplier has to be greater than or equal to 0.
4022
   *                           If the multiplier is set to 0, the function
4023
   *                           will return an empty string.
4024
   *                           </p>
4025
   *
4026
   * @return string <p>The repeated string.</p>
4027
   */
4028 1
  public static function str_repeat(string $str, int $multiplier): string
4029
  {
4030 1
    $str = self::filter($str);
4031
4032 1
    return \str_repeat($str, $multiplier);
4033
  }
4034
4035
  /**
4036
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4037
   *
4038
   * Replace all occurrences of the search string with the replacement string
4039
   *
4040
   * @link http://php.net/manual/en/function.str-replace.php
4041
   *
4042
   * @param mixed $search  <p>
4043
   *                       The value being searched for, otherwise known as the needle.
4044
   *                       An array may be used to designate multiple needles.
4045
   *                       </p>
4046
   * @param mixed $replace <p>
4047
   *                       The replacement value that replaces found search
4048
   *                       values. An array may be used to designate multiple replacements.
4049
   *                       </p>
4050
   * @param mixed $subject <p>
4051
   *                       The string or array being searched and replaced on,
4052
   *                       otherwise known as the haystack.
4053
   *                       </p>
4054
   *                       <p>
4055
   *                       If subject is an array, then the search and
4056
   *                       replace is performed with every entry of
4057
   *                       subject, and the return value is an array as
4058
   *                       well.
4059
   *                       </p>
4060
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4061
   *
4062
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4063
   */
4064 12
  public static function str_replace($search, $replace, $subject, int &$count = null)
4065
  {
4066 12
    return \str_replace($search, $replace, $subject, $count);
4067
  }
4068
4069
  /**
4070
   * Replace the first "$search"-term with the "$replace"-term.
4071
   *
4072
   * @param string $search
4073
   * @param string $replace
4074
   * @param string $subject
4075
   *
4076
   * @return string
4077
   */
4078 1
  public static function str_replace_first(string $search, string $replace, string $subject): string
4079
  {
4080 1
    $pos = self::strpos($subject, $search);
4081
4082 1
    if ($pos !== false) {
4083 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4084
    }
4085
4086 1
    return $subject;
4087
  }
4088
4089
  /**
4090
   * Shuffles all the characters in the string.
4091
   *
4092
   * @param string $str <p>The input string</p>
4093
   *
4094
   * @return string <p>The shuffled string.</p>
4095
   */
4096 1
  public static function str_shuffle(string $str): string
4097
  {
4098 1
    $array = self::split($str);
4099
4100 1
    \shuffle($array);
4101
4102 1
    return \implode('', $array);
4103
  }
4104
4105
  /**
4106
   * Sort all characters according to code points.
4107
   *
4108
   * @param string $str    <p>A UTF-8 string.</p>
4109
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4110
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4111
   *
4112
   * @return string <p>String of sorted characters.</p>
4113
   */
4114 1
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
4115
  {
4116 1
    $array = self::codepoints($str);
4117
4118 1
    if ($unique) {
4119 1
      $array = \array_flip(\array_flip($array));
4120
    }
4121
4122 1
    if ($desc) {
4123 1
      \arsort($array);
4124
    } else {
4125 1
      \asort($array);
4126
    }
4127
4128 1
    return self::string($array);
4129
  }
4130
4131
  /**
4132
   * Split a string into an array.
4133
   *
4134
   * @param string|string[] $str
4135
   * @param int             $len
4136
   *
4137
   * @return array
4138
   */
4139 23
  public static function str_split($str, int $len = 1): array
4140
  {
4141 23 View Code Duplication
    if (\is_array($str) === true) {
4142 1
      foreach ($str as $k => $v) {
4143 1
        $str[$k] = self::str_split($v, $len);
4144
      }
4145
4146 1
      return $str;
4147
    }
4148
4149 23
    if (!isset($str[0])) {
4150 1
      return [];
4151
    }
4152
4153 22
    if ($len < 1) {
4154
      return \str_split($str, $len);
4155
    }
4156
4157
    /** @noinspection PhpInternalEntityUsedInspection */
4158 22
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4159 22
    $a = $a[0];
4160
4161 22
    if ($len === 1) {
4162 22
      return $a;
4163
    }
4164
4165 1
    $arrayOutput = [];
4166 1
    $p = -1;
4167
4168
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4169 1
    foreach ($a as $l => $a) {
4170 1
      if ($l % $len) {
4171 1
        $arrayOutput[$p] .= $a;
4172
      } else {
4173 1
        $arrayOutput[++$p] = $a;
4174
      }
4175
    }
4176
4177 1
    return $arrayOutput;
4178
  }
4179
4180
  /**
4181
   * Check if the string starts with the given substring.
4182
   *
4183
   * @param string $haystack <p>The string to search in.</p>
4184
   * @param string $needle   <p>The substring to search for.</p>
4185
   *
4186
   * @return bool
4187
   */
4188 2 View Code Duplication
  public static function str_starts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4189
  {
4190 2
    if (!isset($haystack[0], $needle[0])) {
4191 1
      return false;
4192
    }
4193
4194 2
    if (\strpos($haystack, $needle) === 0) {
4195 2
      return true;
4196
    }
4197
4198 2
    return false;
4199
  }
4200
4201
  /**
4202
   * Get a binary representation of a specific string.
4203
   *
4204
   * @param string $str <p>The input string.</p>
4205
   *
4206
   * @return string
4207
   */
4208 1
  public static function str_to_binary(string $str): string
4209
  {
4210 1
    $value = \unpack('H*', $str);
4211
4212 1
    return \base_convert($value[1], 16, 2);
4213
  }
4214
4215
  /**
4216
   * Convert a string into an array of words.
4217
   *
4218
   * @param string   $str
4219
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
4220
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4221
   * @param null|int $removeShortValues
4222
   *
4223
   * @return array
4224
   */
4225 10
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
4226
  {
4227 10
    if (!isset($str[0])) {
4228 2
      if ($removeEmptyValues === true) {
4229
        return [];
4230
      }
4231
4232 2
      return [''];
4233
    }
4234
4235 10
    $charList = self::rxClass($charList, '\pL');
4236
4237 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4238
4239
    if (
4240 10
        $removeShortValues === null
4241
        &&
4242 10
        $removeEmptyValues === false
4243
    ) {
4244 10
      return $return;
4245
    }
4246
4247 1
    $tmpReturn = [];
4248 1
    foreach ($return as $returnValue) {
4249
      if (
4250 1
          $removeShortValues !== null
4251
          &&
4252 1
          self::strlen($returnValue) <= $removeShortValues
4253
      ) {
4254 1
        continue;
4255
      }
4256
4257
      if (
4258 1
          $removeEmptyValues === true
4259
          &&
4260 1
          \trim($returnValue) === ''
4261
      ) {
4262 1
        continue;
4263
      }
4264
4265 1
      $tmpReturn[] = $returnValue;
4266
    }
4267
4268 1
    return $tmpReturn;
4269
  }
4270
4271
  /**
4272
   * alias for "UTF8::to_ascii()"
4273
   *
4274
   * @see UTF8::to_ascii()
4275
   *
4276
   * @param string $str
4277
   * @param string $unknown
4278
   * @param bool   $strict
4279
   *
4280
   * @return string
4281
   */
4282 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
4283
  {
4284 7
    return self::to_ascii($str, $unknown, $strict);
4285
  }
4286
4287
  /**
4288
   * Counts number of words in the UTF-8 string.
4289
   *
4290
   * @param string $str      <p>The input string.</p>
4291
   * @param int    $format   [optional] <p>
4292
   *                         <strong>0</strong> => return a number of words (default)<br>
4293
   *                         <strong>1</strong> => return an array of words<br>
4294
   *                         <strong>2</strong> => return an array of words with word-offset as key
4295
   *                         </p>
4296
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4297
   *
4298
   * @return array|int <p>The number of words in the string</p>
4299
   */
4300 1
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
4301
  {
4302 1
    $strParts = self::str_to_words($str, $charlist);
4303
4304 1
    $len = \count($strParts);
4305
4306 1
    if ($format === 1) {
4307
4308 1
      $numberOfWords = [];
4309 1
      for ($i = 1; $i < $len; $i += 2) {
4310 1
        $numberOfWords[] = $strParts[$i];
4311
      }
4312
4313 1
    } elseif ($format === 2) {
4314
4315 1
      $numberOfWords = [];
4316 1
      $offset = self::strlen($strParts[0]);
4317 1
      for ($i = 1; $i < $len; $i += 2) {
4318 1
        $numberOfWords[$offset] = $strParts[$i];
4319 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4320
      }
4321
4322
    } else {
4323
4324 1
      $numberOfWords = ($len - 1) / 2;
4325
4326
    }
4327
4328 1
    return $numberOfWords;
4329
  }
4330
4331
  /**
4332
   * Case-insensitive string comparison.
4333
   *
4334
   * INFO: Case-insensitive version of UTF8::strcmp()
4335
   *
4336
   * @param string $str1
4337
   * @param string $str2
4338
   *
4339
   * @return int <p>
4340
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
4341
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
4342
   *             <strong>0</strong> if they are equal.
4343
   *             </p>
4344
   */
4345 11
  public static function strcasecmp(string $str1, string $str2): int
4346
  {
4347 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4348
  }
4349
4350
  /**
4351
   * alias for "UTF8::strstr()"
4352
   *
4353
   * @see UTF8::strstr()
4354
   *
4355
   * @param string $haystack
4356
   * @param string $needle
4357
   * @param bool   $before_needle
4358
   * @param string $encoding
4359
   * @param bool   $cleanUtf8
4360
   *
4361
   * @return string|false
4362
   */
4363 1
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4364
  {
4365 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4366
  }
4367
4368
  /**
4369
   * Case-sensitive string comparison.
4370
   *
4371
   * @param string $str1
4372
   * @param string $str2
4373
   *
4374
   * @return int  <p>
4375
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
4376
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
4377
   *              <strong>0</strong> if they are equal.
4378
   *              </p>
4379
   */
4380 14
  public static function strcmp(string $str1, string $str2): int
4381
  {
4382
    /** @noinspection PhpUndefinedClassInspection */
4383 14
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
4384 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
4385 14
        \Normalizer::normalize($str2, \Normalizer::NFD)
4386
    );
4387
  }
4388
4389
  /**
4390
   * Find length of initial segment not matching mask.
4391
   *
4392
   * @param string $str
4393
   * @param string $charList
4394
   * @param int    $offset
4395
   * @param int    $length
4396
   *
4397
   * @return int|null
4398
   */
4399 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
4400
  {
4401 15
    if ('' === $charList .= '') {
4402 1
      return null;
4403
    }
4404
4405 14 View Code Duplication
    if ($offset || $length !== null) {
4406 2
      $strTmp = self::substr($str, $offset, $length);
4407 2
      if ($strTmp === false) {
4408
        return null;
4409
      }
4410 2
      $str = (string)$strTmp;
4411
    }
4412
4413 14
    if (!isset($str[0])) {
4414 1
      return null;
4415
    }
4416
4417 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4418 13
      return self::strlen($length[1]);
4419
    }
4420
4421 1
    return self::strlen($str);
4422
  }
4423
4424
  /**
4425
   * alias for "UTF8::stristr()"
4426
   *
4427
   * @see UTF8::stristr()
4428
   *
4429
   * @param string $haystack
4430
   * @param string $needle
4431
   * @param bool   $before_needle
4432
   * @param string $encoding
4433
   * @param bool   $cleanUtf8
4434
   *
4435
   * @return string|false
4436
   */
4437 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4438
  {
4439 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4440
  }
4441
4442
  /**
4443
   * Create a UTF-8 string from code points.
4444
   *
4445
   * INFO: opposite to UTF8::codepoints()
4446
   *
4447
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4448
   *
4449
   * @return string <p>UTF-8 encoded string.</p>
4450
   */
4451 2
  public static function string(array $array): string
4452
  {
4453 2
    return \implode(
4454 2
        '',
4455 2
        \array_map(
4456
            [
4457 2
                self::class,
4458
                'chr',
4459
            ],
4460 2
            $array
4461
        )
4462
    );
4463
  }
4464
4465
  /**
4466
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4467
   *
4468
   * @param string $str <p>The input string.</p>
4469
   *
4470
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4471
   */
4472 3
  public static function string_has_bom(string $str): bool
4473
  {
4474 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
4475 3
      if (0 === \strpos($str, $bomString)) {
4476 3
        return true;
4477
      }
4478
    }
4479
4480 3
    return false;
4481
  }
4482
4483
  /**
4484
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4485
   *
4486
   * @link http://php.net/manual/en/function.strip-tags.php
4487
   *
4488
   * @param string $str             <p>
4489
   *                                The input string.
4490
   *                                </p>
4491
   * @param string $allowable_tags  [optional] <p>
4492
   *                                You can use the optional second parameter to specify tags which should
4493
   *                                not be stripped.
4494
   *                                </p>
4495
   *                                <p>
4496
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4497
   *                                can not be changed with allowable_tags.
4498
   *                                </p>
4499
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
4500
   *
4501
   * @return string <p>The stripped string.</p>
4502
   */
4503 2
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
4504
  {
4505 2
    if (!isset($str[0])) {
4506 1
      return '';
4507
    }
4508
4509 2
    if ($cleanUtf8 === true) {
4510 1
      $str = self::clean($str);
4511
    }
4512
4513 2
    return \strip_tags($str, $allowable_tags);
4514
  }
4515
4516
  /**
4517
   * Strip all whitespace characters. This includes tabs and newline
4518
   * characters, as well as multibyte whitespace such as the thin space
4519
   * and ideographic space.
4520
   *
4521
   * @param string $str
4522
   *
4523
   * @return string
4524
   */
4525 12
  public static function strip_whitespace(string $str): string
4526
  {
4527 12
    if (!isset($str[0])) {
4528 1
      return '';
4529
    }
4530
4531 11
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
4532
  }
4533
4534
  /**
4535
   * Finds position of first occurrence of a string within another, case insensitive.
4536
   *
4537
   * @link http://php.net/manual/en/function.mb-stripos.php
4538
   *
4539
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4540
   * @param string $needle    <p>The string to find in haystack.</p>
4541
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
4542
   * @param string $encoding  [optional] <p>Set the charset.</p>
4543
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4544
   *
4545
   * @return int|false <p>
4546
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
4547
   *                   or false if needle is not found.
4548
   *                   </p>
4549
   */
4550 10
  public static function stripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4551
  {
4552 10
    if (!isset($haystack[0], $needle[0])) {
4553 3
      return false;
4554
    }
4555
4556 9
    if ($cleanUtf8 === true) {
4557
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4558
      // if invalid characters are found in $haystack before $needle
4559 1
      $haystack = self::clean($haystack);
4560 1
      $needle = self::clean($needle);
4561
    }
4562
4563 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4564 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4565
    }
4566
4567 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4568
      self::checkForSupport();
4569
    }
4570
4571 View Code Duplication
    if (
4572 9
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4573
        &&
4574 9
        self::$SUPPORT['intl'] === true
4575
    ) {
4576 9
      return \grapheme_stripos($haystack, $needle, $offset);
4577
    }
4578
4579
    // fallback to "mb_"-function via polyfill
4580 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4581
  }
4582
4583
  /**
4584
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4585
   *
4586
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
4587
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
4588
   * @param bool   $before_needle  [optional] <p>
4589
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4590
   *                               haystack before the first occurrence of the needle (excluding the needle).
4591
   *                               </p>
4592
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
4593
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
4594
   *
4595
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
4596
   */
4597 17
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4598
  {
4599 17
    if (!isset($haystack[0], $needle[0])) {
4600 6
      return false;
4601
    }
4602
4603 11
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4604 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4605
    }
4606
4607 11
    if ($cleanUtf8 === true) {
4608
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4609
      // if invalid characters are found in $haystack before $needle
4610 1
      $needle = self::clean($needle);
4611 1
      $haystack = self::clean($haystack);
4612
    }
4613
4614 11
    if (!$needle) {
4615
      return $haystack;
4616
    }
4617
4618 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4619
      self::checkForSupport();
4620
    }
4621
4622 View Code Duplication
    if (
4623 11
        $encoding !== 'UTF-8'
4624
        &&
4625 11
        self::$SUPPORT['mbstring'] === false
4626
    ) {
4627
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4628
    }
4629
4630 11
    if (self::$SUPPORT['mbstring'] === true) {
4631 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4632
    }
4633
4634 View Code Duplication
    if (
4635
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4636
        &&
4637
        self::$SUPPORT['intl'] === true
4638
    ) {
4639
      return \grapheme_stristr($haystack, $needle, $before_needle);
4640
    }
4641
4642
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
4643
      return \stristr($haystack, $needle, $before_needle);
4644
    }
4645
4646
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
4647
4648
    if (!isset($match[1])) {
4649
      return false;
4650
    }
4651
4652
    if ($before_needle) {
4653
      return $match[1];
4654
    }
4655
4656
    return self::substr($haystack, self::strlen($match[1]));
4657
  }
4658
4659
  /**
4660
   * Get the string length, not the byte-length!
4661
   *
4662
   * @link     http://php.net/manual/en/function.mb-strlen.php
4663
   *
4664
   * @param string $str       <p>The string being checked for length.</p>
4665
   * @param string $encoding  [optional] <p>Set the charset.</p>
4666
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4667
   *
4668
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4669
   *             character counted as +1)</p>
4670
   */
4671 85
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
4672
  {
4673 85
    if (!isset($str[0])) {
4674 6
      return 0;
4675
    }
4676
4677 84
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4678 5
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4679
    }
4680
4681 84
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4682
      self::checkForSupport();
4683
    }
4684
4685
    switch ($encoding) {
4686 84
      case 'ASCII':
4687 84
      case 'CP850':
4688
        if (
4689 6
            $encoding === 'CP850'
4690
            &&
4691 6
            self::$SUPPORT['mbstring_func_overload'] === false
4692
        ) {
4693 6
          return \strlen($str);
4694
        }
4695
4696
        return \mb_strlen($str, 'CP850'); // 8-BIT
4697
    }
4698
4699 79
    if ($cleanUtf8 === true) {
4700
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
4701
      // if invalid characters are found in $str
4702 2
      $str = self::clean($str);
4703
    }
4704
4705 View Code Duplication
    if (
4706 79
        $encoding !== 'UTF-8'
4707
        &&
4708 79
        self::$SUPPORT['mbstring'] === false
4709
        &&
4710 79
        self::$SUPPORT['iconv'] === false
4711
    ) {
4712
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4713
    }
4714
4715
    if (
4716 79
        $encoding !== 'UTF-8'
4717
        &&
4718 79
        self::$SUPPORT['iconv'] === true
4719
        &&
4720 79
        self::$SUPPORT['mbstring'] === false
4721
    ) {
4722
      $returnTmp = \iconv_strlen($str, $encoding);
4723
      if ($returnTmp !== false) {
4724
        return $returnTmp;
4725
      }
4726
    }
4727
4728 79 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
4729 78
      $returnTmp = \mb_strlen($str, $encoding);
4730 78
      if ($returnTmp !== false) {
4731 78
        return $returnTmp;
4732
      }
4733
    }
4734
4735 2 View Code Duplication
    if (self::$SUPPORT['iconv'] === true) {
4736
      $returnTmp = \iconv_strlen($str, $encoding);
4737
      if ($returnTmp !== false) {
4738
        return $returnTmp;
4739
      }
4740
    }
4741
4742
    if (
4743 2
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4744
        &&
4745 2
        self::$SUPPORT['intl'] === true
4746
    ) {
4747
      return \grapheme_strlen($str);
4748
    }
4749
4750 2
    if (self::is_ascii($str)) {
4751 1
      return \strlen($str);
4752
    }
4753
4754
    // fallback via vanilla php
4755 2
    \preg_match_all('/./us', $str, $parts);
4756 2
    $returnTmp = \count($parts[0]);
4757 2
    if ($returnTmp !== 0) {
4758 2
      return $returnTmp;
4759
    }
4760
4761
    // fallback to "mb_"-function via polyfill
4762
    return \mb_strlen($str, $encoding);
4763
  }
4764
4765
  /**
4766
   * Get string length in byte.
4767
   *
4768
   * @param string $str
4769
   *
4770
   * @return int
4771
   */
4772 72
  public static function strlen_in_byte(string $str): int
4773
  {
4774 72
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
4775
      $len = \mb_strlen($str, 'CP850'); // 8-BIT
4776
    } else {
4777 72
      $len = \strlen($str);
4778
    }
4779
4780 72
    return $len;
4781
  }
4782
4783
  /**
4784
   * Case insensitive string comparisons using a "natural order" algorithm.
4785
   *
4786
   * INFO: natural order version of UTF8::strcasecmp()
4787
   *
4788
   * @param string $str1 <p>The first string.</p>
4789
   * @param string $str2 <p>The second string.</p>
4790
   *
4791
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
4792
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
4793
   *             <strong>0</strong> if they are equal
4794
   */
4795 1
  public static function strnatcasecmp(string $str1, string $str2): int
4796
  {
4797 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4798
  }
4799
4800
  /**
4801
   * String comparisons using a "natural order" algorithm
4802
   *
4803
   * INFO: natural order version of UTF8::strcmp()
4804
   *
4805
   * @link  http://php.net/manual/en/function.strnatcmp.php
4806
   *
4807
   * @param string $str1 <p>The first string.</p>
4808
   * @param string $str2 <p>The second string.</p>
4809
   *
4810
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
4811
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
4812
   *             <strong>0</strong> if they are equal
4813
   */
4814 2
  public static function strnatcmp(string $str1, string $str2): int
4815
  {
4816 2
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4817
  }
4818
4819
  /**
4820
   * Case-insensitive string comparison of the first n characters.
4821
   *
4822
   * @link  http://php.net/manual/en/function.strncasecmp.php
4823
   *
4824
   * @param string $str1 <p>The first string.</p>
4825
   * @param string $str2 <p>The second string.</p>
4826
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
4827
   *
4828
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4829
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4830
   *             <strong>0</strong> if they are equal
4831
   */
4832 1
  public static function strncasecmp(string $str1, string $str2, int $len): int
4833
  {
4834 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4835
  }
4836
4837
  /**
4838
   * String comparison of the first n characters.
4839
   *
4840
   * @link  http://php.net/manual/en/function.strncmp.php
4841
   *
4842
   * @param string $str1 <p>The first string.</p>
4843
   * @param string $str2 <p>The second string.</p>
4844
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
4845
   *
4846
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4847
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4848
   *             <strong>0</strong> if they are equal
4849
   */
4850 2
  public static function strncmp(string $str1, string $str2, int $len): int
4851
  {
4852 2
    $str1 = (string)self::substr($str1, 0, $len);
4853 2
    $str2 = (string)self::substr($str2, 0, $len);
4854
4855 2
    return self::strcmp($str1, $str2);
4856
  }
4857
4858
  /**
4859
   * Search a string for any of a set of characters.
4860
   *
4861
   * @link  http://php.net/manual/en/function.strpbrk.php
4862
   *
4863
   * @param string $haystack  <p>The string where char_list is looked for.</p>
4864
   * @param string $char_list <p>This parameter is case sensitive.</p>
4865
   *
4866
   * @return string|false <p>String starting from the character found, or false if it is not found.</p>
4867
   */
4868 1
  public static function strpbrk(string $haystack, string $char_list)
4869
  {
4870 1
    if (!isset($haystack[0], $char_list[0])) {
4871 1
      return false;
4872
    }
4873
4874 1
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
4875 1
      return \substr($haystack, \strpos($haystack, $m[0]));
4876
    }
4877
4878 1
    return false;
4879
  }
4880
4881
  /**
4882
   * Find position of first occurrence of string in a string.
4883
   *
4884
   * @link http://php.net/manual/en/function.mb-strpos.php
4885
   *
4886
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4887
   * @param string $needle    <p>The string to find in haystack.</p>
4888
   * @param int    $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
4889
   * @param string $encoding  [optional] <p>Set the charset.</p>
4890
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4891
   *
4892
   * @return int|false <p>
4893
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
4894
   *                   If needle is not found it returns false.
4895
   *                   </p>
4896
   */
4897 59
  public static function strpos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4898
  {
4899 59
    if (!isset($haystack[0], $needle[0])) {
4900 3
      return false;
4901
    }
4902
4903
    // iconv and mbstring do not support integer $needle
4904 58 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4905
      $needle = (string)self::chr((int)$needle);
4906
    }
4907
4908 58
    if ($cleanUtf8 === true) {
4909
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4910
      // if invalid characters are found in $haystack before $needle
4911 2
      $needle = self::clean($needle);
4912 2
      $haystack = self::clean($haystack);
4913
    }
4914
4915 58
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4916 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4917
    }
4918
4919 58
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4920
      self::checkForSupport();
4921
    }
4922
4923
    if (
4924 58
        $encoding === 'CP850'
4925
        &&
4926 58
        self::$SUPPORT['mbstring_func_overload'] === false
4927
    ) {
4928 44
      return \strpos($haystack, $needle, $offset);
4929
    }
4930
4931 View Code Duplication
    if (
4932 15
        $encoding !== 'UTF-8'
4933
        &&
4934 15
        self::$SUPPORT['iconv'] === false
4935
        &&
4936 15
        self::$SUPPORT['mbstring'] === false
4937
    ) {
4938
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4939
    }
4940
4941
    if (
4942 15
        $offset >= 0 // iconv_strpos() can't handle negative offset
4943
        &&
4944 15
        $encoding !== 'UTF-8'
4945
        &&
4946 15
        self::$SUPPORT['mbstring'] === false
4947
        &&
4948 15
        self::$SUPPORT['iconv'] === true
4949
    ) {
4950
      // ignore invalid negative offset to keep compatibility
4951
      // with php < 5.5.35, < 5.6.21, < 7.0.6
4952
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4953
      if ($returnTmp !== false) {
4954
        return $returnTmp;
4955
      }
4956
    }
4957
4958 15 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
4959 15
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
4960 15
      if ($returnTmp !== false) {
4961 13
        return $returnTmp;
4962
      }
4963
    }
4964
4965 View Code Duplication
    if (
4966 10
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4967
        &&
4968 10
        self::$SUPPORT['intl'] === true
4969
    ) {
4970 9
      return \grapheme_strpos($haystack, $needle, $offset);
4971
    }
4972
4973
    if (
4974 2
        $offset >= 0 // iconv_strpos() can't handle negative offset
4975
        &&
4976 2
        self::$SUPPORT['iconv'] === true
4977
    ) {
4978
      // ignore invalid negative offset to keep compatibility
4979
      // with php < 5.5.35, < 5.6.21, < 7.0.6
4980 1
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4981 1
      if ($returnTmp !== false) {
4982
        return $returnTmp;
4983
      }
4984
    }
4985
4986 2
    $haystackIsAscii = self::is_ascii($haystack);
4987 2
    if ($haystackIsAscii && self::is_ascii($needle)) {
4988 1
      return \strpos($haystack, $needle, $offset);
4989
    }
4990
4991
    // fallback via vanilla php
4992
4993 2
    if ($haystackIsAscii) {
4994
      $haystackTmp = \substr($haystack, $offset);
4995
    } else {
4996 2
      $haystackTmp = self::substr($haystack, $offset);
4997
    }
4998 2
    if ($haystackTmp === false) {
4999
      $haystackTmp = '';
5000
    }
5001 2
    $haystack = (string)$haystackTmp;
5002
5003 2
    if ($offset < 0) {
5004
      $offset = 0;
5005
    }
5006
5007 2
    $pos = \strpos($haystack, $needle);
5008 2
    if ($pos === false) {
5009
      return false;
5010
    }
5011
5012 2
    $returnTmp = $offset + self::strlen(\substr($haystack, 0, $pos));
5013 2
    if ($returnTmp !== false) {
5014 2
      return $returnTmp;
5015
    }
5016
5017
    // fallback to "mb_"-function via polyfill
5018
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5019
  }
5020
5021
  /**
5022
   * Finds the last occurrence of a character in a string within another.
5023
   *
5024
   * @link http://php.net/manual/en/function.mb-strrchr.php
5025
   *
5026
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5027
   * @param string $needle        <p>The string to find in haystack</p>
5028
   * @param bool   $before_needle [optional] <p>
5029
   *                              Determines which portion of haystack
5030
   *                              this function returns.
5031
   *                              If set to true, it returns all of haystack
5032
   *                              from the beginning to the last occurrence of needle.
5033
   *                              If set to false, it returns all of haystack
5034
   *                              from the last occurrence of needle to the end,
5035
   *                              </p>
5036
   * @param string $encoding      [optional] <p>
5037
   *                              Character encoding name to use.
5038
   *                              If it is omitted, internal character encoding is used.
5039
   *                              </p>
5040
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5041
   *
5042
   * @return string|false The portion of haystack or false if needle is not found.
5043
   */
5044 1 View Code Duplication
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5045
  {
5046 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5047 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5048
    }
5049
5050 1
    if ($cleanUtf8 === true) {
5051
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5052
      // if invalid characters are found in $haystack before $needle
5053 1
      $needle = self::clean($needle);
5054 1
      $haystack = self::clean($haystack);
5055
    }
5056
5057
    // fallback to "mb_"-function via polyfill
5058 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5059
  }
5060
5061
  /**
5062
   * Reverses characters order in the string.
5063
   *
5064
   * @param string $str The input string
5065
   *
5066
   * @return string The string with characters in the reverse sequence
5067
   */
5068 4
  public static function strrev(string $str): string
5069
  {
5070 4
    if (!isset($str[0])) {
5071 2
      return '';
5072
    }
5073
5074 3
    return \implode('', \array_reverse(self::split($str)));
5075
  }
5076
5077
  /**
5078
   * Finds the last occurrence of a character in a string within another, case insensitive.
5079
   *
5080
   * @link http://php.net/manual/en/function.mb-strrichr.php
5081
   *
5082
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
5083
   * @param string $needle         <p>The string to find in haystack.</p>
5084
   * @param bool   $before_needle  [optional] <p>
5085
   *                               Determines which portion of haystack
5086
   *                               this function returns.
5087
   *                               If set to true, it returns all of haystack
5088
   *                               from the beginning to the last occurrence of needle.
5089
   *                               If set to false, it returns all of haystack
5090
   *                               from the last occurrence of needle to the end,
5091
   *                               </p>
5092
   * @param string $encoding       [optional] <p>
5093
   *                               Character encoding name to use.
5094
   *                               If it is omitted, internal character encoding is used.
5095
   *                               </p>
5096
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5097
   *
5098
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
5099
   */
5100 1 View Code Duplication
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5101
  {
5102 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5103 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5104
    }
5105
5106 1
    if ($cleanUtf8 === true) {
5107
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5108
      // if invalid characters are found in $haystack before $needle
5109 1
      $needle = self::clean($needle);
5110 1
      $haystack = self::clean($haystack);
5111
    }
5112
5113 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5114
  }
5115
5116
  /**
5117
   * Find position of last occurrence of a case-insensitive string.
5118
   *
5119
   * @param string $haystack  <p>The string to look in.</p>
5120
   * @param string $needle    <p>The string to look for.</p>
5121
   * @param int    $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5122
   * @param string $encoding  [optional] <p>Set the charset.</p>
5123
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5124
   *
5125
   * @return int|false <p>
5126
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5127
   *                   not found, it returns false.
5128
   *                   </p>
5129
   */
5130 1
  public static function strripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5131
  {
5132 1 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5133
      $needle = (string)self::chr((int)$needle);
5134
    }
5135
5136 1
    if (!isset($haystack[0], $needle[0])) {
5137
      return false;
5138
    }
5139
5140 1
    if ($cleanUtf8 === true) {
5141
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5142 1
      $needle = self::clean($needle);
5143 1
      $haystack = self::clean($haystack);
5144
    }
5145
5146 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5147 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5148
    }
5149
5150 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5151
      self::checkForSupport();
5152
    }
5153
5154 View Code Duplication
    if (
5155 1
        $encoding !== 'UTF-8'
5156
        &&
5157 1
        self::$SUPPORT['mbstring'] === false
5158
    ) {
5159
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5160
    }
5161
5162 1
    if (self::$SUPPORT['mbstring'] === true) {
5163 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5164
    }
5165
5166 View Code Duplication
    if (
5167
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5168
        &&
5169
        self::$SUPPORT['intl'] === true
5170
    ) {
5171
      return \grapheme_strripos($haystack, $needle, $offset);
5172
    }
5173
5174
    // fallback via vanilla php
5175
5176
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5177
  }
5178
5179
  /**
5180
   * Find position of last occurrence of a string in a string.
5181
   *
5182
   * @link http://php.net/manual/en/function.mb-strrpos.php
5183
   *
5184
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5185
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5186
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5187
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5188
   *                              the end of the string.
5189
   *                              </p>
5190
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5191
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5192
   *
5193
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5194
   *                   is not found, it returns false.</p>
5195
   */
5196 10
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5197
  {
5198 10
    if ((int)$needle === $needle && $needle >= 0) {
5199 2
      $needle = (string)self::chr($needle);
5200
    }
5201 10
    $needle = (string)$needle;
5202
5203 10
    if (!isset($haystack[0], $needle[0])) {
5204 2
      return false;
5205
    }
5206
5207
    if (
5208 9
        $cleanUtf8 === true
5209
        ||
5210 9
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5211
    ) {
5212
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5213 3
      $needle = self::clean($needle);
5214 3
      $haystack = self::clean($haystack);
5215
    }
5216
5217 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5218 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5219
    }
5220
5221 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5222
      self::checkForSupport();
5223
    }
5224
5225 View Code Duplication
    if (
5226 9
        $encoding !== 'UTF-8'
5227
        &&
5228 9
        self::$SUPPORT['mbstring'] === false
5229
    ) {
5230
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5231
    }
5232
5233 9
    if (self::$SUPPORT['mbstring'] === true) {
5234 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5235
    }
5236
5237 View Code Duplication
    if (
5238
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5239
        &&
5240
        self::$SUPPORT['intl'] === true
5241
    ) {
5242
      return \grapheme_strrpos($haystack, $needle, $offset);
5243
    }
5244
5245
    // fallback via vanilla php
5246
5247
    $haystackTmp = null;
5248
    if ($offset > 0) {
5249
      $haystackTmp = self::substr($haystack, $offset);
5250
    } elseif ($offset < 0) {
5251
      $haystackTmp = self::substr($haystack, 0, $offset);
5252
      $offset = 0;
5253
    }
5254
5255
    if ($haystackTmp !== null) {
5256
      if ($haystackTmp === false) {
5257
        $haystackTmp = '';
5258
      }
5259
      $haystack = (string)$haystackTmp;
5260
    }
5261
5262
    $pos = \strrpos($haystack, $needle);
5263
    if ($pos === false) {
5264
      return false;
5265
    }
5266
5267
    return $offset + self::strlen(\substr($haystack, 0, $pos));
5268
  }
5269
5270
  /**
5271
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5272
   * mask.
5273
   *
5274
   * @param string $str    <p>The input string.</p>
5275
   * @param string $mask   <p>The mask of chars</p>
5276
   * @param int    $offset [optional]
5277
   * @param int    $length [optional]
5278
   *
5279
   * @return int
5280
   */
5281 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
5282
  {
5283 10 View Code Duplication
    if ($offset || $length !== null) {
5284 2
      $strTmp = self::substr($str, $offset, $length);
5285 2
      if ($strTmp === false) {
5286
        $strTmp = '';
5287
      }
5288 2
      $str = (string)$strTmp;
5289
    }
5290
5291 10
    if (!isset($str[0], $mask[0])) {
5292 2
      return 0;
5293
    }
5294
5295 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5296
  }
5297
5298
  /**
5299
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5300
   *
5301
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
5302
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
5303
   * @param bool   $before_needle  [optional] <p>
5304
   *                               If <b>TRUE</b>, strstr() returns the part of the
5305
   *                               haystack before the first occurrence of the needle (excluding the needle).
5306
   *                               </p>
5307
   * @param string $encoding       [optional] <p>Set the charset.</p>
5308
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5309
   *
5310
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
5311
   */
5312 2
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5313
  {
5314 2
    if (!isset($haystack[0], $needle[0])) {
5315 1
      return false;
5316
    }
5317
5318 2
    if ($cleanUtf8 === true) {
5319
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5320
      // if invalid characters are found in $haystack before $needle
5321
      $needle = self::clean($needle);
5322
      $haystack = self::clean($haystack);
5323
    }
5324
5325 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5326 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5327
    }
5328
5329 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5330
      self::checkForSupport();
5331
    }
5332
5333 View Code Duplication
    if (
5334 2
        $encoding !== 'UTF-8'
5335
        &&
5336 2
        self::$SUPPORT['mbstring'] === false
5337
    ) {
5338
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5339
    }
5340
5341 2
    if (self::$SUPPORT['mbstring'] === true) {
5342 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5343
    }
5344
5345 View Code Duplication
    if (
5346
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5347
        &&
5348
        self::$SUPPORT['intl'] === true
5349
    ) {
5350
      return \grapheme_strstr($haystack, $needle, $before_needle);
5351
    }
5352
5353
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
5354
5355
    if (!isset($match[1])) {
5356
      return false;
5357
    }
5358
5359
    if ($before_needle) {
5360
      return $match[1];
5361
    }
5362
5363
    return self::substr($haystack, self::strlen($match[1]));
5364
  }
5365
5366
  /**
5367
   * Unicode transformation for case-less matching.
5368
   *
5369
   * @link http://unicode.org/reports/tr21/tr21-5.html
5370
   *
5371
   * @param string $str        <p>The input string.</p>
5372
   * @param bool   $full       [optional] <p>
5373
   *                           <b>true</b>, replace full case folding chars (default)<br>
5374
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5375
   *                           </p>
5376
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
5377
   *
5378
   * @return string
5379
   */
5380 13
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
5381
  {
5382 13
    if (!isset($str[0])) {
5383 4
      return '';
5384
    }
5385
5386 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5387 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5388
5389 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5390 1
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
5391 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
5392
    }
5393
5394 12
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5395
5396 12
    if ($full) {
5397
5398 12
      static $FULL_CASE_FOLD = null;
5399 12
      if ($FULL_CASE_FOLD === null) {
5400 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
5401
      }
5402
5403 12
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
5404
    }
5405
5406 12
    if ($cleanUtf8 === true) {
5407 1
      $str = self::clean($str);
5408
    }
5409
5410 12
    return self::strtolower($str);
5411
  }
5412
5413
  /**
5414
   * Make a string lowercase.
5415
   *
5416
   * @link http://php.net/manual/en/function.mb-strtolower.php
5417
   *
5418
   * @param string      $str       <p>The string being lowercased.</p>
5419
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5420
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5421
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5422
   *
5423
   * @return string str with all alphabetic characters converted to lowercase.
5424
   */
5425 25 View Code Duplication
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5426
  {
5427
    // init
5428 25
    $str = (string)$str;
5429 25
    if (!isset($str[0])) {
5430 3
      return '';
5431
    }
5432
5433 23
    if ($cleanUtf8 === true) {
5434
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5435
      // if invalid characters are found in $haystack before $needle
5436 1
      $str = self::clean($str);
5437
    }
5438
5439 23
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5440 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5441
    }
5442
5443 23
    if ($lang !== null) {
5444 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5445
        self::checkForSupport();
5446
      }
5447
5448 1
      if (self::$SUPPORT['intl'] === true) {
5449
5450 1
        $langCode = $lang . '-Lower';
5451 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5452
          \trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
5453
5454
          $langCode = 'Any-Lower';
5455
        }
5456
5457 1
        return transliterator_transliterate($langCode, $str);
5458
      }
5459
5460
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5461
    }
5462
5463 23
    return \mb_strtolower($str, $encoding);
5464
  }
5465
5466
  /**
5467
   * Generic case sensitive transformation for collation matching.
5468
   *
5469
   * @param string $str <p>The input string</p>
5470
   *
5471
   * @return string
5472
   */
5473 3
  private static function strtonatfold(string $str): string
5474
  {
5475
    /** @noinspection PhpUndefinedClassInspection */
5476 3
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5477
  }
5478
5479
  /**
5480
   * Make a string uppercase.
5481
   *
5482
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5483
   *
5484
   * @param string      $str       <p>The string being uppercased.</p>
5485
   * @param string      $encoding  [optional] <p>Set the charset.</p>
5486
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5487
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5488
   *
5489
   * @return string <p>$str with all alphabetic characters converted to uppercase.</p>
5490
   */
5491 19 View Code Duplication
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5492
  {
5493 19
    $str = (string)$str;
5494 19
    if (!isset($str[0])) {
5495 3
      return '';
5496
    }
5497
5498 17
    if ($cleanUtf8 === true) {
5499
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5500
      // if invalid characters are found in $haystack before $needle
5501 2
      $str = self::clean($str);
5502
    }
5503
5504 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5505 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5506
    }
5507
5508 17
    if ($lang !== null) {
5509 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5510
        self::checkForSupport();
5511
      }
5512
5513 1
      if (self::$SUPPORT['intl'] === true) {
5514
5515 1
        $langCode = $lang . '-Upper';
5516 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5517
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
5518
5519
          $langCode = 'Any-Upper';
5520
        }
5521
5522 1
        return transliterator_transliterate($langCode, $str);
5523
      }
5524
5525
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5526
    }
5527
5528 17
    return \mb_strtoupper($str, $encoding);
5529
  }
5530
5531
  /**
5532
   * Translate characters or replace sub-strings.
5533
   *
5534
   * @link  http://php.net/manual/en/function.strtr.php
5535
   *
5536
   * @param string          $str  <p>The string being translated.</p>
5537
   * @param string|string[] $from <p>The string replacing from.</p>
5538
   * @param string|string[] $to   <p>The string being translated to to.</p>
5539
   *
5540
   * @return string <p>
5541
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5542
   *                corresponding character in to.
5543
   *                </p>
5544
   */
5545 1
  public static function strtr(string $str, $from, $to = INF): string
5546
  {
5547 1
    if (!isset($str[0])) {
5548
      return '';
5549
    }
5550
5551 1
    if ($from === $to) {
5552
      return $str;
5553
    }
5554
5555 1
    if (INF !== $to) {
5556 1
      $from = self::str_split($from);
5557 1
      $to = self::str_split($to);
5558 1
      $countFrom = \count($from);
5559 1
      $countTo = \count($to);
5560
5561 1
      if ($countFrom > $countTo) {
5562 1
        $from = \array_slice($from, 0, $countTo);
5563 1
      } elseif ($countFrom < $countTo) {
5564 1
        $to = \array_slice($to, 0, $countFrom);
5565
      }
5566
5567 1
      $from = \array_combine($from, $to);
5568
    }
5569
5570 1
    if (\is_string($from)) {
5571 1
      return \str_replace($from, '', $str);
5572
    }
5573
5574 1
    return \strtr($str, $from);
5575
  }
5576
5577
  /**
5578
   * Return the width of a string.
5579
   *
5580
   * @param string $str       <p>The input string.</p>
5581
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5582
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5583
   *
5584
   * @return int
5585
   */
5586 1 View Code Duplication
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5587
  {
5588 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5589 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5590
    }
5591
5592 1
    if ($cleanUtf8 === true) {
5593
      // iconv and mbstring are not tolerant to invalid encoding
5594
      // further, their behaviour is inconsistent with that of PHP's substr
5595 1
      $str = self::clean($str);
5596
    }
5597
5598
    // fallback to "mb_"-function via polyfill
5599 1
    return \mb_strwidth($str, $encoding);
5600
  }
5601
5602
  /**
5603
   * Get part of a string.
5604
   *
5605
   * @link http://php.net/manual/en/function.mb-substr.php
5606
   *
5607
   * @param string $str       <p>The string being checked.</p>
5608
   * @param int    $offset    <p>The first position used in str.</p>
5609
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
5610
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5611
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5612
   *
5613
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
5614
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
5615
   *                      characters long, <b>FALSE</b> will be returned.</p>
5616
   */
5617 71
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5618
  {
5619 71
    if (!isset($str[0])) {
5620 10
      return '';
5621
    }
5622
5623
    // Empty string
5624 68
    if ($length === 0) {
5625 3
      return '';
5626
    }
5627
5628 67
    if ($cleanUtf8 === true) {
5629
      // iconv and mbstring are not tolerant to invalid encoding
5630
      // further, their behaviour is inconsistent with that of PHP's substr
5631 1
      $str = self::clean($str);
5632
    }
5633
5634
    // Whole string
5635 67
    if (!$offset && $length === null) {
5636 3
      return $str;
5637
    }
5638
5639 64
    $str_length = 0;
5640 64
    if ($offset || $length === null) {
5641 44
      $str_length = self::strlen($str, $encoding);
5642
    }
5643
5644
    // Empty string
5645 64
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be zero. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5646 10
      return '';
5647
    }
5648
5649
    // Impossible
5650 64
    if ($offset && $offset > $str_length) {
5651 2
      return false;
5652
    }
5653
5654 62
    if ($length === null) {
5655 27
      $length = $str_length;
5656
    } else {
5657 53
      $length = (int)$length;
5658
    }
5659
5660 62
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5661 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5662
    }
5663
5664 62
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5665
      self::checkForSupport();
5666
    }
5667
5668
    if (
5669 62
        $encoding === 'CP850'
5670
        &&
5671 62
        self::$SUPPORT['mbstring_func_overload'] === false
5672
    ) {
5673 16
      return \substr($str, $offset, $length ?? $str_length);
5674
    }
5675
5676 View Code Duplication
    if (
5677 46
        $encoding !== 'UTF-8'
5678
        &&
5679 46
        self::$SUPPORT['mbstring'] === false
5680
    ) {
5681
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5682
    }
5683
5684 46
    if (self::$SUPPORT['mbstring'] === true) {
5685 46
      return \mb_substr($str, $offset, $length, $encoding);
5686
    }
5687
5688
    if (
5689
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5690
        &&
5691
        self::$SUPPORT['intl'] === true
5692
    ) {
5693
      return \grapheme_substr($str, $offset, $length);
5694
    }
5695
5696
    if (
5697
        $length >= 0 // "iconv_substr()" can't handle negative length
5698
        &&
5699
        self::$SUPPORT['iconv'] === true
5700
    ) {
5701
      $returnTmp = \iconv_substr($str, $offset, $length);
5702
      if ($returnTmp !== false) {
5703
        return $returnTmp;
5704
      }
5705
    }
5706
5707
    if (self::is_ascii($str)) {
5708
      return ($length === null) ?
5709
          \substr($str, $offset) :
5710
          \substr($str, $offset, $length);
5711
    }
5712
5713
    // fallback via vanilla php
5714
5715
    // split to array, and remove invalid characters
5716
    $array = self::split($str);
5717
5718
    // extract relevant part, and join to make sting again
5719
    return \implode('', \array_slice($array, $offset, $length));
5720
  }
5721
5722
  /**
5723
   * Binary safe comparison of two strings from an offset, up to length characters.
5724
   *
5725
   * @param string   $str1               <p>The main string being compared.</p>
5726
   * @param string   $str2               <p>The secondary string being compared.</p>
5727
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
5728
   *                                     counting from the end of the string.</p>
5729
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
5730
   *                                     the length of the str compared to the length of main_str less the offset.</p>
5731
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5732
   *                                     insensitive.</p>
5733
   *
5734
   * @return int <p>
5735
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5736
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5737
   *             <strong>0</strong> if they are equal.
5738
   *             </p>
5739
   */
5740 1
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
5741
  {
5742
    if (
5743 1
        $offset !== 0
5744
        ||
5745 1
        $length !== null
5746
    ) {
5747 1
      $str1Tmp = self::substr($str1, $offset, $length);
5748 1
      if ($str1Tmp === false) {
5749
        $str1Tmp = '';
5750
      }
5751 1
      $str1 = (string)$str1Tmp;
5752
5753 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
5754 1
      if ($str2Tmp === false) {
5755
        $str2Tmp = '';
5756
      }
5757 1
      $str2 = (string)$str2Tmp;
5758
    }
5759
5760 1
    if ($case_insensitivity === true) {
5761 1
      return self::strcasecmp($str1, $str2);
5762
    }
5763
5764 1
    return self::strcmp($str1, $str2);
5765
  }
5766
5767
  /**
5768
   * Count the number of substring occurrences.
5769
   *
5770
   * @link  http://php.net/manual/en/function.substr-count.php
5771
   *
5772
   * @param string $haystack   <p>The string to search in.</p>
5773
   * @param string $needle     <p>The substring to search for.</p>
5774
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
5775
   * @param int    $length     [optional] <p>
5776
   *                           The maximum length after the specified offset to search for the
5777
   *                           substring. It outputs a warning if the offset plus the length is
5778
   *                           greater than the haystack length.
5779
   *                           </p>
5780
   * @param string $encoding   <p>Set the charset.</p>
5781
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
5782
   *
5783
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5784
   */
5785 1
  public static function substr_count(string $haystack, string $needle, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5786
  {
5787 1
    if (!isset($haystack[0], $needle[0])) {
5788 1
      return false;
5789
    }
5790
5791 1
    if ($offset || $length !== null) {
5792
5793 1
      if ($length === null) {
5794 1
        $length = self::strlen($haystack);
5795
      }
5796
5797
      if (
5798
          (
5799 1
              $length !== 0
5800
              &&
5801 1
              $offset !== 0
5802
          )
5803
          &&
5804 1
          ($length + $offset) <= 0
5805
          &&
5806 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
5807
      ) {
5808
        return false;
5809
      }
5810
5811 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
5812 1
      if ($haystackTmp === false) {
5813
        $haystackTmp = '';
5814
      }
5815 1
      $haystack = (string)$haystackTmp;
5816
    }
5817
5818 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5819 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5820
    }
5821
5822 1
    if ($cleanUtf8 === true) {
5823
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5824
      // if invalid characters are found in $haystack before $needle
5825
      $needle = self::clean($needle);
5826
      $haystack = self::clean($haystack);
5827
    }
5828
5829 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5830
      self::checkForSupport();
5831
    }
5832
5833 View Code Duplication
    if (
5834 1
        $encoding !== 'UTF-8'
5835
        &&
5836 1
        self::$SUPPORT['mbstring'] === false
5837
    ) {
5838
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5839
    }
5840
5841 1
    if (self::$SUPPORT['mbstring'] === true) {
5842 1
      return \mb_substr_count($haystack, $needle, $encoding);
5843
    }
5844
5845
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
5846
5847
    return \count($matches);
5848
  }
5849
5850
  /**
5851
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
5852
   *
5853
   * @param string $haystack <p>The string to search in.</p>
5854
   * @param string $needle   <p>The substring to search for.</p>
5855
   *
5856
   * @return string <p>Return the sub-string.</p>
5857
   */
5858 1 View Code Duplication
  public static function substr_ileft(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5859
  {
5860 1
    if (!isset($haystack[0])) {
5861 1
      return '';
5862
    }
5863
5864 1
    if (!isset($needle[0])) {
5865 1
      return $haystack;
5866
    }
5867
5868 1
    if (self::str_istarts_with($haystack, $needle) === true) {
5869 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
5870 1
      if ($haystackTmp === false) {
5871
        $haystackTmp = '';
5872
      }
5873 1
      $haystack = (string)$haystackTmp;
5874
    }
5875
5876 1
    return $haystack;
5877
  }
5878
5879
  /**
5880
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
5881
   *
5882
   * @param string $haystack <p>The string to search in.</p>
5883
   * @param string $needle   <p>The substring to search for.</p>
5884
   *
5885
   * @return string <p>Return the sub-string.</p>
5886
   */
5887 1 View Code Duplication
  public static function substr_iright(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5888
  {
5889 1
    if (!isset($haystack[0])) {
5890 1
      return '';
5891
    }
5892
5893 1
    if (!isset($needle[0])) {
5894 1
      return $haystack;
5895
    }
5896
5897 1
    if (self::str_iends_with($haystack, $needle) === true) {
5898 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5899 1
      if ($haystackTmp === false) {
5900
        $haystackTmp = '';
5901
      }
5902 1
      $haystack = (string)$haystackTmp;
5903
    }
5904
5905 1
    return $haystack;
5906
  }
5907
5908
  /**
5909
   * Removes an prefix ($needle) from start of the string ($haystack).
5910
   *
5911
   * @param string $haystack <p>The string to search in.</p>
5912
   * @param string $needle   <p>The substring to search for.</p>
5913
   *
5914
   * @return string <p>Return the sub-string.</p>
5915
   */
5916 1 View Code Duplication
  public static function substr_left(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5917
  {
5918 1
    if (!isset($haystack[0])) {
5919 1
      return '';
5920
    }
5921
5922 1
    if (!isset($needle[0])) {
5923 1
      return $haystack;
5924
    }
5925
5926 1
    if (self::str_starts_with($haystack, $needle) === true) {
5927 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
5928 1
      if ($haystackTmp === false) {
5929
        $haystackTmp = '';
5930
      }
5931 1
      $haystack = (string)$haystackTmp;
5932
    }
5933
5934 1
    return $haystack;
5935
  }
5936
5937
  /**
5938
   * Replace text within a portion of a string.
5939
   *
5940
   * source: https://gist.github.com/stemar/8287074
5941
   *
5942
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
5943
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
5944
   * @param int|int[]       $offset           <p>
5945
   *                                          If start is positive, the replacing will begin at the start'th offset
5946
   *                                          into string.
5947
   *                                          <br><br>
5948
   *                                          If start is negative, the replacing will begin at the start'th character
5949
   *                                          from the end of string.
5950
   *                                          </p>
5951
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
5952
   *                                          portion of string which is to be replaced. If it is negative, it
5953
   *                                          represents the number of characters from the end of string at which to
5954
   *                                          stop replacing. If it is not given, then it will default to strlen(
5955
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
5956
   *                                          length is zero then this function will have the effect of inserting
5957
   *                                          replacement into string at the given start offset.</p>
5958
   *
5959
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
5960
   */
5961 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
5962
  {
5963 7
    if (\is_array($str) === true) {
5964 1
      $num = \count($str);
5965
5966
      // the replacement
5967 1
      if (\is_array($replacement) === true) {
5968 1
        $replacement = \array_slice($replacement, 0, $num);
5969
      } else {
5970 1
        $replacement = \array_pad([$replacement], $num, $replacement);
5971
      }
5972
5973
      // the offset
5974 1 View Code Duplication
      if (\is_array($offset) === true) {
5975 1
        $offset = \array_slice($offset, 0, $num);
5976 1
        foreach ($offset as &$valueTmp) {
5977 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5978
        }
5979 1
        unset($valueTmp);
5980
      } else {
5981 1
        $offset = \array_pad([$offset], $num, $offset);
5982
      }
5983
5984
      // the length
5985 1
      if (null === $length) {
5986 1
        $length = \array_fill(0, $num, 0);
5987 1 View Code Duplication
      } elseif (\is_array($length) === true) {
5988 1
        $length = \array_slice($length, 0, $num);
5989 1
        foreach ($length as &$valueTmpV2) {
5990 1
          if (null !== $valueTmpV2) {
5991 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5992
          } else {
5993 1
            $valueTmpV2 = 0;
5994
          }
5995
        }
5996 1
        unset($valueTmpV2);
5997
      } else {
5998 1
        $length = \array_pad([$length], $num, $length);
5999
      }
6000
6001
      // recursive call
6002 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return \array_map(array(...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
6003
    }
6004
6005 7
    if (\is_array($replacement) === true) {
6006 1
      if (\count($replacement) > 0) {
6007 1
        $replacement = $replacement[0];
6008
      } else {
6009 1
        $replacement = '';
6010
      }
6011
    }
6012
6013
    // init
6014 7
    $str = (string)$str;
6015 7
    $replacement = (string)$replacement;
6016
6017 7
    if (!isset($str[0])) {
6018 1
      return $replacement;
6019
    }
6020
6021 6
    if (self::is_ascii($str)) {
6022 3
      return ($length === null) ?
6023
          \substr_replace($str, $replacement, $offset) :
6024 3
          \substr_replace($str, $replacement, $offset, $length);
6025
    }
6026
6027 5
    \preg_match_all('/./us', $str, $smatches);
6028 5
    \preg_match_all('/./us', $replacement, $rmatches);
6029
6030 5
    if ($length === null) {
6031 3
      $length = self::strlen($str);
6032
    }
6033
6034 5
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
6035
6036 5
    return \implode('', $smatches[0]);
6037
  }
6038
6039
  /**
6040
   * Removes an suffix ($needle) from end of the string ($haystack).
6041
   *
6042
   * @param string $haystack <p>The string to search in.</p>
6043
   * @param string $needle   <p>The substring to search for.</p>
6044
   *
6045
   * @return string <p>Return the sub-string.</p>
6046
   */
6047 1 View Code Duplication
  public static function substr_right(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6048
  {
6049 1
    if (!isset($haystack[0])) {
6050 1
      return '';
6051
    }
6052
6053 1
    if (!isset($needle[0])) {
6054 1
      return $haystack;
6055
    }
6056
6057 1
    if (self::str_ends_with($haystack, $needle) === true) {
6058 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6059 1
      if ($haystackTmp === false) {
6060
        $haystackTmp = '';
6061
      }
6062 1
      $haystack = (string)$haystackTmp;
6063
    }
6064
6065 1
    return $haystack;
6066
  }
6067
6068
  /**
6069
   * Returns a case swapped version of the string.
6070
   *
6071
   * @param string $str       <p>The input string.</p>
6072
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
6073
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6074
   *
6075
   * @return string <p>Each character's case swapped.</p>
6076
   */
6077 1
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6078
  {
6079 1
    if (!isset($str[0])) {
6080 1
      return '';
6081
    }
6082
6083 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6084 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6085
    }
6086
6087 1
    if ($cleanUtf8 === true) {
6088
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6089
      // if invalid characters are found in $haystack before $needle
6090 1
      $str = self::clean($str);
6091
    }
6092
6093 1
    $strSwappedCase = \preg_replace_callback(
6094 1
        '/[\S]/u',
6095
        function ($match) use ($encoding) {
6096 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6097
6098 1
          if ($match[0] === $marchToUpper) {
6099 1
            return UTF8::strtolower($match[0], $encoding);
6100
          }
6101
6102 1
          return $marchToUpper;
6103 1
        },
6104 1
        $str
6105
    );
6106
6107 1
    return $strSwappedCase;
6108
  }
6109
6110
  /**
6111
   * alias for "UTF8::to_ascii()"
6112
   *
6113
   * @see        UTF8::to_ascii()
6114
   *
6115
   * @param string $str
6116
   * @param string $subst_chr
6117
   * @param bool   $strict
6118
   *
6119
   * @return string
6120
   *
6121
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6122
   */
6123 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
6124
  {
6125 7
    return self::to_ascii($str, $subst_chr, $strict);
6126
  }
6127
6128
  /**
6129
   * alias for "UTF8::to_iso8859()"
6130
   *
6131
   * @see        UTF8::to_iso8859()
6132
   *
6133
   * @param string|string[] $str
6134
   *
6135
   * @return string|string[]
6136
   *
6137
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6138
   */
6139 1
  public static function toIso8859($str)
6140
  {
6141 1
    return self::to_iso8859($str);
6142
  }
6143
6144
  /**
6145
   * alias for "UTF8::to_latin1()"
6146
   *
6147
   * @see        UTF8::to_latin1()
6148
   *
6149
   * @param string|string[] $str
6150
   *
6151
   * @return string|string[]
6152
   *
6153
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6154
   */
6155 1
  public static function toLatin1($str)
6156
  {
6157 1
    return self::to_latin1($str);
6158
  }
6159
6160
  /**
6161
   * alias for "UTF8::to_utf8()"
6162
   *
6163
   * @see        UTF8::to_utf8()
6164
   *
6165
   * @param string|string[] $str
6166
   *
6167
   * @return string|string[]
6168
   *
6169
   * @deprecated <p>use "UTF8::to_utf8()"</p>
6170
   */
6171 1
  public static function toUTF8($str)
6172
  {
6173 1
    return self::to_utf8($str);
6174
  }
6175
6176
  /**
6177
   * Convert a string into ASCII.
6178
   *
6179
   * @param string $str     <p>The input string.</p>
6180
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6181
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6182
   *                        performance</p>
6183
   *
6184
   * @return string
6185
   */
6186 21
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
6187
  {
6188 21
    static $UTF8_TO_ASCII;
6189
6190 21
    if (!isset($str[0])) {
6191 4
      return '';
6192
    }
6193
6194
    // check if we only have ASCII, first (better performance)
6195 18
    if (self::is_ascii($str) === true) {
6196 6
      return $str;
6197
    }
6198
6199 13
    $str = self::clean(
6200 13
        $str,
6201 13
        true,
6202 13
        true,
6203 13
        true,
6204 13
        false,
6205 13
        true,
6206 13
        true
6207
    );
6208
6209
    // check again, if we only have ASCII, now ...
6210 13
    if (self::is_ascii($str) === true) {
6211 7
      return $str;
6212
    }
6213
6214 7
    if ($strict === true) {
6215 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6216
        self::checkForSupport();
6217
      }
6218
6219 1
      if (self::$SUPPORT['intl'] === true) {
6220
6221
        // HACK for issue from "transliterator_transliterate()"
6222
        //
6223
        // bug is already reported: https://bugs.php.net/bug.php?id=76286
6224 1
        $str = \str_replace(
6225 1
            'ℌ',
6226 1
            'H',
6227 1
            $str
6228
        );
6229
6230 1
        $str = \transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
6231
6232
        // check again, if we only have ASCII, now ...
6233 1
        if (self::is_ascii($str) === true) {
6234 1
          return $str;
6235
        }
6236
6237
      }
6238
    }
6239
6240 7
    if (self::$ORD === null) {
6241
      self::$ORD = self::getData('ord');
6242
    }
6243
6244 7
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6245 7
    $chars = $ar[0];
6246 7
    foreach ($chars as &$c) {
6247
6248 7
      $ordC0 = self::$ORD[$c[0]];
6249
6250 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6251 7
        continue;
6252
      }
6253
6254 7
      $ordC1 = self::$ORD[$c[1]];
6255
6256
      // ASCII - next please
6257 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6258 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6259
      }
6260
6261 7
      if ($ordC0 >= 224) {
6262 2
        $ordC2 = self::$ORD[$c[2]];
6263
6264 2
        if ($ordC0 <= 239) {
6265 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6266
        }
6267
6268 2
        if ($ordC0 >= 240) {
6269 1
          $ordC3 = self::$ORD[$c[3]];
6270
6271 1
          if ($ordC0 <= 247) {
6272 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6273
          }
6274
6275 1
          if ($ordC0 >= 248) {
6276
            $ordC4 = self::$ORD[$c[4]];
6277
6278 View Code Duplication
            if ($ordC0 <= 251) {
6279
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6280
            }
6281
6282
            if ($ordC0 >= 252) {
6283
              $ordC5 = self::$ORD[$c[5]];
6284
6285 View Code Duplication
              if ($ordC0 <= 253) {
6286
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6287
              }
6288
            }
6289
          }
6290
        }
6291
      }
6292
6293 7
      if ($ordC0 === 254 || $ordC0 === 255) {
6294
        $c = $unknown;
6295
        continue;
6296
      }
6297
6298 7
      if (!isset($ord)) {
6299
        $c = $unknown;
6300
        continue;
6301
      }
6302
6303 7
      $bank = $ord >> 8;
6304 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
6305 3
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
6306 3
        if ($UTF8_TO_ASCII[$bank] === false) {
6307 1
          $UTF8_TO_ASCII[$bank] = [];
6308
        }
6309
      }
6310
6311 7
      $newchar = $ord & 255;
6312
6313 7
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
6314
6315
        // keep for debugging
6316
        /*
6317
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6318
        echo "char: " . $c . "\n";
6319
        echo "ord: " . $ord . "\n";
6320
        echo "newchar: " . $newchar . "\n";
6321
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
6322
        echo "bank:" . $bank . "\n\n";
6323
        */
6324
6325 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6326
      } else {
6327
6328
        // keep for debugging missing chars
6329
        /*
6330
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6331
        echo "char: " . $c . "\n";
6332
        echo "ord: " . $ord . "\n";
6333
        echo "newchar: " . $newchar . "\n";
6334
        echo "bank:" . $bank . "\n\n";
6335
        */
6336
6337 7
        $c = $unknown;
6338
      }
6339
    }
6340
6341 7
    return \implode('', $chars);
6342
  }
6343
6344
  /**
6345
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6346
   *
6347
   * @param string|string[] $str
6348
   *
6349
   * @return string|string[]
6350
   */
6351 3
  public static function to_iso8859($str)
6352
  {
6353 3
    if (\is_array($str) === true) {
6354 1
      foreach ($str as $k => $v) {
6355 1
        $str[$k] = self::to_iso8859($v);
6356
      }
6357
6358 1
      return $str;
6359
    }
6360
6361 3
    $str = (string)$str;
6362 3
    if (!isset($str[0])) {
6363 1
      return '';
6364
    }
6365
6366 3
    return self::utf8_decode($str);
6367
  }
6368
6369
  /**
6370
   * alias for "UTF8::to_iso8859()"
6371
   *
6372
   * @see UTF8::to_iso8859()
6373
   *
6374
   * @param string|string[] $str
6375
   *
6376
   * @return string|string[]
6377
   */
6378 1
  public static function to_latin1($str)
6379
  {
6380 1
    return self::to_iso8859($str);
6381
  }
6382
6383
  /**
6384
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6385
   *
6386
   * <ul>
6387
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6388
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
6389
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
6390
   * case.</li>
6391
   * </ul>
6392
   *
6393
   * @param string|string[] $str                    <p>Any string or array.</p>
6394
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6395
   *
6396
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6397
   */
6398 22
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
6399
  {
6400 22 View Code Duplication
    if (\is_array($str) === true) {
6401 2
      foreach ($str as $k => $v) {
6402 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6403
      }
6404
6405 2
      return $str;
6406
    }
6407
6408 22
    $str = (string)$str;
6409 22
    if (!isset($str[0])) {
6410 3
      return $str;
6411
    }
6412
6413 22
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6414
      self::checkForSupport();
6415
    }
6416
6417 22
    $max = self::strlen_in_byte($str);
6418 22
    $buf = '';
6419
6420
    /** @noinspection ForeachInvariantsInspection */
6421 22
    for ($i = 0; $i < $max; $i++) {
6422 22
      $c1 = $str[$i];
6423
6424 22
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
6425
6426 22
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
6427
6428 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6429
6430 20
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
6431 15
            $buf .= $c1 . $c2;
6432 15
            $i++;
6433
          } else { // not valid UTF8 - convert it
6434 20
            $buf .= self::to_utf8_convert($c1);
6435
          }
6436
6437 21
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
6438
6439 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6440 20
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6441
6442 20
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
6443 12
            $buf .= $c1 . $c2 . $c3;
6444 12
            $i += 2;
6445
          } else { // not valid UTF8 - convert it
6446 20
            $buf .= self::to_utf8_convert($c1);
6447
          }
6448
6449 14
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
6450
6451 14
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6452 14
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6453 14
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6454
6455 14
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
6456 5
            $buf .= $c1 . $c2 . $c3 . $c4;
6457 5
            $i += 3;
6458
          } else { // not valid UTF8 - convert it
6459 14
            $buf .= self::to_utf8_convert($c1);
6460
          }
6461
6462
        } else { // doesn't look like UTF8, but should be converted
6463 22
          $buf .= self::to_utf8_convert($c1);
6464
        }
6465
6466 20
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
6467
6468 2
        $buf .= self::to_utf8_convert($c1);
6469
6470
      } else { // it doesn't need conversion
6471 20
        $buf .= $c1;
6472
      }
6473
    }
6474
6475
    // decode unicode escape sequences
6476 22
    $buf = \preg_replace_callback(
6477 22
        '/\\\\u([0-9a-f]{4})/i',
6478
        function ($match) {
6479 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6480 22
        },
6481 22
        $buf
6482
    );
6483
6484
    // decode UTF-8 codepoints
6485 22
    if ($decodeHtmlEntityToUtf8 === true) {
6486 1
      $buf = self::html_entity_decode($buf);
6487
    }
6488
6489 22
    return $buf;
6490
  }
6491
6492
  /**
6493
   * @param int $int
6494
   *
6495
   * @return string
6496
   */
6497 16
  private static function to_utf8_convert($int): string
6498
  {
6499
    // init
6500 16
    $buf = '';
6501
6502 16
    if (self::$ORD === null) {
6503 1
      self::$ORD = self::getData('ord');
6504
    }
6505
6506 16
    if (self::$CHR === null) {
6507 1
      self::$CHR = self::getData('chr');
6508
    }
6509
6510 16
    if (self::$WIN1252_TO_UTF8 === null) {
6511 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6512
    }
6513
6514 16
    $ordC1 = self::$ORD[$int];
6515 16
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
6516 16
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
6517
    } else {
6518 1
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
6519 1
      $cc2 = ($int & "\x3F") | "\x80";
6520 1
      $buf .= $cc1 . $cc2;
6521
    }
6522
6523 16
    return $buf;
6524
  }
6525
6526
  /**
6527
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6528
   *
6529
   * INFO: This is slower then "trim()"
6530
   *
6531
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6532
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6533
   *
6534
   * @param string $str   <p>The string to be trimmed</p>
6535
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
6536
   *
6537
   * @return string <p>The trimmed string.</p>
6538
   */
6539 26
  public static function trim(string $str = '', $chars = INF): string
6540
  {
6541 26
    if (!isset($str[0])) {
6542 5
      return '';
6543
    }
6544
6545
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6546 22
    if ($chars === INF || !$chars) {
6547 6
      return \preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6548
    }
6549
6550 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
6551
  }
6552
6553
  /**
6554
   * Makes string's first char uppercase.
6555
   *
6556
   * @param string $str       <p>The input string.</p>
6557
   * @param string $encoding  [optional] <p>Set the charset.</p>
6558
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6559
   *
6560
   * @return string <p>The resulting string</p>
6561
   */
6562 14
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6563
  {
6564 14
    if ($cleanUtf8 === true) {
6565
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6566
      // if invalid characters are found in $haystack before $needle
6567 1
      $str = self::clean($str);
6568
    }
6569
6570 14
    $strPartTwo = self::substr($str, 1, null, $encoding);
6571 14
    if ($strPartTwo === false) {
6572
      $strPartTwo = '';
6573
    }
6574
6575 14
    $strPartOne = self::strtoupper(
6576 14
        (string)self::substr($str, 0, 1, $encoding),
6577 14
        $encoding,
6578 14
        $cleanUtf8
6579
    );
6580
6581 14
    return $strPartOne . $strPartTwo;
6582
  }
6583
6584
  /**
6585
   * alias for "UTF8::ucfirst()"
6586
   *
6587
   * @see UTF8::ucfirst()
6588
   *
6589
   * @param string $word
6590
   * @param string $encoding
6591
   * @param bool   $cleanUtf8
6592
   *
6593
   * @return string
6594
   */
6595 1
  public static function ucword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6596
  {
6597 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
6598
  }
6599
6600
  /**
6601
   * Uppercase for all words in the string.
6602
   *
6603
   * @param string   $str        <p>The input string.</p>
6604
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6605
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6606
   * @param string   $encoding   [optional] <p>Set the charset.</p>
6607
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6608
   *
6609
   * @return string
6610
   */
6611 8
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6612
  {
6613 8
    if (!$str) {
6614 2
      return '';
6615
    }
6616
6617
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
6618
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
6619
6620 7
    if ($cleanUtf8 === true) {
6621
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6622
      // if invalid characters are found in $haystack before $needle
6623 1
      $str = self::clean($str);
6624
    }
6625
6626 7
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
6627
6628
    if (
6629 7
        $usePhpDefaultFunctions === true
6630
        &&
6631 7
        self::is_ascii($str) === true
6632
    ) {
6633
      return \ucwords($str);
6634
    }
6635
6636 7
    $words = self::str_to_words($str, $charlist);
6637 7
    $newWords = [];
6638
6639 7
    if (\count($exceptions) > 0) {
6640 1
      $useExceptions = true;
6641
    } else {
6642 7
      $useExceptions = false;
6643
    }
6644
6645 7 View Code Duplication
    foreach ($words as $word) {
6646
6647 7
      if (!$word) {
6648 7
        continue;
6649
      }
6650
6651
      if (
6652 7
          $useExceptions === false
6653
          ||
6654
          (
6655 1
              $useExceptions === true
6656
              &&
6657 7
              !\in_array($word, $exceptions, true)
6658
          )
6659
      ) {
6660 7
        $word = self::ucfirst($word, $encoding);
6661
      }
6662
6663 7
      $newWords[] = $word;
6664
    }
6665
6666 7
    return \implode('', $newWords);
6667
  }
6668
6669
  /**
6670
   * Multi decode html entity & fix urlencoded-win1252-chars.
6671
   *
6672
   * e.g:
6673
   * 'test+test'                     => 'test test'
6674
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6675
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6676
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6677
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6678
   * 'Düsseldorf'                   => 'Düsseldorf'
6679
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6680
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6681
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6682
   *
6683
   * @param string $str          <p>The input string.</p>
6684
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6685
   *
6686
   * @return string
6687
   */
6688 1 View Code Duplication
  public static function urldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6689
  {
6690 1
    if (!isset($str[0])) {
6691 1
      return '';
6692
    }
6693
6694 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
6695 1
    if (\preg_match($pattern, $str)) {
6696 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
6697
    }
6698
6699 1
    $flags = ENT_QUOTES | ENT_HTML5;
6700
6701
    do {
6702 1
      $str_compare = $str;
6703
6704 1
      $str = self::fix_simple_utf8(
6705 1
          \urldecode(
6706 1
              self::html_entity_decode(
6707 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6708 1
                  $flags
6709
              )
6710
          )
6711
      );
6712
6713 1
    } while ($multi_decode === true && $str_compare !== $str);
6714
6715 1
    return $str;
6716
  }
6717
6718
  /**
6719
   * Return a array with "urlencoded"-win1252 -> UTF-8
6720
   *
6721
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
6722
   *
6723
   * @return array
6724
   */
6725 1
  public static function urldecode_fix_win1252_chars(): array
6726
  {
6727
    return [
6728 1
        '%20' => ' ',
6729
        '%21' => '!',
6730
        '%22' => '"',
6731
        '%23' => '#',
6732
        '%24' => '$',
6733
        '%25' => '%',
6734
        '%26' => '&',
6735
        '%27' => "'",
6736
        '%28' => '(',
6737
        '%29' => ')',
6738
        '%2A' => '*',
6739
        '%2B' => '+',
6740
        '%2C' => ',',
6741
        '%2D' => '-',
6742
        '%2E' => '.',
6743
        '%2F' => '/',
6744
        '%30' => '0',
6745
        '%31' => '1',
6746
        '%32' => '2',
6747
        '%33' => '3',
6748
        '%34' => '4',
6749
        '%35' => '5',
6750
        '%36' => '6',
6751
        '%37' => '7',
6752
        '%38' => '8',
6753
        '%39' => '9',
6754
        '%3A' => ':',
6755
        '%3B' => ';',
6756
        '%3C' => '<',
6757
        '%3D' => '=',
6758
        '%3E' => '>',
6759
        '%3F' => '?',
6760
        '%40' => '@',
6761
        '%41' => 'A',
6762
        '%42' => 'B',
6763
        '%43' => 'C',
6764
        '%44' => 'D',
6765
        '%45' => 'E',
6766
        '%46' => 'F',
6767
        '%47' => 'G',
6768
        '%48' => 'H',
6769
        '%49' => 'I',
6770
        '%4A' => 'J',
6771
        '%4B' => 'K',
6772
        '%4C' => 'L',
6773
        '%4D' => 'M',
6774
        '%4E' => 'N',
6775
        '%4F' => 'O',
6776
        '%50' => 'P',
6777
        '%51' => 'Q',
6778
        '%52' => 'R',
6779
        '%53' => 'S',
6780
        '%54' => 'T',
6781
        '%55' => 'U',
6782
        '%56' => 'V',
6783
        '%57' => 'W',
6784
        '%58' => 'X',
6785
        '%59' => 'Y',
6786
        '%5A' => 'Z',
6787
        '%5B' => '[',
6788
        '%5C' => '\\',
6789
        '%5D' => ']',
6790
        '%5E' => '^',
6791
        '%5F' => '_',
6792
        '%60' => '`',
6793
        '%61' => 'a',
6794
        '%62' => 'b',
6795
        '%63' => 'c',
6796
        '%64' => 'd',
6797
        '%65' => 'e',
6798
        '%66' => 'f',
6799
        '%67' => 'g',
6800
        '%68' => 'h',
6801
        '%69' => 'i',
6802
        '%6A' => 'j',
6803
        '%6B' => 'k',
6804
        '%6C' => 'l',
6805
        '%6D' => 'm',
6806
        '%6E' => 'n',
6807
        '%6F' => 'o',
6808
        '%70' => 'p',
6809
        '%71' => 'q',
6810
        '%72' => 'r',
6811
        '%73' => 's',
6812
        '%74' => 't',
6813
        '%75' => 'u',
6814
        '%76' => 'v',
6815
        '%77' => 'w',
6816
        '%78' => 'x',
6817
        '%79' => 'y',
6818
        '%7A' => 'z',
6819
        '%7B' => '{',
6820
        '%7C' => '|',
6821
        '%7D' => '}',
6822
        '%7E' => '~',
6823
        '%7F' => '',
6824
        '%80' => '`',
6825
        '%81' => '',
6826
        '%82' => '‚',
6827
        '%83' => 'ƒ',
6828
        '%84' => '„',
6829
        '%85' => '…',
6830
        '%86' => '†',
6831
        '%87' => '‡',
6832
        '%88' => 'ˆ',
6833
        '%89' => '‰',
6834
        '%8A' => 'Š',
6835
        '%8B' => '‹',
6836
        '%8C' => 'Œ',
6837
        '%8D' => '',
6838
        '%8E' => 'Ž',
6839
        '%8F' => '',
6840
        '%90' => '',
6841
        '%91' => '‘',
6842
        '%92' => '’',
6843
        '%93' => '“',
6844
        '%94' => '”',
6845
        '%95' => '•',
6846
        '%96' => '–',
6847
        '%97' => '—',
6848
        '%98' => '˜',
6849
        '%99' => '™',
6850
        '%9A' => 'š',
6851
        '%9B' => '›',
6852
        '%9C' => 'œ',
6853
        '%9D' => '',
6854
        '%9E' => 'ž',
6855
        '%9F' => 'Ÿ',
6856
        '%A0' => '',
6857
        '%A1' => '¡',
6858
        '%A2' => '¢',
6859
        '%A3' => '£',
6860
        '%A4' => '¤',
6861
        '%A5' => '¥',
6862
        '%A6' => '¦',
6863
        '%A7' => '§',
6864
        '%A8' => '¨',
6865
        '%A9' => '©',
6866
        '%AA' => 'ª',
6867
        '%AB' => '«',
6868
        '%AC' => '¬',
6869
        '%AD' => '',
6870
        '%AE' => '®',
6871
        '%AF' => '¯',
6872
        '%B0' => '°',
6873
        '%B1' => '±',
6874
        '%B2' => '²',
6875
        '%B3' => '³',
6876
        '%B4' => '´',
6877
        '%B5' => 'µ',
6878
        '%B6' => '¶',
6879
        '%B7' => '·',
6880
        '%B8' => '¸',
6881
        '%B9' => '¹',
6882
        '%BA' => 'º',
6883
        '%BB' => '»',
6884
        '%BC' => '¼',
6885
        '%BD' => '½',
6886
        '%BE' => '¾',
6887
        '%BF' => '¿',
6888
        '%C0' => 'À',
6889
        '%C1' => 'Á',
6890
        '%C2' => 'Â',
6891
        '%C3' => 'Ã',
6892
        '%C4' => 'Ä',
6893
        '%C5' => 'Å',
6894
        '%C6' => 'Æ',
6895
        '%C7' => 'Ç',
6896
        '%C8' => 'È',
6897
        '%C9' => 'É',
6898
        '%CA' => 'Ê',
6899
        '%CB' => 'Ë',
6900
        '%CC' => 'Ì',
6901
        '%CD' => 'Í',
6902
        '%CE' => 'Î',
6903
        '%CF' => 'Ï',
6904
        '%D0' => 'Ð',
6905
        '%D1' => 'Ñ',
6906
        '%D2' => 'Ò',
6907
        '%D3' => 'Ó',
6908
        '%D4' => 'Ô',
6909
        '%D5' => 'Õ',
6910
        '%D6' => 'Ö',
6911
        '%D7' => '×',
6912
        '%D8' => 'Ø',
6913
        '%D9' => 'Ù',
6914
        '%DA' => 'Ú',
6915
        '%DB' => 'Û',
6916
        '%DC' => 'Ü',
6917
        '%DD' => 'Ý',
6918
        '%DE' => 'Þ',
6919
        '%DF' => 'ß',
6920
        '%E0' => 'à',
6921
        '%E1' => 'á',
6922
        '%E2' => 'â',
6923
        '%E3' => 'ã',
6924
        '%E4' => 'ä',
6925
        '%E5' => 'å',
6926
        '%E6' => 'æ',
6927
        '%E7' => 'ç',
6928
        '%E8' => 'è',
6929
        '%E9' => 'é',
6930
        '%EA' => 'ê',
6931
        '%EB' => 'ë',
6932
        '%EC' => 'ì',
6933
        '%ED' => 'í',
6934
        '%EE' => 'î',
6935
        '%EF' => 'ï',
6936
        '%F0' => 'ð',
6937
        '%F1' => 'ñ',
6938
        '%F2' => 'ò',
6939
        '%F3' => 'ó',
6940
        '%F4' => 'ô',
6941
        '%F5' => 'õ',
6942
        '%F6' => 'ö',
6943
        '%F7' => '÷',
6944
        '%F8' => 'ø',
6945
        '%F9' => 'ù',
6946
        '%FA' => 'ú',
6947
        '%FB' => 'û',
6948
        '%FC' => 'ü',
6949
        '%FD' => 'ý',
6950
        '%FE' => 'þ',
6951
        '%FF' => 'ÿ',
6952
    ];
6953
  }
6954
6955
  /**
6956
   * Decodes an UTF-8 string to ISO-8859-1.
6957
   *
6958
   * @param string $str <p>The input string.</p>
6959
   * @param bool   $keepUtf8Chars
6960
   *
6961
   * @return string
6962
   */
6963 6
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
6964
  {
6965 6
    if (!isset($str[0])) {
6966 3
      return '';
6967
    }
6968
6969 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
6970 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
6971
6972 6 View Code Duplication
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
6973
6974 1
      if (self::$WIN1252_TO_UTF8 === null) {
6975
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6976
      }
6977
6978 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
6979 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
6980
    }
6981
6982
    /** @noinspection PhpInternalEntityUsedInspection */
6983 6
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
6984
6985 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6986
      self::checkForSupport();
6987
    }
6988
6989
    // save for later comparision
6990 6
    $str_backup = $str;
6991 6
    $len = self::strlen_in_byte($str);
6992
6993 6
    if (self::$ORD === null) {
6994
      self::$ORD = self::getData('ord');
6995
    }
6996
6997 6
    if (self::$CHR === null) {
6998
      self::$CHR = self::getData('chr');
6999
    }
7000
7001 6
    $noCharFound = '?';
7002
    /** @noinspection ForeachInvariantsInspection */
7003 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7004 6
      switch ($str[$i] & "\xF0") {
7005 6
        case "\xC0":
7006 6
        case "\xD0":
7007 6
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
7008 6
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
7009 6
          break;
7010
7011
        /** @noinspection PhpMissingBreakStatementInspection */
7012 6
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7013
          ++$i;
7014 6
        case "\xE0":
7015 5
          $str[$j] = $noCharFound;
7016 5
          $i += 2;
7017 5
          break;
7018
7019
        default:
7020 6
          $str[$j] = $str[$i];
7021
      }
7022
    }
7023
7024 6
    $return = (string)self::substr($str, 0, $j, 'CP850'); // 8-BIT
7025
7026
    if (
7027 6
        $keepUtf8Chars === true
7028
        &&
7029 6
        self::strlen($return) >= self::strlen($str_backup)
7030
    ) {
7031 1
      return $str_backup;
7032
    }
7033
7034 6
    return $return;
7035
  }
7036
7037
  /**
7038
   * Encodes an ISO-8859-1 string to UTF-8.
7039
   *
7040
   * @param string $str <p>The input string.</p>
7041
   *
7042
   * @return string
7043
   */
7044 7
  public static function utf8_encode(string $str): string
7045
  {
7046 7
    if (!isset($str[0])) {
7047 7
      return '';
7048
    }
7049
7050 7
    $strTmp = \utf8_encode($str);
7051
7052
    // the polyfill maybe return false
7053 7
    if ($strTmp === false) {
7054
      return '';
7055
    }
7056
7057 7
    $str = (string)$strTmp;
7058 7
    if (false === \strpos($str, "\xC2")) {
7059 3
      return $str;
7060
    }
7061
7062 6
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
7063 6
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
7064
7065 6 View Code Duplication
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
7066
7067 1
      if (self::$WIN1252_TO_UTF8 === null) {
7068
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
7069
      }
7070
7071 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
7072 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
7073
    }
7074
7075 6
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
7076
  }
7077
7078
  /**
7079
   * fix -> utf8-win1252 chars
7080
   *
7081
   * @param string $str <p>The input string.</p>
7082
   *
7083
   * @return string
7084
   *
7085
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
7086
   */
7087 1
  public static function utf8_fix_win1252_chars(string $str): string
7088
  {
7089 1
    return self::fix_simple_utf8($str);
7090
  }
7091
7092
  /**
7093
   * Returns an array with all utf8 whitespace characters.
7094
   *
7095
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7096
   *
7097
   * @author: Derek E. [email protected]
7098
   *
7099
   * @return array <p>
7100
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7101
   *               as defined in above URL.
7102
   *               </p>
7103
   */
7104 1
  public static function whitespace_table(): array
7105
  {
7106 1
    return self::$WHITESPACE_TABLE;
7107
  }
7108
7109
  /**
7110
   * Limit the number of words in a string.
7111
   *
7112
   * @param string $str      <p>The input string.</p>
7113
   * @param int    $limit    <p>The limit of words as integer.</p>
7114
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7115
   *
7116
   * @return string
7117
   */
7118 1
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
7119
  {
7120 1
    if (!isset($str[0])) {
7121 1
      return '';
7122
    }
7123
7124 1
    if ($limit < 1) {
7125 1
      return '';
7126
    }
7127
7128 1
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7129
7130
    if (
7131 1
        !isset($matches[0])
7132
        ||
7133 1
        self::strlen($str) === self::strlen($matches[0])
7134
    ) {
7135 1
      return $str;
7136
    }
7137
7138 1
    return self::rtrim($matches[0]) . $strAddOn;
7139
  }
7140
7141
  /**
7142
   * Wraps a string to a given number of characters
7143
   *
7144
   * @link  http://php.net/manual/en/function.wordwrap.php
7145
   *
7146
   * @param string $str   <p>The input string.</p>
7147
   * @param int    $width [optional] <p>The column width.</p>
7148
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7149
   * @param bool   $cut   [optional] <p>
7150
   *                      If the cut is set to true, the string is
7151
   *                      always wrapped at or before the specified width. So if you have
7152
   *                      a word that is larger than the given width, it is broken apart.
7153
   *                      </p>
7154
   *
7155
   * @return string <p>The given string wrapped at the specified column.</p>
7156
   */
7157 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
7158
  {
7159 10
    if (!isset($str[0], $break[0])) {
7160 3
      return '';
7161
    }
7162
7163 8
    $w = '';
7164 8
    $strSplit = \explode($break, $str);
7165 8
    $count = \count($strSplit);
7166
7167 8
    $chars = [];
7168
    /** @noinspection ForeachInvariantsInspection */
7169 8
    for ($i = 0; $i < $count; ++$i) {
7170
7171 8
      if ($i) {
7172 1
        $chars[] = $break;
7173 1
        $w .= '#';
7174
      }
7175
7176 8
      $c = $strSplit[$i];
7177 8
      unset($strSplit[$i]);
7178
7179 8
      foreach (self::split($c) as $c) {
7180 8
        $chars[] = $c;
7181 8
        $w .= ' ' === $c ? ' ' : '?';
7182
      }
7183
    }
7184
7185 8
    $strReturn = '';
7186 8
    $j = 0;
7187 8
    $b = $i = -1;
7188 8
    $w = \wordwrap($w, $width, '#', $cut);
7189
7190 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7191 6
      for (++$i; $i < $b; ++$i) {
7192 6
        $strReturn .= $chars[$j];
7193 6
        unset($chars[$j++]);
7194
      }
7195
7196 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7197 3
        unset($chars[$j++]);
7198
      }
7199
7200 6
      $strReturn .= $break;
7201
    }
7202
7203 8
    return $strReturn . \implode('', $chars);
7204
  }
7205
7206
  /**
7207
   * Returns an array of Unicode White Space characters.
7208
   *
7209
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7210
   */
7211 1
  public static function ws(): array
7212
  {
7213 1
    return self::$WHITESPACE;
7214
  }
7215
7216
}
7217