Passed
Push — master ( 7eed2c...0ce138 )
by Lars
03:23
created

UTF8::max()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 14
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 3

Importance

Changes 0
Metric Value
cc 3
eloc 7
nc 4
nop 1
dl 0
loc 14
ccs 8
cts 8
cp 1
crap 3
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206
   * __construct()
207
   */
208 17
  public function __construct()
209
  {
210 17
    self::checkForSupport();
211 17
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219
   * @return string Single Multi-Byte character.
220
   */
221 2
  public static function access(string $str, int $pos): string
222
  {
223 2
    if ('' === $str) {
224 1
      return '';
225
    }
226
227 2
    if ($pos < 0) {
228 1
      return '';
229
    }
230
231 2
    return (string)self::substr($str, $pos, 1);
232
  }
233
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string The output string that contains BOM.
242
   */
243 1
  public static function add_bom_to_string(string $str): string
244
  {
245 1
    if (self::string_has_bom($str) === false) {
246 1
      $str = self::bom() . $str;
247
    }
248
249 1
    return $str;
250
  }
251
252
  /**
253
   * Adds the specified amount of left and right padding to the given string.
254
   * The default character used is a space.
255
   *
256
   * @param string $str
257
   * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
258
   * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
259
   * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
260
   * @param string $encoding [optional] <p>Default: UTF-8</p>
261
   *
262
   * @return string String with padding applied.
263
   */
264 25
  private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
265
  {
266 25
    $strlen = self::strlen($str, $encoding);
267
268 25
    if ($left && $right) {
269 8
      $length = ($left + $right) + $strlen;
270 8
      $type = STR_PAD_BOTH;
271 17
    } elseif ($left) {
272 7
      $length = $left + $strlen;
273 7
      $type = STR_PAD_LEFT;
274 10
    } elseif ($right) {
275 10
      $length = $right + $strlen;
276 10
      $type = STR_PAD_RIGHT;
277
    } else {
278
      $length = ($left + $right) + $strlen;
279
      $type = STR_PAD_BOTH;
280
    }
281
282 25
    return self::str_pad($str, $length, $padStr, $type, $encoding);
283
  }
284
285
  /**
286
   * Changes all keys in an array.
287
   *
288
   * @param array $array <p>The array to work on</p>
289
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
290
   *                     or <strong>CASE_LOWER</strong> (default)</p>
291
   *
292
   * @return string[] An array with its keys lower or uppercased.
293
   */
294 1
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
295
  {
296
    if (
297 1
        $case !== CASE_LOWER
298
        &&
299 1
        $case !== CASE_UPPER
300
    ) {
301
      $case = CASE_LOWER;
302
    }
303
304 1
    $return = [];
305 1
    foreach ($array as $key => $value) {
306 1
      if ($case === CASE_LOWER) {
307 1
        $key = self::strtolower($key);
308
      } else {
309 1
        $key = self::strtoupper($key);
310
      }
311
312 1
      $return[$key] = $value;
313
    }
314
315 1
    return $return;
316
  }
317
318
  /**
319
   * Returns the substring between $start and $end, if found, or an empty
320
   * string. An optional offset may be supplied from which to begin the
321
   * search for the start string.
322
   *
323
   * @param string $str
324
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
325
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
326
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
327
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
328
   *
329
   * @return string
330
   */
331 16
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
332
  {
333 16
    $posStart = self::strpos($str, $start, $offset, $encoding);
334 16
    if ($posStart === false) {
335 2
      return '';
336
    }
337
338 14
    $substrIndex = $posStart + self::strlen($start, $encoding);
339 14
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
340
    if (
341 14
        $posEnd === false
342
        ||
343 14
        $posEnd === $substrIndex
344
    ) {
345 4
      return '';
346
    }
347
348 10
    $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
349
350 10
    if ($return === false) {
351
      return '';
352
    }
353
354 10
    return $return;
355
  }
356
357
  /**
358
   * Convert binary into an string.
359
   *
360
   * @param mixed $bin 1|0
361
   *
362
   * @return string
363
   */
364 1
  public static function binary_to_str($bin): string
365
  {
366 1
    if (!isset($bin[0])) {
367
      return '';
368
    }
369
370 1
    $convert = \base_convert($bin, 2, 16);
371 1
    if ($convert === '0') {
372 1
      return '';
373
    }
374
375 1
    return \pack('H*', $convert);
376
  }
377
378
  /**
379
   * Returns the UTF-8 Byte Order Mark Character.
380
   *
381
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
382
   *
383
   * @return string UTF-8 Byte Order Mark
384
   */
385 2
  public static function bom(): string
386
  {
387 2
    return "\xef\xbb\xbf";
388
  }
389
390
  /**
391
   * @alias of UTF8::chr_map()
392
   *
393
   * @see   UTF8::chr_map()
394
   *
395
   * @param string|array $callback
396
   * @param string       $str
397
   *
398
   * @return string[]
399
   */
400 1
  public static function callback($callback, string $str): array
401
  {
402 1
    return self::chr_map($callback, $str);
403
  }
404
405
  /**
406
   * Returns the character at $index, with indexes starting at 0.
407
   *
408
   * @param string $str
409
   * @param int    $index    <p>Position of the character.</p>
410
   * @param string $encoding [optional] <p>Default is UTF-8</p>
411
   *
412
   * @return string The character at $index.
413
   */
414 9
  public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
415
  {
416 9
    return (string)self::substr($str, $index, 1, $encoding);
417
  }
418
419
  /**
420
   * Returns an array consisting of the characters in the string.
421
   *
422
   * @param string $str <p>The input string.</p>
423
   *
424
   * @return string[] An array of chars.
425
   */
426 3
  public static function chars(string $str): array
427
  {
428 3
    return self::str_split($str, 1);
429
  }
430
431
  /**
432
   * This method will auto-detect your server environment for UTF-8 support.
433
   *
434
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
435
   */
436 20
  public static function checkForSupport()
437
  {
438 20
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
439
440
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
441
442
      // http://php.net/manual/en/book.mbstring.php
443
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
444
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
445
446
      // http://php.net/manual/en/book.iconv.php
447
      self::$SUPPORT['iconv'] = self::iconv_loaded();
448
449
      // http://php.net/manual/en/book.intl.php
450
      self::$SUPPORT['intl'] = self::intl_loaded();
451
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
452
453
      self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
454
455
      if (
456
          self::$SUPPORT['intl'] === true
457
          &&
458
          \function_exists('transliterator_list_ids') === true
459
      ) {
460
        /** @noinspection PhpComposerExtensionStubsInspection */
461
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
462
      }
463
464
      // http://php.net/manual/en/class.intlchar.php
465
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
466
467
      // http://php.net/manual/en/book.ctype.php
468
      self::$SUPPORT['ctype'] = self::ctype_loaded();
469
470
      // http://php.net/manual/en/class.finfo.php
471
      self::$SUPPORT['finfo'] = self::finfo_loaded();
472
473
      // http://php.net/manual/en/book.json.php
474
      self::$SUPPORT['json'] = self::json_loaded();
475
476
      // http://php.net/manual/en/book.pcre.php
477
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
478
    }
479 20
  }
480
481
  /**
482
   * Generates a UTF-8 encoded character from the given code point.
483
   *
484
   * INFO: opposite to UTF8::ord()
485
   *
486
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
487
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
488
   *
489
   * @return string|null Multi-Byte character, returns null on failure or empty input.
490
   */
491 10
  public static function chr($code_point, string $encoding = 'UTF-8')
492
  {
493
    // init
494 10
    static $CHAR_CACHE = [];
495
496 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
497
      self::checkForSupport();
498
    }
499
500 10
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
501 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
502
    }
503
504
    if (
505 10
        $encoding !== 'UTF-8'
506
        &&
507 10
        $encoding !== 'ISO-8859-1'
508
        &&
509 10
        $encoding !== 'WINDOWS-1252'
510
        &&
511 10
        self::$SUPPORT['mbstring'] === false
512
    ) {
513
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
514
    }
515
516 10
    $cacheKey = $code_point . $encoding;
517 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
518 9
      return $CHAR_CACHE[$cacheKey];
519
    }
520
521 9
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
522
523 8
      if (self::$CHR === null) {
524
        $chrTmp = self::getData('chr');
525
        if ($chrTmp) {
526
          self::$CHR = (array)$chrTmp;
527
        }
528
      }
529
530 8
      $chr = self::$CHR[$code_point];
531
532 8
      if ($encoding !== 'UTF-8') {
533
        // always fallback via symfony polyfill
534 1
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
535
      }
536
537 8
      return $CHAR_CACHE[$cacheKey] = $chr;
538
    }
539
540 7
    if (self::$SUPPORT['intlChar'] === true) {
541
      /** @noinspection PhpComposerExtensionStubsInspection */
542 7
      $chr = \IntlChar::chr($code_point);
543
544 7
      if ($encoding !== 'UTF-8') {
545
        // always fallback via symfony polyfill
546
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
547
      }
548
549 7
      return $CHAR_CACHE[$cacheKey] = $chr;
550
    }
551
552
    if (self::$CHR === null) {
553
      $chrTmp = self::getData('chr');
554
      if ($chrTmp) {
555
        self::$CHR = (array)$chrTmp;
556
      }
557
    }
558
559
    $code_point = (int)$code_point;
560
    if ($code_point <= 0x7F) {
561
      $chr = self::$CHR[$code_point];
562
    } elseif ($code_point <= 0x7FF) {
563
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
564
             self::$CHR[($code_point & 0x3F) + 0x80];
565
    } elseif ($code_point <= 0xFFFF) {
566
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
567
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
568
             self::$CHR[($code_point & 0x3F) + 0x80];
569
    } else {
570
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
571
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
572
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
573
             self::$CHR[($code_point & 0x3F) + 0x80];
574
    }
575
576
    if ($encoding !== 'UTF-8') {
577
      // always fallback via symfony polyfill
578
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
579
    }
580
581
    return $CHAR_CACHE[$cacheKey] = $chr;
582
  }
583
584
  /**
585
   * Applies callback to all characters of a string.
586
   *
587
   * @param string|array $callback <p>The callback function.</p>
588
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
589
   *
590
   * @return string[] The outcome of callback.
591
   */
592 1
  public static function chr_map($callback, string $str): array
593
  {
594 1
    $chars = self::split($str);
595
596 1
    return \array_map($callback, $chars);
597
  }
598
599
  /**
600
   * Generates an array of byte length of each character of a Unicode string.
601
   *
602
   * 1 byte => U+0000  - U+007F
603
   * 2 byte => U+0080  - U+07FF
604
   * 3 byte => U+0800  - U+FFFF
605
   * 4 byte => U+10000 - U+10FFFF
606
   *
607
   * @param string $str <p>The original unicode string.</p>
608
   *
609
   * @return int[] An array of byte lengths of each character.
610
   */
611 2
  public static function chr_size_list(string $str): array
612
  {
613 2
    if ('' === $str) {
614 2
      return [];
615
    }
616
617 2
    $strSplit = self::split($str);
618
619 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
620
      return \array_map(
621
          function ($data) {
622
            return UTF8::strlen_in_byte($data);
623
          },
624
          $strSplit
625
      );
626
    }
627
628 2
    return \array_map('\strlen', $strSplit);
629
  }
630
631
  /**
632
   * Get a decimal code representation of a specific character.
633
   *
634
   * @param string $char <p>The input character.</p>
635
   *
636
   * @return int
637
   */
638 2
  public static function chr_to_decimal(string $char): int
639
  {
640 2
    $code = self::ord($char[0]);
641 2
    $bytes = 1;
642
643 2
    if (!($code & 0x80)) {
644
      // 0xxxxxxx
645 2
      return $code;
646
    }
647
648 2
    if (($code & 0xe0) === 0xc0) {
649
      // 110xxxxx
650 2
      $bytes = 2;
651 2
      $code &= ~0xc0;
652 2
    } elseif (($code & 0xf0) === 0xe0) {
653
      // 1110xxxx
654 2
      $bytes = 3;
655 2
      $code &= ~0xe0;
656 1
    } elseif (($code & 0xf8) === 0xf0) {
657
      // 11110xxx
658 1
      $bytes = 4;
659 1
      $code &= ~0xf0;
660
    }
661
662 2
    for ($i = 2; $i <= $bytes; $i++) {
663
      // 10xxxxxx
664 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
665
    }
666
667 2
    return $code;
668
  }
669
670
  /**
671
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
672
   *
673
   * @param string $char <p>The input character</p>
674
   * @param string $pfix [optional]
675
   *
676
   * @return string The code point encoded as U+xxxx
677
   */
678 1
  public static function chr_to_hex(string $char, string $pfix = 'U+'): string
679
  {
680 1
    if ('' === $char) {
681 1
      return '';
682
    }
683
684 1
    if ($char === '&#0;') {
685 1
      $char = '';
686
    }
687
688 1
    return self::int_to_hex(self::ord($char), $pfix);
689
  }
690
691
  /**
692
   * alias for "UTF8::chr_to_decimal()"
693
   *
694
   * @see UTF8::chr_to_decimal()
695
   *
696
   * @param string $chr
697
   *
698
   * @return int
699
   */
700 1
  public static function chr_to_int(string $chr): int
701
  {
702 1
    return self::chr_to_decimal($chr);
703
  }
704
705
  /**
706
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
707
   *
708
   * @param string $body     <p>The original string to be split.</p>
709
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
710
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
711
   *
712
   * @return string The chunked string.
713
   */
714 2
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
715
  {
716 2
    return \implode($end, self::split($body, $chunklen));
717
  }
718
719
  /**
720
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
721
   *
722
   * @param string $str                           <p>The string to be sanitized.</p>
723
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
724
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
725
   *                                              whitespace.</p>
726
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
727
   *                                              e.g.: "…"
728
   *                                              => "..."</p>
729
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
730
   *                                              combination with
731
   *                                              $normalize_whitespace</p>
732
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
733
   *                                              mark e.g.: "�"</p>
734
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
735
   *                                              characters e.g.: "\0"</p>
736
   *
737
   * @return string Clean UTF-8 encoded string.
738
   */
739 80
  public static function clean(
740
      string $str,
741
      bool $remove_bom = false,
742
      bool $normalize_whitespace = false,
743
      bool $normalize_msword = false,
744
      bool $keep_non_breaking_space = false,
745
      bool $replace_diamond_question_mark = false,
746
      bool $remove_invisible_characters = true
747
  ): string
748
  {
749
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
750
    // caused connection reset problem on larger strings
751
752 80
    $regx = '/
753
      (
754
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
755
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
756
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
757
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
758
        ){1,100}                      # ...one or more times
759
      )
760
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
761
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
762
    /x';
763 80
    $str = (string)\preg_replace($regx, '$1', $str);
764
765 80
    if ($replace_diamond_question_mark === true) {
766 53
      $str = self::replace_diamond_question_mark($str, '');
767
    }
768
769 80
    if ($remove_invisible_characters === true) {
770 80
      $str = self::remove_invisible_characters($str);
771
    }
772
773 80
    if ($normalize_whitespace === true) {
774 55
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
775
    }
776
777 80
    if ($normalize_msword === true) {
778 31
      $str = self::normalize_msword($str);
779
    }
780
781 80
    if ($remove_bom === true) {
782 54
      $str = self::remove_bom($str);
783
    }
784
785 80
    return $str;
786
  }
787
788
  /**
789
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
790
   *
791
   * @param string $str <p>The input string.</p>
792
   *
793
   * @return string
794
   */
795 25
  public static function cleanup(string $str): string
796
  {
797 25
    if ('' === $str) {
798 3
      return '';
799
    }
800
801
    // fixed ISO <-> UTF-8 Errors
802 25
    $str = self::fix_simple_utf8($str);
803
804
    // remove all none UTF-8 symbols
805
    // && remove diamond question mark (�)
806
    // && remove remove invisible characters (e.g. "\0")
807
    // && remove BOM
808
    // && normalize whitespace chars (but keep non-breaking-spaces)
809 25
    $str = self::clean(
810 25
        $str,
811 25
        true,
812 25
        true,
813 25
        false,
814 25
        true,
815 25
        true,
816 25
        true
817
    );
818
819 25
    return $str;
820
  }
821
822
  /**
823
   * Accepts a string or a array of strings and returns an array of Unicode code points.
824
   *
825
   * INFO: opposite to UTF8::string()
826
   *
827
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
828
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
829
   *                                    default, code points will be returned as integers.</p>
830
   *
831
   * @return array<int|string>
832
   *                           The array of code points:<br>
833
   *                           array<int> for $u_style === false<br>
834
   *                           array<string> for $u_style === true<br>
835
   */
836 7
  public static function codepoints($arg, bool $u_style = false): array
837
  {
838 7
    if (\is_string($arg) === true) {
839 7
      $arg = self::split($arg);
840
    }
841
842 7
    $arg = \array_map(
843
        [
844 7
            self::class,
845
            'ord',
846
        ],
847 7
        $arg
848
    );
849
850 7
    if (\count($arg) === 0) {
851 4
      return [];
852
    }
853
854 6
    if ($u_style) {
855 1
      $arg = \array_map(
856
          [
857 1
              self::class,
858
              'int_to_hex',
859
          ],
860 1
          $arg
861
      );
862
    }
863
864 6
    return $arg;
865
  }
866
867
  /**
868
   * Trims the string and replaces consecutive whitespace characters with a
869
   * single space. This includes tabs and newline characters, as well as
870
   * multibyte whitespace such as the thin space and ideographic space.
871
   *
872
   * @param string $str <p>The input string.</p>
873
   *
874
   * @return string String with a trimmed $str and condensed whitespace.
875
   */
876 13
  public static function collapse_whitespace(string $str): string
877
  {
878 13
    return self::trim(
879 13
        self::regex_replace($str, '[[:space:]]+', ' ')
880
    );
881
  }
882
883
  /**
884
   * Returns count of characters used in a string.
885
   *
886
   * @param string $str       <p>The input string.</p>
887
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
888
   *
889
   * @return int[] An associative array of Character as keys and
890
   *               their count as values.
891
   */
892 9
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
893
  {
894 9
    return \array_count_values(self::split($str, 1, $cleanUtf8));
895
  }
896
897
  /**
898
   * Remove css media-queries.
899
   *
900
   * @param string $str
901
   *
902
   * @return string
903
   */
904 1
  public static function css_stripe_media_queries(string $str): string
905
  {
906 1
    return (string)\preg_replace(
907 1
        '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
908 1
        '',
909 1
        $str
910
    );
911
  }
912
913
  /**
914
   * Checks whether ctype is available on the server.
915
   *
916
   * @return bool
917
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
918
   */
919
  public static function ctype_loaded(): bool
920
  {
921
    return \extension_loaded('ctype');
922
  }
923
924
  /**
925
   * Converts a int-value into an UTF-8 character.
926
   *
927
   * @param mixed $int
928
   *
929
   * @return string
930
   */
931 5
  public static function decimal_to_chr($int): string
932
  {
933 5
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
934
  }
935
936
  /**
937
   * Encode a string with a new charset-encoding.
938
   *
939
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
940
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
941
   *
942
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
943
   * @param string $str      <p>The input string</p>
944
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
945
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
946
   *
947
   * @return string
948
   */
949 14
  public static function encode(string $encoding, string $str, bool $force = true): string
950
  {
951 14
    if ('' === $str || '' === $encoding) {
952 6
      return $str;
953
    }
954
955 14
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
956 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
957
    }
958
959 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
960
      self::checkForSupport();
961
    }
962
963 14
    $encodingDetected = self::str_detect_encoding($str);
964
965
    // DEBUG
966
    //var_dump($encoding, $encodingDetected, $str, "\n\n");
967
968
    if (
969 14
        $force === true
970
        ||
971
        (
972 7
            $encodingDetected !== false
973
            &&
974 14
            $encodingDetected !== $encoding
975
        )
976
    ) {
977
978
      if (
979 14
          $encoding === 'UTF-8'
980
          &&
981
          (
982 14
              $force === true
983 5
              || $encodingDetected === 'UTF-8'
984 5
              || $encodingDetected === 'WINDOWS-1252'
985 14
              || $encodingDetected === 'ISO-8859-1'
986
          )
987
      ) {
988 13
        return self::to_utf8($str);
989
      }
990
991
      if (
992 4
          $encoding === 'ISO-8859-1'
993
          &&
994
          (
995 2
              $force === true
996 2
              || $encodingDetected === 'ISO-8859-1'
997 2
              || $encodingDetected === 'WINDOWS-1252'
998 4
              || $encodingDetected === 'UTF-8'
999
          )
1000
      ) {
1001 2
        return self::to_iso8859($str);
1002
      }
1003
1004
      if (
1005 3
          $encoding !== 'UTF-8'
1006
          &&
1007 3
          $encoding !== 'ISO-8859-1'
1008
          &&
1009 3
          $encoding !== 'WINDOWS-1252'
1010
          &&
1011 3
          self::$SUPPORT['mbstring'] === false
1012
      ) {
1013
        \trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1014
      }
1015
1016
      // always fallback via symfony polyfill
1017 3
      $strEncoded = \mb_convert_encoding(
1018 3
          $str,
1019 3
          $encoding,
1020 3
          ($force === true ? $encoding : $encodingDetected)
0 ignored issues
show
Bug introduced by
It seems like $force === true ? $encoding : $encodingDetected can also be of type false; however, parameter $from_encoding of mb_convert_encoding() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1020
          /** @scrutinizer ignore-type */ ($force === true ? $encoding : $encodingDetected)
Loading history...
1021
      );
1022
1023 3
      if ($strEncoded) {
1024 3
        return $strEncoded;
1025
      }
1026
    }
1027
1028 4
    return $str;
1029
  }
1030
1031
  /**
1032
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1033
   *
1034
   * @param string   $str                    <p>The input string.</p>
1035
   * @param string   $search                 <p>The searched string.</p>
1036
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1037
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1038
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1039
   *
1040
   * @return string
1041
   */
1042 1
  public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1043
  {
1044 1
    if ('' === $str) {
1045 1
      return '';
1046
    }
1047
1048 1
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1049
1050 1
    if ($length === null) {
1051 1
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
1052
    }
1053
1054 1
    if (empty($search)) {
1055
1056 1
      $stringLength = self::strlen($str, $encoding);
1057
1058 1
      if ($length > 0) {
1059 1
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1060
      } else {
1061 1
        $end = 0;
1062
      }
1063
1064 1
      $pos = (int)\min(
1065 1
          self::strpos($str, ' ', $end, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $end can also be of type false; however, parameter $offset of voku\helper\UTF8::strpos() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1065
          self::strpos($str, ' ', /** @scrutinizer ignore-type */ $end, $encoding),
Loading history...
1066 1
          self::strpos($str, '.', $end, $encoding)
1067
      );
1068
1069 1
      if ($pos) {
1070 1
        $strSub = self::substr($str, 0, $pos, $encoding);
1071 1
        if ($strSub === false) {
1072
          return '';
1073
        }
1074
1075 1
        return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1076
      }
1077
1078
      return $str;
1079
    }
1080
1081 1
    $wordPos = self::stripos($str, $search, 0, $encoding);
1082 1
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1083
1084 1
    $pos_start = 0;
1085 1
    if ($halfSide > 0) {
1086 1
      $halfText = self::substr($str, 0, $halfSide, $encoding);
1087 1
      if ($halfText !== false) {
1088 1
        $pos_start = (int)\max(
1089 1
            self::strrpos($halfText, ' ', 0, $encoding),
1090 1
            self::strrpos($halfText, '.', 0, $encoding)
1091
        );
1092
      }
1093
    }
1094
1095 1
    if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type integer|false is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1096 1
      $l = $pos_start + $length - 1;
1097 1
      $realLength = self::strlen($str, $encoding);
1098
1099 1
      if ($l > $realLength) {
1100
        $l = $realLength;
1101
      }
1102
1103 1
      $pos_end = (int)\min(
1104 1
              self::strpos($str, ' ', $l, $encoding),
1105 1
              self::strpos($str, '.', $l, $encoding)
1106 1
          ) - $pos_start;
1107
1108 1
      if (!$pos_end || $pos_end <= 0) {
1109 1
        $strSub = self::substr($str, $pos_start, self::strlen($str), $encoding);
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1109
        $strSub = self::substr($str, $pos_start, /** @scrutinizer ignore-type */ self::strlen($str), $encoding);
Loading history...
1110 1
        if ($strSub !== false) {
1111 1
          $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1112
        } else {
1113 1
          $extract = '';
1114
        }
1115
      } else {
1116 1
        $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1117 1
        if ($strSub !== false) {
1118 1
          $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1119
        } else {
1120 1
          $extract = '';
1121
        }
1122
      }
1123
1124
    } else {
1125
1126 1
      $l = $length - 1;
1127 1
      $trueLength = self::strlen($str, $encoding);
1128
1129 1
      if ($l > $trueLength) {
1130
        $l = $trueLength;
1131
      }
1132
1133 1
      $pos_end = \min(
1134 1
          self::strpos($str, ' ', $l, $encoding),
1135 1
          self::strpos($str, '.', $l, $encoding)
1136
      );
1137
1138 1
      if ($pos_end) {
1139 1
        $strSub = self::substr($str, 0, $pos_end, $encoding);
1140 1
        if ($strSub !== false) {
1141 1
          $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1142
        } else {
1143 1
          $extract = '';
1144
        }
1145
      } else {
1146 1
        $extract = $str;
1147
      }
1148
    }
1149
1150 1
    return $extract;
1151
  }
1152
1153
  /**
1154
   * Reads entire file into a string.
1155
   *
1156
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1157
   *
1158
   * @link http://php.net/manual/en/function.file-get-contents.php
1159
   *
1160
   * @param string        $filename         <p>
1161
   *                                        Name of the file to read.
1162
   *                                        </p>
1163
   * @param bool          $use_include_path [optional] <p>
1164
   *                                        Prior to PHP 5, this parameter is called
1165
   *                                        use_include_path and is a bool.
1166
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1167
   *                                        to trigger include path
1168
   *                                        search.
1169
   *                                        </p>
1170
   * @param resource|null $context          [optional] <p>
1171
   *                                        A valid context resource created with
1172
   *                                        stream_context_create. If you don't need to use a
1173
   *                                        custom context, you can skip this parameter by &null;.
1174
   *                                        </p>
1175
   * @param int|null      $offset           [optional] <p>
1176
   *                                        The offset where the reading starts.
1177
   *                                        </p>
1178
   * @param int|null      $maxLength        [optional] <p>
1179
   *                                        Maximum length of data read. The default is to read until end
1180
   *                                        of file is reached.
1181
   *                                        </p>
1182
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1183
   *
1184
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
1185
   *                                        images or pdf, because they used non default utf-8 chars.</p>
1186
   *
1187
   * @return string|false The function returns the read data or false on failure.
1188
   */
1189 6
  public static function file_get_contents(
1190
      string $filename,
1191
      bool $use_include_path = false,
1192
      $context = null,
1193
      int $offset = null,
1194
      int $maxLength = null,
1195
      int $timeout = 10,
1196
      bool $convertToUtf8 = true
1197
  )
1198
  {
1199
    // init
1200 6
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1201
1202 6
    if ($timeout && $context === null) {
1203 5
      $context = \stream_context_create(
1204
          [
1205
              'http' =>
1206
                  [
1207 5
                      'timeout' => $timeout,
1208
                  ],
1209
          ]
1210
      );
1211
    }
1212
1213 6
    if ($offset === null) {
1214 6
      $offset = 0;
1215
    }
1216
1217 6
    if (\is_int($maxLength) === true) {
1218 1
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1219
    } else {
1220 6
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1221
    }
1222
1223
    // return false on error
1224 6
    if ($data === false) {
1225
      return false;
1226
    }
1227
1228 6
    if ($convertToUtf8 === true) {
1229
      // only for non binary, but also for UTF-16 or UTF-32
1230
      if (
1231 6
          self::is_binary($data, true) !== true
1232
          ||
1233 4
          self::is_utf16($data) !== false
1234
          ||
1235 6
          self::is_utf32($data) !== false
1236
      ) {
1237 5
        $data = self::encode('UTF-8', $data, false);
1238 5
        $data = self::cleanup($data);
1239
      }
1240
    }
1241
1242 6
    return $data;
1243
  }
1244
1245
  /**
1246
   * Checks if a file starts with BOM (Byte Order Mark) character.
1247
   *
1248
   * @param string $file_path <p>Path to a valid file.</p>
1249
   *
1250
   * @throws \RuntimeException if file_get_contents() returned false
1251
   *
1252
   * @return bool
1253
   *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.
1254
   */
1255 1
  public static function file_has_bom(string $file_path): bool
1256
  {
1257 1
    $file_content = \file_get_contents($file_path);
1258 1
    if ($file_content === false) {
1259
      throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1260
    }
1261
1262 1
    return self::string_has_bom($file_content);
1263
  }
1264
1265
  /**
1266
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1267
   *
1268
   * @param mixed  $var
1269
   * @param int    $normalization_form
1270
   * @param string $leading_combining
1271
   *
1272
   * @return mixed
1273
   */
1274 35
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1275
  {
1276 35
    switch (\gettype($var)) {
1277 35
      case 'array':
1278 3
        foreach ($var as $k => $v) {
1279
          /** @noinspection AlterInForeachInspection */
1280 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1281
        }
1282 3
        break;
1283 35
      case 'object':
1284 2
        foreach ($var as $k => $v) {
1285 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1286
        }
1287 2
        break;
1288 35
      case 'string':
1289
1290 35
        if (false !== \strpos($var, "\r")) {
1291
          // Workaround https://bugs.php.net/65732
1292 2
          $var = self::normalize_line_ending($var);
1293
        }
1294
1295 35
        if (self::is_ascii($var) === false) {
1296
          /** @noinspection PhpUndefinedClassInspection */
1297 18
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1298 15
            $n = '-';
1299
          } else {
1300
            /** @noinspection PhpUndefinedClassInspection */
1301 7
            $n = \Normalizer::normalize($var, $normalization_form);
1302
1303 7
            if (isset($n[0])) {
1304 4
              $var = $n;
1305
            } else {
1306 5
              $var = self::encode('UTF-8', $var, true);
1307
            }
1308
          }
1309
1310
          if (
1311 18
              $var[0] >= "\x80"
1312
              &&
1313 18
              isset($n[0], $leading_combining[0])
1314
              &&
1315 18
              \preg_match('/^\p{Mn}/u', $var)
1316
          ) {
1317
            // Prevent leading combining chars
1318
            // for NFC-safe concatenations.
1319 2
            $var = $leading_combining . $var;
1320
          }
1321
        }
1322
1323 35
        break;
1324
    }
1325
1326 35
    return $var;
1327
  }
1328
1329
  /**
1330
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1331
   *
1332
   * Gets a specific external variable by name and optionally filters it
1333
   *
1334
   * @link  http://php.net/manual/en/function.filter-input.php
1335
   *
1336
   * @param int    $type          <p>
1337
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1338
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1339
   *                              <b>INPUT_ENV</b>.
1340
   *                              </p>
1341
   * @param string $variable_name <p>
1342
   *                              Name of a variable to get.
1343
   *                              </p>
1344
   * @param int    $filter        [optional] <p>
1345
   *                              The ID of the filter to apply. The
1346
   *                              manual page lists the available filters.
1347
   *                              </p>
1348
   * @param mixed  $options       [optional] <p>
1349
   *                              Associative array of options or bitwise disjunction of flags. If filter
1350
   *                              accepts options, flags can be provided in "flags" field of array.
1351
   *                              </p>
1352
   *
1353
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1354
   *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1355
   *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1356
   */
1357
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
1358
  {
1359
    if (4 > \func_num_args()) {
1360
      $var = \filter_input($type, $variable_name, $filter);
1361
    } else {
1362
      $var = \filter_input($type, $variable_name, $filter, $options);
1363
    }
1364
1365
    return self::filter($var);
1366
  }
1367
1368
  /**
1369
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1370
   *
1371
   * Gets external variables and optionally filters them
1372
   *
1373
   * @link  http://php.net/manual/en/function.filter-input-array.php
1374
   *
1375
   * @param int   $type       <p>
1376
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1377
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1378
   *                          <b>INPUT_ENV</b>.
1379
   *                          </p>
1380
   * @param mixed $definition [optional] <p>
1381
   *                          An array defining the arguments. A valid key is a string
1382
   *                          containing a variable name and a valid value is either a filter type, or an array
1383
   *                          optionally specifying the filter, flags and options. If the value is an
1384
   *                          array, valid keys are filter which specifies the
1385
   *                          filter type,
1386
   *                          flags which specifies any flags that apply to the
1387
   *                          filter, and options which specifies any options that
1388
   *                          apply to the filter. See the example below for a better understanding.
1389
   *                          </p>
1390
   *                          <p>
1391
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1392
   *                          input array are filtered by this filter.
1393
   *                          </p>
1394
   * @param bool  $add_empty  [optional] <p>
1395
   *                          Add missing keys as <b>NULL</b> to the return value.
1396
   *                          </p>
1397
   *
1398
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1399
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set. Or
1400
   *               if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable is not
1401
   *               set and <b>NULL</b> if the filter fails.
1402
   */
1403
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1404
  {
1405
    if (2 > \func_num_args()) {
1406
      $a = \filter_input_array($type);
1407
    } else {
1408
      $a = \filter_input_array($type, $definition, $add_empty);
1409
    }
1410
1411
    return self::filter($a);
1412
  }
1413
1414
  /**
1415
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1416
   *
1417
   * Filters a variable with a specified filter
1418
   *
1419
   * @link  http://php.net/manual/en/function.filter-var.php
1420
   *
1421
   * @param mixed $variable <p>
1422
   *                        Value to filter.
1423
   *                        </p>
1424
   * @param int   $filter   [optional] <p>
1425
   *                        The ID of the filter to apply. The
1426
   *                        manual page lists the available filters.
1427
   *                        </p>
1428
   * @param mixed $options  [optional] <p>
1429
   *                        Associative array of options or bitwise disjunction of flags. If filter
1430
   *                        accepts options, flags can be provided in "flags" field of array. For
1431
   *                        the "callback" filter, callable type should be passed. The
1432
   *                        callback must accept one argument, the value to be filtered, and return
1433
   *                        the value after filtering/sanitizing it.
1434
   *                        </p>
1435
   *                        <p>
1436
   *                        <code>
1437
   *                        // for filters that accept options, use this format
1438
   *                        $options = array(
1439
   *                        'options' => array(
1440
   *                        'default' => 3, // value to return if the filter fails
1441
   *                        // other options here
1442
   *                        'min_range' => 0
1443
   *                        ),
1444
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1445
   *                        );
1446
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1447
   *                        // for filter that only accept flags, you can pass them directly
1448
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1449
   *                        // for filter that only accept flags, you can also pass as an array
1450
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1451
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1452
   *                        // callback validate filter
1453
   *                        function foo($value)
1454
   *                        {
1455
   *                        // Expected format: Surname, GivenNames
1456
   *                        if (strpos($value, ", ") === false) return false;
1457
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1458
   *                        $empty = (empty($surname) || empty($givennames));
1459
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1460
   *                        if ($empty || $notstrings) {
1461
   *                        return false;
1462
   *                        } else {
1463
   *                        return $value;
1464
   *                        }
1465
   *                        }
1466
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1467
   *                        </code>
1468
   *                        </p>
1469
   *
1470
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1471
   */
1472 1
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
1473
  {
1474 1
    if (3 > \func_num_args()) {
1475 1
      $variable = \filter_var($variable, $filter);
1476
    } else {
1477 1
      $variable = \filter_var($variable, $filter, $options);
1478
    }
1479
1480 1
    return self::filter($variable);
1481
  }
1482
1483
  /**
1484
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1485
   *
1486
   * Gets multiple variables and optionally filters them
1487
   *
1488
   * @link  http://php.net/manual/en/function.filter-var-array.php
1489
   *
1490
   * @param array $data       <p>
1491
   *                          An array with string keys containing the data to filter.
1492
   *                          </p>
1493
   * @param mixed $definition [optional] <p>
1494
   *                          An array defining the arguments. A valid key is a string
1495
   *                          containing a variable name and a valid value is either a
1496
   *                          filter type, or an
1497
   *                          array optionally specifying the filter, flags and options.
1498
   *                          If the value is an array, valid keys are filter
1499
   *                          which specifies the filter type,
1500
   *                          flags which specifies any flags that apply to the
1501
   *                          filter, and options which specifies any options that
1502
   *                          apply to the filter. See the example below for a better understanding.
1503
   *                          </p>
1504
   *                          <p>
1505
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1506
   *                          input array are filtered by this filter.
1507
   *                          </p>
1508
   * @param bool  $add_empty  [optional] <p>
1509
   *                          Add missing keys as <b>NULL</b> to the return value.
1510
   *                          </p>
1511
   *
1512
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1513
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set.
1514
   */
1515 1
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1516
  {
1517 1
    if (2 > \func_num_args()) {
1518 1
      $a = \filter_var_array($data);
1519
    } else {
1520 1
      $a = \filter_var_array($data, $definition, $add_empty);
1521
    }
1522
1523 1
    return self::filter($a);
1524
  }
1525
1526
  /**
1527
   * Checks whether finfo is available on the server.
1528
   *
1529
   * @return bool
1530
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
1531
   */
1532
  public static function finfo_loaded(): bool
1533
  {
1534
    return \class_exists('finfo');
1535
  }
1536
1537
  /**
1538
   * Returns the first $n characters of the string.
1539
   *
1540
   * @param string $str      <p>The input string.</p>
1541
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1542
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1543
   *
1544
   * @return string
1545
   */
1546 13
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1547
  {
1548 13
    if ($n <= 0) {
1549 4
      return '';
1550
    }
1551
1552 9
    $strSub = self::substr($str, 0, $n, $encoding);
1553 9
    if ($strSub === false) {
1554
      return '';
1555
    }
1556
1557 9
    return $strSub;
1558
  }
1559
1560
  /**
1561
   * Check if the number of unicode characters are not more than the specified integer.
1562
   *
1563
   * @param string $str      The original string to be checked.
1564
   * @param int    $box_size The size in number of chars to be checked against string.
1565
   *
1566
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1567
   */
1568 1
  public static function fits_inside(string $str, int $box_size): bool
1569
  {
1570 1
    return (self::strlen($str) <= $box_size);
1571
  }
1572
1573
  /**
1574
   * @param string $str
1575
   * @param bool   $useLower <p>Use uppercase by default, otherwise use lowecase.</p>
1576
   *
1577
   * @return string
1578
   */
1579 17
  private static function fixStrCaseHelper(string $str, $useLower = false): string
1580
  {
1581
    $upper = [
1582 17
        'ẞ',
1583
    ];
1584
    $lower = [
1585 17
        'ß',
1586
    ];
1587
1588 17
    if ($useLower === true) {
1589
      $str = \str_replace(
1590
          $upper,
1591
          $lower,
1592
          $str
1593
      );
1594
    } else {
1595 17
      $str = \str_replace(
1596 17
          $lower,
1597 17
          $upper,
1598 17
          $str
1599
      );
1600
    }
1601
1602
1603 17
    return $str;
1604
  }
1605
1606
  /**
1607
   * Try to fix simple broken UTF-8 strings.
1608
   *
1609
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1610
   *
1611
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1612
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1613
   * See: http://en.wikipedia.org/wiki/Windows-1252
1614
   *
1615
   * @param string $str <p>The input string</p>
1616
   *
1617
   * @return string
1618
   */
1619 30
  public static function fix_simple_utf8(string $str): string
1620
  {
1621 30
    if ('' === $str) {
1622 2
      return '';
1623
    }
1624
1625 30
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1626 30
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1627
1628 30
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1629
1630 1
      if (self::$BROKEN_UTF8_FIX === null) {
1631 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_fix') can also be of type false. However, the property $BROKEN_UTF8_FIX is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1632
      }
1633
1634 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1634
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1635 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1635
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1636
    }
1637
1638 30
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1639
  }
1640
1641
  /**
1642
   * Fix a double (or multiple) encoded UTF8 string.
1643
   *
1644
   * @param string[]|string $str You can use a string or an array of strings.
1645
   *
1646
   * @return string[]|string
1647
   *                          Will return the fixed input-"array" or
1648
   *                          the fixed input-"string".
1649
   */
1650 1
  public static function fix_utf8($str)
1651
  {
1652 1
    if (\is_array($str) === true) {
1653 1
      foreach ($str as $k => $v) {
1654 1
        $str[$k] = self::fix_utf8($v);
1655
      }
1656
1657 1
      return $str;
1658
    }
1659
1660 1
    $str = (string)$str;
1661 1
    $last = '';
1662 1
    while ($last !== $str) {
1663 1
      $last = $str;
1664 1
      $str = self::to_utf8(
1665 1
          self::utf8_decode($str, true)
1666
      );
1667
    }
1668
1669 1
    return $str;
1670
  }
1671
1672
  /**
1673
   * Get character of a specific character.
1674
   *
1675
   * @param string $char
1676
   *
1677
   * @return string 'RTL' or 'LTR'
1678
   */
1679 1
  public static function getCharDirection(string $char): string
1680
  {
1681 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1682
      self::checkForSupport();
1683
    }
1684
1685 1
    if (self::$SUPPORT['intlChar'] === true) {
1686
      /** @noinspection PhpComposerExtensionStubsInspection */
1687 1
      $tmpReturn = \IntlChar::charDirection($char);
1688
1689
      // from "IntlChar"-Class
1690
      $charDirection = [
1691 1
          'RTL' => [1, 13, 14, 15, 21],
1692
          'LTR' => [0, 11, 12, 20],
1693
      ];
1694
1695 1
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1696
        return 'LTR';
1697
      }
1698
1699 1
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1700 1
        return 'RTL';
1701
      }
1702
    }
1703
1704 1
    $c = static::chr_to_decimal($char);
1705
1706 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1707 1
      return 'LTR';
1708
    }
1709
1710 1
    if (0x85e >= $c) {
1711
1712 1
      if (0x5be === $c ||
1713 1
          0x5c0 === $c ||
1714 1
          0x5c3 === $c ||
1715 1
          0x5c6 === $c ||
1716 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1717 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1718 1
          0x608 === $c ||
1719 1
          0x60b === $c ||
1720 1
          0x60d === $c ||
1721 1
          0x61b === $c ||
1722 1
          (0x61e <= $c && 0x64a >= $c) ||
1723
          (0x66d <= $c && 0x66f >= $c) ||
1724
          (0x671 <= $c && 0x6d5 >= $c) ||
1725
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1726
          (0x6ee <= $c && 0x6ef >= $c) ||
1727
          (0x6fa <= $c && 0x70d >= $c) ||
1728
          0x710 === $c ||
1729
          (0x712 <= $c && 0x72f >= $c) ||
1730
          (0x74d <= $c && 0x7a5 >= $c) ||
1731
          0x7b1 === $c ||
1732
          (0x7c0 <= $c && 0x7ea >= $c) ||
1733
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1734
          0x7fa === $c ||
1735
          (0x800 <= $c && 0x815 >= $c) ||
1736
          0x81a === $c ||
1737
          0x824 === $c ||
1738
          0x828 === $c ||
1739
          (0x830 <= $c && 0x83e >= $c) ||
1740
          (0x840 <= $c && 0x858 >= $c) ||
1741 1
          0x85e === $c
1742
      ) {
1743 1
        return 'RTL';
1744
      }
1745
1746 1
    } elseif (0x200f === $c) {
1747
1748
      return 'RTL';
1749
1750 1
    } elseif (0xfb1d <= $c) {
1751
1752 1
      if (0xfb1d === $c ||
1753 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1754 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1755 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1756 1
          0xfb3e === $c ||
1757 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1758 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1759 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1760 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1761 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1762 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1763 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1764 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1765 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1766 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1767 1
          0x10808 === $c ||
1768 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1769 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1770 1
          0x1083c === $c ||
1771 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1772 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1773 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1774 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1775 1
          0x1093f === $c ||
1776 1
          0x10a00 === $c ||
1777 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1778 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1779 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1780 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1781 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1782 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1783 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1784 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1785 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1786 1
          (0x10b78 <= $c && 0x10b7f >= $c)
1787
      ) {
1788 1
        return 'RTL';
1789
      }
1790
    }
1791
1792 1
    return 'LTR';
1793
  }
1794
1795
  /**
1796
   * get data from "/data/*.ser"
1797
   *
1798
   * @param string $file
1799
   *
1800
   * @return mixed|false Will return false on error.
1801
   */
1802 13
  private static function getData(string $file)
1803
  {
1804 13
    $file = __DIR__ . '/data/' . $file . '.php';
1805 13
    if (\file_exists($file)) {
1806
      /** @noinspection PhpIncludeInspection */
1807 12
      return require $file;
1808
    }
1809
1810 2
    return false;
1811
  }
1812
1813
  /**
1814
   * Check for php-support.
1815
   *
1816
   * @param string|null $key
1817
   *
1818
   * @return mixed
1819
   *               Return the full support-"array", if $key === null<br>
1820
   *               return bool-value, if $key is used and available<br>
1821
   *               otherwise return <strong>null</strong>.
1822
   */
1823 19
  public static function getSupportInfo(string $key = null)
1824
  {
1825 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1826
      self::checkForSupport();
1827
    }
1828
1829 19
    if ($key === null) {
1830 2
      return self::$SUPPORT;
1831
    }
1832
1833 18
    if (!isset(self::$SUPPORT[$key])) {
1834 1
      return null;
1835
    }
1836
1837 17
    return self::$SUPPORT[$key];
1838
  }
1839
1840
  /**
1841
   * @param int    $length        <p>Length of the random string.</p>
1842
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
1843
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
1844
   *
1845
   * @return string
1846
   */
1847 1
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
1848
  {
1849
    // init
1850 1
    $i = 0;
1851 1
    $str = '';
1852 1
    $maxlength = self::strlen($possibleChars, $encoding);
1853
1854 1
    if ($maxlength === 0) {
1855 1
      return '';
1856
    }
1857
1858
    // add random chars
1859 1
    while ($i < $length) {
1860
      try {
1861 1
        $randInt = \random_int(0, $maxlength - 1);
1862
      } catch (\Exception $e) {
1863
        /** @noinspection RandomApiMigrationInspection */
1864
        $randInt = \mt_rand(0, $maxlength - 1);
1865
      }
1866 1
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
1867 1
      $str .= $char;
1868 1
      $i++;
1869
    }
1870
1871 1
    return $str;
1872
  }
1873
1874
  /**
1875
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
1876
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
1877
   *
1878
   * @return string
1879
   */
1880 1
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
1881
  {
1882 1
    $uniqueHelper = \mt_rand() .
1883 1
                    \session_id() .
1884 1
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
1885 1
                    ($_SERVER['SERVER_ADDR'] ?? '') .
1886 1
                    $entropyExtra;
1887
1888 1
    $uniqueString = \uniqid($uniqueHelper, true);
1889
1890 1
    if ($md5) {
1891 1
      $uniqueString = \md5($uniqueString . $uniqueHelper);
1892
    }
1893
1894 1
    return $uniqueString;
1895
  }
1896
1897
  /**
1898
   * alias for "UTF8::string_has_bom()"
1899
   *
1900
   * @see        UTF8::string_has_bom()
1901
   *
1902
   * @param string $str
1903
   *
1904
   * @return bool
1905
   *
1906
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1907
   */
1908 1
  public static function hasBom(string $str): bool
1909
  {
1910 1
    return self::string_has_bom($str);
1911
  }
1912
1913
  /**
1914
   * Returns true if the string contains a lower case char, false otherwise.
1915
   *
1916
   * @param string $str <p>The input string.</p>
1917
   *
1918
   * @return bool Whether or not the string contains a lower case character.
1919
   */
1920 47
  public static function has_lowercase(string $str): bool
1921
  {
1922 47
    return self::str_matches_pattern($str, '.*[[:lower:]]');
1923
  }
1924
1925
  /**
1926
   * Returns true if the string contains an upper case char, false otherwise.
1927
   *
1928
   * @param string $str <p>The input string.</p>
1929
   *
1930
   * @return bool Whether or not the string contains an upper case character.
1931
   */
1932 12
  public static function has_uppercase(string $str): bool
1933
  {
1934 12
    return self::str_matches_pattern($str, '.*[[:upper:]]');
1935
  }
1936
1937
  /**
1938
   * Converts a hexadecimal-value into an UTF-8 character.
1939
   *
1940
   * @param string $hexdec <p>The hexadecimal value.</p>
1941
   *
1942
   * @return string|false One single UTF-8 character.
1943
   */
1944 2
  public static function hex_to_chr(string $hexdec)
1945
  {
1946 2
    return self::decimal_to_chr(\hexdec($hexdec));
1947
  }
1948
1949
  /**
1950
   * Converts hexadecimal U+xxxx code point representation to integer.
1951
   *
1952
   * INFO: opposite to UTF8::int_to_hex()
1953
   *
1954
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1955
   *
1956
   * @return int|false The code point, or false on failure.
1957
   */
1958 1
  public static function hex_to_int(string $hexDec)
1959
  {
1960 1
    if ('' === $hexDec) {
1961 1
      return false;
1962
    }
1963
1964 1
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1965 1
      return \intval($match[1], 16);
1966
    }
1967
1968 1
    return false;
1969
  }
1970
1971
  /**
1972
   * alias for "UTF8::html_entity_decode()"
1973
   *
1974
   * @see UTF8::html_entity_decode()
1975
   *
1976
   * @param string $str
1977
   * @param int    $flags
1978
   * @param string $encoding
1979
   *
1980
   * @return string
1981
   */
1982 1
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1983
  {
1984 1
    return self::html_entity_decode($str, $flags, $encoding);
1985
  }
1986
1987
  /**
1988
   * Converts a UTF-8 string to a series of HTML numbered entities.
1989
   *
1990
   * INFO: opposite to UTF8::html_decode()
1991
   *
1992
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1993
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1994
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
1995
   *
1996
   * @return string HTML numbered entities.
1997
   */
1998 8
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
1999
  {
2000 8
    if ('' === $str) {
2001 2
      return '';
2002
    }
2003
2004 8
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2005 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2006
    }
2007
2008
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2009 8
    if (\function_exists('mb_encode_numericentity')) {
2010
2011 8
      $startCode = 0x00;
2012 8
      if ($keepAsciiChars === true) {
2013 8
        $startCode = 0x80;
2014
      }
2015
2016 8
      return \mb_encode_numericentity(
2017 8
          $str,
2018 8
          [$startCode, 0xfffff, 0, 0xfffff, 0],
2019 8
          $encoding
2020
      );
2021
    }
2022
2023
    return \implode(
2024
        '',
2025
        \array_map(
2026
            function ($data) use ($keepAsciiChars, $encoding) {
2027
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2028
            },
2029
            self::split($str)
2030
        )
2031
    );
2032
  }
2033
2034
  /**
2035
   * UTF-8 version of html_entity_decode()
2036
   *
2037
   * The reason we are not using html_entity_decode() by itself is because
2038
   * while it is not technically correct to leave out the semicolon
2039
   * at the end of an entity most browsers will still interpret the entity
2040
   * correctly. html_entity_decode() does not convert entities without
2041
   * semicolons, so we are left with our own little solution here. Bummer.
2042
   *
2043
   * Convert all HTML entities to their applicable characters
2044
   *
2045
   * INFO: opposite to UTF8::html_encode()
2046
   *
2047
   * @link http://php.net/manual/en/function.html-entity-decode.php
2048
   *
2049
   * @param string $str      <p>
2050
   *                         The input string.
2051
   *                         </p>
2052
   * @param int    $flags    [optional] <p>
2053
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2054
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2055
   *                         <table>
2056
   *                         Available <i>flags</i> constants
2057
   *                         <tr valign="top">
2058
   *                         <td>Constant Name</td>
2059
   *                         <td>Description</td>
2060
   *                         </tr>
2061
   *                         <tr valign="top">
2062
   *                         <td><b>ENT_COMPAT</b></td>
2063
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2064
   *                         </tr>
2065
   *                         <tr valign="top">
2066
   *                         <td><b>ENT_QUOTES</b></td>
2067
   *                         <td>Will convert both double and single quotes.</td>
2068
   *                         </tr>
2069
   *                         <tr valign="top">
2070
   *                         <td><b>ENT_NOQUOTES</b></td>
2071
   *                         <td>Will leave both double and single quotes unconverted.</td>
2072
   *                         </tr>
2073
   *                         <tr valign="top">
2074
   *                         <td><b>ENT_HTML401</b></td>
2075
   *                         <td>
2076
   *                         Handle code as HTML 4.01.
2077
   *                         </td>
2078
   *                         </tr>
2079
   *                         <tr valign="top">
2080
   *                         <td><b>ENT_XML1</b></td>
2081
   *                         <td>
2082
   *                         Handle code as XML 1.
2083
   *                         </td>
2084
   *                         </tr>
2085
   *                         <tr valign="top">
2086
   *                         <td><b>ENT_XHTML</b></td>
2087
   *                         <td>
2088
   *                         Handle code as XHTML.
2089
   *                         </td>
2090
   *                         </tr>
2091
   *                         <tr valign="top">
2092
   *                         <td><b>ENT_HTML5</b></td>
2093
   *                         <td>
2094
   *                         Handle code as HTML 5.
2095
   *                         </td>
2096
   *                         </tr>
2097
   *                         </table>
2098
   *                         </p>
2099
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2100
   *
2101
   * @return string The decoded string.
2102
   */
2103 22
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2104
  {
2105 22
    if ('' === $str) {
2106 6
      return '';
2107
    }
2108
2109 22
    if (!isset($str[3])) { // examples: &; || &x;
2110 10
      return $str;
2111
    }
2112
2113
    if (
2114 21
        \strpos($str, '&') === false
2115
        ||
2116
        (
2117 21
            \strpos($str, '&#') === false
2118
            &&
2119 21
            \strpos($str, ';') === false
2120
        )
2121
    ) {
2122 9
      return $str;
2123
    }
2124
2125 21
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2126 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2127
    }
2128
2129 21
    if ($flags === null) {
2130 5
      $flags = ENT_QUOTES | ENT_HTML5;
2131
    }
2132
2133
    if (
2134 21
        $encoding !== 'UTF-8'
2135
        &&
2136 21
        $encoding !== 'ISO-8859-1'
2137
        &&
2138 21
        $encoding !== 'WINDOWS-1252'
2139
        &&
2140 21
        self::$SUPPORT['mbstring'] === false
2141
    ) {
2142
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2143
    }
2144
2145
    do {
2146 21
      $str_compare = $str;
2147
2148 21
      $str = (string)\preg_replace_callback(
2149 21
          "/&#\d{2,6};/",
2150 21
          function ($matches) use ($encoding) {
2151
            // always fallback via symfony polyfill
2152 15
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2153
2154 15
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2155 13
              return $returnTmp;
2156
            }
2157
2158 8
            return $matches[0];
2159 21
          },
2160 21
          $str
2161
      );
2162
2163
      // decode numeric & UTF16 two byte entities
2164 21
      $str = \html_entity_decode(
2165 21
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2166 21
          $flags,
2167 21
          $encoding
2168
      );
2169
2170 21
    } while ($str_compare !== $str);
2171
2172 21
    return $str;
2173
  }
2174
2175
  /**
2176
   * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2177
   *
2178
   * @param string $str
2179
   * @param string $encoding [optional] <p>Default: UTF-8</p>
2180
   *
2181
   * @return string
2182
   */
2183 6
  public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2184
  {
2185 6
    return self::htmlspecialchars(
2186 6
        $str,
2187 6
        ENT_QUOTES | ENT_SUBSTITUTE,
2188 6
        $encoding
2189
    );
2190
  }
2191
2192
  /**
2193
   * Remove empty html-tag.
2194
   *
2195
   * e.g.: <tag></tag>
2196
   *
2197
   * @param string $str
2198
   *
2199
   * @return string
2200
   */
2201 1
  public static function html_stripe_empty_tags(string $str): string
2202
  {
2203 1
    return (string)\preg_replace(
2204 1
        "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2205 1
        '',
2206 1
        $str
2207
    );
2208
  }
2209
2210
  /**
2211
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2212
   *
2213
   * @link http://php.net/manual/en/function.htmlentities.php
2214
   *
2215
   * @param string $str           <p>
2216
   *                              The input string.
2217
   *                              </p>
2218
   * @param int    $flags         [optional] <p>
2219
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2220
   *                              invalid code unit sequences and the used document type. The default is
2221
   *                              ENT_COMPAT | ENT_HTML401.
2222
   *                              <table>
2223
   *                              Available <i>flags</i> constants
2224
   *                              <tr valign="top">
2225
   *                              <td>Constant Name</td>
2226
   *                              <td>Description</td>
2227
   *                              </tr>
2228
   *                              <tr valign="top">
2229
   *                              <td><b>ENT_COMPAT</b></td>
2230
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2231
   *                              </tr>
2232
   *                              <tr valign="top">
2233
   *                              <td><b>ENT_QUOTES</b></td>
2234
   *                              <td>Will convert both double and single quotes.</td>
2235
   *                              </tr>
2236
   *                              <tr valign="top">
2237
   *                              <td><b>ENT_NOQUOTES</b></td>
2238
   *                              <td>Will leave both double and single quotes unconverted.</td>
2239
   *                              </tr>
2240
   *                              <tr valign="top">
2241
   *                              <td><b>ENT_IGNORE</b></td>
2242
   *                              <td>
2243
   *                              Silently discard invalid code unit sequences instead of returning
2244
   *                              an empty string. Using this flag is discouraged as it
2245
   *                              may have security implications.
2246
   *                              </td>
2247
   *                              </tr>
2248
   *                              <tr valign="top">
2249
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2250
   *                              <td>
2251
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2252
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2253
   *                              </td>
2254
   *                              </tr>
2255
   *                              <tr valign="top">
2256
   *                              <td><b>ENT_DISALLOWED</b></td>
2257
   *                              <td>
2258
   *                              Replace invalid code points for the given document type with a
2259
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2260
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2261
   *                              instance, to ensure the well-formedness of XML documents with
2262
   *                              embedded external content.
2263
   *                              </td>
2264
   *                              </tr>
2265
   *                              <tr valign="top">
2266
   *                              <td><b>ENT_HTML401</b></td>
2267
   *                              <td>
2268
   *                              Handle code as HTML 4.01.
2269
   *                              </td>
2270
   *                              </tr>
2271
   *                              <tr valign="top">
2272
   *                              <td><b>ENT_XML1</b></td>
2273
   *                              <td>
2274
   *                              Handle code as XML 1.
2275
   *                              </td>
2276
   *                              </tr>
2277
   *                              <tr valign="top">
2278
   *                              <td><b>ENT_XHTML</b></td>
2279
   *                              <td>
2280
   *                              Handle code as XHTML.
2281
   *                              </td>
2282
   *                              </tr>
2283
   *                              <tr valign="top">
2284
   *                              <td><b>ENT_HTML5</b></td>
2285
   *                              <td>
2286
   *                              Handle code as HTML 5.
2287
   *                              </td>
2288
   *                              </tr>
2289
   *                              </table>
2290
   *                              </p>
2291
   * @param string $encoding      [optional] <p>
2292
   *                              Like <b>htmlspecialchars</b>,
2293
   *                              <b>htmlentities</b> takes an optional third argument
2294
   *                              <i>encoding</i> which defines encoding used in
2295
   *                              conversion.
2296
   *                              Although this argument is technically optional, you are highly
2297
   *                              encouraged to specify the correct value for your code.
2298
   *                              </p>
2299
   * @param bool   $double_encode [optional] <p>
2300
   *                              When <i>double_encode</i> is turned off PHP will not
2301
   *                              encode existing html entities. The default is to convert everything.
2302
   *                              </p>
2303
   *
2304
   *
2305
   * @return string The encoded string.
2306
   * </p>
2307
   * <p>
2308
   * If the input <i>string</i> contains an invalid code unit
2309
   * sequence within the given <i>encoding</i> an empty string
2310
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2311
   * <b>ENT_SUBSTITUTE</b> flags are set.
2312
   */
2313 7
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2314
  {
2315 7
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2316 6
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2317
    }
2318
2319 7
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2320
2321
    /**
2322
     * PHP doesn't replace a backslash to its html entity since this is something
2323
     * that's mostly used to escape characters when inserting in a database. Since
2324
     * we're using a decent database layer, we don't need this shit and we're replacing
2325
     * the double backslashes by its' html entity equivalent.
2326
     *
2327
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2328
     */
2329 7
    $str = \str_replace('\\', '&#92;', $str);
2330
2331 7
    return self::html_encode($str, true, $encoding);
2332
  }
2333
2334
  /**
2335
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2336
   *
2337
   * INFO: Take a look at "UTF8::htmlentities()"
2338
   *
2339
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2340
   *
2341
   * @param string $str           <p>
2342
   *                              The string being converted.
2343
   *                              </p>
2344
   * @param int    $flags         [optional] <p>
2345
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2346
   *                              invalid code unit sequences and the used document type. The default is
2347
   *                              ENT_COMPAT | ENT_HTML401.
2348
   *                              <table>
2349
   *                              Available <i>flags</i> constants
2350
   *                              <tr valign="top">
2351
   *                              <td>Constant Name</td>
2352
   *                              <td>Description</td>
2353
   *                              </tr>
2354
   *                              <tr valign="top">
2355
   *                              <td><b>ENT_COMPAT</b></td>
2356
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2357
   *                              </tr>
2358
   *                              <tr valign="top">
2359
   *                              <td><b>ENT_QUOTES</b></td>
2360
   *                              <td>Will convert both double and single quotes.</td>
2361
   *                              </tr>
2362
   *                              <tr valign="top">
2363
   *                              <td><b>ENT_NOQUOTES</b></td>
2364
   *                              <td>Will leave both double and single quotes unconverted.</td>
2365
   *                              </tr>
2366
   *                              <tr valign="top">
2367
   *                              <td><b>ENT_IGNORE</b></td>
2368
   *                              <td>
2369
   *                              Silently discard invalid code unit sequences instead of returning
2370
   *                              an empty string. Using this flag is discouraged as it
2371
   *                              may have security implications.
2372
   *                              </td>
2373
   *                              </tr>
2374
   *                              <tr valign="top">
2375
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2376
   *                              <td>
2377
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2378
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2379
   *                              </td>
2380
   *                              </tr>
2381
   *                              <tr valign="top">
2382
   *                              <td><b>ENT_DISALLOWED</b></td>
2383
   *                              <td>
2384
   *                              Replace invalid code points for the given document type with a
2385
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2386
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2387
   *                              instance, to ensure the well-formedness of XML documents with
2388
   *                              embedded external content.
2389
   *                              </td>
2390
   *                              </tr>
2391
   *                              <tr valign="top">
2392
   *                              <td><b>ENT_HTML401</b></td>
2393
   *                              <td>
2394
   *                              Handle code as HTML 4.01.
2395
   *                              </td>
2396
   *                              </tr>
2397
   *                              <tr valign="top">
2398
   *                              <td><b>ENT_XML1</b></td>
2399
   *                              <td>
2400
   *                              Handle code as XML 1.
2401
   *                              </td>
2402
   *                              </tr>
2403
   *                              <tr valign="top">
2404
   *                              <td><b>ENT_XHTML</b></td>
2405
   *                              <td>
2406
   *                              Handle code as XHTML.
2407
   *                              </td>
2408
   *                              </tr>
2409
   *                              <tr valign="top">
2410
   *                              <td><b>ENT_HTML5</b></td>
2411
   *                              <td>
2412
   *                              Handle code as HTML 5.
2413
   *                              </td>
2414
   *                              </tr>
2415
   *                              </table>
2416
   *                              </p>
2417
   * @param string $encoding      [optional] <p>
2418
   *                              Defines encoding used in conversion.
2419
   *                              </p>
2420
   *                              <p>
2421
   *                              For the purposes of this function, the encodings
2422
   *                              ISO-8859-1, ISO-8859-15,
2423
   *                              UTF-8, cp866,
2424
   *                              cp1251, cp1252, and
2425
   *                              KOI8-R are effectively equivalent, provided the
2426
   *                              <i>string</i> itself is valid for the encoding, as
2427
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2428
   *                              the same positions in all of these encodings.
2429
   *                              </p>
2430
   * @param bool   $double_encode [optional] <p>
2431
   *                              When <i>double_encode</i> is turned off PHP will not
2432
   *                              encode existing html entities, the default is to convert everything.
2433
   *                              </p>
2434
   *
2435
   * @return string The converted string.
2436
   * </p>
2437
   * <p>
2438
   * If the input <i>string</i> contains an invalid code unit
2439
   * sequence within the given <i>encoding</i> an empty string
2440
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2441
   * <b>ENT_SUBSTITUTE</b> flags are set.
2442
   */
2443 7
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2444
  {
2445 7
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2446 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2447
    }
2448
2449 7
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2450
  }
2451
2452
  /**
2453
   * Checks whether iconv is available on the server.
2454
   *
2455
   * @return bool
2456
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2457
   */
2458
  public static function iconv_loaded(): bool
2459
  {
2460
    return \extension_loaded('iconv') ? true : false;
2461
  }
2462
2463
  /**
2464
   * alias for "UTF8::decimal_to_chr()"
2465
   *
2466
   * @see UTF8::decimal_to_chr()
2467
   *
2468
   * @param mixed $int
2469
   *
2470
   * @return string
2471
   */
2472 2
  public static function int_to_chr($int): string
2473
  {
2474 2
    return self::decimal_to_chr($int);
2475
  }
2476
2477
  /**
2478
   * Converts Integer to hexadecimal U+xxxx code point representation.
2479
   *
2480
   * INFO: opposite to UTF8::hex_to_int()
2481
   *
2482
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2483
   * @param string $pfix [optional]
2484
   *
2485
   * @return string The code point, or empty string on failure.
2486
   */
2487 3
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2488
  {
2489 3
    $hex = \dechex($int);
2490
2491 3
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2492
2493 3
    return $pfix . $hex . '';
2494
  }
2495
2496
  /**
2497
   * Checks whether intl-char is available on the server.
2498
   *
2499
   * @return bool
2500
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2501
   */
2502
  public static function intlChar_loaded(): bool
2503
  {
2504
    return \class_exists('IntlChar');
2505
  }
2506
2507
  /**
2508
   * Checks whether intl is available on the server.
2509
   *
2510
   * @return bool
2511
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2512
   */
2513 3
  public static function intl_loaded(): bool
2514
  {
2515 3
    return \extension_loaded('intl');
2516
  }
2517
2518
  /**
2519
   * alias for "UTF8::is_ascii()"
2520
   *
2521
   * @see        UTF8::is_ascii()
2522
   *
2523
   * @param string $str
2524
   *
2525
   * @return bool
2526
   *
2527
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2528
   */
2529 1
  public static function isAscii(string $str): bool
2530
  {
2531 1
    return self::is_ascii($str);
2532
  }
2533
2534
  /**
2535
   * alias for "UTF8::is_base64()"
2536
   *
2537
   * @see        UTF8::is_base64()
2538
   *
2539
   * @param string $str
2540
   *
2541
   * @return bool
2542
   *
2543
   * @deprecated <p>use "UTF8::is_base64()"</p>
2544
   */
2545 1
  public static function isBase64(string $str): bool
2546
  {
2547 1
    return self::is_base64($str);
2548
  }
2549
2550
  /**
2551
   * alias for "UTF8::is_binary()"
2552
   *
2553
   * @see        UTF8::is_binary()
2554
   *
2555
   * @param mixed $str
2556
   * @param bool  $strict
2557
   *
2558
   * @return bool
2559
   *
2560
   * @deprecated <p>use "UTF8::is_binary()"</p>
2561
   */
2562 2
  public static function isBinary($str, $strict = false): bool
2563
  {
2564 2
    return self::is_binary($str, $strict);
2565
  }
2566
2567
  /**
2568
   * alias for "UTF8::is_bom()"
2569
   *
2570
   * @see        UTF8::is_bom()
2571
   *
2572
   * @param string $utf8_chr
2573
   *
2574
   * @return bool
2575
   *
2576
   * @deprecated <p>use "UTF8::is_bom()"</p>
2577
   */
2578 1
  public static function isBom(string $utf8_chr): bool
2579
  {
2580 1
    return self::is_bom($utf8_chr);
2581
  }
2582
2583
  /**
2584
   * alias for "UTF8::is_html()"
2585
   *
2586
   * @see        UTF8::is_html()
2587
   *
2588
   * @param string $str
2589
   *
2590
   * @return bool
2591
   *
2592
   * @deprecated <p>use "UTF8::is_html()"</p>
2593
   */
2594 1
  public static function isHtml(string $str): bool
2595
  {
2596 1
    return self::is_html($str);
2597
  }
2598
2599
  /**
2600
   * alias for "UTF8::is_json()"
2601
   *
2602
   * @see        UTF8::is_json()
2603
   *
2604
   * @param string $str
2605
   *
2606
   * @return bool
2607
   *
2608
   * @deprecated <p>use "UTF8::is_json()"</p>
2609
   */
2610
  public static function isJson(string $str): bool
2611
  {
2612
    return self::is_json($str);
2613
  }
2614
2615
  /**
2616
   * alias for "UTF8::is_utf16()"
2617
   *
2618
   * @see        UTF8::is_utf16()
2619
   *
2620
   * @param string $str
2621
   *
2622
   * @return int|false
2623
   *                    <strong>false</strong> if is't not UTF16,<br>
2624
   *                    <strong>1</strong> for UTF-16LE,<br>
2625
   *                    <strong>2</strong> for UTF-16BE.
2626
   *
2627
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2628
   */
2629 1
  public static function isUtf16(string $str)
2630
  {
2631 1
    return self::is_utf16($str);
2632
  }
2633
2634
  /**
2635
   * alias for "UTF8::is_utf32()"
2636
   *
2637
   * @see        UTF8::is_utf32()
2638
   *
2639
   * @param string $str
2640
   *
2641
   * @return int|false
2642
   *                   <strong>false</strong> if is't not UTF16,
2643
   *                   <strong>1</strong> for UTF-32LE,
2644
   *                   <strong>2</strong> for UTF-32BE.
2645
   *
2646
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2647
   */
2648 1
  public static function isUtf32(string $str)
2649
  {
2650 1
    return self::is_utf32($str);
2651
  }
2652
2653
  /**
2654
   * alias for "UTF8::is_utf8()"
2655
   *
2656
   * @see        UTF8::is_utf8()
2657
   *
2658
   * @param string $str
2659
   * @param bool   $strict
2660
   *
2661
   * @return bool
2662
   *
2663
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2664
   */
2665 16
  public static function isUtf8($str, $strict = false): bool
2666
  {
2667 16
    return self::is_utf8($str, $strict);
2668
  }
2669
2670
  /**
2671
   * Returns true if the string contains only alphabetic chars, false otherwise.
2672
   *
2673
   * @param string $str
2674
   *
2675
   * @return bool
2676
   *               Whether or not $str contains only alphabetic chars.
2677
   */
2678 10
  public static function is_alpha(string $str): bool
2679
  {
2680 10
    return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2681
  }
2682
2683
  /**
2684
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2685
   *
2686
   * @param string $str
2687
   *
2688
   * @return bool
2689
   *               Whether or not $str contains only alphanumeric chars.
2690
   */
2691 13
  public static function is_alphanumeric(string $str): bool
2692
  {
2693 13
    return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2694
  }
2695
2696
  /**
2697
   * Checks if a string is 7 bit ASCII.
2698
   *
2699
   * @param string $str <p>The string to check.</p>
2700
   *
2701
   * @return bool
2702
   *              <strong>true</strong> if it is ASCII<br>
2703
   *              <strong>false</strong> otherwise
2704
   *
2705
   */
2706 174
  public static function is_ascii(string $str): bool
2707
  {
2708 174
    if ('' === $str) {
2709 6
      return true;
2710
    }
2711
2712 173
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2713
  }
2714
2715
  /**
2716
   * Returns true if the string is base64 encoded, false otherwise.
2717
   *
2718
   * @param string $str <p>The input string.</p>
2719
   *
2720
   * @return bool Whether or not $str is base64 encoded.
2721
   */
2722 8
  public static function is_base64(string $str): bool
2723
  {
2724 8
    $base64String = (string)\base64_decode($str, true);
2725
2726 8
    return $base64String && \base64_encode($base64String) === $str;
2727
  }
2728
2729
  /**
2730
   * Check if the input is binary... (is look like a hack).
2731
   *
2732
   * @param mixed $input
2733
   * @param bool  $strict
2734
   *
2735
   * @return bool
2736
   */
2737 19
  public static function is_binary($input, bool $strict = false): bool
2738
  {
2739 19
    $input = (string)$input;
2740 19
    if ('' === $input) {
2741 5
      return false;
2742
    }
2743
2744 19
    if (\preg_match('~^[01]+$~', $input)) {
2745 6
      return true;
2746
    }
2747
2748 19
    $testNull = 0;
2749 19
    $testLength = \strlen($input);
2750 19
    if ($testLength) {
2751 19
      $testNull = \substr_count($input, "\x0");
2752 19
      if (($testNull / $testLength) > 0.3) {
2753 6
        return true;
2754
      }
2755
    }
2756
2757 18
    if ($strict === true) {
2758
2759 16
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2760
        self::checkForSupport();
2761
      }
2762
2763 16
      if (self::$SUPPORT['finfo'] === false) {
2764
        throw new \RuntimeException('ext-fileinfo: is not installed');
2765
      }
2766
2767
      /** @noinspection PhpComposerExtensionStubsInspection */
2768 16
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
2769 16
      $finfo_encoding = $finfo->buffer($input);
2770 16
      if ($finfo_encoding && $finfo_encoding === 'binary') {
2771 16
        return true;
2772
      }
2773
2774 8
    } elseif ($testNull > 0) {
2775
2776 3
      return true;
2777
2778
    }
2779
2780 17
    return false;
2781
  }
2782
2783
  /**
2784
   * Check if the file is binary.
2785
   *
2786
   * @param string $file
2787
   *
2788
   * @return bool
2789
   */
2790 3
  public static function is_binary_file($file): bool
2791
  {
2792
    // init
2793 3
    $block = '';
2794
2795 3
    $fp = \fopen($file, 'rb');
2796 3
    if (\is_resource($fp)) {
2797 3
      $block = \fread($fp, 512);
2798 3
      \fclose($fp);
2799
    }
2800
2801 3
    if ($block === '') {
2802 1
      return false;
2803
    }
2804
2805 3
    return self::is_binary($block, true);
2806
  }
2807
2808
  /**
2809
   * Returns true if the string contains only whitespace chars, false otherwise.
2810
   *
2811
   * @param string $str
2812
   *
2813
   * @return bool
2814
   *               Whether or not $str contains only whitespace characters.
2815
   */
2816 15
  public static function is_blank(string $str): bool
2817
  {
2818 15
    return self::str_matches_pattern($str, '^[[:space:]]*$');
2819
  }
2820
2821
  /**
2822
   * Checks if the given string is equal to any "Byte Order Mark".
2823
   *
2824
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2825
   *
2826
   * @param string $str <p>The input string.</p>
2827
   *
2828
   * @return bool
2829
   *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.
2830
   */
2831 1
  public static function is_bom($str): bool
2832
  {
2833 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2834 1
      if ($str === $bomString) {
2835 1
        return true;
2836
      }
2837
    }
2838
2839 1
    return false;
2840
  }
2841
2842
  /**
2843
   * Determine whether the string is considered to be empty.
2844
   *
2845
   * A variable is considered empty if it does not exist or if its value equals FALSE.
2846
   * empty() does not generate a warning if the variable does not exist.
2847
   *
2848
   * @param mixed $str
2849
   *
2850
   * @return bool Whether or not $str is empty().
2851
   */
2852
  public static function is_empty($str): bool
2853
  {
2854
    return empty($str);
2855
  }
2856
2857
  /**
2858
   * Returns true if the string contains only hexadecimal chars, false otherwise.
2859
   *
2860
   * @param string $str
2861
   *
2862
   * @return bool
2863
   *               Whether or not $str contains only hexadecimal chars.
2864
   */
2865 13
  public static function is_hexadecimal(string $str): bool
2866
  {
2867 13
    return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
2868
  }
2869
2870
  /**
2871
   * Check if the string contains any html-tags <lall>.
2872
   *
2873
   * @param string $str <p>The input string.</p>
2874
   *
2875
   * @return bool
2876
   */
2877 2
  public static function is_html(string $str): bool
2878
  {
2879 2
    if ('' === $str) {
2880 2
      return false;
2881
    }
2882
2883
    // init
2884 2
    $matches = [];
2885
2886 2
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2887
2888 2
    return !(\count($matches) === 0);
2889
  }
2890
2891
  /**
2892
   * Try to check if "$str" is an json-string.
2893
   *
2894
   * @param string $str <p>The input string.</p>
2895
   *
2896
   * @return bool
2897
   */
2898 21
  public static function is_json(string $str): bool
2899
  {
2900 21
    if ('' === $str) {
2901 2
      return false;
2902
    }
2903
2904 20
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2905
      self::checkForSupport();
2906
    }
2907
2908 20
    if (self::$SUPPORT['json'] === false) {
2909
      throw new \RuntimeException('ext-json: is not installed');
2910
    }
2911
2912 20
    $json = self::json_decode($str);
2913
2914
    /** @noinspection PhpComposerExtensionStubsInspection */
2915
    return (
2916 20
               \is_object($json) === true
2917
               ||
2918 20
               \is_array($json) === true
2919
           )
2920
           &&
2921 20
           \json_last_error() === JSON_ERROR_NONE;
2922
  }
2923
2924
  /**
2925
   * @param string $str
2926
   *
2927
   * @return bool
2928
   */
2929 8
  public static function is_lowercase(string $str): bool
2930
  {
2931 8
    if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
2932 3
      return true;
2933
    }
2934
2935 5
    return false;
2936
  }
2937
2938
  /**
2939
   * Returns true if the string is serialized, false otherwise.
2940
   *
2941
   * @param string $str
2942
   *
2943
   * @return bool Whether or not $str is serialized.
2944
   */
2945 7
  public static function is_serialized(string $str): bool
2946
  {
2947 7
    if ('' === $str) {
2948 1
      return false;
2949
    }
2950
2951
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2952
    /** @noinspection UnserializeExploitsInspection */
2953 6
    return $str === 'b:0;'
2954
           ||
2955 6
           @\unserialize($str) !== false;
2956
  }
2957
2958
  /**
2959
   * Returns true if the string contains only lower case chars, false
2960
   * otherwise.
2961
   *
2962
   * @param string $str <p>The input string.</p>
2963
   *
2964
   * @return bool
2965
   *               Whether or not $str contains only lower case characters.
2966
   */
2967 8
  public static function is_uppercase(string $str): bool
2968
  {
2969 8
    return self::str_matches_pattern($str, '^[[:upper:]]*$');
2970
  }
2971
2972
  /**
2973
   * Check if the string is UTF-16.
2974
   *
2975
   * @param string $str <p>The input string.</p>
2976
   *
2977
   * @return int|false
2978
   *                   <strong>false</strong> if is't not UTF-16,<br>
2979
   *                   <strong>1</strong> for UTF-16LE,<br>
2980
   *                   <strong>2</strong> for UTF-16BE.
2981
   */
2982 10
  public static function is_utf16(string $str)
2983
  {
2984 10
    if (self::is_binary($str) === false) {
2985 4
      return false;
2986
    }
2987
2988 8
    if (self::$SUPPORT['mbstring'] === false) {
2989 1
      \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', E_USER_WARNING);
2990
    }
2991
2992
    // init
2993 8
    $strChars = [];
2994
2995 8
    $str = self::remove_bom($str);
2996
2997 8
    $maybeUTF16LE = 0;
2998 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2999 8
    if ($test) {
3000 7
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3001 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3002 7
      if ($test3 === $test) {
3003 7
        if (\count($strChars) === 0) {
3004 7
          $strChars = self::count_chars($str, true);
3005
        }
3006 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3007 7
          if (\in_array($test3char, $strChars, true) === true) {
3008 7
            $maybeUTF16LE++;
3009
          }
3010
        }
3011
      }
3012
    }
3013
3014 8
    $maybeUTF16BE = 0;
3015 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3016 8
    if ($test) {
3017 7
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3018 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3019 7
      if ($test3 === $test) {
3020 7
        if (\count($strChars) === 0) {
3021 3
          $strChars = self::count_chars($str, true);
3022
        }
3023 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3024 7
          if (\in_array($test3char, $strChars, true) === true) {
3025 7
            $maybeUTF16BE++;
3026
          }
3027
        }
3028
      }
3029
    }
3030
3031 8
    if ($maybeUTF16BE !== $maybeUTF16LE) {
3032 3
      if ($maybeUTF16LE > $maybeUTF16BE) {
3033 2
        return 1;
3034
      }
3035
3036 3
      return 2;
3037
    }
3038
3039 6
    return false;
3040
  }
3041
3042
  /**
3043
   * Check if the string is UTF-32.
3044
   *
3045
   * @param string $str
3046
   *
3047
   * @return int|false
3048
   *                   <strong>false</strong> if is't not UTF-32,<br>
3049
   *                   <strong>1</strong> for UTF-32LE,<br>
3050
   *                   <strong>2</strong> for UTF-32BE.
3051
   */
3052 8
  public static function is_utf32(string $str)
3053
  {
3054 8
    if (self::is_binary($str) === false) {
3055 4
      return false;
3056
    }
3057
3058 6
    if (self::$SUPPORT['mbstring'] === false) {
3059 1
      \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', E_USER_WARNING);
3060
    }
3061
3062
    // init
3063 6
    $strChars = [];
3064
3065 6
    $str = self::remove_bom($str);
3066
3067 6
    $maybeUTF32LE = 0;
3068 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3069 6
    if ($test) {
3070 5
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3071 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3072 5
      if ($test3 === $test) {
3073 5
        if (\count($strChars) === 0) {
3074 5
          $strChars = self::count_chars($str, true);
3075
        }
3076 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3077 5
          if (\in_array($test3char, $strChars, true) === true) {
3078 5
            $maybeUTF32LE++;
3079
          }
3080
        }
3081
      }
3082
    }
3083
3084 6
    $maybeUTF32BE = 0;
3085 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3086 6
    if ($test) {
3087 5
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3088 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3089 5
      if ($test3 === $test) {
3090 5
        if (\count($strChars) === 0) {
3091 3
          $strChars = self::count_chars($str, true);
3092
        }
3093 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3094 5
          if (\in_array($test3char, $strChars, true) === true) {
3095 5
            $maybeUTF32BE++;
3096
          }
3097
        }
3098
      }
3099
    }
3100
3101 6
    if ($maybeUTF32BE !== $maybeUTF32LE) {
3102 1
      if ($maybeUTF32LE > $maybeUTF32BE) {
3103 1
        return 1;
3104
      }
3105
3106 1
      return 2;
3107
    }
3108
3109 6
    return false;
3110
  }
3111
3112
  /**
3113
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3114
   *
3115
   * @see    http://hsivonen.iki.fi/php-utf8/
3116
   *
3117
   * @param string|string[] $str    <p>The string to be checked.</p>
3118
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3119
   *
3120
   * @return bool
3121
   */
3122 92
  public static function is_utf8($str, bool $strict = false): bool
3123
  {
3124 92
    if (\is_array($str) === true) {
3125 1
      foreach ($str as $k => $v) {
3126 1
        if (false === self::is_utf8($v, $strict)) {
3127 1
          return false;
3128
        }
3129
      }
3130
3131
      return true;
3132
    }
3133
3134 92
    if ('' === $str) {
3135 11
      return true;
3136
    }
3137
3138 88
    if ($strict === true) {
3139 1
      if (self::is_utf16($str) !== false) {
3140 1
        return false;
3141
      }
3142
3143
      if (self::is_utf32($str) !== false) {
3144
        return false;
3145
      }
3146
    }
3147
3148 88
    if (self::pcre_utf8_support() !== true) {
3149
3150
      // If even just the first character can be matched, when the /u
3151
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3152
      // invalid, nothing at all will match, even if the string contains
3153
      // some valid sequences
3154
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
3155
    }
3156
3157 88
    $mState = 0; // cached expected number of octets after the current octet
3158
    // until the beginning of the next UTF8 character sequence
3159 88
    $mUcs4 = 0; // cached Unicode character
3160 88
    $mBytes = 1; // cached expected number of octets in the current sequence
3161
3162 88
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3163
      self::checkForSupport();
3164
    }
3165
3166 88
    if (self::$ORD === null) {
3167
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3168
    }
3169
3170 88
    $len = self::strlen_in_byte((string)$str);
3171
    /** @noinspection ForeachInvariantsInspection */
3172 88
    for ($i = 0; $i < $len; $i++) {
3173 88
      $in = self::$ORD[$str[$i]];
3174 88
      if ($mState === 0) {
3175
        // When mState is zero we expect either a US-ASCII character or a
3176
        // multi-octet sequence.
3177 88
        if (0 === (0x80 & $in)) {
3178
          // US-ASCII, pass straight through.
3179 85
          $mBytes = 1;
3180 69
        } elseif (0xC0 === (0xE0 & $in)) {
3181
          // First octet of 2 octet sequence.
3182 62
          $mUcs4 = $in;
3183 62
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3184 62
          $mState = 1;
3185 62
          $mBytes = 2;
3186 46
        } elseif (0xE0 === (0xF0 & $in)) {
3187
          // First octet of 3 octet sequence.
3188 30
          $mUcs4 = $in;
3189 30
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3190 30
          $mState = 2;
3191 30
          $mBytes = 3;
3192 23
        } elseif (0xF0 === (0xF8 & $in)) {
3193
          // First octet of 4 octet sequence.
3194 13
          $mUcs4 = $in;
3195 13
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3196 13
          $mState = 3;
3197 13
          $mBytes = 4;
3198 11
        } elseif (0xF8 === (0xFC & $in)) {
3199
          /* First octet of 5 octet sequence.
3200
          *
3201
          * This is illegal because the encoded codepoint must be either
3202
          * (a) not the shortest form or
3203
          * (b) outside the Unicode range of 0-0x10FFFF.
3204
          * Rather than trying to resynchronize, we will carry on until the end
3205
          * of the sequence and let the later error handling code catch it.
3206
          */
3207 4
          $mUcs4 = $in;
3208 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3209 4
          $mState = 4;
3210 4
          $mBytes = 5;
3211 8
        } elseif (0xFC === (0xFE & $in)) {
3212
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3213 4
          $mUcs4 = $in;
3214 4
          $mUcs4 = ($mUcs4 & 1) << 30;
3215 4
          $mState = 5;
3216 4
          $mBytes = 6;
3217
        } else {
3218
          // Current octet is neither in the US-ASCII range nor a legal first
3219
          // octet of a multi-octet sequence.
3220 88
          return false;
3221
        }
3222
      } else {
3223
        // When mState is non-zero, we expect a continuation of the multi-octet
3224
        // sequence
3225 69
        if (0x80 === (0xC0 & $in)) {
3226
          // Legal continuation.
3227 63
          $shift = ($mState - 1) * 6;
3228 63
          $tmp = $in;
3229 63
          $tmp = ($tmp & 0x0000003F) << $shift;
3230 63
          $mUcs4 |= $tmp;
3231
          // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3232
          // Unicode code point to be output.
3233 63
          if (0 === --$mState) {
3234
            // Check for illegal sequences and code points.
3235
            //
3236
            // From Unicode 3.1, non-shortest form is illegal
3237
            if (
3238 63
                (2 === $mBytes && $mUcs4 < 0x0080)
3239
                ||
3240 63
                (3 === $mBytes && $mUcs4 < 0x0800)
3241
                ||
3242 63
                (4 === $mBytes && $mUcs4 < 0x10000)
3243
                ||
3244 63
                (4 < $mBytes)
3245
                ||
3246
                // From Unicode 3.2, surrogate characters are illegal.
3247 63
                (($mUcs4 & 0xFFFFF800) === 0xD800)
3248
                ||
3249
                // Code points outside the Unicode range are illegal.
3250 63
                ($mUcs4 > 0x10FFFF)
3251
            ) {
3252 7
              return false;
3253
            }
3254
            // initialize UTF8 cache
3255 63
            $mState = 0;
3256 63
            $mUcs4 = 0;
3257 63
            $mBytes = 1;
3258
          }
3259
        } else {
3260
          // ((0xC0 & (*in) != 0x80) && (mState != 0))
3261
          // Incomplete multi-octet sequence.
3262 28
          return false;
3263
        }
3264
      }
3265
    }
3266
3267 56
    return true;
3268
  }
3269
3270
  /**
3271
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3272
   * Decodes a JSON string
3273
   *
3274
   * @link http://php.net/manual/en/function.json-decode.php
3275
   *
3276
   * @param string $json    <p>
3277
   *                        The <i>json</i> string being decoded.
3278
   *                        </p>
3279
   *                        <p>
3280
   *                        This function only works with UTF-8 encoded strings.
3281
   *                        </p>
3282
   *                        <p>PHP implements a superset of
3283
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3284
   *                        only supports these values when they are nested inside an array or an object.
3285
   *                        </p>
3286
   * @param bool   $assoc   [optional] <p>
3287
   *                        When <b>TRUE</b>, returned objects will be converted into
3288
   *                        associative arrays.
3289
   *                        </p>
3290
   * @param int    $depth   [optional] <p>
3291
   *                        User specified recursion depth.
3292
   *                        </p>
3293
   * @param int    $options [optional] <p>
3294
   *                        Bitmask of JSON decode options. Currently only
3295
   *                        <b>JSON_BIGINT_AS_STRING</b>
3296
   *                        is supported (default is to cast large integers as floats)
3297
   *                        </p>
3298
   *
3299
   * @return mixed
3300
   *                The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3301
   *                null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3302
   *                <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3303
   *                is deeper than the recursion limit.
3304
   */
3305 21
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3306
  {
3307 21
    $json = self::filter($json);
3308
3309 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3310
      self::checkForSupport();
3311
    }
3312
3313 21
    if (self::$SUPPORT['json'] === false) {
3314
      throw new \RuntimeException('ext-json: is not installed');
3315
    }
3316
3317
    /** @noinspection PhpComposerExtensionStubsInspection */
3318 21
    $json = \json_decode($json, $assoc, $depth, $options);
3319
3320 21
    return $json;
3321
  }
3322
3323
  /**
3324
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3325
   * Returns the JSON representation of a value.
3326
   *
3327
   * @link http://php.net/manual/en/function.json-encode.php
3328
   *
3329
   * @param mixed $value   <p>
3330
   *                       The <i>value</i> being encoded. Can be any type except
3331
   *                       a resource.
3332
   *                       </p>
3333
   *                       <p>
3334
   *                       All string data must be UTF-8 encoded.
3335
   *                       </p>
3336
   *                       <p>PHP implements a superset of
3337
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3338
   *                       only supports these values when they are nested inside an array or an object.
3339
   *                       </p>
3340
   * @param int   $options [optional] <p>
3341
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3342
   *                       <b>JSON_HEX_TAG</b>,
3343
   *                       <b>JSON_HEX_AMP</b>,
3344
   *                       <b>JSON_HEX_APOS</b>,
3345
   *                       <b>JSON_NUMERIC_CHECK</b>,
3346
   *                       <b>JSON_PRETTY_PRINT</b>,
3347
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3348
   *                       <b>JSON_FORCE_OBJECT</b>,
3349
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3350
   *                       constants is described on
3351
   *                       the JSON constants page.
3352
   *                       </p>
3353
   * @param int   $depth   [optional] <p>
3354
   *                       Set the maximum depth. Must be greater than zero.
3355
   *                       </p>
3356
   *
3357
   * @return string|false
3358
   *                      A JSON encoded <strong>string</strong> on success or<br>
3359
   *                      <strong>FALSE</strong> on failure.
3360
   */
3361 2
  public static function json_encode($value, int $options = 0, int $depth = 512)
3362
  {
3363 2
    $value = self::filter($value);
3364
3365 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3366
      self::checkForSupport();
3367
    }
3368
3369 2
    if (self::$SUPPORT['json'] === false) {
3370
      throw new \RuntimeException('ext-json: is not installed');
3371
    }
3372
3373
    /** @noinspection PhpComposerExtensionStubsInspection */
3374 2
    $json = \json_encode($value, $options, $depth);
3375
3376 2
    return $json;
3377
  }
3378
3379
  /**
3380
   * Checks whether JSON is available on the server.
3381
   *
3382
   * @return bool
3383
   *              <strong>true</strong> if available, <strong>false</strong> otherwise
3384
   */
3385
  public static function json_loaded(): bool
3386
  {
3387
    return \function_exists('json_decode');
3388
  }
3389
3390
  /**
3391
   * Makes string's first char lowercase.
3392
   *
3393
   * @param string $str       <p>The input string</p>
3394
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
3395
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3396
   *
3397
   * @return string The resulting string.
3398
   */
3399 44
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3400
  {
3401 44
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3402 44
    if ($strPartTwo === false) {
3403
      $strPartTwo = '';
3404
    }
3405
3406 44
    $strPartOne = self::strtolower(
3407 44
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3408 44
        $encoding,
3409 44
        $cleanUtf8
3410
    );
3411
3412 44
    return $strPartOne . $strPartTwo;
3413
  }
3414
3415
  /**
3416
   * alias for "UTF8::lcfirst()"
3417
   *
3418
   * @see UTF8::lcfirst()
3419
   *
3420
   * @param string $str
3421
   * @param string $encoding
3422
   * @param bool   $cleanUtf8
3423
   *
3424
   * @return string
3425
   */
3426 1
  public static function lcword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3427
  {
3428 1
    return self::lcfirst($str, $encoding, $cleanUtf8);
3429
  }
3430
3431
  /**
3432
   * Lowercase for all words in the string.
3433
   *
3434
   * @param string   $str        <p>The input string.</p>
3435
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3436
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3437
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3438
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3439
   *
3440
   * @return string
3441
   */
3442 1
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3443
  {
3444 1
    if (!$str) {
3445 1
      return '';
3446
    }
3447
3448 1
    $words = self::str_to_words($str, $charlist);
3449 1
    $newWords = [];
3450
3451 1
    if (\count($exceptions) > 0) {
3452 1
      $useExceptions = true;
3453
    } else {
3454 1
      $useExceptions = false;
3455
    }
3456
3457 1
    foreach ($words as $word) {
3458
3459 1
      if (!$word) {
3460 1
        continue;
3461
      }
3462
3463
      if (
3464 1
          $useExceptions === false
3465
          ||
3466
          (
3467 1
              $useExceptions === true
3468
              &&
3469 1
              !\in_array($word, $exceptions, true)
3470
          )
3471
      ) {
3472 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3473
      }
3474
3475 1
      $newWords[] = $word;
3476
    }
3477
3478 1
    return \implode('', $newWords);
3479
  }
3480
3481
  /**
3482
   * alias for "UTF8::lcfirst()"
3483
   *
3484
   * @see UTF8::lcfirst()
3485
   *
3486
   * @param string $str
3487
   * @param string $encoding
3488
   * @param bool   $cleanUtf8
3489
   *
3490
   * @return string
3491
   */
3492 5
  public static function lowerCaseFirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3493
  {
3494 5
    return self::lcfirst($str, $encoding, $cleanUtf8);
3495
  }
3496
3497
  /**
3498
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3499
   *
3500
   * @param string $str   <p>The string to be trimmed</p>
3501
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3502
   *
3503
   * @return string The string with unwanted characters stripped from the left.
3504
   */
3505 21
  public static function ltrim(string $str = '', $chars = INF): string
3506
  {
3507 21
    if ('' === $str) {
3508 2
      return '';
3509
    }
3510
3511
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3512 20
    if ($chars === INF || !$chars) {
3513 13
      $pattern = "^[\pZ\pC]+";
3514
    } else {
3515 9
      $chars = \preg_quote($chars, '/');
3516 9
      $pattern = "^[$chars]+";
3517
    }
3518
3519 20
    return self::regex_replace($str, $pattern, '', '', '/');
3520
  }
3521
3522
  /**
3523
   * Returns the UTF-8 character with the maximum code point in the given data.
3524
   *
3525
   * @param string|array<string> $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3526
   *
3527
   * @return string|null The character with the highest code point than others, returns null on failure or empty input.
3528
   */
3529 1
  public static function max($arg)
3530
  {
3531 1
    if (\is_array($arg) === true) {
3532 1
      $arg = \implode('', $arg);
3533
    }
3534
3535 1
    $codepoints = self::codepoints($arg, false);
3536 1
    if (\count($codepoints) === 0) {
3537 1
      return null;
3538
    }
3539
3540 1
    $codepoint_max = \max($codepoints);
3541
3542 1
    return self::chr($codepoint_max);
3543
  }
3544
3545
  /**
3546
   * Calculates and returns the maximum number of bytes taken by any
3547
   * UTF-8 encoded character in the given string.
3548
   *
3549
   * @param string $str <p>The original Unicode string.</p>
3550
   *
3551
   * @return int Max byte lengths of the given chars.
3552
   */
3553 1
  public static function max_chr_width(string $str): int
3554
  {
3555 1
    $bytes = self::chr_size_list($str);
3556 1
    if (\count($bytes) > 0) {
3557 1
      return (int)\max($bytes);
3558
    }
3559
3560 1
    return 0;
3561
  }
3562
3563
  /**
3564
   * Checks whether mbstring is available on the server.
3565
   *
3566
   * @return bool
3567
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
3568
   */
3569 14
  public static function mbstring_loaded(): bool
3570
  {
3571 14
    $return = \extension_loaded('mbstring') ? true : false;
3572
3573 14
    if ($return === true) {
3574 14
      \mb_internal_encoding('UTF-8');
3575
    }
3576
3577 14
    return $return;
3578
  }
3579
3580
  /**
3581
   * Checks whether mbstring "overloaded" is active on the server.
3582
   *
3583
   * @return bool
3584
   */
3585
  private static function mbstring_overloaded(): bool
3586
  {
3587
    /**
3588
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3589
     */
3590
3591
    /** @noinspection PhpComposerExtensionStubsInspection */
3592
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3593
    return \defined('MB_OVERLOAD_STRING')
3594
           &&
3595
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3596
  }
3597
3598
  /**
3599
   * Returns the UTF-8 character with the minimum code point in the given data.
3600
   *
3601
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3602
   *
3603
   * @return string|null The character with the lowest code point than others, returns null on failure or empty input.
3604
   */
3605 1
  public static function min($arg)
3606
  {
3607 1
    if (\is_array($arg) === true) {
3608 1
      $arg = \implode('', $arg);
3609
    }
3610
3611 1
    $codepoints = self::codepoints($arg, false);
3612 1
    if (\count($codepoints) === 0) {
3613 1
      return null;
3614
    }
3615
3616 1
    $codepoint_min = \min($codepoints);
3617
3618 1
    return self::chr($codepoint_min);
3619
  }
3620
3621
  /**
3622
   * alias for "UTF8::normalize_encoding()"
3623
   *
3624
   * @see        UTF8::normalize_encoding()
3625
   *
3626
   * @param string $encoding
3627
   * @param mixed  $fallback
3628
   *
3629
   * @return mixed
3630
   *
3631
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3632
   */
3633 1
  public static function normalizeEncoding(string $encoding, $fallback = '')
3634
  {
3635 1
    return self::normalize_encoding($encoding, $fallback);
3636
  }
3637
3638
  /**
3639
   * Normalize the encoding-"name" input.
3640
   *
3641
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3642
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3643
   *
3644
   * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3645
   */
3646 325
  public static function normalize_encoding(string $encoding, $fallback = '')
3647
  {
3648 325
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3649
3650
    if (
3651 325
        !$encoding
3652
        ||
3653 26
        $encoding === '1' // only a fallback, for non "strict_types" usage ...
3654
        ||
3655 325
        $encoding === '0' // only a fallback, for non "strict_types" usage ...
3656
    ) {
3657 302
      return $fallback;
3658
    }
3659
3660
    if (
3661 25
        'UTF-8' === $encoding
3662
        ||
3663 25
        'UTF8' === $encoding
3664
    ) {
3665 11
      return 'UTF-8';
3666
    }
3667
3668 21
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3669 19
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3670
    }
3671
3672 5
    if (self::$ENCODINGS === null) {
3673 1
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3674
    }
3675
3676 5
    if (\in_array($encoding, self::$ENCODINGS, true)) {
0 ignored issues
show
Bug introduced by
It seems like self::ENCODINGS can also be of type false; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3676
    if (\in_array($encoding, /** @scrutinizer ignore-type */ self::$ENCODINGS, true)) {
Loading history...
3677 3
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3678
3679 3
      return $encoding;
3680
    }
3681
3682 4
    $encodingOrig = $encoding;
3683 4
    $encoding = \strtoupper($encoding);
3684 4
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3685
3686
    $equivalences = [
3687 4
        'ISO8859'     => 'ISO-8859-1',
3688
        'ISO88591'    => 'ISO-8859-1',
3689
        'ISO'         => 'ISO-8859-1',
3690
        'LATIN'       => 'ISO-8859-1',
3691
        'LATIN1'      => 'ISO-8859-1', // Western European
3692
        'ISO88592'    => 'ISO-8859-2',
3693
        'LATIN2'      => 'ISO-8859-2', // Central European
3694
        'ISO88593'    => 'ISO-8859-3',
3695
        'LATIN3'      => 'ISO-8859-3', // Southern European
3696
        'ISO88594'    => 'ISO-8859-4',
3697
        'LATIN4'      => 'ISO-8859-4', // Northern European
3698
        'ISO88595'    => 'ISO-8859-5',
3699
        'ISO88596'    => 'ISO-8859-6', // Greek
3700
        'ISO88597'    => 'ISO-8859-7',
3701
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3702
        'ISO88599'    => 'ISO-8859-9',
3703
        'LATIN5'      => 'ISO-8859-9', // Turkish
3704
        'ISO885911'   => 'ISO-8859-11',
3705
        'TIS620'      => 'ISO-8859-11', // Thai
3706
        'ISO885910'   => 'ISO-8859-10',
3707
        'LATIN6'      => 'ISO-8859-10', // Nordic
3708
        'ISO885913'   => 'ISO-8859-13',
3709
        'LATIN7'      => 'ISO-8859-13', // Baltic
3710
        'ISO885914'   => 'ISO-8859-14',
3711
        'LATIN8'      => 'ISO-8859-14', // Celtic
3712
        'ISO885915'   => 'ISO-8859-15',
3713
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3714
        'ISO885916'   => 'ISO-8859-16',
3715
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3716
        'CP1250'      => 'WINDOWS-1250',
3717
        'WIN1250'     => 'WINDOWS-1250',
3718
        'WINDOWS1250' => 'WINDOWS-1250',
3719
        'CP1251'      => 'WINDOWS-1251',
3720
        'WIN1251'     => 'WINDOWS-1251',
3721
        'WINDOWS1251' => 'WINDOWS-1251',
3722
        'CP1252'      => 'WINDOWS-1252',
3723
        'WIN1252'     => 'WINDOWS-1252',
3724
        'WINDOWS1252' => 'WINDOWS-1252',
3725
        'CP1253'      => 'WINDOWS-1253',
3726
        'WIN1253'     => 'WINDOWS-1253',
3727
        'WINDOWS1253' => 'WINDOWS-1253',
3728
        'CP1254'      => 'WINDOWS-1254',
3729
        'WIN1254'     => 'WINDOWS-1254',
3730
        'WINDOWS1254' => 'WINDOWS-1254',
3731
        'CP1255'      => 'WINDOWS-1255',
3732
        'WIN1255'     => 'WINDOWS-1255',
3733
        'WINDOWS1255' => 'WINDOWS-1255',
3734
        'CP1256'      => 'WINDOWS-1256',
3735
        'WIN1256'     => 'WINDOWS-1256',
3736
        'WINDOWS1256' => 'WINDOWS-1256',
3737
        'CP1257'      => 'WINDOWS-1257',
3738
        'WIN1257'     => 'WINDOWS-1257',
3739
        'WINDOWS1257' => 'WINDOWS-1257',
3740
        'CP1258'      => 'WINDOWS-1258',
3741
        'WIN1258'     => 'WINDOWS-1258',
3742
        'WINDOWS1258' => 'WINDOWS-1258',
3743
        'UTF16'       => 'UTF-16',
3744
        'UTF32'       => 'UTF-32',
3745
        'UTF8'        => 'UTF-8',
3746
        'UTF'         => 'UTF-8',
3747
        'UTF7'        => 'UTF-7',
3748
        '8BIT'        => 'CP850',
3749
        'BINARY'      => 'CP850',
3750
    ];
3751
3752 4
    if (!empty($equivalences[$encodingUpperHelper])) {
3753 4
      $encoding = $equivalences[$encodingUpperHelper];
3754
    }
3755
3756 4
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3757
3758 4
    return $encoding;
3759
  }
3760
3761
  /**
3762
   * Standardize line ending to unix-like.
3763
   *
3764
   * @param string $str
3765
   *
3766
   * @return string
3767
   */
3768 3
  public static function normalize_line_ending(string $str): string
3769
  {
3770 3
    return (string)str_replace(["\r\n", "\r"], "\n", $str);
3771
  }
3772
3773
  /**
3774
   * Normalize some MS Word special characters.
3775
   *
3776
   * @param string $str <p>The string to be normalized.</p>
3777
   *
3778
   * @return string
3779
   */
3780 36
  public static function normalize_msword(string $str): string
3781
  {
3782 36
    if ('' === $str) {
3783 1
      return '';
3784
    }
3785
3786 36
    static $UTF8_MSWORD_KEYS_CACHE = null;
3787 36
    static $UTF8_MSWORD_VALUES_CACHE = null;
3788
3789 36
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3790
3791 1
      if (self::$UTF8_MSWORD === null) {
3792 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_msword') can also be of type false. However, the property $UTF8_MSWORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3793
      }
3794
3795 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3795
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
3796 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3796
      $UTF8_MSWORD_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
3797
    }
3798
3799 36
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3800
  }
3801
3802
  /**
3803
   * Normalize the whitespace.
3804
   *
3805
   * @param string $str                     <p>The string to be normalized.</p>
3806
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3807
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3808
   *                                        bidirectional text chars.</p>
3809
   *
3810
   * @return string
3811
   */
3812 76
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
3813
  {
3814 76
    if ('' === $str) {
3815 5
      return '';
3816
    }
3817
3818 76
    static $WHITESPACE_CACHE = [];
3819 76
    $cacheKey = (int)$keepNonBreakingSpace;
3820
3821 76
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3822
3823 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3824
3825 2
      if ($keepNonBreakingSpace === true) {
3826 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3827
      }
3828
3829 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
3830
    }
3831
3832 76
    if ($keepBidiUnicodeControls === false) {
3833 76
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3834
3835 76
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3836 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3837
      }
3838
3839 76
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3840
    }
3841
3842 76
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3843
  }
3844
3845
  /**
3846
   * Calculates Unicode code point of the given UTF-8 encoded character.
3847
   *
3848
   * INFO: opposite to UTF8::chr()
3849
   *
3850
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3851
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3852
   *
3853
   * @return int
3854
   *             Unicode code point of the given character,<br>
3855
   *             0 on invalid UTF-8 byte sequence.
3856
   */
3857 23
  public static function ord(string $chr, string $encoding = 'UTF-8'): int
3858
  {
3859
    // init
3860 23
    static $CHAR_CACHE = [];
3861
3862
    // save the original string
3863 23
    $chr_orig = $chr;
3864
3865 23
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3866 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3867
3868
      // check again, if it's still not UTF-8
3869 2
      if ($encoding !== 'UTF-8') {
3870
        // always fallback via symfony polyfill
3871 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3872
      }
3873
    }
3874
3875 23
    $cacheKey = $chr_orig . $encoding;
3876 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3877 23
      return $CHAR_CACHE[$cacheKey];
3878
    }
3879
3880 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3881
      self::checkForSupport();
3882
    }
3883
3884 11
    if (self::$SUPPORT['intlChar'] === true) {
3885
      /** @noinspection PhpComposerExtensionStubsInspection */
3886 10
      $code = \IntlChar::ord($chr);
3887 10
      if ($code) {
3888 9
        return $CHAR_CACHE[$cacheKey] = $code;
3889
      }
3890
    }
3891
3892
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3893 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
0 ignored issues
show
Bug introduced by
$chr of type array is incompatible with the type string expected by parameter $str of voku\helper\UTF8::substr(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3893
    $chr = \unpack('C*', (string)self::substr(/** @scrutinizer ignore-type */ $chr, 0, 4, 'CP850'));
Loading history...
3894 6
    $code = $chr ? $chr[1] : 0;
3895
3896 6
    if (0xF0 <= $code && isset($chr[4])) {
3897
      /** @noinspection UnnecessaryCastingInspection */
3898
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
3899
    }
3900
3901 6
    if (0xE0 <= $code && isset($chr[3])) {
3902
      /** @noinspection UnnecessaryCastingInspection */
3903 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
3904
    }
3905
3906 6
    if (0xC0 <= $code && isset($chr[2])) {
3907
      /** @noinspection UnnecessaryCastingInspection */
3908 2
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xC0) << 6) + $chr[2] - 0x80);
3909
    }
3910
3911 5
    return $CHAR_CACHE[$cacheKey] = $code;
3912
  }
3913
3914
  /**
3915
   * Parses the string into an array (into the the second parameter).
3916
   *
3917
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3918
   *          if the second parameter is not set!
3919
   *
3920
   * @link http://php.net/manual/en/function.parse-str.php
3921
   *
3922
   * @param string $str       <p>The input string.</p>
3923
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
3924
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3925
   *
3926
   * @return bool Will return <strong>false</strong> if php can't parse the string and we haven't any $result.
3927
   */
3928 1
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
3929
  {
3930 1
    if ($cleanUtf8 === true) {
3931 1
      $str = self::clean($str);
3932
    }
3933
3934
    // always fallback via symfony polyfill
3935 1
    $return = \mb_parse_str($str, $result);
3936
3937 1
    return !($return === false || empty($result));
3938
  }
3939
3940
  /**
3941
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3942
   *
3943
   * @return bool
3944
   *              <strong>true</strong> if support is available,<br>
3945
   *              <strong>false</strong> otherwise.
3946
   */
3947 88
  public static function pcre_utf8_support(): bool
3948
  {
3949
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3950 88
    return (bool)@\preg_match('//u', '');
3951
  }
3952
3953
  /**
3954
   * Create an array containing a range of UTF-8 characters.
3955
   *
3956
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3957
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3958
   *
3959
   * @return string[]
3960
   */
3961 1
  public static function range($var1, $var2): array
3962
  {
3963 1
    if (!$var1 || !$var2) {
3964 1
      return [];
3965
    }
3966
3967 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3968
      self::checkForSupport();
3969
    }
3970
3971 1
    if (self::$SUPPORT['ctype'] === false) {
3972
      throw new \RuntimeException('ext-ctype: is not installed');
3973
    }
3974
3975
    /** @noinspection PhpComposerExtensionStubsInspection */
3976 1
    if (\ctype_digit((string)$var1)) {
3977 1
      $start = (int)$var1;
3978 1
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
3979
      $start = (int)self::hex_to_int($var1);
3980
    } else {
3981 1
      $start = self::ord($var1);
3982
    }
3983
3984 1
    if (!$start) {
3985
      return [];
3986
    }
3987
3988
    /** @noinspection PhpComposerExtensionStubsInspection */
3989 1
    if (\ctype_digit((string)$var2)) {
3990 1
      $end = (int)$var2;
3991 1
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
3992
      $end = (int)self::hex_to_int($var2);
3993
    } else {
3994 1
      $end = self::ord($var2);
3995
    }
3996
3997 1
    if (!$end) {
3998
      return [];
3999
    }
4000
4001 1
    return \array_map(
4002
        [
4003 1
            self::class,
4004
            'chr',
4005
        ],
4006 1
        \range($start, $end)
4007
    );
4008
  }
4009
4010
  /**
4011
   * Multi decode html entity & fix urlencoded-win1252-chars.
4012
   *
4013
   * e.g:
4014
   * 'test+test'                     => 'test+test'
4015
   * 'D&#252;sseldorf'               => 'Düsseldorf'
4016
   * 'D%FCsseldorf'                  => 'Düsseldorf'
4017
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4018
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4019
   * 'Düsseldorf'                   => 'Düsseldorf'
4020
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4021
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4022
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4023
   *
4024
   * @param string $str          <p>The input string.</p>
4025
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
4026
   *
4027
   * @return string
4028
   */
4029 2
  public static function rawurldecode(string $str, bool $multi_decode = true): string
4030
  {
4031 2
    if ('' === $str) {
4032 1
      return '';
4033
    }
4034
4035 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
4036 2
    if (\preg_match($pattern, $str)) {
4037 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4038
    }
4039
4040 2
    $flags = ENT_QUOTES | ENT_HTML5;
4041
4042
    do {
4043 2
      $str_compare = $str;
4044
4045 2
      $str = self::fix_simple_utf8(
4046 2
          \rawurldecode(
4047 2
              self::html_entity_decode(
4048 2
                  self::to_utf8($str),
4049 2
                  $flags
4050
              )
4051
          )
4052
      );
4053
4054 2
    } while ($multi_decode === true && $str_compare !== $str);
4055
4056 2
    return $str;
4057
  }
4058
4059
  /**
4060
   * @param array $strings
4061
   * @param bool  $removeEmptyValues
4062
   * @param int   $removeShortValues
4063
   *
4064
   * @return array
4065
   */
4066 1
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
4067
  {
4068
    // init
4069 1
    $return = [];
4070
4071 1
    foreach ($strings as $str) {
4072
      if (
4073 1
          $removeShortValues !== null
4074
          &&
4075 1
          self::strlen($str) <= $removeShortValues
4076
      ) {
4077 1
        continue;
4078
      }
4079
4080
      if (
4081 1
          $removeEmptyValues === true
4082
          &&
4083 1
          \trim($str) === ''
4084
      ) {
4085 1
        continue;
4086
      }
4087
4088 1
      $return[] = $str;
4089
    }
4090
4091 1
    return $return;
4092
  }
4093
4094
  /**
4095
   * Replaces all occurrences of $pattern in $str by $replacement.
4096
   *
4097
   * @param string $str         <p>The input string.</p>
4098
   * @param string $pattern     <p>The regular expression pattern.</p>
4099
   * @param string $replacement <p>The string to replace with.</p>
4100
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
4101
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4102
   *
4103
   * @return string
4104
   */
4105 274
  public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4106
  {
4107 274
    if ($options === 'msr') {
4108 9
      $options = 'ms';
4109
    }
4110
4111
    // fallback
4112 274
    if (!$delimiter) {
4113
      $delimiter = '/';
4114
    }
4115
4116 274
    $str = (string)\preg_replace(
4117 274
        $delimiter . $pattern . $delimiter . 'u' . $options,
4118 274
        $replacement,
4119 274
        $str
4120
    );
4121
4122 274
    return $str;
4123
  }
4124
4125
  /**
4126
   * alias for "UTF8::remove_bom()"
4127
   *
4128
   * @see        UTF8::remove_bom()
4129
   *
4130
   * @param string $str
4131
   *
4132
   * @return string
4133
   *
4134
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4135
   */
4136
  public static function removeBOM(string $str): string
4137
  {
4138
    return self::remove_bom($str);
4139
  }
4140
4141
  /**
4142
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4143
   *
4144
   * @param string $str <p>The input string.</p>
4145
   *
4146
   * @return string String without UTF-BOM.
4147
   */
4148 60
  public static function remove_bom(string $str): string
4149
  {
4150 60
    if ('' === $str) {
4151 4
      return '';
4152
    }
4153
4154 60
    foreach (self::$BOM as $bomString => $bomByteLength) {
4155 60
      if (0 === self::strpos($str, $bomString, 0, 'CP850')) {
4156 5
        $strTmp = self::substr($str, $bomByteLength, null, 'CP850');
4157 5
        if ($strTmp === false) {
4158
          $strTmp = '';
4159
        }
4160 60
        $str = (string)$strTmp;
4161
      }
4162
    }
4163
4164 60
    return $str;
4165
  }
4166
4167
  /**
4168
   * Removes duplicate occurrences of a string in another string.
4169
   *
4170
   * @param string          $str  <p>The base string.</p>
4171
   * @param string|string[] $what <p>String to search for in the base string.</p>
4172
   *
4173
   * @return string The result string with removed duplicates.
4174
   */
4175 1
  public static function remove_duplicates(string $str, $what = ' '): string
4176
  {
4177 1
    if (\is_string($what) === true) {
4178 1
      $what = [$what];
4179
    }
4180
4181 1
    if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4182
      /** @noinspection ForeachSourceInspection */
4183 1
      foreach ($what as $item) {
4184 1
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4185
      }
4186
    }
4187
4188 1
    return $str;
4189
  }
4190
4191
  /**
4192
   * Remove html via "strip_tags()" from the string.
4193
   *
4194
   * @param string $str
4195
   * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4196
   *                              not be stripped. Default: null
4197
   *                              </p>
4198
   *
4199
   * @return string
4200
   */
4201 6
  public static function remove_html(string $str, string $allowableTags = ''): string
4202
  {
4203 6
    return \strip_tags($str, $allowableTags);
4204
  }
4205
4206
  /**
4207
   * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4208
   *
4209
   * @param string $str
4210
   * @param string $replacement [optional] <p>Default is a empty string.</p>
4211
   *
4212
   * @return string
4213
   */
4214 6
  public static function remove_html_breaks(string $str, string $replacement = ''): string
4215
  {
4216 6
    return (string)\preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4217
  }
4218
4219
  /**
4220
   * Remove invisible characters from a string.
4221
   *
4222
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4223
   *
4224
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4225
   *
4226
   * @param string $str
4227
   * @param bool   $url_encoded
4228
   * @param string $replacement
4229
   *
4230
   * @return string
4231
   */
4232 81
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4233
  {
4234
    // init
4235 81
    $non_displayables = [];
4236
4237
    // every control character except newline (dec 10),
4238
    // carriage return (dec 13) and horizontal tab (dec 09)
4239 81
    if ($url_encoded) {
4240 81
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4241 81
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4242
    }
4243
4244 81
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4245
4246
    do {
4247 81
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4248 81
    } while ($count !== 0);
4249
4250 81
    return $str;
4251
  }
4252
4253
  /**
4254
   * Returns a new string with the prefix $substring removed, if present.
4255
   *
4256
   * @param string $str
4257
   * @param string $substring <p>The prefix to remove.</p>
4258
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4259
   *
4260
   * @return string String without the prefix $substring.
4261
   */
4262 12
  public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4263
  {
4264 12
    if (self::str_starts_with($str, $substring)) {
4265
4266 6
      return (string)self::substr(
4267 6
          $str,
4268 6
          self::strlen($substring, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::strlen($substring, $encoding) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4268
          /** @scrutinizer ignore-type */ self::strlen($substring, $encoding),
Loading history...
4269 6
          null,
4270 6
          $encoding
4271
      );
4272
    }
4273
4274 6
    return $str;
4275
  }
4276
4277
  /**
4278
   * Returns a new string with the suffix $substring removed, if present.
4279
   *
4280
   * @param string $str
4281
   * @param string $substring <p>The suffix to remove.</p>
4282
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4283
   *
4284
   * @return string String having a $str without the suffix $substring.
4285
   */
4286 12
  public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4287
  {
4288 12
    if (self::str_ends_with($str, $substring)) {
4289
4290 6
      return (string)self::substr(
4291 6
          $str,
4292 6
          0,
4293 6
          self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4294
      );
4295
    }
4296
4297 6
    return $str;
4298
  }
4299
4300
  /**
4301
   * Replaces all occurrences of $search in $str by $replacement.
4302
   *
4303
   * @param string $str           <p>The input string.</p>
4304
   * @param string $search        <p>The needle to search for.</p>
4305
   * @param string $replacement   <p>The string to replace with.</p>
4306
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4307
   *
4308
   * @return string String after the replacements.
4309
   */
4310 29
  public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4311
  {
4312 29
    if ($caseSensitive) {
4313 22
      return self::str_replace($search, $replacement, $str);
4314
    }
4315
4316 7
    return self::str_ireplace($search, $replacement, $str);
4317
  }
4318
4319
  /**
4320
   * Replaces all occurrences of $search in $str by $replacement.
4321
   *
4322
   * @param string       $str           <p>The input string.</p>
4323
   * @param array        $search        <p>The elements to search for.</p>
4324
   * @param string|array $replacement   <p>The string to replace with.</p>
4325
   * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4326
   *
4327
   * @return string String after the replacements.
4328
   */
4329 30
  public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4330
  {
4331 30
    if ($caseSensitive) {
4332 23
      return self::str_replace($search, $replacement, $str);
4333
    }
4334
4335 7
    return self::str_ireplace($search, $replacement, $str);
4336
  }
4337
4338
  /**
4339
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4340
   *
4341
   * @param string $str                <p>The input string</p>
4342
   * @param string $replacementChar    <p>The replacement character.</p>
4343
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4344
   *
4345
   * @return string
4346
   */
4347 54
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4348
  {
4349 54
    if ('' === $str) {
4350 5
      return '';
4351
    }
4352
4353 54
    if ($processInvalidUtf8 === true) {
4354 54
      $replacementCharHelper = $replacementChar;
4355 54
      if ($replacementChar === '') {
4356 54
        $replacementCharHelper = 'none';
4357
      }
4358
4359 54
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4360
        self::checkForSupport();
4361
      }
4362
4363 54
      if (self::$SUPPORT['mbstring'] === false) {
4364
        // if there is no native support for "mbstring",
4365
        // then we need to clean the string before ...
4366
        $str = self::clean($str);
4367
      }
4368
4369
      // always fallback via symfony polyfill
4370 54
      $save = \mb_substitute_character();
4371 54
      \mb_substitute_character($replacementCharHelper);
4372 54
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4373 54
      \mb_substitute_character($save);
4374
4375 54
      if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4376 54
        $str = $strTmp;
4377
      } else {
4378
        $str = '';
4379
      }
4380
    }
4381
4382 54
    return str_replace(
4383
        [
4384 54
            "\xEF\xBF\xBD",
4385
            '�',
4386
        ],
4387
        [
4388 54
            $replacementChar,
4389 54
            $replacementChar,
4390
        ],
4391 54
        $str
4392
    );
4393
  }
4394
4395
  /**
4396
   * Strip whitespace or other characters from end of a UTF-8 string.
4397
   *
4398
   * @param string $str   <p>The string to be trimmed.</p>
4399
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4400
   *
4401
   * @return string The string with unwanted characters stripped from the right.
4402
   */
4403 20
  public static function rtrim(string $str = '', $chars = INF): string
4404
  {
4405 20
    if ('' === $str) {
4406 2
      return '';
4407
    }
4408
4409
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4410 19
    if ($chars === INF || !$chars) {
4411 14
      $pattern = "[\pZ\pC]+\$";
4412
    } else {
4413 7
      $chars = \preg_quote($chars, '/');
4414 7
      $pattern = "[$chars]+\$";
4415
    }
4416
4417 19
    return self::regex_replace($str, $pattern, '', '', '/');
4418
  }
4419
4420
  /**
4421
   * rxClass
4422
   *
4423
   * @param string $s
4424
   * @param string $class
4425
   *
4426
   * @return string
4427
   */
4428 32
  private static function rxClass(string $s, string $class = ''): string
4429
  {
4430 32
    static $RX_CLASSS_CACHE = [];
4431
4432 32
    $cacheKey = $s . $class;
4433
4434 32
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4435 20
      return $RX_CLASSS_CACHE[$cacheKey];
4436
    }
4437
4438
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4439 16
    $class = [$class];
4440
4441
    /** @noinspection SuspiciousLoopInspection */
4442 16
    foreach (self::str_split($s) as $s) {
4443 15
      if ('-' === $s) {
4444
        $class[0] = '-' . $class[0];
4445 15
      } elseif (!isset($s[2])) {
4446 15
        $class[0] .= \preg_quote($s, '/');
4447 1
      } elseif (1 === self::strlen($s)) {
4448 1
        $class[0] .= $s;
4449
      } else {
4450 15
        $class[] = $s;
4451
      }
4452
    }
4453
4454 16
    if ($class[0]) {
4455 16
      $class[0] = '[' . $class[0] . ']';
4456
    }
4457
4458 16
    if (1 === \count($class)) {
4459 16
      $return = $class[0];
4460
    } else {
4461
      $return = '(?:' . \implode('|', $class) . ')';
4462
    }
4463
4464 16
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4465
4466 16
    return $return;
4467
  }
4468
4469
  /**
4470
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4471
   */
4472 1
  public static function showSupport()
4473
  {
4474 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4475
      self::checkForSupport();
4476
    }
4477
4478 1
    echo '<pre>';
4479 1
    foreach (self::$SUPPORT as $key => $value) {
4480 1
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4481
    }
4482 1
    echo '</pre>';
4483 1
  }
4484
4485
  /**
4486
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4487
   *
4488
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4489
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4490
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4491
   *
4492
   * @return string The HTML numbered entity.
4493
   */
4494 1
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4495
  {
4496 1
    if ('' === $char) {
4497 1
      return '';
4498
    }
4499
4500
    if (
4501 1
        $keepAsciiChars === true
4502
        &&
4503 1
        self::is_ascii($char) === true
4504
    ) {
4505 1
      return $char;
4506
    }
4507
4508 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4509 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4510
    }
4511
4512 1
    return '&#' . self::ord($char, $encoding) . ';';
4513
  }
4514
4515
  /**
4516
   * @param string $str
4517
   * @param int    $tabLength
4518
   *
4519
   * @return string
4520
   */
4521 5
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4522
  {
4523 5
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4524
  }
4525
4526
  /**
4527
   * Convert a string to an array of Unicode characters.
4528
   *
4529
   * @param string $str       <p>The string to split into array.</p>
4530
   * @param int    $length    [optional] <p>Max character length of each array element.</p>
4531
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4532
   *
4533
   * @return string[] An array containing chunks of the string.
4534
   */
4535 38
  public static function split(string $str, int $length = 1, bool $cleanUtf8 = false): array
4536
  {
4537 38
    if ('' === $str) {
4538 5
      return [];
4539
    }
4540
4541
    // init
4542 37
    $ret = [];
4543
4544 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4545
      self::checkForSupport();
4546
    }
4547
4548 37
    if ($cleanUtf8 === true) {
4549 9
      $str = self::clean($str);
4550
    }
4551
4552 37
    if (self::$SUPPORT['pcre_utf8'] === true) {
4553
4554 35
      \preg_match_all('/./us', $str, $retArray);
4555 35
      if (isset($retArray[0])) {
4556 35
        $ret = $retArray[0];
4557
      }
4558 35
      unset($retArray);
4559
4560
    } else {
4561
4562
      // fallback
4563
4564 4
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4565
        self::checkForSupport();
4566
      }
4567
4568 4
      $len = self::strlen_in_byte($str);
4569
4570
      /** @noinspection ForeachInvariantsInspection */
4571 4
      for ($i = 0; $i < $len; $i++) {
4572
4573 4
        if (($str[$i] & "\x80") === "\x00") {
4574
4575 4
          $ret[] = $str[$i];
4576
4577
        } elseif (
4578 4
            isset($str[$i + 1])
4579
            &&
4580 4
            ($str[$i] & "\xE0") === "\xC0"
4581
        ) {
4582
4583 2
          if (($str[$i + 1] & "\xC0") === "\x80") {
4584 2
            $ret[] = $str[$i] . $str[$i + 1];
4585
4586 2
            $i++;
4587
          }
4588
4589
        } elseif (
4590 3
            isset($str[$i + 2])
4591
            &&
4592 3
            ($str[$i] & "\xF0") === "\xE0"
4593
        ) {
4594
4595
          if (
4596 3
              ($str[$i + 1] & "\xC0") === "\x80"
4597
              &&
4598 3
              ($str[$i + 2] & "\xC0") === "\x80"
4599
          ) {
4600 3
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4601
4602 3
            $i += 2;
4603
          }
4604
4605
        } elseif (
4606
            isset($str[$i + 3])
4607
            &&
4608
            ($str[$i] & "\xF8") === "\xF0"
4609
        ) {
4610
4611
          if (
4612
              ($str[$i + 1] & "\xC0") === "\x80"
4613
              &&
4614
              ($str[$i + 2] & "\xC0") === "\x80"
4615
              &&
4616
              ($str[$i + 3] & "\xC0") === "\x80"
4617
          ) {
4618
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4619
4620
            $i += 3;
4621
          }
4622
4623
        }
4624
      }
4625
    }
4626
4627 37
    if ($length > 1) {
4628 6
      $ret = \array_chunk($ret, $length);
4629
4630 6
      return \array_map(
4631 6
          function ($item) {
4632 6
            return \implode('', $item);
4633 6
          }, $ret
4634
      );
4635
    }
4636
4637 32
    if (isset($ret[0]) && $ret[0] === '') {
4638
      return [];
4639
    }
4640
4641 32
    return $ret;
4642
  }
4643
4644
  /**
4645
   * Returns a camelCase version of the string. Trims surrounding spaces,
4646
   * capitalizes letters following digits, spaces, dashes and underscores,
4647
   * and removes spaces, dashes, as well as underscores.
4648
   *
4649
   * @param string $str      <p>The input string.</p>
4650
   * @param string $encoding [optional] <p>Default: UTF-8</p>
4651
   *
4652
   * @return string
4653
   */
4654 32
  public static function str_camelize(string $str, string $encoding = 'UTF-8'): string
4655
  {
4656 32
    $str = self::lcfirst(self::trim($str), $encoding);
4657 32
    $str = (string)\preg_replace('/^[-_]+/', '', $str);
4658
4659 32
    $str = (string)\preg_replace_callback(
4660 32
        '/[-_\s]+(.)?/u',
4661 32
        function ($match) use ($encoding) {
4662 27
          if (isset($match[1])) {
4663 27
            return UTF8::strtoupper($match[1], $encoding);
4664
          }
4665
4666 1
          return '';
4667 32
        },
4668 32
        $str
4669
    );
4670
4671 32
    $str = (string)\preg_replace_callback(
4672 32
        '/[\d]+(.)?/u',
4673 32
        function ($match) use ($encoding) {
4674 6
          return UTF8::strtoupper($match[0], $encoding);
4675 32
        },
4676 32
        $str
4677
    );
4678
4679 32
    return $str;
4680
  }
4681
4682
  /**
4683
   * Returns the string with the first letter of each word capitalized,
4684
   * except for when the word is a name which shouldn't be capitalized.
4685
   *
4686
   * @param string $str
4687
   *
4688
   * @return string String with $str capitalized.
4689
   */
4690 1
  public static function str_capitalize_name(string $str): string
4691
  {
4692 1
    $str = self::collapse_whitespace($str);
4693
4694 1
    $str = self::str_capitalize_name_helper($str, ' ');
4695 1
    $str = self::str_capitalize_name_helper($str, '-');
4696
4697 1
    return $str;
4698
  }
4699
4700
  /**
4701
   * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
4702
   *
4703
   * @param string $names
4704
   * @param string $delimiter
4705
   * @param string $encoding
4706
   *
4707
   * @return string
4708
   */
4709 1
  private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
4710
  {
4711
    // init
4712 1
    $namesArray = \explode($delimiter, $names);
4713
4714 1
    if ($namesArray === false) {
4715
      return '';
4716
    }
4717
4718
    $specialCases = [
4719 1
        'names'    => [
4720
            'ab',
4721
            'af',
4722
            'al',
4723
            'and',
4724
            'ap',
4725
            'bint',
4726
            'binte',
4727
            'da',
4728
            'de',
4729
            'del',
4730
            'den',
4731
            'der',
4732
            'di',
4733
            'dit',
4734
            'ibn',
4735
            'la',
4736
            'mac',
4737
            'nic',
4738
            'of',
4739
            'ter',
4740
            'the',
4741
            'und',
4742
            'van',
4743
            'von',
4744
            'y',
4745
            'zu',
4746
        ],
4747
        'prefixes' => [
4748
            'al-',
4749
            "d'",
4750
            'ff',
4751
            "l'",
4752
            'mac',
4753
            'mc',
4754
            'nic',
4755
        ],
4756
    ];
4757
4758 1
    foreach ($namesArray as &$name) {
4759 1
      if (\in_array($name, $specialCases['names'], true)) {
4760 1
        continue;
4761
      }
4762
4763 1
      $continue = false;
4764
4765 1
      if ($delimiter == '-') {
4766 1
        foreach ($specialCases['names'] as $beginning) {
4767 1
          if (self::strpos($name, $beginning, 0, $encoding) === 0) {
4768 1
            $continue = true;
4769
          }
4770
        }
4771
      }
4772
4773 1
      foreach ($specialCases['prefixes'] as $beginning) {
4774 1
        if (self::strpos($name, $beginning, 0, $encoding) === 0) {
4775 1
          $continue = true;
4776
        }
4777
      }
4778
4779 1
      if ($continue) {
4780 1
        continue;
4781
      }
4782
4783 1
      $name = self::str_upper_first($name);
4784
    }
4785
4786 1
    return \implode($delimiter, $namesArray);
4787
  }
4788
4789
  /**
4790
   * Returns true if the string contains $needle, false otherwise. By default
4791
   * the comparison is case-sensitive, but can be made insensitive by setting
4792
   * $caseSensitive to false.
4793
   *
4794
   * @param string $haystack      <p>The input string.</p>
4795
   * @param string $needle        <p>Substring to look for.</p>
4796
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4797
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4798
   *
4799
   * @return bool Whether or not $haystack contains $needle.
4800
   */
4801 106
  public static function str_contains(string $haystack, string $needle, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4802
  {
4803 106
    if ($caseSensitive) {
4804 56
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
4805
    }
4806
4807 50
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
4808
  }
4809
4810
  /**
4811
   * Returns true if the string contains all $needles, false otherwise. By
4812
   * default the comparison is case-sensitive, but can be made insensitive by
4813
   * setting $caseSensitive to false.
4814
   *
4815
   * @param string $haystack      <p>The input string.</p>
4816
   * @param array  $needles       <p>SubStrings to look for.</p>
4817
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4818
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4819
   *
4820
   * @return bool Whether or not $haystack contains $needle.
4821
   */
4822 44
  public static function str_contains_all(string $haystack, array $needles, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4823
  {
4824 44
    if ('' === $haystack) {
4825
      return false;
4826
    }
4827
4828 44
    if (empty($needles)) {
4829 1
      return false;
4830
    }
4831
4832 43
    foreach ($needles as $needle) {
4833 43
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4834 43
        return false;
4835
      }
4836
    }
4837
4838 24
    return true;
4839
  }
4840
4841
  /**
4842
   * Returns true if the string contains any $needles, false otherwise. By
4843
   * default the comparison is case-sensitive, but can be made insensitive by
4844
   * setting $caseSensitive to false.
4845
   *
4846
   * @param string $haystack      <p>The input string.</p>
4847
   * @param array  $needles       <p>SubStrings to look for.</p>
4848
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4849
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4850
   *
4851
   * @return bool
4852
   *               Whether or not $str contains $needle.
4853
   */
4854 43
  public static function str_contains_any(string $haystack, array $needles, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4855
  {
4856 43
    if (empty($needles)) {
4857 1
      return false;
4858
    }
4859
4860 42
    foreach ($needles as $needle) {
4861 42
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4862 42
        return true;
4863
      }
4864
    }
4865
4866 18
    return false;
4867
  }
4868
4869
  /**
4870
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
4871
   * inserted before uppercase characters (with the exception of the first
4872
   * character of the string), and in place of spaces as well as underscores.
4873
   *
4874
   * @param string $str      <p>The input string.</p>
4875
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4876
   *
4877
   * @return string
4878
   */
4879 19
  public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
4880
  {
4881 19
    return self::str_delimit($str, '-', $encoding);
4882
  }
4883
4884
  /**
4885
   * Returns a lowercase and trimmed string separated by the given delimiter.
4886
   * Delimiters are inserted before uppercase characters (with the exception
4887
   * of the first character of the string), and in place of spaces, dashes,
4888
   * and underscores. Alpha delimiters are not converted to lowercase.
4889
   *
4890
   * @param string $str       <p>The input string.</p>
4891
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
4892
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4893
   *
4894
   * @return string
4895
   */
4896 49
  public static function str_delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
4897
  {
4898 49
    $str = self::trim($str);
4899
4900 49
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
4901
4902 49
    $str = self::strtolower($str, $encoding);
4903
4904 49
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
4905
  }
4906
4907
  /**
4908
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4909
   *
4910
   * @param string $str <p>The input string.</p>
4911
   *
4912
   * @return false|string
4913
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4914
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
4915
   */
4916 15
  public static function str_detect_encoding(string $str)
4917
  {
4918
    //
4919
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
4920
    //
4921
4922 15
    if (self::is_binary($str, true) === true) {
4923
4924 5
      if (self::is_utf16($str) === 1) {
4925 1
        return 'UTF-16LE';
4926
      }
4927
4928 5
      if (self::is_utf16($str) === 2) {
4929 1
        return 'UTF-16BE';
4930
      }
4931
4932 4
      if (self::is_utf32($str) === 1) {
4933
        return 'UTF-32LE';
4934
      }
4935
4936 4
      if (self::is_utf32($str) === 2) {
4937
        return 'UTF-32BE';
4938
      }
4939
4940
      // is binary but not "UTF-16" or "UTF-32"
4941 4
      return false;
4942
    }
4943
4944
    //
4945
    // 2.) simple check for ASCII chars
4946
    //
4947
4948 13
    if (self::is_ascii($str) === true) {
4949 5
      return 'ASCII';
4950
    }
4951
4952
    //
4953
    // 3.) simple check for UTF-8 chars
4954
    //
4955
4956 13
    if (self::is_utf8($str) === true) {
4957 9
      return 'UTF-8';
4958
    }
4959
4960
    //
4961
    // 4.) check via "mb_detect_encoding()"
4962
    //
4963
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
4964
4965
    $detectOrder = [
4966 8
        'ISO-8859-1',
4967
        'ISO-8859-2',
4968
        'ISO-8859-3',
4969
        'ISO-8859-4',
4970
        'ISO-8859-5',
4971
        'ISO-8859-6',
4972
        'ISO-8859-7',
4973
        'ISO-8859-8',
4974
        'ISO-8859-9',
4975
        'ISO-8859-10',
4976
        'ISO-8859-13',
4977
        'ISO-8859-14',
4978
        'ISO-8859-15',
4979
        'ISO-8859-16',
4980
        'WINDOWS-1251',
4981
        'WINDOWS-1252',
4982
        'WINDOWS-1254',
4983
        'CP932',
4984
        'CP936',
4985
        'CP950',
4986
        'CP866',
4987
        'CP850',
4988
        'CP51932',
4989
        'CP50220',
4990
        'CP50221',
4991
        'CP50222',
4992
        'ISO-2022-JP',
4993
        'ISO-2022-KR',
4994
        'JIS',
4995
        'JIS-ms',
4996
        'EUC-CN',
4997
        'EUC-JP',
4998
    ];
4999
5000
    // always fallback via symfony polyfill
5001 8
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
5002 8
    if ($encoding) {
5003 8
      return $encoding;
5004
    }
5005
5006
    //
5007
    // 5.) check via "iconv()"
5008
    //
5009
5010
    if (self::$ENCODINGS === null) {
5011
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
5012
    }
5013
5014
    $md5 = \md5($str);
5015
    foreach (self::$ENCODINGS as $encodingTmp) {
5016
      # INFO: //IGNORE but still throw notice
5017
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
5018
      if (\md5((string)@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
5019
        return $encodingTmp;
5020
      }
5021
    }
5022
5023
    return false;
5024
  }
5025
5026
  /**
5027
   * Check if the string ends with the given substring.
5028
   *
5029
   * @param string $haystack <p>The string to search in.</p>
5030
   * @param string $needle   <p>The substring to search for.</p>
5031
   *
5032
   * @return bool
5033
   */
5034 38
  public static function str_ends_with(string $haystack, string $needle): bool
5035
  {
5036 38
    if ('' === $haystack || '' === $needle) {
5037 3
      return false;
5038
    }
5039
5040 36
    return \substr($haystack, -\strlen($needle)) === $needle;
5041
  }
5042
5043
  /**
5044
   * Returns true if the string ends with any of $substrings, false otherwise.
5045
   *
5046
   * - case-sensitive
5047
   *
5048
   * @param string   $str        <p>The input string.</p>
5049
   * @param string[] $substrings <p>Substrings to look for.</p>
5050
   *
5051
   * @return bool Whether or not $str ends with $substring.
5052
   */
5053 7
  public static function str_ends_with_any(string $str, array $substrings): bool
5054
  {
5055 7
    if (empty($substrings)) {
5056
      return false;
5057
    }
5058
5059 7
    foreach ($substrings as $substring) {
5060 7
      if (self::str_ends_with($str, $substring)) {
5061 7
        return true;
5062
      }
5063
    }
5064
5065 6
    return false;
5066
  }
5067
5068
  /**
5069
   * Ensures that the string begins with $substring. If it doesn't, it's
5070
   * prepended.
5071
   *
5072
   * @param string $str       <p>The input string.</p>
5073
   * @param string $substring <p>The substring to add if not present.</p>
5074
   *
5075
   * @return string
5076
   */
5077 10
  public static function str_ensure_left(string $str, string $substring): string
5078
  {
5079 10
    if (!self::str_starts_with($str, $substring)) {
5080 4
      $str = $substring . $str;
5081
    }
5082
5083 10
    return $str;
5084
  }
5085
5086
  /**
5087
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
5088
   *
5089
   * @param string $str       <p>The input string.</p>
5090
   * @param string $substring <p>The substring to add if not present.</p>
5091
   *
5092
   * @return string
5093
   */
5094 10
  public static function str_ensure_right(string $str, string $substring): string
5095
  {
5096 10
    if (!self::str_ends_with($str, $substring)) {
5097 4
      $str .= $substring;
5098
    }
5099
5100 10
    return $str;
5101
  }
5102
5103
  /**
5104
   * Capitalizes the first word of the string, replaces underscores with
5105
   * spaces, and strips '_id'.
5106
   *
5107
   * @param string $str
5108
   *
5109
   * @return string
5110
   */
5111 3
  public static function str_humanize($str): string
5112
  {
5113 3
    $str = self::str_replace(
5114
        [
5115 3
            '_id',
5116
            '_',
5117
        ],
5118
        [
5119 3
            '',
5120
            ' ',
5121
        ],
5122 3
        $str
5123
    );
5124
5125 3
    return self::ucfirst(self::trim($str));
5126
  }
5127
5128
  /**
5129
   * Check if the string ends with the given substring, case insensitive.
5130
   *
5131
   * @param string $haystack <p>The string to search in.</p>
5132
   * @param string $needle   <p>The substring to search for.</p>
5133
   *
5134
   * @return bool
5135
   */
5136 10
  public static function str_iends_with(string $haystack, string $needle): bool
5137
  {
5138 10
    if ('' === $haystack || '' === $needle) {
5139 1
      return false;
5140
    }
5141
5142 10
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
5143 10
      return true;
5144
    }
5145
5146 6
    return false;
5147
  }
5148
5149
  /**
5150
   * Returns true if the string ends with any of $substrings, false otherwise.
5151
   *
5152
   * - case-insensitive
5153
   *
5154
   * @param string   $str        <p>The input string.</p>
5155
   * @param string[] $substrings <p>Substrings to look for.</p>
5156
   *
5157
   * @return bool Whether or not $str ends with $substring.
5158
   */
5159 4
  public static function str_iends_with_any(string $str, array $substrings): bool
5160
  {
5161 4
    if (empty($substrings)) {
5162
      return false;
5163
    }
5164
5165 4
    foreach ($substrings as $substring) {
5166 4
      if (self::str_iends_with($str, $substring)) {
5167 4
        return true;
5168
      }
5169
    }
5170
5171
    return false;
5172
  }
5173
5174
  /**
5175
   * Returns the index of the first occurrence of $needle in the string,
5176
   * and false if not found. Accepts an optional offset from which to begin
5177
   * the search.
5178
   *
5179
   * @param string $str      <p>The input string.</p>
5180
   * @param string $needle   <p>Substring to look for.</p>
5181
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5182
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5183
   *
5184
   * @return int|false
5185
   *                    The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5186
   */
5187 2
  public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5188
  {
5189 2
    return self::stripos(
5190 2
        $str,
5191 2
        $needle,
5192 2
        $offset,
5193 2
        $encoding
5194
    );
5195
  }
5196
5197
  /**
5198
   * Returns the index of the last occurrence of $needle in the string,
5199
   * and false if not found. Accepts an optional offset from which to begin
5200
   * the search. Offsets may be negative to count from the last character
5201
   * in the string.
5202
   *
5203
   * @param string $str      <p>The input string.</p>
5204
   * @param string $needle   <p>Substring to look for.</p>
5205
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5206
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5207
   *
5208
   * @return int|false
5209
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5210
   */
5211 2
  public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5212
  {
5213 2
    return self::strripos(
5214 2
        $str,
5215 2
        $needle,
5216 2
        $offset,
5217 2
        $encoding
5218
    );
5219
  }
5220
5221
  /**
5222
   * Returns the index of the first occurrence of $needle in the string,
5223
   * and false if not found. Accepts an optional offset from which to begin
5224
   * the search.
5225
   *
5226
   * @param string $str      <p>The input string.</p>
5227
   * @param string $needle   <p>Substring to look for.</p>
5228
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5229
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5230
   *
5231
   * @return int|false
5232
   *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5233
   */
5234 12
  public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5235
  {
5236 12
    return self::strpos(
5237 12
        $str,
5238 12
        $needle,
5239 12
        $offset,
5240 12
        $encoding
5241
    );
5242
  }
5243
5244
  /**
5245
   * Returns the index of the last occurrence of $needle in the string,
5246
   * and false if not found. Accepts an optional offset from which to begin
5247
   * the search. Offsets may be negative to count from the last character
5248
   * in the string.
5249
   *
5250
   * @param string $str      <p>The input string.</p>
5251
   * @param string $needle   <p>Substring to look for.</p>
5252
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5253
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5254
   *
5255
   * @return int|false
5256
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5257
   */
5258 12
  public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5259
  {
5260 12
    return self::strrpos(
5261 12
        $str,
5262 12
        $needle,
5263 12
        $offset,
5264 12
        $encoding
5265
    );
5266
  }
5267
5268
  /**
5269
   * Inserts $substring into the string at the $index provided.
5270
   *
5271
   * @param string $str       <p>The input string.</p>
5272
   * @param string $substring <p>String to be inserted.</p>
5273
   * @param int    $index     <p>The index at which to insert the substring.</p>
5274
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5275
   *
5276
   * @return string
5277
   */
5278 8
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5279
  {
5280 8
    $len = self::strlen($str, $encoding);
5281
5282 8
    if ($index > $len) {
5283 1
      return $str;
5284
    }
5285
5286 7
    $start = self::substr($str, 0, $index, $encoding);
5287 7
    $end = self::substr($str, $index, $len, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $len can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5287
    $end = self::substr($str, $index, /** @scrutinizer ignore-type */ $len, $encoding);
Loading history...
5288
5289 7
    return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5289
    return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5289
    return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
5290
  }
5291
5292
  /**
5293
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5294
   *
5295
   * @link  http://php.net/manual/en/function.str-ireplace.php
5296
   *
5297
   * @param mixed $search  <p>
5298
   *                       Every replacement with search array is
5299
   *                       performed on the result of previous replacement.
5300
   *                       </p>
5301
   * @param mixed $replace <p>
5302
   *                       </p>
5303
   * @param mixed $subject <p>
5304
   *                       If subject is an array, then the search and
5305
   *                       replace is performed with every entry of
5306
   *                       subject, and the return value is an array as
5307
   *                       well.
5308
   *                       </p>
5309
   * @param int   $count   [optional] <p>
5310
   *                       The number of matched and replaced needles will
5311
   *                       be returned in count which is passed by
5312
   *                       reference.
5313
   *                       </p>
5314
   *
5315
   * @return mixed A string or an array of replacements.
5316
   */
5317 40
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5318
  {
5319 40
    $search = (array)$search;
5320
5321
    /** @noinspection AlterInForeachInspection */
5322 40
    foreach ($search as &$s) {
5323 40
      if ('' === $s .= '') {
5324 7
        $s = '/^(?<=.)$/';
5325
      } else {
5326 40
        $s = '/' . \preg_quote($s, '/') . '/ui';
5327
      }
5328
    }
5329
5330 40
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5331 40
    $count = $replace; // used as reference parameter
5332
5333 40
    return $subject;
5334
  }
5335
5336
  /**
5337
   * Check if the string starts with the given substring, case insensitive.
5338
   *
5339
   * @param string $haystack <p>The string to search in.</p>
5340
   * @param string $needle   <p>The substring to search for.</p>
5341
   *
5342
   * @return bool
5343
   */
5344 10
  public static function str_istarts_with(string $haystack, string $needle): bool
5345
  {
5346 10
    if ('' === $haystack || '' === $needle) {
5347 1
      return false;
5348
    }
5349
5350 10
    if (self::stripos($haystack, $needle) === 0) {
5351 10
      return true;
5352
    }
5353
5354 2
    return false;
5355
  }
5356
5357
  /**
5358
   * Returns true if the string begins with any of $substrings, false otherwise.
5359
   *
5360
   * - case-insensitive
5361
   *
5362
   * @param string $str        <p>The input string.</p>
5363
   * @param array  $substrings <p>Substrings to look for.</p>
5364
   *
5365
   * @return bool Whether or not $str starts with $substring.
5366
   */
5367 4
  public static function str_istarts_with_any(string $str, array $substrings): bool
5368
  {
5369 4
    if ('' === $str) {
5370
      return false;
5371
    }
5372
5373 4
    if (empty($substrings)) {
5374
      return false;
5375
    }
5376
5377 4
    foreach ($substrings as $substring) {
5378 4
      if (self::str_istarts_with($str, $substring)) {
5379 4
        return true;
5380
      }
5381
    }
5382
5383
    return false;
5384
  }
5385
5386
  /**
5387
   * Gets the substring after the first occurrence of a separator.
5388
   *
5389
   * @param string $str       <p>The input string.</p>
5390
   * @param string $separator <p>The string separator.</p>
5391
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5392
   *
5393
   * @return string
5394
   */
5395 1
  public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5396
  {
5397
    if (
5398 1
        $separator === ''
5399
        ||
5400 1
        $str === ''
5401
    ) {
5402 1
      return '';
5403
    }
5404
5405 1
    $offset = self::str_iindex_first($str, $separator);
5406 1
    if ($offset === false) {
5407 1
      return '';
5408
    }
5409
5410 1
    return (string)self::substr(
5411 1
        $str,
5412 1
        $offset + self::strlen($separator, $encoding),
5413 1
        null,
5414 1
        $encoding
5415
    );
5416
  }
5417
5418
  /**
5419
   * Gets the substring after the last occurrence of a separator.
5420
   *
5421
   * @param string $str       <p>The input string.</p>
5422
   * @param string $separator <p>The string separator.</p>
5423
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5424
   *
5425
   * @return string
5426
   */
5427 1
  public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5428
  {
5429
    if (
5430 1
        $separator === ''
5431
        ||
5432 1
        $str === ''
5433
    ) {
5434 1
      return '';
5435
    }
5436
5437 1
    $offset = self::str_iindex_last($str, $separator);
5438 1
    if ($offset === false) {
5439 1
      return '';
5440
    }
5441
5442 1
    return (string)self::substr(
5443 1
        $str,
5444 1
        $offset + self::strlen($separator, $encoding),
5445 1
        null,
5446 1
        $encoding
5447
    );
5448
  }
5449
5450
  /**
5451
   * Gets the substring before the first occurrence of a separator.
5452
   *
5453
   * @param string $str       <p>The input string.</p>
5454
   * @param string $separator <p>The string separator.</p>
5455
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5456
   *
5457
   * @return string
5458
   */
5459 1
  public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5460
  {
5461
    if (
5462 1
        $separator === ''
5463
        ||
5464 1
        $str === ''
5465
    ) {
5466 1
      return '';
5467
    }
5468
5469 1
    $offset = self::str_iindex_first($str, $separator);
5470 1
    if ($offset === false) {
5471 1
      return '';
5472
    }
5473
5474 1
    return (string)self::substr($str, 0, $offset, $encoding);
5475
  }
5476
5477
  /**
5478
   * Gets the substring before the last occurrence of a separator.
5479
   *
5480
   * @param string $str       <p>The input string.</p>
5481
   * @param string $separator <p>The string separator.</p>
5482
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5483
   *
5484
   * @return string
5485
   */
5486 1
  public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5487
  {
5488
    if (
5489 1
        $separator === ''
5490
        ||
5491 1
        $str === ''
5492
    ) {
5493 1
      return '';
5494
    }
5495
5496 1
    $offset = self::str_iindex_last($str, $separator);
5497 1
    if ($offset === false) {
5498 1
      return '';
5499
    }
5500
5501 1
    return (string)self::substr($str, 0, $offset, $encoding);
5502
  }
5503
5504
  /**
5505
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5506
   *
5507
   * @param string $str          <p>The input string.</p>
5508
   * @param string $needle       <p>The string to look for.</p>
5509
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5510
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5511
   *
5512
   * @return string
5513
   */
5514 2
  public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5515
  {
5516
    if (
5517 2
        '' === $needle
5518
        ||
5519 2
        '' === $str
5520
    ) {
5521 2
      return '';
5522
    }
5523
5524 2
    $part = self::stristr(
5525 2
        $str,
5526 2
        $needle,
5527 2
        $beforeNeedle,
5528 2
        $encoding
5529
    );
5530 2
    if (false === $part) {
5531 2
      return '';
5532
    }
5533
5534 2
    return $part;
5535
  }
5536
5537
  /**
5538
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5539
   *
5540
   * @param string $str          <p>The input string.</p>
5541
   * @param string $needle       <p>The string to look for.</p>
5542
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5543
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5544
   *
5545
   * @return string
5546
   */
5547 1
  public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5548
  {
5549
    if (
5550 1
        '' === $needle
5551
        ||
5552 1
        '' === $str
5553
    ) {
5554 1
      return '';
5555
    }
5556
5557 1
    $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5558 1
    if (false === $part) {
0 ignored issues
show
introduced by
The condition false === $part is always false.
Loading history...
5559 1
      return '';
5560
    }
5561
5562 1
    return $part;
5563
  }
5564
5565
  /**
5566
   * Returns the last $n characters of the string.
5567
   *
5568
   * @param string $str      <p>The input string.</p>
5569
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5570
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5571
   *
5572
   * @return string
5573
   */
5574 12
  public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5575
  {
5576 12
    if ($n <= 0) {
5577 4
      return '';
5578
    }
5579
5580 8
    $returnTmp = self::substr($str, -$n, null, $encoding);
5581
5582 8
    return ($returnTmp === false ? '' : $returnTmp);
5583
  }
5584
5585
  /**
5586
   * Limit the number of characters in a string.
5587
   *
5588
   * @param string $str      <p>The input string.</p>
5589
   * @param int    $length   [optional] <p>Default: 100</p>
5590
   * @param string $strAddOn [optional] <p>Default: …</p>
5591
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5592
   *
5593
   * @return string
5594
   */
5595 1
  public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5596
  {
5597 1
    if ('' === $str) {
5598 1
      return '';
5599
    }
5600
5601 1
    if ($length <= 0) {
5602 1
      return '';
5603
    }
5604
5605 1
    if (self::strlen($str, $encoding) <= $length) {
5606 1
      return $str;
5607
    }
5608
5609 1
    return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5609
    return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
5610
  }
5611
5612
  /**
5613
   * Limit the number of characters in a string, but also after the next word.
5614
   *
5615
   * @param string $str      <p>The input string.</p>
5616
   * @param int    $length   [optional] <p>Default: 100</p>
5617
   * @param string $strAddOn [optional] <p>Default: …</p>
5618
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5619
   *
5620
   * @return string
5621
   */
5622 5
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5623
  {
5624 5
    if ('' === $str) {
5625 1
      return '';
5626
    }
5627
5628 5
    if ($length <= 0) {
5629 1
      return '';
5630
    }
5631
5632 5
    if (self::strlen($str, $encoding) <= $length) {
5633 1
      return $str;
5634
    }
5635
5636 5
    if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
5637 4
      return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5637
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
5638
    }
5639
5640 2
    $str = (string)self::substr($str, 0, $length, $encoding);
5641 2
    $array = \explode(' ', $str);
5642 2
    \array_pop($array);
5643 2
    $new_str = \implode(' ', $array);
5644
5645 2
    if ($new_str === '') {
5646 1
      $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
5647
    } else {
5648 2
      $str = $new_str . $strAddOn;
5649
    }
5650
5651 2
    return $str;
5652
  }
5653
5654
  /**
5655
   * Returns the longest common prefix between the string and $otherStr.
5656
   *
5657
   * @param string $str      <p>The input sting.</p>
5658
   * @param string $otherStr <p>Second string for comparison.</p>
5659
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5660
   *
5661
   * @return string
5662
   */
5663 10
  public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5664
  {
5665 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5666
5667 10
    $longestCommonPrefix = '';
5668 10
    for ($i = 0; $i < $maxLength; $i++) {
5669 8
      $char = self::substr($str, $i, 1, $encoding);
5670
5671 8
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
5672 6
        $longestCommonPrefix .= $char;
5673
      } else {
5674 6
        break;
5675
      }
5676
    }
5677
5678 10
    return $longestCommonPrefix;
5679
  }
5680
5681
  /**
5682
   * Returns the longest common substring between the string and $otherStr.
5683
   * In the case of ties, it returns that which occurs first.
5684
   *
5685
   * @param string $str
5686
   * @param string $otherStr <p>Second string for comparison.</p>
5687
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5688
   *
5689
   * @return string String with its $str being the longest common substring.
5690
   */
5691 11
  public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5692
  {
5693
    // Uses dynamic programming to solve
5694
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
5695 11
    $strLength = self::strlen($str, $encoding);
5696 11
    $otherLength = self::strlen($otherStr, $encoding);
5697
5698
    // Return if either string is empty
5699 11
    if ($strLength == 0 || $otherLength == 0) {
5700 2
      return '';
5701
    }
5702
5703 9
    $len = 0;
5704 9
    $end = 0;
5705 9
    $table = \array_fill(
5706 9
        0,
5707 9
        $strLength + 1,
5708 9
        \array_fill(0, $otherLength + 1, 0)
5709
    );
5710
5711 9
    for ($i = 1; $i <= $strLength; $i++) {
5712 9
      for ($j = 1; $j <= $otherLength; $j++) {
5713 9
        $strChar = self::substr($str, $i - 1, 1, $encoding);
5714 9
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
5715
5716 9
        if ($strChar == $otherChar) {
5717 8
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
5718 8
          if ($table[$i][$j] > $len) {
5719 8
            $len = $table[$i][$j];
5720 8
            $end = $i;
5721
          }
5722
        } else {
5723 9
          $table[$i][$j] = 0;
5724
        }
5725
      }
5726
    }
5727
5728 9
    $returnTmp = self::substr($str, $end - $len, $len, $encoding);
5729
5730 9
    return ($returnTmp === false ? '' : $returnTmp);
5731
  }
5732
5733
  /**
5734
   * Returns the longest common suffix between the string and $otherStr.
5735
   *
5736
   * @param string $str
5737
   * @param string $otherStr <p>Second string for comparison.</p>
5738
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5739
   *
5740
   * @return string
5741
   */
5742 10
  public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5743
  {
5744 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5745
5746 10
    $longestCommonSuffix = '';
5747 10
    for ($i = 1; $i <= $maxLength; $i++) {
5748 8
      $char = self::substr($str, -$i, 1, $encoding);
5749
5750 8
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
5751 6
        $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5751
        $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
5752
      } else {
5753 6
        break;
5754
      }
5755
    }
5756
5757 10
    return $longestCommonSuffix;
5758
  }
5759
5760
  /**
5761
   * Returns true if $str matches the supplied pattern, false otherwise.
5762
   *
5763
   * @param string $str     <p>The input string.</p>
5764
   * @param string $pattern <p>Regex pattern to match against.</p>
5765
   *
5766
   * @return bool Whether or not $str matches the pattern.
5767
   */
5768 126
  public static function str_matches_pattern(string $str, string $pattern): bool
5769
  {
5770 126
    if (\preg_match('/' . $pattern . '/u', $str)) {
5771 87
      return true;
5772
    }
5773
5774 39
    return false;
5775
  }
5776
5777
  /**
5778
   * Returns whether or not a character exists at an index. Offsets may be
5779
   * negative to count from the last character in the string. Implements
5780
   * part of the ArrayAccess interface.
5781
   *
5782
   * @param string $str      <p>The input string.</p>
5783
   * @param int    $offset   <p>The index to check.</p>
5784
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5785
   *
5786
   *
5787
   * @return bool Whether or not the index exists.
5788
   */
5789 6
  public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
5790
  {
5791
    // init
5792 6
    $length = self::strlen($str, $encoding);
5793
5794 6
    if ($offset >= 0) {
5795 3
      return ($length > $offset);
5796
    }
5797
5798 3
    return ($length >= \abs($offset));
5799
  }
5800
5801
  /**
5802
   * Returns the character at the given index. Offsets may be negative to
5803
   * count from the last character in the string. Implements part of the
5804
   * ArrayAccess interface, and throws an OutOfBoundsException if the index
5805
   * does not exist.
5806
   *
5807
   * @param string $str      <p>The input string.</p>
5808
   * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
5809
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5810
   *
5811
   * @return string The character at the specified index.
5812
   *
5813
   * @throws \OutOfBoundsException If the positive or negative offset does not exist.
5814
   */
5815 2
  public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
5816
  {
5817
    // init
5818 2
    $length = self::strlen($str);
5819
5820
    if (
5821 2
        ($index >= 0 && $length <= $index)
5822
        ||
5823 2
        $length < \abs($index)
5824
    ) {
5825 1
      throw new \OutOfBoundsException('No character exists at the index');
5826
    }
5827
5828 1
    return self::char_at($str, $index, $encoding);
5829
  }
5830
5831
  /**
5832
   * Pad a UTF-8 string to given length with another string.
5833
   *
5834
   * @param string $str        <p>The input string.</p>
5835
   * @param int    $pad_length <p>The length of return string.</p>
5836
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
5837
   * @param int    $pad_type   [optional] <p>
5838
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
5839
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
5840
   *                           </p>
5841
   * @param string $encoding   [optional] <p>Default: UTF-8</p>
5842
   *
5843
   * @return string Returns the padded string.
5844
   */
5845 40
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
5846
  {
5847 40
    if ('' === $str) {
5848
      return '';
5849
    }
5850
5851 40
    if ($pad_type !== (int)$pad_type) {
5852 13
      if ($pad_type == 'left') {
5853 3
        $pad_type = STR_PAD_LEFT;
5854 10
      } elseif ($pad_type == 'right') {
5855 6
        $pad_type = STR_PAD_RIGHT;
5856 4
      } elseif ($pad_type == 'both') {
5857 3
        $pad_type = STR_PAD_BOTH;
5858
      } else {
5859 1
        throw new \InvalidArgumentException(
5860 1
            'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
5861
        );
5862
      }
5863
    }
5864
5865 39
    $str_length = self::strlen($str, $encoding);
5866
5867
    if (
5868 39
        $pad_length > 0
5869
        &&
5870 39
        $pad_length >= $str_length
5871
    ) {
5872 38
      $ps_length = self::strlen($pad_string, $encoding);
5873
5874 38
      $diff = ($pad_length - $str_length);
5875
5876
      switch ($pad_type) {
5877 38
        case STR_PAD_LEFT:
5878 12
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
5879 12
          $pre = (string)self::substr($pre, 0, $diff, $encoding);
5880 12
          $post = '';
5881 12
          break;
5882
5883 28
        case STR_PAD_BOTH:
5884 13
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
5885 13
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2), $encoding);
5886 13
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
5887 13
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2), $encoding);
5888 13
          break;
5889
5890 17
        case STR_PAD_RIGHT:
5891
        default:
5892 17
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
5893 17
          $post = (string)self::substr($post, 0, $diff, $encoding);
5894 17
          $pre = '';
5895
      }
5896
5897 38
      return $pre . $str . $post;
5898
    }
5899
5900 3
    return $str;
5901
  }
5902
5903
  /**
5904
   * Returns a new string of a given length such that both sides of the
5905
   * string are padded. Alias for pad() with a $padType of 'both'.
5906
   *
5907
   * @param string $str
5908
   * @param int    $length   <p>Desired string length after padding.</p>
5909
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
5910
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5911
   *
5912
   * @return string String with padding applied.
5913
   */
5914 11
  public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
5915
  {
5916 11
    $padding = $length - self::strlen($str, $encoding);
5917
5918 11
    return self::apply_padding($str, (int)\floor($padding / 2), (int)\ceil($padding / 2), $padStr, $encoding);
5919
  }
5920
5921
  /**
5922
   * Returns a new string of a given length such that the beginning of the
5923
   * string is padded. Alias for pad() with a $padType of 'left'.
5924
   *
5925
   * @param string $str
5926
   * @param int    $length   <p>Desired string length after padding.</p>
5927
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
5928
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5929
   *
5930
   * @return string String with left padding.
5931
   */
5932 7
  public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
5933
  {
5934 7
    return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
5935
  }
5936
5937
  /**
5938
   * Returns a new string of a given length such that the end of the string
5939
   * is padded. Alias for pad() with a $padType of 'right'.
5940
   *
5941
   * @param string $str
5942
   * @param int    $length   <p>Desired string length after padding.</p>
5943
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
5944
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5945
   *
5946
   * @return string String with right padding.
5947
   */
5948 7
  public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
5949
  {
5950 7
    return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
5951
  }
5952
5953
  /**
5954
   * Repeat a string.
5955
   *
5956
   * @param string $str        <p>
5957
   *                           The string to be repeated.
5958
   *                           </p>
5959
   * @param int    $multiplier <p>
5960
   *                           Number of time the input string should be
5961
   *                           repeated.
5962
   *                           </p>
5963
   *                           <p>
5964
   *                           multiplier has to be greater than or equal to 0.
5965
   *                           If the multiplier is set to 0, the function
5966
   *                           will return an empty string.
5967
   *                           </p>
5968
   *
5969
   * @return string The repeated string.
5970
   */
5971 8
  public static function str_repeat(string $str, int $multiplier): string
5972
  {
5973 8
    $str = self::filter($str);
5974
5975 8
    return \str_repeat($str, $multiplier);
5976
  }
5977
5978
  /**
5979
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
5980
   *
5981
   * Replace all occurrences of the search string with the replacement string
5982
   *
5983
   * @link http://php.net/manual/en/function.str-replace.php
5984
   *
5985
   * @param mixed $search  <p>
5986
   *                       The value being searched for, otherwise known as the needle.
5987
   *                       An array may be used to designate multiple needles.
5988
   *                       </p>
5989
   * @param mixed $replace <p>
5990
   *                       The replacement value that replaces found search
5991
   *                       values. An array may be used to designate multiple replacements.
5992
   *                       </p>
5993
   * @param mixed $subject <p>
5994
   *                       The string or array being searched and replaced on,
5995
   *                       otherwise known as the haystack.
5996
   *                       </p>
5997
   *                       <p>
5998
   *                       If subject is an array, then the search and
5999
   *                       replace is performed with every entry of
6000
   *                       subject, and the return value is an array as
6001
   *                       well.
6002
   *                       </p>
6003
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
6004
   *
6005
   * @return mixed This function returns a string or an array with the replaced values.
6006
   */
6007 92
  public static function str_replace($search, $replace, $subject, int &$count = null)
6008
  {
6009 92
    return \str_replace($search, $replace, $subject, $count);
6010
  }
6011
6012
  /**
6013
   * Replaces all occurrences of $search from the beginning of string with $replacement.
6014
   *
6015
   * @param string $str         <p>The input string.</p>
6016
   * @param string $search      <p>The string to search for.</p>
6017
   * @param string $replacement <p>The replacement.</p>
6018
   *
6019
   * @return string String after the replacements.
6020
   */
6021 16
  public static function str_replace_beginning(string $str, string $search, string $replacement): string
6022
  {
6023 16
    return self::regex_replace(
6024 16
        $str,
6025 16
        '^' . \preg_quote($search, '/'),
6026 16
        self::str_replace('\\', '\\\\', $replacement)
6027
    );
6028
  }
6029
6030
  /**
6031
   * Replaces all occurrences of $search from the ending of string with $replacement.
6032
   *
6033
   * @param string $str         <p>The input string.</p>
6034
   * @param string $search      <p>The string to search for.</p>
6035
   * @param string $replacement <p>The replacement.</p>
6036
   *
6037
   * @return string String after the replacements.
6038
   */
6039 16
  public static function str_replace_ending(string $str, string $search, string $replacement): string
6040
  {
6041 16
    return self::regex_replace(
6042 16
        $str,
6043 16
        \preg_quote($search, '/') . '$',
6044 16
        self::str_replace('\\', '\\\\', $replacement)
6045
    );
6046
  }
6047
6048
  /**
6049
   * Replace the first "$search"-term with the "$replace"-term.
6050
   *
6051
   * @param string $search
6052
   * @param string $replace
6053
   * @param string $subject
6054
   *
6055
   * @return string
6056
   */
6057 1
  public static function str_replace_first(string $search, string $replace, string $subject): string
6058
  {
6059 1
    $pos = self::strpos($subject, $search);
6060
6061 1
    if ($pos !== false) {
6062 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6062
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
6063
    }
6064
6065 1
    return $subject;
6066
  }
6067
6068
  /**
6069
   * Replace the last "$search"-term with the "$replace"-term.
6070
   *
6071
   * @param string $search
6072
   * @param string $replace
6073
   * @param string $subject
6074
   *
6075
   * @return string
6076
   */
6077 1
  public static function str_replace_last(string $search, string $replace, string $subject): string
6078
  {
6079 1
    $pos = self::strrpos($subject, $search);
6080
6081 1
    if ($pos !== false) {
6082 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6082
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
6083
    }
6084
6085 1
    return $subject;
6086
  }
6087
6088
  /**
6089
   * Shuffles all the characters in the string.
6090
   *
6091
   * PS: uses random algorithm which is weak for cryptography purposes
6092
   *
6093
   * @param string $str <p>The input string</p>
6094
   *
6095
   * @return string The shuffled string.
6096
   */
6097 4
  public static function str_shuffle(string $str): string
6098
  {
6099 4
    $indexes = \range(0, self::strlen($str) - 1);
6100
    /** @noinspection NonSecureShuffleUsageInspection */
6101 4
    \shuffle($indexes);
6102
6103 4
    $shuffledStr = '';
6104 4
    foreach ($indexes as $i) {
6105 4
      $shuffledStr .= self::substr($str, $i, 1);
6106
    }
6107
6108 4
    return $shuffledStr;
6109
  }
6110
6111
  /**
6112
   * Returns the substring beginning at $start, and up to, but not including
6113
   * the index specified by $end. If $end is omitted, the function extracts
6114
   * the remaining string. If $end is negative, it is computed from the end
6115
   * of the string.
6116
   *
6117
   * @param string $str
6118
   * @param int    $start    <p>Initial index from which to begin extraction.</p>
6119
   * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6120
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6121
   *
6122
   * @return string|false
6123
   *                     <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6124
   *                     characters long, <b>FALSE</b> will be returned.
6125
   */
6126 18
  public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6127
  {
6128 18
    if ($end === null) {
6129 6
      $length = self::strlen($str);
6130 12
    } elseif ($end >= 0 && $end <= $start) {
6131 4
      return '';
6132 8
    } elseif ($end < 0) {
6133 2
      $length = self::strlen($str) + $end - $start;
6134
    } else {
6135 6
      $length = $end - $start;
6136
    }
6137
6138 14
    return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6138
    return self::substr($str, $start, /** @scrutinizer ignore-type */ $length, $encoding);
Loading history...
6139
  }
6140
6141
  /**
6142
   * Convert a string to e.g.: "snake_case"
6143
   *
6144
   * @param string $str
6145
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6146
   *
6147
   * @return string String in snake_case.
6148
   */
6149 20
  public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6150
  {
6151 20
    $str = self::normalize_whitespace($str);
6152 20
    $str = \str_replace('-', '_', $str);
6153
6154 20
    $str = (string)\preg_replace_callback(
6155 20
        '/([\d|A-Z])/u',
6156 20
        function ($matches) use ($encoding) {
6157 8
          $match = $matches[1];
6158 8
          $matchInt = (int)$match;
6159
6160 8
          if ((string)$matchInt == $match) {
6161 4
            return '_' . $match . '_';
6162
          }
6163
6164 4
          return '_' . UTF8::strtolower($match, $encoding);
6165 20
        },
6166 20
        $str
6167
    );
6168
6169 20
    $str = (string)\preg_replace(
6170
        [
6171 20
            '/\s+/',        // convert spaces to "_"
6172
            '/^\s+|\s+$/',  // trim leading & trailing spaces
6173
            '/_+/',         // remove double "_"
6174
        ],
6175
        [
6176 20
            '_',
6177
            '',
6178
            '_',
6179
        ],
6180 20
        $str
6181
    );
6182
6183 20
    $str = self::trim($str, '_'); // trim leading & trailing "_"
6184 20
    $str = self::trim($str); // trim leading & trailing whitespace
6185
6186 20
    return $str;
6187
  }
6188
6189
  /**
6190
   * Sort all characters according to code points.
6191
   *
6192
   * @param string $str    <p>A UTF-8 string.</p>
6193
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6194
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6195
   *
6196
   * @return string String of sorted characters.
6197
   */
6198 1
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6199
  {
6200 1
    $array = self::codepoints($str);
6201
6202 1
    if ($unique) {
6203 1
      $array = \array_flip(\array_flip($array));
6204
    }
6205
6206 1
    if ($desc) {
6207 1
      \arsort($array);
6208
    } else {
6209 1
      \asort($array);
6210
    }
6211
6212 1
    return self::string($array);
6213
  }
6214
6215
  /**
6216
   * Split a string into an array.
6217
   *
6218
   * @param string|string[] $str
6219
   * @param int             $len
6220
   *
6221
   * @return string[]
6222
   */
6223 22
  public static function str_split($str, int $len = 1): array
6224
  {
6225 22
    if ($len <= 0) {
6226 1
      return [];
6227
    }
6228
6229 22
    if (\is_array($str) === true) {
6230 1
      foreach ($str as $k => $v) {
6231 1
        $str[$k] = self::str_split($v, $len);
6232
      }
6233
6234 1
      return $str;
6235
    }
6236
6237 22
    if ('' === $str) {
6238 3
      return [];
6239
    }
6240
6241
    /** @noinspection NotOptimalRegularExpressionsInspection */
6242 20
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
6243 20
    $a = $a[0];
6244
6245 20
    if ($len === 1) {
6246 20
      return $a;
6247
    }
6248
6249 1
    $arrayOutput = [];
6250 1
    $p = -1;
6251
6252
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
6253 1
    foreach ($a as $l => $a) {
6254 1
      if ($l % $len) {
6255 1
        $arrayOutput[$p] .= $a;
6256
      } else {
6257 1
        $arrayOutput[++$p] = $a;
6258
      }
6259
    }
6260
6261 1
    return $arrayOutput;
6262
  }
6263
6264
  /**
6265
   * Splits the string with the provided regular expression, returning an
6266
   * array of Stringy objects. An optional integer $limit will truncate the
6267
   * results.
6268
   *
6269
   * @param string $str
6270
   * @param string $pattern <p>The regex with which to split the string.</p>
6271
   * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6272
   *
6273
   * @return string[] An array of strings.
6274
   */
6275 16
  public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6276
  {
6277 16
    if ($limit === 0) {
6278 2
      return [];
6279
    }
6280
6281
    // this->split errors when supplied an empty pattern in < PHP 5.4.13
6282
    // and current versions of HHVM (3.8 and below)
6283 14
    if ($pattern === '') {
6284 1
      return [$str];
6285
    }
6286
6287
    // this->split returns the remaining unsplit string in the last index when
6288
    // supplying a limit
6289 13
    if ($limit > 0) {
6290 8
      ++$limit;
6291
    } else {
6292 5
      $limit = -1;
6293
    }
6294
6295 13
    $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6296
6297 13
    if ($array === false) {
6298
      return [];
6299
    }
6300
6301 13
    if ($limit > 0 && \count($array) === $limit) {
6302 4
      \array_pop($array);
6303
    }
6304
6305 13
    return $array;
6306
  }
6307
6308
  /**
6309
   * Check if the string starts with the given substring.
6310
   *
6311
   * @param string $haystack <p>The string to search in.</p>
6312
   * @param string $needle   <p>The substring to search for.</p>
6313
   *
6314
   * @return bool
6315
   */
6316 39
  public static function str_starts_with(string $haystack, string $needle): bool
6317
  {
6318 39
    if ('' === $haystack || '' === $needle) {
6319 3
      return false;
6320
    }
6321
6322 37
    if (\strpos($haystack, $needle) === 0) {
6323 17
      return true;
6324
    }
6325
6326 22
    return false;
6327
  }
6328
6329
  /**
6330
   * Returns true if the string begins with any of $substrings, false otherwise.
6331
   *
6332
   * - case-sensitive
6333
   *
6334
   * @param string $str        <p>The input string.</p>
6335
   * @param array  $substrings <p>Substrings to look for.</p>
6336
   *
6337
   * @return bool Whether or not $str starts with $substring.
6338
   */
6339 8
  public static function str_starts_with_any(string $str, array $substrings): bool
6340
  {
6341 8
    if ('' === $str) {
6342
      return false;
6343
    }
6344
6345 8
    if (empty($substrings)) {
6346
      return false;
6347
    }
6348
6349 8
    foreach ($substrings as $substring) {
6350 8
      if (self::str_starts_with($str, $substring)) {
6351 8
        return true;
6352
      }
6353
    }
6354
6355 6
    return false;
6356
  }
6357
6358
  /**
6359
   * Gets the substring after the first occurrence of a separator.
6360
   *
6361
   * @param string $str       <p>The input string.</p>
6362
   * @param string $separator <p>The string separator.</p>
6363
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6364
   *
6365
   * @return string
6366
   */
6367 1
  public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6368
  {
6369
    if (
6370 1
        $separator === ''
6371
        ||
6372 1
        $str === ''
6373
    ) {
6374 1
      return '';
6375
    }
6376
6377 1
    $offset = self::str_index_first($str, $separator);
6378 1
    if ($offset === false) {
6379 1
      return '';
6380
    }
6381
6382 1
    return (string)self::substr(
6383 1
        $str,
6384 1
        $offset + self::strlen($separator, $encoding),
6385 1
        null,
6386 1
        $encoding
6387
    );
6388
  }
6389
6390
  /**
6391
   * Gets the substring after the last occurrence of a separator.
6392
   *
6393
   * @param string $str       <p>The input string.</p>
6394
   * @param string $separator <p>The string separator.</p>
6395
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6396
   *
6397
   * @return string
6398
   */
6399 1
  public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6400
  {
6401
    if (
6402 1
        $separator === ''
6403
        ||
6404 1
        $str === ''
6405
    ) {
6406 1
      return '';
6407
    }
6408
6409 1
    $offset = self::str_index_last($str, $separator);
6410 1
    if ($offset === false) {
6411 1
      return '';
6412
    }
6413
6414 1
    return (string)self::substr(
6415 1
        $str,
6416 1
        $offset + self::strlen($separator, $encoding),
6417 1
        null,
6418 1
        $encoding
6419
    );
6420
  }
6421
6422
  /**
6423
   * Gets the substring before the first occurrence of a separator.
6424
   *
6425
   * @param string $str       <p>The input string.</p>
6426
   * @param string $separator <p>The string separator.</p>
6427
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6428
   *
6429
   * @return string
6430
   */
6431 1
  public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6432
  {
6433
    if (
6434 1
        $separator === ''
6435
        ||
6436 1
        $str === ''
6437
    ) {
6438 1
      return '';
6439
    }
6440
6441 1
    $offset = self::str_index_first($str, $separator);
6442 1
    if ($offset === false) {
6443 1
      return '';
6444
    }
6445
6446 1
    return (string)self::substr(
6447 1
        $str,
6448 1
        0,
6449 1
        $offset,
6450 1
        $encoding
6451
    );
6452
  }
6453
6454
  /**
6455
   * Gets the substring before the last occurrence of a separator.
6456
   *
6457
   * @param string $str       <p>The input string.</p>
6458
   * @param string $separator <p>The string separator.</p>
6459
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6460
   *
6461
   * @return string
6462
   */
6463 1
  public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6464
  {
6465
    if (
6466 1
        $separator === ''
6467
        ||
6468 1
        $str === ''
6469
    ) {
6470 1
      return '';
6471
    }
6472
6473 1
    $offset = self::str_index_last($str, $separator);
6474 1
    if ($offset === false) {
6475 1
      return '';
6476
    }
6477
6478 1
    return (string)self::substr(
6479 1
        $str,
6480 1
        0,
6481 1
        $offset,
6482 1
        $encoding
6483
    );
6484
  }
6485
6486
  /**
6487
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6488
   *
6489
   * @param string $str          <p>The input string.</p>
6490
   * @param string $needle       <p>The string to look for.</p>
6491
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6492
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6493
   *
6494
   * @return string
6495
   */
6496 2
  public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6497
  {
6498
    if (
6499 2
        '' === $str
6500
        ||
6501 2
        '' === $needle
6502
    ) {
6503 2
      return '';
6504
    }
6505
6506 2
    $part = self::strstr(
6507 2
        $str,
6508 2
        $needle,
6509 2
        $beforeNeedle,
6510 2
        $encoding
6511
    );
6512 2
    if (false === $part) {
6513 2
      return '';
6514
    }
6515
6516 2
    return $part;
6517
  }
6518
6519
  /**
6520
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6521
   *
6522
   * @param string $str          <p>The input string.</p>
6523
   * @param string $needle       <p>The string to look for.</p>
6524
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6525
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6526
   *
6527
   * @return string
6528
   */
6529 2
  public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6530
  {
6531
    if (
6532 2
        '' === $str
6533
        ||
6534 2
        '' === $needle
6535
    ) {
6536 2
      return '';
6537
    }
6538
6539 2
    $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6540 2
    if (false === $part) {
6541 2
      return '';
6542
    }
6543
6544 2
    return $part;
6545
  }
6546
6547
  /**
6548
   * Surrounds $str with the given substring.
6549
   *
6550
   * @param string $str
6551
   * @param string $substring <p>The substring to add to both sides.</P>
6552
   *
6553
   * @return string String with the substring both prepended and appended.
6554
   */
6555 5
  public static function str_surround(string $str, string $substring): string
6556
  {
6557 5
    return \implode('', [$substring, $str, $substring]);
6558
  }
6559
6560
  /**
6561
   * Returns a trimmed string with the first letter of each word capitalized.
6562
   * Also accepts an array, $ignore, allowing you to list words not to be
6563
   * capitalized.
6564
   *
6565
   * @param string              $str
6566
   * @param string[]|array|null $ignore   [optional] <p>An array of words not to capitalize or null. Default: null</p>
6567
   * @param string              $encoding [optional] <p>Default: UTF-8</p>
6568
   *
6569
   * @return string The titleized string.
6570
   */
6571 5
  public static function str_titleize(string $str, array $ignore = null, string $encoding = 'UTF-8'): string
6572
  {
6573 5
    $str = self::trim($str);
6574
6575 5
    $str = (string)\preg_replace_callback(
6576 5
        '/([\S]+)/u',
6577 5
        function ($match) use ($encoding, $ignore) {
6578 5
          if ($ignore && \in_array($match[0], $ignore, true)) {
6579 2
            return $match[0];
6580
          }
6581
6582 5
          return self::str_upper_first(self::strtolower($match[0], $encoding));
6583 5
        },
6584 5
        $str
6585
    );
6586
6587 5
    return $str;
6588
  }
6589
6590
  /**
6591
   * Returns a trimmed string in proper title case.
6592
   *
6593
   * Also accepts an array, $ignore, allowing you to list words not to be
6594
   * capitalized.
6595
   *
6596
   * Adapted from John Gruber's script.
6597
   *
6598
   * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6599
   *
6600
   * @param string $str
6601
   * @param array  $ignore   <p>An array of words not to capitalize.</p>
6602
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6603
   *
6604
   * @return string The titleized string.
6605
   */
6606 35
  public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6607
  {
6608 35
    $smallWords = \array_merge(
6609
        [
6610 35
            '(?<!q&)a',
6611
            'an',
6612
            'and',
6613
            'as',
6614
            'at(?!&t)',
6615
            'but',
6616
            'by',
6617
            'en',
6618
            'for',
6619
            'if',
6620
            'in',
6621
            'of',
6622
            'on',
6623
            'or',
6624
            'the',
6625
            'to',
6626
            'v[.]?',
6627
            'via',
6628
            'vs[.]?',
6629
        ],
6630 35
        $ignore
6631
    );
6632
6633 35
    $smallWordsRx = \implode('|', $smallWords);
6634 35
    $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6635
6636 35
    $str = self::trim($str);
6637
6638 35
    if (self::has_lowercase($str) === false) {
6639 2
      $str = self::strtolower($str);
6640
    }
6641
6642
    // The main substitutions
6643 35
    $str = (string)\preg_replace_callback(
6644
        '~\b (_*) (?:                                                              # 1. Leading underscore and
6645
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6646 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6647
                        |
6648 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6649
                        |
6650 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6651
                        |
6652 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
6653
                      ) (_*) \b                                                           # 6. With trailing underscore
6654
                    ~ux',
6655 35
        function ($matches) use ($encoding) {
6656
          // Preserve leading underscore
6657 35
          $str = $matches[1];
6658 35
          if ($matches[2]) {
6659
            // Preserve URLs, domains, emails and file paths
6660 5
            $str .= $matches[2];
6661 35
          } elseif ($matches[3]) {
6662
            // Lower-case small words
6663 25
            $str .= self::strtolower($matches[3], $encoding);
6664 35
          } elseif ($matches[4]) {
6665
            // Capitalize word w/o internal caps
6666 34
            $str .= static::str_upper_first($matches[4], $encoding);
6667
          } else {
6668
            // Preserve other kinds of word (iPhone)
6669 7
            $str .= $matches[5];
6670
          }
6671
          // Preserve trailing underscore
6672 35
          $str .= $matches[6];
6673
6674 35
          return $str;
6675 35
        },
6676 35
        $str
6677
    );
6678
6679
    // Exceptions for small words: capitalize at start of title...
6680 35
    $str = (string)\preg_replace_callback(
6681
        '~(  \A [[:punct:]]*                # start of title...
6682
                      |  [:.;?!][ ]+               # or of subsentence...
6683
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
6684 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
6685
                     ~uxi',
6686 35
        function ($matches) use ($encoding) {
6687 11
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
6688 35
        },
6689 35
        $str
6690
    );
6691
6692
    // ...and end of title
6693 35
    $str = (string)\preg_replace_callback(
6694 35
        '~\b ( ' . $smallWordsRx . ' ) # small word...
6695
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
6696
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
6697
                     ~uxi',
6698 35
        function ($matches) use ($encoding) {
6699 3
          return static::str_upper_first($matches[1], $encoding);
6700 35
        },
6701 35
        $str
6702
    );
6703
6704
    // Exceptions for small words in hyphenated compound words
6705
    // e.g. "in-flight" -> In-Flight
6706 35
    $str = (string)\preg_replace_callback(
6707
        '~\b
6708
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
6709 35
                        ( ' . $smallWordsRx . ' )
6710
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
6711
                       ~uxi',
6712 35
        function ($matches) use ($encoding) {
6713
          return static::str_upper_first($matches[1], $encoding);
6714 35
        },
6715 35
        $str
6716
    );
6717
6718
    // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
6719 35
    $str = (string)\preg_replace_callback(
6720
        '~\b
6721
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
6722
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
6723 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
6724
                      (?!	- )                   # Negative lookahead for another -
6725
                     ~uxi',
6726 35
        function ($matches) use ($encoding) {
6727
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
6728 35
        },
6729 35
        $str
6730
    );
6731
6732 35
    return $str;
6733
  }
6734
6735
  /**
6736
   * Get a binary representation of a specific string.
6737
   *
6738
   * @param string $str <p>The input string.</p>
6739
   *
6740
   * @return string
6741
   */
6742 1
  public static function str_to_binary(string $str): string
6743
  {
6744 1
    $value = \unpack('H*', $str);
6745
6746 1
    return \base_convert($value[1], 16, 2);
6747
  }
6748
6749
  /**
6750
   * @param string   $str
6751
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6752
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
6753
   *
6754
   * @return string[]
6755
   */
6756 17
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
6757
  {
6758 17
    if ('' === $str) {
6759 1
      return ($removeEmptyValues === true ? [] : ['']);
6760
    }
6761
6762 16
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
6763
6764 16
    if ($return === false) {
6765
      return ($removeEmptyValues === true ? [] : ['']);
6766
    }
6767
6768
    if (
6769 16
        $removeShortValues === null
6770
        &&
6771 16
        $removeEmptyValues === false
6772
    ) {
6773 16
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
6774
    }
6775
6776
    $tmpReturn = self::reduce_string_array(
6777
        $return,
6778
        $removeEmptyValues,
6779
        $removeShortValues
6780
    );
6781
6782
    return $tmpReturn;
6783
  }
6784
6785
  /**
6786
   * Convert a string into an array of words.
6787
   *
6788
   * @param string   $str
6789
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
6790
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6791
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
6792
   *
6793
   * @return string[]
6794
   */
6795 10
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
6796
  {
6797 10
    if ('' === $str) {
6798 2
      return ($removeEmptyValues === true ? [] : ['']);
6799
    }
6800
6801 10
    $charList = self::rxClass($charList, '\pL');
6802
6803 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
6804
6805 10
    if ($return === false) {
6806
      return ($removeEmptyValues === true ? [] : ['']);
6807
    }
6808
6809
    if (
6810 10
        $removeShortValues === null
6811
        &&
6812 10
        $removeEmptyValues === false
6813
    ) {
6814 10
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
6815
    }
6816
6817 1
    $tmpReturn = self::reduce_string_array(
6818 1
        $return,
6819 1
        $removeEmptyValues,
6820 1
        $removeShortValues
6821
    );
6822
6823 1
    foreach ($tmpReturn as &$item) {
6824 1
      $item = (string)$item;
6825
    }
6826
6827 1
    return $tmpReturn;
6828
  }
6829
6830
  /**
6831
   * alias for "UTF8::to_ascii()"
6832
   *
6833
   * @see UTF8::to_ascii()
6834
   *
6835
   * @param string $str
6836
   * @param string $unknown
6837
   * @param bool   $strict
6838
   *
6839
   * @return string
6840
   */
6841 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
6842
  {
6843 7
    return self::to_ascii($str, $unknown, $strict);
6844
  }
6845
6846
  /**
6847
   * Truncates the string to a given length. If $substring is provided, and
6848
   * truncating occurs, the string is further truncated so that the substring
6849
   * may be appended without exceeding the desired length.
6850
   *
6851
   * @param string $str
6852
   * @param int    $length    <p>Desired length of the truncated string.</p>
6853
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
6854
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6855
   *
6856
   * @return string String after truncating.
6857
   */
6858 22
  public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
6859
  {
6860
    // init
6861 22
    $str = (string)$str;
6862
6863 22
    if ('' === $str) {
6864
      return '';
6865
    }
6866
6867 22
    if ($length >= self::strlen($str, $encoding)) {
6868 4
      return $str;
6869
    }
6870
6871
    // Need to further trim the string so we can append the substring
6872 18
    $substringLength = self::strlen($substring, $encoding);
6873 18
    $length -= $substringLength;
6874
6875 18
    $truncated = self::substr($str, 0, $length, $encoding);
6876
6877 18
    return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6877
    return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
6878
  }
6879
6880
  /**
6881
   * Truncates the string to a given length, while ensuring that it does not
6882
   * split words. If $substring is provided, and truncating occurs, the
6883
   * string is further truncated so that the substring may be appended without
6884
   * exceeding the desired length.
6885
   *
6886
   * @param string $str
6887
   * @param int    $length    <p>Desired length of the truncated string.</p>
6888
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
6889
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6890
   *
6891
   * @return string String after truncating.
6892
   */
6893 23
  public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
6894
  {
6895 23
    if ($length >= self::strlen($str, $encoding)) {
6896 4
      return $str;
6897
    }
6898
6899
    // need to further trim the string so we can append the substring
6900 19
    $substringLength = self::strlen($substring, $encoding);
6901 19
    $length -= $substringLength;
6902
6903 19
    $truncated = self::substr($str, 0, $length, $encoding);
6904 19
    if ($truncated === false) {
6905
      return '';
6906
    }
6907
6908
    // if the last word was truncated
6909 19
    $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
6910 19
    if ($strPosSpace != $length) {
6911
      // find pos of the last occurrence of a space, get up to that
6912 12
      $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
6913
6914 12
      if ($lastPos !== false || $strPosSpace !== false) {
6915 11
        $truncated = self::substr($truncated, 0, (int)$lastPos, $encoding);
6916
      }
6917
    }
6918
6919 19
    $str = $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6919
    $str = /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
6920
6921 19
    return $str;
6922
  }
6923
6924
  /**
6925
   * Returns a lowercase and trimmed string separated by underscores.
6926
   * Underscores are inserted before uppercase characters (with the exception
6927
   * of the first character of the string), and in place of spaces as well as
6928
   * dashes.
6929
   *
6930
   * @param string $str
6931
   *
6932
   * @return string The underscored string.
6933
   */
6934 16
  public static function str_underscored(string $str): string
6935
  {
6936 16
    return self::str_delimit($str, '_');
6937
  }
6938
6939
  /**
6940
   * Returns an UpperCamelCase version of the supplied string. It trims
6941
   * surrounding spaces, capitalizes letters following digits, spaces, dashes
6942
   * and underscores, and removes spaces, dashes, underscores.
6943
   *
6944
   * @param string $str      <p>The input string.</p>
6945
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6946
   *
6947
   * @return string String in UpperCamelCase.
6948
   */
6949 13
  public static function str_upper_camelize(string $str, string $encoding = 'UTF-8'): string
6950
  {
6951 13
    return self::str_upper_first(self::str_camelize($str, $encoding), $encoding);
6952
  }
6953
6954
  /**
6955
   * alias for "UTF8::ucfirst()"
6956
   *
6957
   * @see UTF8::ucfirst()
6958
   *
6959
   * @param string $str
6960
   * @param string $encoding
6961
   * @param bool   $cleanUtf8
6962
   *
6963
   * @return string
6964
   */
6965 58
  public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6966
  {
6967 58
    return self::ucfirst($str, $encoding, $cleanUtf8);
6968
  }
6969
6970
  /**
6971
   * Counts number of words in the UTF-8 string.
6972
   *
6973
   * @param string $str      <p>The input string.</p>
6974
   * @param int    $format   [optional] <p>
6975
   *                         <strong>0</strong> => return a number of words (default)<br>
6976
   *                         <strong>1</strong> => return an array of words<br>
6977
   *                         <strong>2</strong> => return an array of words with word-offset as key
6978
   *                         </p>
6979
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6980
   *
6981
   * @return string[]|int The number of words in the string
6982
   */
6983 1
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
6984
  {
6985 1
    $strParts = self::str_to_words($str, $charlist);
6986
6987 1
    $len = \count($strParts);
6988
6989 1
    if ($format === 1) {
6990
6991 1
      $numberOfWords = [];
6992 1
      for ($i = 1; $i < $len; $i += 2) {
6993 1
        $numberOfWords[] = $strParts[$i];
6994
      }
6995
6996 1
    } elseif ($format === 2) {
6997
6998 1
      $numberOfWords = [];
6999 1
      $offset = self::strlen($strParts[0]);
7000 1
      for ($i = 1; $i < $len; $i += 2) {
7001 1
        $numberOfWords[$offset] = $strParts[$i];
7002 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
7003
      }
7004
7005
    } else {
7006
7007 1
      $numberOfWords = (int)(($len - 1) / 2);
7008
7009
    }
7010
7011 1
    return $numberOfWords;
7012
  }
7013
7014
  /**
7015
   * Case-insensitive string comparison.
7016
   *
7017
   * INFO: Case-insensitive version of UTF8::strcmp()
7018
   *
7019
   * @param string $str1
7020
   * @param string $str2
7021
   *
7022
   * @return int
7023
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7024
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7025
   *             <strong>0</strong> if they are equal.
7026
   */
7027 19
  public static function strcasecmp(string $str1, string $str2): int
7028
  {
7029 19
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
7030
  }
7031
7032
  /**
7033
   * alias for "UTF8::strstr()"
7034
   *
7035
   * @see UTF8::strstr()
7036
   *
7037
   * @param string $haystack
7038
   * @param string $needle
7039
   * @param bool   $before_needle
7040
   * @param string $encoding
7041
   * @param bool   $cleanUtf8
7042
   *
7043
   * @return string|false
7044
   */
7045 1
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7046
  {
7047 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7048
  }
7049
7050
  /**
7051
   * Case-sensitive string comparison.
7052
   *
7053
   * @param string $str1
7054
   * @param string $str2
7055
   *
7056
   * @return int
7057
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
7058
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
7059
   *              <strong>0</strong> if they are equal.
7060
   */
7061 22
  public static function strcmp(string $str1, string $str2): int
7062
  {
7063
    /** @noinspection PhpUndefinedClassInspection */
7064 22
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7065 17
        \Normalizer::normalize($str1, \Normalizer::NFD),
7066 22
        \Normalizer::normalize($str2, \Normalizer::NFD)
7067
    );
7068
  }
7069
7070
  /**
7071
   * Find length of initial segment not matching mask.
7072
   *
7073
   * @param string $str
7074
   * @param string $charList
7075
   * @param int    $offset
7076
   * @param int    $length
7077
   *
7078
   * @return int|null
7079
   */
7080 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
7081
  {
7082 15
    if ('' === $charList .= '') {
7083 1
      return null;
7084
    }
7085
7086 14
    if ($offset || $length !== null) {
7087 2
      $strTmp = self::substr($str, $offset, $length);
7088 2
      if ($strTmp === false) {
7089
        return null;
7090
      }
7091 2
      $str = (string)$strTmp;
7092
    }
7093
7094 14
    if ('' === $str) {
7095 1
      return null;
7096
    }
7097
7098 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept null|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7098
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7099 13
      return self::strlen($length[1]);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($length[1]) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7100
    }
7101
7102 1
    return self::strlen($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($str) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7103
  }
7104
7105
  /**
7106
   * alias for "UTF8::stristr()"
7107
   *
7108
   * @see UTF8::stristr()
7109
   *
7110
   * @param string $haystack
7111
   * @param string $needle
7112
   * @param bool   $before_needle
7113
   * @param string $encoding
7114
   * @param bool   $cleanUtf8
7115
   *
7116
   * @return string|false
7117
   */
7118 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7119
  {
7120 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7121
  }
7122
7123
  /**
7124
   * Create a UTF-8 string from code points.
7125
   *
7126
   * INFO: opposite to UTF8::codepoints()
7127
   *
7128
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7129
   *
7130
   * @return string UTF-8 encoded string.
7131
   */
7132 2
  public static function string(array $array): string
7133
  {
7134 2
    return \implode(
7135 2
        '',
7136 2
        \array_map(
7137
            [
7138 2
                self::class,
7139
                'chr',
7140
            ],
7141 2
            $array
7142
        )
7143
    );
7144
  }
7145
7146
  /**
7147
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7148
   *
7149
   * @param string $str <p>The input string.</p>
7150
   *
7151
   * @return bool
7152
   *              <strong>true</strong> if the string has BOM at the start,<br>
7153
   *              <strong>false</strong> otherwise.
7154
   */
7155 3
  public static function string_has_bom(string $str): bool
7156
  {
7157 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
7158 3
      if (0 === \strpos($str, $bomString)) {
7159 3
        return true;
7160
      }
7161
    }
7162
7163 3
    return false;
7164
  }
7165
7166
  /**
7167
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7168
   *
7169
   * @link http://php.net/manual/en/function.strip-tags.php
7170
   *
7171
   * @param string $str             <p>
7172
   *                                The input string.
7173
   *                                </p>
7174
   * @param string $allowable_tags  [optional] <p>
7175
   *                                You can use the optional second parameter to specify tags which should
7176
   *                                not be stripped.
7177
   *                                </p>
7178
   *                                <p>
7179
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
7180
   *                                can not be changed with allowable_tags.
7181
   *                                </p>
7182
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
7183
   *
7184
   * @return string The stripped string.
7185
   */
7186 2
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7187
  {
7188 2
    if ('' === $str) {
7189 1
      return '';
7190
    }
7191
7192 2
    if ($cleanUtf8 === true) {
7193 1
      $str = self::clean($str);
7194
    }
7195
7196 2
    return \strip_tags($str, $allowable_tags);
7197
  }
7198
7199
  /**
7200
   * Strip all whitespace characters. This includes tabs and newline
7201
   * characters, as well as multibyte whitespace such as the thin space
7202
   * and ideographic space.
7203
   *
7204
   * @param string $str
7205
   *
7206
   * @return string
7207
   */
7208 24
  public static function strip_whitespace(string $str): string
7209
  {
7210 24
    if ('' === $str) {
7211 2
      return '';
7212
    }
7213
7214 22
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
7215
  }
7216
7217
  /**
7218
   * Finds position of first occurrence of a string within another, case insensitive.
7219
   *
7220
   * @link http://php.net/manual/en/function.mb-stripos.php
7221
   *
7222
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7223
   * @param string $needle    <p>The string to find in haystack.</p>
7224
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7225
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7226
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7227
   *
7228
   * @return int|false
7229
   *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7230
   *                   haystack string,<br> or <strong>false</strong> if needle is not found.
7231
   */
7232 71
  public static function stripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7233
  {
7234 71
    if ('' === $haystack || '' === $needle) {
7235 3
      return false;
7236
    }
7237
7238 70
    if ($cleanUtf8 === true) {
7239
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7240
      // if invalid characters are found in $haystack before $needle
7241 1
      $haystack = self::clean($haystack);
7242 1
      $needle = self::clean($needle);
7243
    }
7244
7245 70
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7246 22
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7247
    }
7248
7249 70
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7250
      self::checkForSupport();
7251
    }
7252
7253 70
    if (self::$SUPPORT['mbstring'] === true) {
7254 70
      $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7255 70
      if ($returnTmp !== false) {
7256 50
        return $returnTmp;
7257
      }
7258
    }
7259
7260
    if (
7261 27
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7262
        &&
7263 27
        $offset >= 0 // grapheme_stripos() can't handle negative offset
7264
        &&
7265 27
        self::$SUPPORT['intl'] === true
7266
    ) {
7267 27
      $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7268 27
      if ($returnTmp !== false) {
7269
        return $returnTmp;
7270
      }
7271
    }
7272
7273
    // fallback for ascii only
7274 27
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7275 14
      return \stripos($haystack, $needle, $offset);
7276
    }
7277
7278
    // fallback via symfony polyfill
7279 16
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7280
      return \mb_stripos($haystack, $needle, $offset, $encoding);
7281
    }
7282
7283
    // fallback via vanilla php
7284
7285 16
    $haystack = self::strtoupper($haystack, $encoding, false, null, true);
7286 16
    $needle = self::strtoupper($needle, $encoding, false, null, true);
7287
7288 16
    return self::strpos($haystack, $needle, $offset, $encoding);
7289
  }
7290
7291
  /**
7292
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
7293
   *
7294
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7295
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7296
   * @param bool   $before_needle  [optional] <p>
7297
   *                               If <b>TRUE</b>, it returns the part of the
7298
   *                               haystack before the first occurrence of the needle (excluding the needle).
7299
   *                               </p>
7300
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
7301
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7302
   *
7303
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
7304
   */
7305 19
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7306
  {
7307 19
    if ('' === $haystack || '' === $needle) {
7308 6
      return false;
7309
    }
7310
7311 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7312 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7313
    }
7314
7315 13
    if ($cleanUtf8 === true) {
7316
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7317
      // if invalid characters are found in $haystack before $needle
7318 1
      $needle = self::clean($needle);
7319 1
      $haystack = self::clean($haystack);
7320
    }
7321
7322 13
    if (!$needle) {
7323
      return $haystack;
7324
    }
7325
7326 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7327
      self::checkForSupport();
7328
    }
7329
7330
    if (
7331 13
        $encoding !== 'UTF-8'
7332
        &&
7333 13
        self::$SUPPORT['mbstring'] === false
7334
    ) {
7335
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7336
    }
7337
7338 13
    if (self::$SUPPORT['mbstring'] === true) {
7339 13
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7340
    }
7341
7342
    if (
7343
        $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7344
        &&
7345
        self::$SUPPORT['intl'] === true
7346
    ) {
7347
      $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7348
      if ($returnTmp !== false) {
7349
        return $returnTmp;
7350
      }
7351
    }
7352
7353
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7354
      return \stristr($haystack, $needle, $before_needle);
7355
    }
7356
7357
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7358
7359
    if (!isset($match[1])) {
7360
      return false;
7361
    }
7362
7363
    if ($before_needle) {
7364
      return $match[1];
7365
    }
7366
7367
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7367
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
7368
  }
7369
7370
  /**
7371
   * Get the string length, not the byte-length!
7372
   *
7373
   * @link     http://php.net/manual/en/function.mb-strlen.php
7374
   *
7375
   * @param string $str       <p>The string being checked for length.</p>
7376
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7377
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7378
   *
7379
   * @return int|false
7380
   *             The number <strong>(int)</strong> of characters in the string $str having character encoding $encoding.
7381
   *             (One multi-byte character counted as +1).
7382
   *             <br>
7383
   *             Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid chars.
7384
   */
7385 380
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7386
  {
7387 380
    if ('' === $str) {
7388 32
      return 0;
7389
    }
7390
7391 378
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7392 162
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7393
    }
7394
7395 378
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7396
      self::checkForSupport();
7397
    }
7398
7399 378
    if ($encoding === 'ASCII' || $encoding === 'CP850') {
7400 6
      return self::strlen_in_byte($str);
7401
    }
7402
7403 373
    if ($cleanUtf8 === true) {
7404
      // "mb_strlen" and "\iconv_strlen" returns wrong length,
7405
      // if invalid characters are found in $str
7406 2
      $str = self::clean($str);
7407
    }
7408
7409
    if (
7410 373
        $encoding !== 'UTF-8'
7411
        &&
7412 373
        self::$SUPPORT['mbstring'] === false
7413
        &&
7414 373
        self::$SUPPORT['iconv'] === false
7415
    ) {
7416 1
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7417
    }
7418
7419
    if (
7420 373
        $encoding !== 'UTF-8'
7421
        &&
7422 373
        self::$SUPPORT['iconv'] === true
7423
        &&
7424 373
        self::$SUPPORT['mbstring'] === false
7425
    ) {
7426
      $returnTmp = \iconv_strlen($str, $encoding);
7427
      if ($returnTmp !== false) {
7428
        return $returnTmp;
7429
      }
7430
    }
7431
7432 373
    if (self::$SUPPORT['mbstring'] === true) {
7433 373
      $returnTmp = \mb_strlen($str, $encoding);
7434 373
      if ($returnTmp !== false) {
7435 373
        return $returnTmp;
7436
      }
7437
    }
7438
7439 4
    if (self::$SUPPORT['iconv'] === true) {
7440
      $returnTmp = \iconv_strlen($str, $encoding);
7441
      if ($returnTmp !== false) {
7442
        return $returnTmp;
7443
      }
7444
    }
7445
7446
    if (
7447 4
        $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7448
        &&
7449 4
        self::$SUPPORT['intl'] === true
7450
    ) {
7451
      $returnTmp = \grapheme_strlen($str);
7452
      if ($returnTmp !== null) {
7453
        return $returnTmp;
7454
      }
7455
    }
7456
7457
    // fallback via symfony polyfill
7458 4
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7459
      $returnTmp = \mb_strlen($str, $encoding);
7460
      if ($returnTmp !== false) {
7461
        return $returnTmp;
7462
      }
7463
    }
7464
7465
    // fallback for ascii only
7466 4
    if (self::is_ascii($str)) {
7467 2
      return \strlen($str);
7468
    }
7469
7470
    // fallback via vanilla php
7471 4
    \preg_match_all('/./us', $str, $parts);
7472
7473 4
    $returnTmp = \count($parts[0]);
7474 4
    if ($returnTmp === 0 && isset($str[0])) {
7475
      return false;
7476
    }
7477
7478 4
    return $returnTmp;
7479
  }
7480
7481
  /**
7482
   * Get string length in byte.
7483
   *
7484
   * @param string $str
7485
   *
7486
   * @return int
7487
   */
7488 108
  public static function strlen_in_byte(string $str): int
7489
  {
7490 108
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7491
      // "mb_" is available if overload is used, so use it ...
7492
      return \mb_strlen($str, 'CP850'); // 8-BIT
7493
    }
7494
7495 108
    return \strlen($str);
7496
  }
7497
7498
  /**
7499
   * Case insensitive string comparisons using a "natural order" algorithm.
7500
   *
7501
   * INFO: natural order version of UTF8::strcasecmp()
7502
   *
7503
   * @param string $str1 <p>The first string.</p>
7504
   * @param string $str2 <p>The second string.</p>
7505
   *
7506
   * @return int
7507
   *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7508
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7509
   *             <strong>0</strong> if they are equal
7510
   */
7511 1
  public static function strnatcasecmp(string $str1, string $str2): int
7512
  {
7513 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
7514
  }
7515
7516
  /**
7517
   * String comparisons using a "natural order" algorithm
7518
   *
7519
   * INFO: natural order version of UTF8::strcmp()
7520
   *
7521
   * @link  http://php.net/manual/en/function.strnatcmp.php
7522
   *
7523
   * @param string $str1 <p>The first string.</p>
7524
   * @param string $str2 <p>The second string.</p>
7525
   *
7526
   * @return int
7527
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7528
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7529
   *             <strong>0</strong> if they are equal
7530
   */
7531 2
  public static function strnatcmp(string $str1, string $str2): int
7532
  {
7533 2
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
7534
  }
7535
7536
  /**
7537
   * Case-insensitive string comparison of the first n characters.
7538
   *
7539
   * @link  http://php.net/manual/en/function.strncasecmp.php
7540
   *
7541
   * @param string $str1 <p>The first string.</p>
7542
   * @param string $str2 <p>The second string.</p>
7543
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
7544
   *
7545
   * @return int
7546
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7547
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7548
   *             <strong>0</strong> if they are equal
7549
   */
7550 1
  public static function strncasecmp(string $str1, string $str2, int $len): int
7551
  {
7552 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
7553
  }
7554
7555
  /**
7556
   * String comparison of the first n characters.
7557
   *
7558
   * @link  http://php.net/manual/en/function.strncmp.php
7559
   *
7560
   * @param string $str1 <p>The first string.</p>
7561
   * @param string $str2 <p>The second string.</p>
7562
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
7563
   *
7564
   * @return int
7565
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7566
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7567
   *             <strong>0</strong> if they are equal
7568
   */
7569 2
  public static function strncmp(string $str1, string $str2, int $len): int
7570
  {
7571 2
    $str1 = (string)self::substr($str1, 0, $len);
7572 2
    $str2 = (string)self::substr($str2, 0, $len);
7573
7574 2
    return self::strcmp($str1, $str2);
7575
  }
7576
7577
  /**
7578
   * Search a string for any of a set of characters.
7579
   *
7580
   * @link  http://php.net/manual/en/function.strpbrk.php
7581
   *
7582
   * @param string $haystack  <p>The string where char_list is looked for.</p>
7583
   * @param string $char_list <p>This parameter is case sensitive.</p>
7584
   *
7585
   * @return string|false String starting from the character found, or false if it is not found.
7586
   */
7587 1
  public static function strpbrk(string $haystack, string $char_list)
7588
  {
7589 1
    if ('' === $haystack || '' === $char_list) {
7590 1
      return false;
7591
    }
7592
7593 1
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7594 1
      return \substr($haystack, (int)\strpos($haystack, $m[0]));
7595
    }
7596
7597 1
    return false;
7598
  }
7599
7600
  /**
7601
   * Find position of first occurrence of string in a string.
7602
   *
7603
   * @link http://php.net/manual/en/function.mb-strpos.php
7604
   *
7605
   * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7606
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7607
   * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7608
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7609
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7610
   *
7611
   * @return int|false
7612
   *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
7613
   *                   string.<br> If needle is not found it returns false.
7614
   */
7615 197
  public static function strpos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7616
  {
7617 197
    if ('' === $haystack) {
7618 3
      return false;
7619
    }
7620
7621
    // iconv and mbstring do not support integer $needle
7622 196
    if ((int)$needle === $needle && $needle >= 0) {
7623
      $needle = (string)self::chr($needle);
7624
    }
7625 196
    $needle = (string)$needle;
7626
7627 196
    if ('' === $needle) {
7628 2
      return false;
7629
    }
7630
7631 195
    if ($cleanUtf8 === true) {
7632
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7633
      // if invalid characters are found in $haystack before $needle
7634 1
      $needle = self::clean($needle);
7635 1
      $haystack = self::clean($haystack);
7636
    }
7637
7638 195
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7639 58
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7640
    }
7641
7642 195
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7643
      self::checkForSupport();
7644
    }
7645
7646
    if (
7647 195
        $encoding === 'CP850'
7648
        &&
7649 195
        self::$SUPPORT['mbstring_func_overload'] === false
7650
    ) {
7651 61
      return \strpos($haystack, $needle, $offset);
7652
    }
7653
7654
    if (
7655 135
        $encoding !== 'UTF-8'
7656
        &&
7657 135
        self::$SUPPORT['iconv'] === false
7658
        &&
7659 135
        self::$SUPPORT['mbstring'] === false
7660
    ) {
7661 1
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7662
    }
7663
7664
    if (
7665 135
        $offset >= 0 // iconv_strpos() can't handle negative offset
7666
        &&
7667 135
        $encoding !== 'UTF-8'
7668
        &&
7669 135
        self::$SUPPORT['mbstring'] === false
7670
        &&
7671 135
        self::$SUPPORT['iconv'] === true
7672
    ) {
7673
      // ignore invalid negative offset to keep compatibility
7674
      // with php < 5.5.35, < 5.6.21, < 7.0.6
7675
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
7676
      if ($returnTmp !== false) {
7677
        return $returnTmp;
7678
      }
7679
    }
7680
7681 135
    if (self::$SUPPORT['mbstring'] === true) {
7682 135
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
7683 135
      if ($returnTmp !== false) {
7684 84
        return $returnTmp;
7685
      }
7686
    }
7687
7688
    if (
7689 64
        $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
7690
        &&
7691 64
        $offset >= 0 // grapheme_strpos() can't handle negative offset
7692
        &&
7693 64
        self::$SUPPORT['intl'] === true
7694
    ) {
7695 64
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
7696 64
      if ($returnTmp !== false) {
7697
        return $returnTmp;
7698
      }
7699
    }
7700
7701
    if (
7702 64
        $offset >= 0 // iconv_strpos() can't handle negative offset
7703
        &&
7704 64
        self::$SUPPORT['iconv'] === true
7705
    ) {
7706
      // ignore invalid negative offset to keep compatibility
7707
      // with php < 5.5.35, < 5.6.21, < 7.0.6
7708 64
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
7709 64
      if ($returnTmp !== false) {
7710
        return $returnTmp;
7711
      }
7712
    }
7713
7714
    // fallback for ascii only
7715 64
    if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
7716 34
      return \strpos($haystack, $needle, $offset);
7717
    }
7718
7719
    // fallback via symfony polyfill
7720 34
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7721
      return \mb_strpos($haystack, $needle, $offset, $encoding);
7722
    }
7723
7724
    // fallback via vanilla php
7725
7726 34
    if ($haystackIsAscii) {
7727
      $haystackTmp = \substr($haystack, $offset);
7728
    } else {
7729 34
      $haystackTmp = self::substr($haystack, $offset, null, $encoding);
7730
    }
7731 34
    if ($haystackTmp === false) {
7732
      $haystackTmp = '';
7733
    }
7734 34
    $haystack = (string)$haystackTmp;
7735
7736 34
    if ($offset < 0) {
7737 1
      $offset = 0;
7738
    }
7739
7740 34
    $pos = \strpos($haystack, $needle);
7741 34
    if ($pos === false) {
7742 33
      return false;
7743
    }
7744
7745 2
    if ($pos) {
7746 2
      return ($offset + (self::strlen(substr($haystack, 0, $pos), $encoding)));
7747
    }
7748
7749 1
    return ($offset + 0);
7750
  }
7751
7752
  /**
7753
   * Finds the last occurrence of a character in a string within another.
7754
   *
7755
   * @link http://php.net/manual/en/function.mb-strrchr.php
7756
   *
7757
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
7758
   * @param string $needle        <p>The string to find in haystack</p>
7759
   * @param bool   $before_needle [optional] <p>
7760
   *                              Determines which portion of haystack
7761
   *                              this function returns.
7762
   *                              If set to true, it returns all of haystack
7763
   *                              from the beginning to the last occurrence of needle.
7764
   *                              If set to false, it returns all of haystack
7765
   *                              from the last occurrence of needle to the end,
7766
   *                              </p>
7767
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
7768
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
7769
   *
7770
   * @return string|false The portion of haystack or false if needle is not found.
7771
   */
7772 3
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7773
  {
7774 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7775 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7776
    }
7777
7778 3
    if ($cleanUtf8 === true) {
7779
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7780
      // if invalid characters are found in $haystack before $needle
7781 1
      $needle = self::clean($needle);
7782 1
      $haystack = self::clean($haystack);
7783
    }
7784
7785 3
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7786
      self::checkForSupport();
7787
    }
7788
7789
    if (
7790 3
        $encoding !== 'UTF-8'
7791
        &&
7792 3
        self::$SUPPORT['mbstring'] === false
7793
    ) {
7794
      \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7795
    }
7796
7797 3
    if (self::$SUPPORT['mbstring'] === true) {
7798 3
      return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
7799
    }
7800
7801
    if (
7802
        $before_needle === false
7803
        &&
7804
        ('CP850' === $encoding || 'ASCII' === $encoding)
7805
    ) {
7806
      return \strrchr($haystack, $needle);
7807
    }
7808
7809
    // fallback via symfony polyfill
7810
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7811
      return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
7812
    }
7813
7814
    // fallback via vanilla php
7815
7816
    $needleTmp = self::substr($needle, 0, 1, $encoding);
7817
    if ($needleTmp === false) {
7818
      return false;
7819
    }
7820
    $needle = (string)$needleTmp;
7821
7822
    $pos = self::strrpos($haystack, $needle, null, $encoding);
7823
    if ($pos === false) {
7824
      return false;
7825
    }
7826
7827
    if ($before_needle) {
7828
      return self::substr($haystack, 0, $pos, $encoding);
7829
    }
7830
7831
    return self::substr($haystack, $pos, null, $encoding);
7832
  }
7833
7834
  /**
7835
   * Reverses characters order in the string.
7836
   *
7837
   * @param string $str <p>The input string.</p>
7838
   *
7839
   * @return string The string with characters in the reverse sequence.
7840
   */
7841 9
  public static function strrev(string $str): string
7842
  {
7843 9
    if ('' === $str) {
7844 3
      return '';
7845
    }
7846
7847 7
    $reversed = '';
7848 7
    $i = self::strlen($str);
7849 7
    while ($i--) {
7850 7
      $reversed .= self::substr($str, $i, 1);
0 ignored issues
show
Bug introduced by
It seems like $i can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7850
      $reversed .= self::substr($str, /** @scrutinizer ignore-type */ $i, 1);
Loading history...
7851
    }
7852
7853 7
    return $reversed;
7854
  }
7855
7856
  /**
7857
   * Finds the last occurrence of a character in a string within another, case insensitive.
7858
   *
7859
   * @link http://php.net/manual/en/function.mb-strrichr.php
7860
   *
7861
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
7862
   * @param string $needle         <p>The string to find in haystack.</p>
7863
   * @param bool   $before_needle  [optional] <p>
7864
   *                               Determines which portion of haystack
7865
   *                               this function returns.
7866
   *                               If set to true, it returns all of haystack
7867
   *                               from the beginning to the last occurrence of needle.
7868
   *                               If set to false, it returns all of haystack
7869
   *                               from the last occurrence of needle to the end,
7870
   *                               </p>
7871
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
7872
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7873
   *
7874
   * @return string|false The portion of haystack or<br>false if needle is not found.
7875
   */
7876 2
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7877
  {
7878 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7879 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7880
    }
7881
7882 2
    if ($cleanUtf8 === true) {
7883
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7884
      // if invalid characters are found in $haystack before $needle
7885 1
      $needle = self::clean($needle);
7886 1
      $haystack = self::clean($haystack);
7887
    }
7888
7889
    // always fallback via symfony polyfill
7890 2
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
7891
  }
7892
7893
  /**
7894
   * Find position of last occurrence of a case-insensitive string.
7895
   *
7896
   * @param string     $haystack  <p>The string to look in.</p>
7897
   * @param string|int $needle    <p>The string to look for.</p>
7898
   * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
7899
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7900
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7901
   *
7902
   * @return int|false
7903
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
7904
   *                   string.<br>If needle is not found, it returns false.
7905
   */
7906 3
  public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7907
  {
7908 3
    if ('' === $haystack) {
7909
      return false;
7910
    }
7911
7912
    // iconv and mbstring do not support integer $needle
7913 3
    if ((int)$needle === $needle && $needle >= 0) {
7914
      $needle = (string)self::chr($needle);
7915
    }
7916 3
    $needle = (string)$needle;
7917
7918 3
    if ('' === $needle) {
7919
      return false;
7920
    }
7921
7922 3
    if ($cleanUtf8 === true) {
7923
      // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
7924 1
      $needle = self::clean($needle);
7925 1
      $haystack = self::clean($haystack);
7926
    }
7927
7928 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7929 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7930
    }
7931
7932 3
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7933
      self::checkForSupport();
7934
    }
7935
7936
    if (
7937 3
        $encoding !== 'UTF-8'
7938
        &&
7939 3
        self::$SUPPORT['mbstring'] === false
7940
    ) {
7941
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7942
    }
7943
7944 3
    if (self::$SUPPORT['mbstring'] === true) {
7945 3
      return \mb_strripos($haystack, $needle, $offset, $encoding);
7946
    }
7947
7948
    if (
7949
        $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
7950
        &&
7951
        $offset >= 0 // grapheme_strripos() can't handle negative offset
7952
        &&
7953
        self::$SUPPORT['intl'] === true
7954
    ) {
7955
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
7956
      if ($returnTmp !== false) {
7957
        return $returnTmp;
7958
      }
7959
    }
7960
7961
    // fallback via symfony polyfill
7962
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7963
      return \mb_strripos($haystack, $needle, $offset, $encoding);
7964
    }
7965
7966
    // fallback for ascii only
7967
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7968
      return \strripos($haystack, $needle, $offset);
7969
    }
7970
7971
    // fallback via vanilla php
7972
7973
    $haystack = self::strtoupper($haystack, $encoding, false, null, true);
7974
    $needle = self::strtoupper($needle, $encoding, false, null, true);
7975
7976
    return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
7977
  }
7978
7979
  /**
7980
   * Find position of last occurrence of a string in a string.
7981
   *
7982
   * @link http://php.net/manual/en/function.mb-strrpos.php
7983
   *
7984
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
7985
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7986
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
7987
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
7988
   *                              the end of the string.
7989
   *                              </p>
7990
   * @param string     $encoding  [optional] <p>Set the charset.</p>
7991
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7992
   *
7993
   * @return int|false
7994
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
7995
   *                   string.<br>If needle is not found, it returns false.
7996
   */
7997 36
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7998
  {
7999 36
    if ('' === $haystack) {
8000 2
      return false;
8001
    }
8002
8003
    // iconv and mbstring do not support integer $needle
8004 35
    if ((int)$needle === $needle && $needle >= 0) {
8005 2
      $needle = (string)self::chr($needle);
8006
    }
8007 35
    $needle = (string)$needle;
8008
8009 35
    if ('' === $needle) {
8010 1
      return false;
8011
    }
8012
8013 35
    if ($cleanUtf8 === true) {
8014
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8015 3
      $needle = self::clean($needle);
8016 3
      $haystack = self::clean($haystack);
8017
    }
8018
8019 35
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8020 13
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8021
    }
8022
8023 35
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8024
      self::checkForSupport();
8025
    }
8026
8027
    if (
8028 35
        $encoding !== 'UTF-8'
8029
        &&
8030 35
        self::$SUPPORT['mbstring'] === false
8031
    ) {
8032
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8033
    }
8034
8035 35
    if (self::$SUPPORT['mbstring'] === true) {
8036 35
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
8037
    }
8038
8039
    if (
8040
        $offset !== null
8041
        &&
8042
        $offset >= 0 // grapheme_strrpos() can't handle negative offset
8043
        &&
8044
        $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8045
        &&
8046
        self::$SUPPORT['intl'] === true
8047
    ) {
8048
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8049
      if ($returnTmp !== false) {
8050
        return $returnTmp;
8051
      }
8052
    }
8053
8054
    // fallback via symfony polyfill
8055
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
8056
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
8057
    }
8058
8059
    // fallback for ascii only
8060
    if (
8061
        $offset !== null
8062
        &&
8063
        self::is_ascii($haystack)
8064
        &&
8065
        self::is_ascii($needle)
8066
    ) {
8067
      return \strrpos($haystack, $needle, $offset);
8068
    }
8069
8070
    // fallback via vanilla php
8071
8072
    $haystackTmp = null;
8073
    if ($offset > 0) {
8074
      $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8074
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
8075
    } elseif ($offset < 0) {
8076
      $haystackTmp = self::substr($haystack, 0, $offset);
8077
      $offset = 0;
8078
    }
8079
8080
    if ($haystackTmp !== null) {
8081
      if ($haystackTmp === false) {
8082
        $haystackTmp = '';
8083
      }
8084
      $haystack = (string)$haystackTmp;
8085
    }
8086
8087
    $pos = \strrpos($haystack, $needle);
8088
    if ($pos === false) {
8089
      return false;
8090
    }
8091
8092
    return $offset + self::strlen(\substr($haystack, 0, $pos));
8093
  }
8094
8095
  /**
8096
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8097
   * mask.
8098
   *
8099
   * @param string $str    <p>The input string.</p>
8100
   * @param string $mask   <p>The mask of chars</p>
8101
   * @param int    $offset [optional]
8102
   * @param int    $length [optional]
8103
   *
8104
   * @return int
8105
   */
8106 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
8107
  {
8108 10
    if ($offset || $length !== null) {
8109 2
      $strTmp = self::substr($str, $offset, $length);
8110 2
      if ($strTmp === false) {
8111
        $strTmp = '';
8112
      }
8113 2
      $str = (string)$strTmp;
8114
    }
8115
8116 10
    if ('' === $str || '' === $mask) {
8117 2
      return 0;
8118
    }
8119
8120 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug introduced by
$str of type string is incompatible with the type null|array expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8120
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
Bug Best Practice introduced by
The expression return preg_match('/^' ....lf::strlen($str[0]) : 0 could return the type false which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
8121
  }
8122
8123
  /**
8124
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8125
   *
8126
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
8127
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
8128
   * @param bool   $before_needle  [optional] <p>
8129
   *                               If <b>TRUE</b>, strstr() returns the part of the
8130
   *                               haystack before the first occurrence of the needle (excluding the needle).
8131
   *                               </p>
8132
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8133
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8134
   *
8135
   * @return string|false
8136
   *                       A sub-string,<br>or <strong>false</strong> if needle is not found.
8137
   */
8138 4
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8139
  {
8140 4
    if ('' === $haystack || '' === $needle) {
8141 1
      return false;
8142
    }
8143
8144 4
    if ($cleanUtf8 === true) {
8145
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8146
      // if invalid characters are found in $haystack before $needle
8147
      $needle = self::clean($needle);
8148
      $haystack = self::clean($haystack);
8149
    }
8150
8151 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8152 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8153
    }
8154
8155 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8156
      self::checkForSupport();
8157
    }
8158
8159
    if (
8160 4
        $encoding !== 'UTF-8'
8161
        &&
8162 4
        self::$SUPPORT['mbstring'] === false
8163
    ) {
8164
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8165
    }
8166
8167 4
    if (self::$SUPPORT['mbstring'] === true) {
8168 4
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8169
    }
8170
8171
    if (
8172
        $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8173
        &&
8174
        self::$SUPPORT['intl'] === true
8175
    ) {
8176
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8177
      if ($returnTmp !== false) {
8178
        return $returnTmp;
8179
      }
8180
    }
8181
8182
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8183
8184
    if (!isset($match[1])) {
8185
      return false;
8186
    }
8187
8188
    if ($before_needle) {
8189
      return $match[1];
8190
    }
8191
8192
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8192
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
8193
  }
8194
8195
  /**
8196
   * Unicode transformation for case-less matching.
8197
   *
8198
   * @link http://unicode.org/reports/tr21/tr21-5.html
8199
   *
8200
   * @param string $str        <p>The input string.</p>
8201
   * @param bool   $full       [optional] <p>
8202
   *                           <b>true</b>, replace full case folding chars (default)<br>
8203
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
8204
   *                           </p>
8205
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
8206
   *
8207
   * @return string
8208
   */
8209 21
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
8210
  {
8211 21
    if ('' === $str) {
8212 4
      return '';
8213
    }
8214
8215 20
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
8216 20
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
8217
8218 20
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
8219 1
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
8220 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
8221
    }
8222
8223 20
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
8224
8225 20
    if ($full) {
8226
8227 20
      static $FULL_CASE_FOLD = null;
8228 20
      if ($FULL_CASE_FOLD === null) {
8229 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
8230
      }
8231
8232 20
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
8233
    }
8234
8235 20
    if ($cleanUtf8 === true) {
8236 1
      $str = self::clean($str);
8237
    }
8238
8239 20
    return self::strtolower($str);
8240
  }
8241
8242
  /**
8243
   * Make a string lowercase.
8244
   *
8245
   * @link http://php.net/manual/en/function.mb-strtolower.php
8246
   *
8247
   * @param string      $str                   <p>The string being lowercased.</p>
8248
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
8249
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8250
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8251
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8252
   *
8253
   * @return string String with all alphabetic characters converted to lowercase.
8254
   */
8255 163
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8256
  {
8257
    // init
8258 163
    $str = (string)$str;
8259
8260 163
    if ('' === $str) {
8261 3
      return '';
8262
    }
8263
8264 161
    if ($cleanUtf8 === true) {
8265
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8266
      // if invalid characters are found in $haystack before $needle
8267 1
      $str = self::clean($str);
8268
    }
8269
8270 161
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8271 92
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8272
    }
8273
8274
    // hack for old php version or for the polyfill ...
8275 161
    if ($tryToKeepStringLength === true) {
8276
      $str = self::fixStrCaseHelper($str, true);
8277
    }
8278
8279 161
    if ($lang !== null) {
8280
8281 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8282
        self::checkForSupport();
8283
      }
8284
8285 1
      if (self::$SUPPORT['intl'] === true) {
8286
8287 1
        $langCode = $lang . '-Lower';
8288 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8289
          \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, E_USER_WARNING);
8290
8291
          $langCode = 'Any-Lower';
8292
        }
8293
8294
        /** @noinspection PhpComposerExtensionStubsInspection */
8295 1
        return transliterator_transliterate($langCode, $str);
8296
      }
8297
8298
      \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, E_USER_WARNING);
8299
    }
8300
8301
    // always fallback via symfony polyfill
8302 161
    return \mb_strtolower($str, $encoding);
8303
  }
8304
8305
  /**
8306
   * Generic case sensitive transformation for collation matching.
8307
   *
8308
   * @param string $str <p>The input string</p>
8309
   *
8310
   * @return string
8311
   */
8312 3
  private static function strtonatfold(string $str): string
8313
  {
8314
    /** @noinspection PhpUndefinedClassInspection */
8315 3
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
8316
  }
8317
8318
  /**
8319
   * Make a string uppercase.
8320
   *
8321
   * @link http://php.net/manual/en/function.mb-strtoupper.php
8322
   *
8323
   * @param string      $str                   <p>The string being uppercased.</p>
8324
   * @param string      $encoding              [optional] <p>Set the charset.</p>
8325
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8326
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8327
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8328
   *
8329
   * @return string String with all alphabetic characters converted to uppercase.
8330
   */
8331 128
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8332
  {
8333
    // init
8334 128
    $str = (string)$str;
8335
8336 128
    if ('' === $str) {
8337 4
      return '';
8338
    }
8339
8340 126
    if ($cleanUtf8 === true) {
8341
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8342
      // if invalid characters are found in $haystack before $needle
8343 2
      $str = self::clean($str);
8344
    }
8345
8346 126
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8347 69
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8348
    }
8349
8350
    // hack for old php version or for the polyfill ...
8351 126
    if ($tryToKeepStringLength === true) {
8352 17
      $str = self::fixStrCaseHelper($str, false);
8353
    }
8354
8355 126
    if ($lang !== null) {
8356
8357 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8358
        self::checkForSupport();
8359
      }
8360
8361 1
      if (self::$SUPPORT['intl'] === true) {
8362
8363 1
        $langCode = $lang . '-Upper';
8364 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8365
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
8366
8367
          $langCode = 'Any-Upper';
8368
        }
8369
8370
        /** @noinspection PhpComposerExtensionStubsInspection */
8371 1
        return transliterator_transliterate($langCode, $str);
8372
      }
8373
8374
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
8375
    }
8376
8377
    // always fallback via symfony polyfill
8378 126
    return \mb_strtoupper($str, $encoding);
8379
  }
8380
8381
  /**
8382
   * Translate characters or replace sub-strings.
8383
   *
8384
   * @link  http://php.net/manual/en/function.strtr.php
8385
   *
8386
   * @param string          $str  <p>The string being translated.</p>
8387
   * @param string|string[] $from <p>The string replacing from.</p>
8388
   * @param string|string[] $to   <p>The string being translated to to.</p>
8389
   *
8390
   * @return string
8391
   *                This function returns a copy of str, translating all occurrences of each character in from to the
8392
   *                corresponding character in to.
8393
   */
8394 1
  public static function strtr(string $str, $from, $to = INF): string
8395
  {
8396 1
    if ('' === $str) {
8397
      return '';
8398
    }
8399
8400 1
    if ($from === $to) {
8401
      return $str;
8402
    }
8403
8404 1
    if (INF !== $to) {
8405 1
      $from = self::str_split($from);
8406 1
      $to = self::str_split($to);
8407 1
      $countFrom = \count($from);
8408 1
      $countTo = \count($to);
8409
8410 1
      if ($countFrom > $countTo) {
8411 1
        $from = \array_slice($from, 0, $countTo);
8412 1
      } elseif ($countFrom < $countTo) {
8413 1
        $to = \array_slice($to, 0, $countFrom);
8414
      }
8415
8416 1
      $from = \array_combine($from, $to);
8417
    }
8418
8419 1
    if (\is_string($from)) {
8420 1
      return \str_replace($from, '', $str);
8421
    }
8422
8423 1
    return \strtr($str, $from);
8424
  }
8425
8426
  /**
8427
   * Return the width of a string.
8428
   *
8429
   * @param string $str       <p>The input string.</p>
8430
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8431
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8432
   *
8433
   * @return int
8434
   */
8435 1
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
8436
  {
8437 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8438 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8439
    }
8440
8441 1
    if ($cleanUtf8 === true) {
8442
      // iconv and mbstring are not tolerant to invalid encoding
8443
      // further, their behaviour is inconsistent with that of PHP's substr
8444 1
      $str = self::clean($str);
8445
    }
8446
8447
    // always fallback via symfony polyfill
8448 1
    return \mb_strwidth($str, $encoding);
8449
  }
8450
8451
  /**
8452
   * Get part of a string.
8453
   *
8454
   * @link http://php.net/manual/en/function.mb-substr.php
8455
   *
8456
   * @param string $str       <p>The string being checked.</p>
8457
   * @param int    $offset    <p>The first position used in str.</p>
8458
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
8459
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8460
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8461
   *
8462
   * @return string|false
8463
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
8464
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
8465
   *                      characters long, <b>FALSE</b> will be returned.
8466
   */
8467 377
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8468
  {
8469 377
    if ('' === $str) {
8470 14
      return '';
8471
    }
8472
8473
    // Empty string
8474 371
    if ($length === 0) {
8475 12
      return '';
8476
    }
8477
8478 368
    if ($cleanUtf8 === true) {
8479
      // iconv and mbstring are not tolerant to invalid encoding
8480
      // further, their behaviour is inconsistent with that of PHP's substr
8481 1
      $str = self::clean($str);
8482
    }
8483
8484
    // Whole string
8485 368
    if (!$offset && $length === null) {
8486 35
      return $str;
8487
    }
8488
8489 341
    $str_length = 0;
8490 341
    if ($offset || $length === null) {
8491 224
      $str_length = self::strlen($str, $encoding);
8492
    }
8493
8494
    // e.g.: invalid chars + mbstring not installed
8495 341
    if ($str_length === false ) {
8496
      return false;
8497
    }
8498
8499
    // Empty string
8500 341
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
8501 19
      return '';
8502
    }
8503
8504
    // Impossible
8505 341
    if ($offset && $offset > $str_length) {
8506 3
      return false;
8507
    }
8508
8509 338
    if ($length === null) {
8510 137
      $length = (int)$str_length;
8511
    } else {
8512 305
      $length = (int)$length;
8513
    }
8514
8515 338
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8516 153
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8517
    }
8518
8519 338
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8520
      self::checkForSupport();
8521
    }
8522
8523
    if (
8524 338
        $encoding === 'CP850'
8525
        &&
8526 338
        self::$SUPPORT['mbstring_func_overload'] === false
8527
    ) {
8528 10
      return \substr($str, $offset, $length ?? $str_length);
8529
    }
8530
8531
    if (
8532 328
        $encoding !== 'UTF-8'
8533
        &&
8534 328
        self::$SUPPORT['mbstring'] === false
8535
    ) {
8536 1
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8537
    }
8538
8539 328
    if (self::$SUPPORT['mbstring'] === true) {
8540 328
      return \mb_substr($str, $offset, $length, $encoding);
8541
    }
8542
8543
    if (
8544 2
        $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
8545
        &&
8546 2
        $offset >= 0 // grapheme_substr() can't handle negative offset
8547
        &&
8548 2
        self::$SUPPORT['intl'] === true
8549
    ) {
8550
      $returnTmp = \grapheme_substr($str, $offset, $length);
8551
      if ($returnTmp !== false) {
8552
        return $returnTmp;
8553
      }
8554
    }
8555
8556
    if (
8557 2
        $length >= 0 // "iconv_substr()" can't handle negative length
8558
        &&
8559 2
        self::$SUPPORT['iconv'] === true
8560
    ) {
8561
      $returnTmp = \iconv_substr($str, $offset, $length);
8562
      if ($returnTmp !== false) {
8563
        return $returnTmp;
8564
      }
8565
    }
8566
8567
    // fallback for ascii only
8568 2
    if (self::is_ascii($str)) {
8569
      return \substr($str, $offset, $length);
8570
    }
8571
8572
    // fallback via symfony polyfill
8573 2
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
8574
      return \mb_substr($str, $offset, $length, $encoding);
8575
    }
8576
8577
    // fallback via vanilla php
8578
8579
    // split to array, and remove invalid characters
8580 2
    $array = self::split($str);
8581
8582
    // extract relevant part, and join to make sting again
8583 2
    return \implode('', \array_slice($array, $offset, $length));
8584
  }
8585
8586
  /**
8587
   * Binary safe comparison of two strings from an offset, up to length characters.
8588
   *
8589
   * @param string   $str1               <p>The main string being compared.</p>
8590
   * @param string   $str2               <p>The secondary string being compared.</p>
8591
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
8592
   *                                     counting from the end of the string.</p>
8593
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
8594
   *                                     the length of the str compared to the length of main_str less the offset.</p>
8595
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
8596
   *                                     insensitive.</p>
8597
   *
8598
   * @return int
8599
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8600
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8601
   *             <strong>0</strong> if they are equal.
8602
   */
8603 1
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
8604
  {
8605
    if (
8606 1
        $offset !== 0
8607
        ||
8608 1
        $length !== null
8609
    ) {
8610 1
      $str1Tmp = self::substr($str1, $offset, $length);
8611 1
      if ($str1Tmp === false) {
8612
        $str1Tmp = '';
8613
      }
8614 1
      $str1 = (string)$str1Tmp;
8615
8616 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str1) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8616
      $str2Tmp = self::substr($str2, 0, /** @scrutinizer ignore-type */ self::strlen($str1));
Loading history...
8617 1
      if ($str2Tmp === false) {
8618
        $str2Tmp = '';
8619
      }
8620 1
      $str2 = (string)$str2Tmp;
8621
    }
8622
8623 1
    if ($case_insensitivity === true) {
8624 1
      return self::strcasecmp($str1, $str2);
8625
    }
8626
8627 1
    return self::strcmp($str1, $str2);
8628
  }
8629
8630
  /**
8631
   * Count the number of substring occurrences.
8632
   *
8633
   * @link  http://php.net/manual/en/function.substr-count.php
8634
   *
8635
   * @param string $haystack   <p>The string to search in.</p>
8636
   * @param string $needle     <p>The substring to search for.</p>
8637
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
8638
   * @param int    $length     [optional] <p>
8639
   *                           The maximum length after the specified offset to search for the
8640
   *                           substring. It outputs a warning if the offset plus the length is
8641
   *                           greater than the haystack length.
8642
   *                           </p>
8643
   * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8644
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
8645
   *
8646
   * @return int|false This functions returns an integer or false if there isn't a string.
8647
   */
8648 19
  public static function substr_count(
8649
      string $haystack,
8650
      string $needle,
8651
      int $offset = 0,
8652
      int $length = null,
8653
      string $encoding = 'UTF-8',
8654
      bool $cleanUtf8 = false
8655
  )
8656
  {
8657 19
    if ('' === $haystack || '' === $needle) {
8658 3
      return false;
8659
    }
8660
8661 17
    if ($offset || $length !== null) {
8662
8663 1
      if ($length === null) {
8664 1
        $lengthTmp = self::strlen($haystack);
8665 1
        if ($lengthTmp === false) {
8666
          return false;
8667
        }
8668 1
        $length = (int)$lengthTmp;
8669
      }
8670
8671
      if (
8672
          (
8673 1
              $length !== 0
8674
              &&
8675 1
              $offset !== 0
8676
          )
8677
          &&
8678 1
          ($length + $offset) <= 0
8679
          &&
8680 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
8681
      ) {
8682 1
        return false;
8683
      }
8684
8685 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
8686 1
      if ($haystackTmp === false) {
8687
        $haystackTmp = '';
8688
      }
8689 1
      $haystack = (string)$haystackTmp;
8690
    }
8691
8692 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8693 11
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8694
    }
8695
8696 17
    if ($cleanUtf8 === true) {
8697
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8698
      // if invalid characters are found in $haystack before $needle
8699
      $needle = self::clean($needle);
8700
      $haystack = self::clean($haystack);
8701
    }
8702
8703 17
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8704
      self::checkForSupport();
8705
    }
8706
8707
    if (
8708 17
        $encoding !== 'UTF-8'
8709
        &&
8710 17
        self::$SUPPORT['mbstring'] === false
8711
    ) {
8712
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8713
    }
8714
8715 17
    if (self::$SUPPORT['mbstring'] === true) {
8716 17
      return \mb_substr_count($haystack, $needle, $encoding);
8717
    }
8718
8719
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
8720
8721
    return \count($matches);
8722
  }
8723
8724
  /**
8725
   * Returns the number of occurrences of $substring in the given string.
8726
   * By default, the comparison is case-sensitive, but can be made insensitive
8727
   * by setting $caseSensitive to false.
8728
   *
8729
   * @param string $str           <p>The input string.</p>
8730
   * @param string $substring     <p>The substring to search for.</p>
8731
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
8732
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8733
   *
8734
   * @return int
8735
   */
8736 15
  public static function substr_count_simple(string $str, string $substring, bool $caseSensitive = true, string $encoding = 'UTF-8'): int
8737
  {
8738 15
    if (!$caseSensitive) {
8739 6
      $str = self::strtoupper($str, $encoding);
8740 6
      $substring = self::strtoupper($substring, $encoding);
8741
    }
8742
8743 15
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
8744
  }
8745
8746
  /**
8747
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
8748
   *
8749
   * @param string $haystack <p>The string to search in.</p>
8750
   * @param string $needle   <p>The substring to search for.</p>
8751
   *
8752
   * @return string Return the sub-string.
8753
   */
8754 1
  public static function substr_ileft(string $haystack, string $needle): string
8755
  {
8756 1
    if ('' === $haystack) {
8757 1
      return '';
8758
    }
8759
8760 1
    if ('' === $needle) {
8761 1
      return $haystack;
8762
    }
8763
8764 1
    if (self::str_istarts_with($haystack, $needle) === true) {
8765 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8765
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
8766 1
      if ($haystackTmp === false) {
8767
        $haystackTmp = '';
8768
      }
8769 1
      $haystack = (string)$haystackTmp;
8770
    }
8771
8772 1
    return $haystack;
8773
  }
8774
8775
  /**
8776
   * Get part of a string process in bytes.
8777
   *
8778
   * @param string $str    <p>The string being checked.</p>
8779
   * @param int    $offset <p>The first position used in str.</p>
8780
   * @param int    $length [optional] <p>The maximum length of the returned string.</p>
8781
   *
8782
   * @return string|false
8783
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
8784
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
8785
   *                      characters long, <b>FALSE</b> will be returned.
8786
   */
8787 6
  public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
8788
  {
8789 6
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8790
      // "mb_" is available if overload is used, so use it ...
8791
      return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
8792
    }
8793
8794 6
    return \substr($str, $offset, $length);
8795
  }
8796
8797
  /**
8798
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
8799
   *
8800
   * @param string $haystack <p>The string to search in.</p>
8801
   * @param string $needle   <p>The substring to search for.</p>
8802
   *
8803
   * @return string Return the sub-string.
8804
   */
8805 1
  public static function substr_iright(string $haystack, string $needle): string
8806
  {
8807 1
    if ('' === $haystack) {
8808 1
      return '';
8809
    }
8810
8811 1
    if ('' === $needle) {
8812 1
      return $haystack;
8813
    }
8814
8815 1
    if (self::str_iends_with($haystack, $needle) === true) {
8816 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
8817 1
      if ($haystackTmp === false) {
8818
        $haystackTmp = '';
8819
      }
8820 1
      $haystack = (string)$haystackTmp;
8821
    }
8822
8823 1
    return $haystack;
8824
  }
8825
8826
  /**
8827
   * Removes an prefix ($needle) from start of the string ($haystack).
8828
   *
8829
   * @param string $haystack <p>The string to search in.</p>
8830
   * @param string $needle   <p>The substring to search for.</p>
8831
   *
8832
   * @return string Return the sub-string.
8833
   */
8834 1
  public static function substr_left(string $haystack, string $needle): string
8835
  {
8836 1
    if ('' === $haystack) {
8837 1
      return '';
8838
    }
8839
8840 1
    if ('' === $needle) {
8841 1
      return $haystack;
8842
    }
8843
8844 1
    if (self::str_starts_with($haystack, $needle) === true) {
8845 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8845
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
8846 1
      if ($haystackTmp === false) {
8847
        $haystackTmp = '';
8848
      }
8849 1
      $haystack = (string)$haystackTmp;
8850
    }
8851
8852 1
    return $haystack;
8853
  }
8854
8855
  /**
8856
   * Replace text within a portion of a string.
8857
   *
8858
   * source: https://gist.github.com/stemar/8287074
8859
   *
8860
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
8861
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
8862
   * @param int|int[]       $offset           <p>
8863
   *                                          If start is positive, the replacing will begin at the start'th offset
8864
   *                                          into string.
8865
   *                                          <br><br>
8866
   *                                          If start is negative, the replacing will begin at the start'th character
8867
   *                                          from the end of string.
8868
   *                                          </p>
8869
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
8870
   *                                          portion of string which is to be replaced. If it is negative, it
8871
   *                                          represents the number of characters from the end of string at which to
8872
   *                                          stop replacing. If it is not given, then it will default to strlen(
8873
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
8874
   *                                          length is zero then this function will have the effect of inserting
8875
   *                                          replacement into string at the given start offset.</p>
8876
   *
8877
   * @return string|string[] The result string is returned. If string is an array then array is returned.
8878
   */
8879 8
  public static function substr_replace($str, $replacement, $offset, $length = null)
8880
  {
8881 8
    if (\is_array($str) === true) {
8882 1
      $num = \count($str);
8883
8884
      // the replacement
8885 1
      if (\is_array($replacement) === true) {
8886 1
        $replacement = \array_slice($replacement, 0, $num);
8887
      } else {
8888 1
        $replacement = \array_pad([$replacement], $num, $replacement);
8889
      }
8890
8891
      // the offset
8892 1
      if (\is_array($offset) === true) {
8893 1
        $offset = \array_slice($offset, 0, $num);
8894 1
        foreach ($offset as &$valueTmp) {
8895 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
8896
        }
8897 1
        unset($valueTmp);
8898
      } else {
8899 1
        $offset = \array_pad([$offset], $num, $offset);
8900
      }
8901
8902
      // the length
8903 1
      if (null === $length) {
8904 1
        $length = \array_fill(0, $num, 0);
8905 1
      } elseif (\is_array($length) === true) {
8906 1
        $length = \array_slice($length, 0, $num);
8907 1
        foreach ($length as &$valueTmpV2) {
8908 1
          if (null !== $valueTmpV2) {
8909 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
8910
          } else {
8911 1
            $valueTmpV2 = 0;
8912
          }
8913
        }
8914 1
        unset($valueTmpV2);
8915
      } else {
8916 1
        $length = \array_pad([$length], $num, $length);
8917
      }
8918
8919
      // recursive call
8920 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
8921
    }
8922
8923 8
    if (\is_array($replacement) === true) {
8924 1
      if (\count($replacement) > 0) {
8925 1
        $replacement = $replacement[0];
8926
      } else {
8927 1
        $replacement = '';
8928
      }
8929
    }
8930
8931
    // init
8932 8
    $str = (string)$str;
8933 8
    $replacement = (string)$replacement;
8934
8935 8
    if ('' === $str) {
8936 1
      return $replacement;
8937
    }
8938
8939 7
    if (self::is_ascii($str)) {
8940 4
      return ($length === null) ?
8941
          \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8941
          \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
8942 4
          \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8942
          \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
8943
    }
8944
8945 6
    \preg_match_all('/./us', $str, $smatches);
8946 6
    \preg_match_all('/./us', $replacement, $rmatches);
8947
8948 6
    if ($length === null) {
8949 3
      $lengthTmp = self::strlen($str);
8950 3
      if ($lengthTmp === false) {
8951
        return false;
0 ignored issues
show
Bug Best Practice introduced by
The expression return false returns the type false which is incompatible with the documented return type string|string[].
Loading history...
8952
      }
8953 3
      $length = (int)$lengthTmp;
8954
    }
8955
8956 6
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8956
    \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8956
    \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
8957
8958 6
    return \implode('', $smatches[0]);
8959
  }
8960
8961
  /**
8962
   * Removes an suffix ($needle) from end of the string ($haystack).
8963
   *
8964
   * @param string $haystack <p>The string to search in.</p>
8965
   * @param string $needle   <p>The substring to search for.</p>
8966
   *
8967
   * @return string Return the sub-string.
8968
   */
8969 1
  public static function substr_right(string $haystack, string $needle): string
8970
  {
8971 1
    if ('' === $haystack) {
8972 1
      return '';
8973
    }
8974
8975 1
    if ('' === $needle) {
8976 1
      return $haystack;
8977
    }
8978
8979 1
    if (self::str_ends_with($haystack, $needle) === true) {
8980 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
8981 1
      if ($haystackTmp === false) {
8982
        $haystackTmp = '';
8983
      }
8984 1
      $haystack = (string)$haystackTmp;
8985
    }
8986
8987 1
    return $haystack;
8988
  }
8989
8990
  /**
8991
   * Returns a case swapped version of the string.
8992
   *
8993
   * @param string $str       <p>The input string.</p>
8994
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8995
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8996
   *
8997
   * @return string Each character's case swapped.
8998
   */
8999 5
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9000
  {
9001 5
    if ('' === $str) {
9002 1
      return '';
9003
    }
9004
9005 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9006 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9007
    }
9008
9009 5
    if ($cleanUtf8 === true) {
9010
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9011
      // if invalid characters are found in $haystack before $needle
9012 1
      $str = self::clean($str);
9013
    }
9014
9015 5
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9016
  }
9017
9018
  /**
9019
   * Checks whether mbstring is available on the server.
9020
   *
9021
   * @return bool
9022
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
9023
   */
9024
  public static function symfony_polyfill_used(): bool
9025
  {
9026
    // init
9027
    $return = false;
9028
9029
    $returnTmp = \extension_loaded('mbstring') ? true : false;
9030
    if ($returnTmp === false && \function_exists('mb_strlen')) {
9031
      $return = true;
9032
    }
9033
9034
    $returnTmp = \extension_loaded('iconv') ? true : false;
9035
    if ($returnTmp === false && \function_exists('iconv')) {
9036
      $return = true;
9037
    }
9038
9039
    return $return;
9040
  }
9041
9042
  /**
9043
   * @param string $str
9044
   * @param int    $tabLength
9045
   *
9046
   * @return string
9047
   */
9048 6
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9049
  {
9050 6
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9051
  }
9052
9053
  /**
9054
   * Converts the first character of each word in the string to uppercase
9055
   * and all other chars to lowercase.
9056
   *
9057
   * @param string $str      <p>The input string.</p>
9058
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9059
   *
9060
   * @return string String with all characters of $str being title-cased.
9061
   */
9062 5
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
9063
  {
9064 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9065 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9066
    }
9067
9068
    // always fallback via symfony polyfill
9069 5
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
9070
  }
9071
9072
  /**
9073
   * alias for "UTF8::to_ascii()"
9074
   *
9075
   * @see        UTF8::to_ascii()
9076
   *
9077
   * @param string $str
9078
   * @param string $subst_chr
9079
   * @param bool   $strict
9080
   *
9081
   * @return string
9082
   *
9083
   * @deprecated <p>use "UTF8::to_ascii()"</p>
9084
   */
9085 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
9086
  {
9087 7
    return self::to_ascii($str, $subst_chr, $strict);
9088
  }
9089
9090
  /**
9091
   * alias for "UTF8::to_iso8859()"
9092
   *
9093
   * @see        UTF8::to_iso8859()
9094
   *
9095
   * @param string|string[] $str
9096
   *
9097
   * @return string|string[]
9098
   *
9099
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
9100
   */
9101 1
  public static function toIso8859($str)
9102
  {
9103 1
    return self::to_iso8859($str);
9104
  }
9105
9106
  /**
9107
   * alias for "UTF8::to_latin1()"
9108
   *
9109
   * @see        UTF8::to_latin1()
9110
   *
9111
   * @param string|string[] $str
9112
   *
9113
   * @return string|string[]
9114
   *
9115
   * @deprecated <p>use "UTF8::to_latin1()"</p>
9116
   */
9117 1
  public static function toLatin1($str)
9118
  {
9119 1
    return self::to_latin1($str);
9120
  }
9121
9122
  /**
9123
   * alias for "UTF8::to_utf8()"
9124
   *
9125
   * @see        UTF8::to_utf8()
9126
   *
9127
   * @param string|string[] $str
9128
   *
9129
   * @return string|string[]
9130
   *
9131
   * @deprecated <p>use "UTF8::to_utf8()"</p>
9132
   */
9133 1
  public static function toUTF8($str)
9134
  {
9135 1
    return self::to_utf8($str);
9136
  }
9137
9138
  /**
9139
   * Convert a string into ASCII.
9140
   *
9141
   * @param string $str     <p>The input string.</p>
9142
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
9143
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
9144
   *                        performance</p>
9145
   *
9146
   * @return string
9147
   */
9148 37
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
9149
  {
9150 37
    static $UTF8_TO_ASCII;
9151
9152 37
    if ('' === $str) {
9153 4
      return '';
9154
    }
9155
9156
    // check if we only have ASCII, first (better performance)
9157 34
    if (self::is_ascii($str) === true) {
9158 6
      return $str;
9159
    }
9160
9161 29
    $str = self::clean(
9162 29
        $str,
9163 29
        true,
9164 29
        true,
9165 29
        true,
9166 29
        false,
9167 29
        true,
9168 29
        true
9169
    );
9170
9171
    // check again, if we only have ASCII, now ...
9172 29
    if (self::is_ascii($str) === true) {
9173 12
      return $str;
9174
    }
9175
9176 18
    if ($strict === true) {
9177
9178 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9179
        self::checkForSupport();
9180
      }
9181
9182 1
      if (self::$SUPPORT['intl'] === true) {
9183
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
9184
        /** @noinspection PhpComposerExtensionStubsInspection */
9185 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
9186
9187
        // check again, if we only have ASCII, now ...
9188 1
        if (self::is_ascii($str) === true) {
9189 1
          return $str;
9190
        }
9191
9192
      }
9193
    }
9194
9195 18
    if (self::$ORD === null) {
9196
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9197
    }
9198
9199 18
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
9200 18
    $chars = $ar[0];
9201 18
    $ord = null;
9202 18
    foreach ($chars as &$c) {
9203
9204 18
      $ordC0 = self::$ORD[$c[0]];
9205
9206 18
      if ($ordC0 >= 0 && $ordC0 <= 127) {
9207 14
        continue;
9208
      }
9209
9210 18
      $ordC1 = self::$ORD[$c[1]];
9211
9212
      // ASCII - next please
9213 18
      if ($ordC0 >= 192 && $ordC0 <= 223) {
9214 16
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
9215
      }
9216
9217 18
      if ($ordC0 >= 224) {
9218 7
        $ordC2 = self::$ORD[$c[2]];
9219
9220 7
        if ($ordC0 <= 239) {
9221 6
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
9222
        }
9223
9224 7
        if ($ordC0 >= 240) {
9225 2
          $ordC3 = self::$ORD[$c[3]];
9226
9227 2
          if ($ordC0 <= 247) {
9228 2
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
9229
          }
9230
9231 2
          if ($ordC0 >= 248) {
9232
            $ordC4 = self::$ORD[$c[4]];
9233
9234
            if ($ordC0 <= 251) {
9235
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
9236
            }
9237
9238
            if ($ordC0 >= 252) {
9239
              $ordC5 = self::$ORD[$c[5]];
9240
9241
              if ($ordC0 <= 253) {
9242
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
9243
              }
9244
            }
9245
          }
9246
        }
9247
      }
9248
9249 18
      if ($ordC0 === 254 || $ordC0 === 255) {
9250
        $c = $unknown;
9251
        continue;
9252
      }
9253
9254 18
      if ($ord === null) {
9255
        $c = $unknown;
9256
        continue;
9257
      }
9258
9259 18
      $bank = $ord >> 8;
9260 18
      if (!isset($UTF8_TO_ASCII[$bank])) {
9261 9
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
9262 9
        if ($UTF8_TO_ASCII[$bank] === false) {
9263 2
          $UTF8_TO_ASCII[$bank] = [];
9264
        }
9265
      }
9266
9267 18
      $newchar = $ord & 255;
9268
9269 18
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
9270
9271
        // keep for debugging
9272
        /*
9273
        echo "file: " . sprintf('x%02x', $bank) . "\n";
9274
        echo "char: " . $c . "\n";
9275
        echo "ord: " . $ord . "\n";
9276
        echo "newchar: " . $newchar . "\n";
9277
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
9278
        echo "bank:" . $bank . "\n\n";
9279
        */
9280
9281 17
        $c = $UTF8_TO_ASCII[$bank][$newchar];
9282
      } else {
9283
9284
        // keep for debugging missing chars
9285
        /*
9286
        echo "file: " . sprintf('x%02x', $bank) . "\n";
9287
        echo "char: " . $c . "\n";
9288
        echo "ord: " . $ord . "\n";
9289
        echo "newchar: " . $newchar . "\n";
9290
        echo "bank:" . $bank . "\n\n";
9291
        */
9292
9293 18
        $c = $unknown;
9294
      }
9295
    }
9296
9297 18
    return \implode('', $chars);
9298
  }
9299
9300
  /**
9301
   * @param mixed $str
9302
   *
9303
   * @return bool
9304
   */
9305 19
  public static function to_boolean($str): bool
9306
  {
9307
    // init
9308 19
    $str = (string)$str;
9309
9310 19
    if ('' === $str) {
9311 2
      return false;
9312
    }
9313
9314 17
    $key = \strtolower($str);
9315
9316
    // Info: http://php.net/manual/en/filter.filters.validate.php
9317
    $map = [
9318 17
        'true'  => true,
9319
        '1'     => true,
9320
        'on'    => true,
9321
        'yes'   => true,
9322
        'false' => false,
9323
        '0'     => false,
9324
        'off'   => false,
9325
        'no'    => false,
9326
    ];
9327
9328 17
    if (isset($map[$key])) {
9329 13
      return $map[$key];
9330
    }
9331
9332
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
9333 4
    if (\is_numeric($str)) {
9334 2
      return (((float)$str + 0) > 0);
9335
    }
9336
9337 2
    return (bool)self::trim($str);
9338
  }
9339
9340
  /**
9341
   * Convert a string into "ISO-8859"-encoding (Latin-1).
9342
   *
9343
   * @param string|string[] $str
9344
   *
9345
   * @return string|string[]
9346
   */
9347 3
  public static function to_iso8859($str)
9348
  {
9349 3
    if (\is_array($str) === true) {
9350 1
      foreach ($str as $k => $v) {
9351 1
        $str[$k] = self::to_iso8859($v);
9352
      }
9353
9354 1
      return $str;
9355
    }
9356
9357 3
    $str = (string)$str;
9358 3
    if ('' === $str) {
9359 1
      return '';
9360
    }
9361
9362 3
    return self::utf8_decode($str);
9363
  }
9364
9365
  /**
9366
   * alias for "UTF8::to_iso8859()"
9367
   *
9368
   * @see UTF8::to_iso8859()
9369
   *
9370
   * @param string|string[] $str
9371
   *
9372
   * @return string|string[]
9373
   */
9374 1
  public static function to_latin1($str)
9375
  {
9376 1
    return self::to_iso8859($str);
9377
  }
9378
9379
  /**
9380
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
9381
   *
9382
   * <ul>
9383
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
9384
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
9385
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
9386
   * case.</li>
9387
   * </ul>
9388
   *
9389
   * @param string|string[] $str                    <p>Any string or array.</p>
9390
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
9391
   *
9392
   * @return string|string[] The UTF-8 encoded string.
9393
   */
9394 22
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
9395
  {
9396 22
    if (\is_array($str) === true) {
9397 2
      foreach ($str as $k => $v) {
9398 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
9399
      }
9400
9401 2
      return $str;
9402
    }
9403
9404 22
    $str = (string)$str;
9405 22
    if ('' === $str) {
9406 3
      return $str;
9407
    }
9408
9409 22
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9410
      self::checkForSupport();
9411
    }
9412
9413 22
    $max = self::strlen_in_byte($str);
9414 22
    $buf = '';
9415
9416
    /** @noinspection ForeachInvariantsInspection */
9417 22
    for ($i = 0; $i < $max; $i++) {
9418 22
      $c1 = $str[$i];
9419
9420 22
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
9421
9422 22
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
9423
9424 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9425
9426 20
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
9427 15
            $buf .= $c1 . $c2;
9428 15
            $i++;
9429
          } else { // not valid UTF8 - convert it
9430 20
            $buf .= self::to_utf8_convert_helper($c1);
9431
          }
9432
9433 21
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
9434
9435 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9436 20
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
9437
9438 20
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
9439 12
            $buf .= $c1 . $c2 . $c3;
9440 12
            $i += 2;
9441
          } else { // not valid UTF8 - convert it
9442 20
            $buf .= self::to_utf8_convert_helper($c1);
9443
          }
9444
9445 14
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
9446
9447 14
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9448 14
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
9449 14
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
9450
9451 14
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
9452 5
            $buf .= $c1 . $c2 . $c3 . $c4;
9453 5
            $i += 3;
9454
          } else { // not valid UTF8 - convert it
9455 14
            $buf .= self::to_utf8_convert_helper($c1);
9456
          }
9457
9458
        } else { // doesn't look like UTF8, but should be converted
9459 22
          $buf .= self::to_utf8_convert_helper($c1);
9460
        }
9461
9462 20
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
9463
9464 2
        $buf .= self::to_utf8_convert_helper($c1);
9465
9466
      } else { // it doesn't need conversion
9467 20
        $buf .= $c1;
9468
      }
9469
    }
9470
9471
    // decode unicode escape sequences
9472 22
    $buf = \preg_replace_callback(
9473 22
        '/\\\\u([0-9a-f]{4})/i',
9474 22
        function ($match) {
9475
          // always fallback via symfony polyfill
9476 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
9477 22
        },
9478 22
        $buf
9479
    );
9480
9481
    // decode UTF-8 codepoints
9482 22
    if ($decodeHtmlEntityToUtf8 === true) {
9483 1
      $buf = self::html_entity_decode($buf);
9484
    }
9485
9486 22
    return $buf;
9487
  }
9488
9489
  /**
9490
   * @param int|string $input
9491
   *
9492
   * @return string
9493
   */
9494 16
  private static function to_utf8_convert_helper($input): string
9495
  {
9496
    // init
9497 16
    $buf = '';
9498
9499 16
    if (self::$ORD === null) {
9500 1
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9501
    }
9502
9503 16
    if (self::$CHR === null) {
9504 1
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9505
    }
9506
9507 16
    if (self::$WIN1252_TO_UTF8 === null) {
9508 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9509
    }
9510
9511 16
    $ordC1 = self::$ORD[$input];
9512 16
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
9513 16
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
9514
    } else {
9515 1
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
9516 1
      $cc2 = ((string)$input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
9517 1
      $buf .= $cc1 . $cc2;
9518
    }
9519
9520 16
    return $buf;
9521
  }
9522
9523
  /**
9524
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
9525
   *
9526
   * INFO: This is slower then "trim()"
9527
   *
9528
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
9529
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
9530
   *
9531
   * @param string $str   <p>The string to be trimmed</p>
9532
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
9533
   *
9534
   * @return string The trimmed string.
9535
   */
9536 197
  public static function trim(string $str = '', $chars = INF): string
9537
  {
9538 197
    if ('' === $str) {
9539 7
      return '';
9540
    }
9541
9542
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
9543 192
    if ($chars === INF || !$chars) {
9544 175
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
9545
    } else {
9546 37
      $chars = \preg_quote($chars, '/');
9547 37
      $pattern = "^[$chars]+|[$chars]+\$";
9548
    }
9549
9550 192
    return self::regex_replace($str, $pattern, '', '', '/');
9551
  }
9552
9553
  /**
9554
   * Makes string's first char uppercase.
9555
   *
9556
   * @param string $str       <p>The input string.</p>
9557
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9558
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9559
   *
9560
   * @return string The resulting string.
9561
   */
9562 75
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9563
  {
9564 75
    if ($cleanUtf8 === true) {
9565
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9566
      // if invalid characters are found in $haystack before $needle
9567 1
      $str = self::clean($str);
9568
    }
9569
9570 75
    $strPartTwo = self::substr($str, 1, null, $encoding);
9571 75
    if ($strPartTwo === false) {
9572
      $strPartTwo = '';
9573
    }
9574
9575 75
    $strPartOne = self::strtoupper(
9576 75
        (string)self::substr($str, 0, 1, $encoding),
9577 75
        $encoding,
9578 75
        $cleanUtf8
9579
    );
9580
9581 75
    return $strPartOne . $strPartTwo;
9582
  }
9583
9584
  /**
9585
   * alias for "UTF8::ucfirst()"
9586
   *
9587
   * @see UTF8::ucfirst()
9588
   *
9589
   * @param string $str
9590
   * @param string $encoding
9591
   * @param bool   $cleanUtf8
9592
   *
9593
   * @return string
9594
   */
9595 1
  public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9596
  {
9597 1
    return self::ucfirst($str, $encoding, $cleanUtf8);
9598
  }
9599
9600
  /**
9601
   * Uppercase for all words in the string.
9602
   *
9603
   * @param string   $str        <p>The input string.</p>
9604
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
9605
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9606
   * @param string   $encoding   [optional] <p>Set the charset.</p>
9607
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9608
   *
9609
   * @return string
9610
   */
9611 8
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9612
  {
9613 8
    if (!$str) {
9614 2
      return '';
9615
    }
9616
9617
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
9618
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
9619
9620 7
    if ($cleanUtf8 === true) {
9621
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9622
      // if invalid characters are found in $haystack before $needle
9623 1
      $str = self::clean($str);
9624
    }
9625
9626 7
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
9627
9628
    if (
9629 7
        $usePhpDefaultFunctions === true
9630
        &&
9631 7
        self::is_ascii($str) === true
9632
    ) {
9633
      return \ucwords($str);
9634
    }
9635
9636 7
    $words = self::str_to_words($str, $charlist);
9637 7
    $newWords = [];
9638
9639 7
    if (\count($exceptions) > 0) {
9640 1
      $useExceptions = true;
9641
    } else {
9642 7
      $useExceptions = false;
9643
    }
9644
9645 7
    foreach ($words as $word) {
9646
9647 7
      if (!$word) {
9648 7
        continue;
9649
      }
9650
9651
      if (
9652 7
          $useExceptions === false
9653
          ||
9654
          (
9655 1
              $useExceptions === true
9656
              &&
9657 7
              !\in_array($word, $exceptions, true)
9658
          )
9659
      ) {
9660 7
        $word = self::ucfirst($word, $encoding);
9661
      }
9662
9663 7
      $newWords[] = $word;
9664
    }
9665
9666 7
    return \implode('', $newWords);
9667
  }
9668
9669
  /**
9670
   * Multi decode html entity & fix urlencoded-win1252-chars.
9671
   *
9672
   * e.g:
9673
   * 'test+test'                     => 'test test'
9674
   * 'D&#252;sseldorf'               => 'Düsseldorf'
9675
   * 'D%FCsseldorf'                  => 'Düsseldorf'
9676
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
9677
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
9678
   * 'Düsseldorf'                   => 'Düsseldorf'
9679
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
9680
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
9681
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
9682
   *
9683
   * @param string $str          <p>The input string.</p>
9684
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
9685
   *
9686
   * @return string
9687
   */
9688 1
  public static function urldecode(string $str, bool $multi_decode = true): string
9689
  {
9690 1
    if ('' === $str) {
9691 1
      return '';
9692
    }
9693
9694 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
9695 1
    if (\preg_match($pattern, $str)) {
9696 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
9697
    }
9698
9699 1
    $flags = ENT_QUOTES | ENT_HTML5;
9700
9701
    do {
9702 1
      $str_compare = $str;
9703
9704 1
      $str = self::fix_simple_utf8(
9705 1
          \urldecode(
9706 1
              self::html_entity_decode(
9707 1
                  self::to_utf8($str),
9708 1
                  $flags
9709
              )
9710
          )
9711
      );
9712
9713 1
    } while ($multi_decode === true && $str_compare !== $str);
9714
9715 1
    return $str;
9716
  }
9717
9718
  /**
9719
   * Return a array with "urlencoded"-win1252 -> UTF-8
9720
   *
9721
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
9722
   *
9723
   * @return string[]
9724
   */
9725 1
  public static function urldecode_fix_win1252_chars(): array
9726
  {
9727
    return [
9728 1
        '%20' => ' ',
9729
        '%21' => '!',
9730
        '%22' => '"',
9731
        '%23' => '#',
9732
        '%24' => '$',
9733
        '%25' => '%',
9734
        '%26' => '&',
9735
        '%27' => "'",
9736
        '%28' => '(',
9737
        '%29' => ')',
9738
        '%2A' => '*',
9739
        '%2B' => '+',
9740
        '%2C' => ',',
9741
        '%2D' => '-',
9742
        '%2E' => '.',
9743
        '%2F' => '/',
9744
        '%30' => '0',
9745
        '%31' => '1',
9746
        '%32' => '2',
9747
        '%33' => '3',
9748
        '%34' => '4',
9749
        '%35' => '5',
9750
        '%36' => '6',
9751
        '%37' => '7',
9752
        '%38' => '8',
9753
        '%39' => '9',
9754
        '%3A' => ':',
9755
        '%3B' => ';',
9756
        '%3C' => '<',
9757
        '%3D' => '=',
9758
        '%3E' => '>',
9759
        '%3F' => '?',
9760
        '%40' => '@',
9761
        '%41' => 'A',
9762
        '%42' => 'B',
9763
        '%43' => 'C',
9764
        '%44' => 'D',
9765
        '%45' => 'E',
9766
        '%46' => 'F',
9767
        '%47' => 'G',
9768
        '%48' => 'H',
9769
        '%49' => 'I',
9770
        '%4A' => 'J',
9771
        '%4B' => 'K',
9772
        '%4C' => 'L',
9773
        '%4D' => 'M',
9774
        '%4E' => 'N',
9775
        '%4F' => 'O',
9776
        '%50' => 'P',
9777
        '%51' => 'Q',
9778
        '%52' => 'R',
9779
        '%53' => 'S',
9780
        '%54' => 'T',
9781
        '%55' => 'U',
9782
        '%56' => 'V',
9783
        '%57' => 'W',
9784
        '%58' => 'X',
9785
        '%59' => 'Y',
9786
        '%5A' => 'Z',
9787
        '%5B' => '[',
9788
        '%5C' => '\\',
9789
        '%5D' => ']',
9790
        '%5E' => '^',
9791
        '%5F' => '_',
9792
        '%60' => '`',
9793
        '%61' => 'a',
9794
        '%62' => 'b',
9795
        '%63' => 'c',
9796
        '%64' => 'd',
9797
        '%65' => 'e',
9798
        '%66' => 'f',
9799
        '%67' => 'g',
9800
        '%68' => 'h',
9801
        '%69' => 'i',
9802
        '%6A' => 'j',
9803
        '%6B' => 'k',
9804
        '%6C' => 'l',
9805
        '%6D' => 'm',
9806
        '%6E' => 'n',
9807
        '%6F' => 'o',
9808
        '%70' => 'p',
9809
        '%71' => 'q',
9810
        '%72' => 'r',
9811
        '%73' => 's',
9812
        '%74' => 't',
9813
        '%75' => 'u',
9814
        '%76' => 'v',
9815
        '%77' => 'w',
9816
        '%78' => 'x',
9817
        '%79' => 'y',
9818
        '%7A' => 'z',
9819
        '%7B' => '{',
9820
        '%7C' => '|',
9821
        '%7D' => '}',
9822
        '%7E' => '~',
9823
        '%7F' => '',
9824
        '%80' => '`',
9825
        '%81' => '',
9826
        '%82' => '‚',
9827
        '%83' => 'ƒ',
9828
        '%84' => '„',
9829
        '%85' => '…',
9830
        '%86' => '†',
9831
        '%87' => '‡',
9832
        '%88' => 'ˆ',
9833
        '%89' => '‰',
9834
        '%8A' => 'Š',
9835
        '%8B' => '‹',
9836
        '%8C' => 'Œ',
9837
        '%8D' => '',
9838
        '%8E' => 'Ž',
9839
        '%8F' => '',
9840
        '%90' => '',
9841
        '%91' => '‘',
9842
        '%92' => '’',
9843
        '%93' => '“',
9844
        '%94' => '”',
9845
        '%95' => '•',
9846
        '%96' => '–',
9847
        '%97' => '—',
9848
        '%98' => '˜',
9849
        '%99' => '™',
9850
        '%9A' => 'š',
9851
        '%9B' => '›',
9852
        '%9C' => 'œ',
9853
        '%9D' => '',
9854
        '%9E' => 'ž',
9855
        '%9F' => 'Ÿ',
9856
        '%A0' => '',
9857
        '%A1' => '¡',
9858
        '%A2' => '¢',
9859
        '%A3' => '£',
9860
        '%A4' => '¤',
9861
        '%A5' => '¥',
9862
        '%A6' => '¦',
9863
        '%A7' => '§',
9864
        '%A8' => '¨',
9865
        '%A9' => '©',
9866
        '%AA' => 'ª',
9867
        '%AB' => '«',
9868
        '%AC' => '¬',
9869
        '%AD' => '',
9870
        '%AE' => '®',
9871
        '%AF' => '¯',
9872
        '%B0' => '°',
9873
        '%B1' => '±',
9874
        '%B2' => '²',
9875
        '%B3' => '³',
9876
        '%B4' => '´',
9877
        '%B5' => 'µ',
9878
        '%B6' => '¶',
9879
        '%B7' => '·',
9880
        '%B8' => '¸',
9881
        '%B9' => '¹',
9882
        '%BA' => 'º',
9883
        '%BB' => '»',
9884
        '%BC' => '¼',
9885
        '%BD' => '½',
9886
        '%BE' => '¾',
9887
        '%BF' => '¿',
9888
        '%C0' => 'À',
9889
        '%C1' => 'Á',
9890
        '%C2' => 'Â',
9891
        '%C3' => 'Ã',
9892
        '%C4' => 'Ä',
9893
        '%C5' => 'Å',
9894
        '%C6' => 'Æ',
9895
        '%C7' => 'Ç',
9896
        '%C8' => 'È',
9897
        '%C9' => 'É',
9898
        '%CA' => 'Ê',
9899
        '%CB' => 'Ë',
9900
        '%CC' => 'Ì',
9901
        '%CD' => 'Í',
9902
        '%CE' => 'Î',
9903
        '%CF' => 'Ï',
9904
        '%D0' => 'Ð',
9905
        '%D1' => 'Ñ',
9906
        '%D2' => 'Ò',
9907
        '%D3' => 'Ó',
9908
        '%D4' => 'Ô',
9909
        '%D5' => 'Õ',
9910
        '%D6' => 'Ö',
9911
        '%D7' => '×',
9912
        '%D8' => 'Ø',
9913
        '%D9' => 'Ù',
9914
        '%DA' => 'Ú',
9915
        '%DB' => 'Û',
9916
        '%DC' => 'Ü',
9917
        '%DD' => 'Ý',
9918
        '%DE' => 'Þ',
9919
        '%DF' => 'ß',
9920
        '%E0' => 'à',
9921
        '%E1' => 'á',
9922
        '%E2' => 'â',
9923
        '%E3' => 'ã',
9924
        '%E4' => 'ä',
9925
        '%E5' => 'å',
9926
        '%E6' => 'æ',
9927
        '%E7' => 'ç',
9928
        '%E8' => 'è',
9929
        '%E9' => 'é',
9930
        '%EA' => 'ê',
9931
        '%EB' => 'ë',
9932
        '%EC' => 'ì',
9933
        '%ED' => 'í',
9934
        '%EE' => 'î',
9935
        '%EF' => 'ï',
9936
        '%F0' => 'ð',
9937
        '%F1' => 'ñ',
9938
        '%F2' => 'ò',
9939
        '%F3' => 'ó',
9940
        '%F4' => 'ô',
9941
        '%F5' => 'õ',
9942
        '%F6' => 'ö',
9943
        '%F7' => '÷',
9944
        '%F8' => 'ø',
9945
        '%F9' => 'ù',
9946
        '%FA' => 'ú',
9947
        '%FB' => 'û',
9948
        '%FC' => 'ü',
9949
        '%FD' => 'ý',
9950
        '%FE' => 'þ',
9951
        '%FF' => 'ÿ',
9952
    ];
9953
  }
9954
9955
  /**
9956
   * Decodes an UTF-8 string to ISO-8859-1.
9957
   *
9958
   * @param string $str <p>The input string.</p>
9959
   * @param bool   $keepUtf8Chars
9960
   *
9961
   * @return string
9962
   */
9963 6
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
9964
  {
9965 6
    if ('' === $str) {
9966 3
      return '';
9967
    }
9968
9969 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
9970 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
9971
9972 6
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
9973
9974 1
      if (self::$WIN1252_TO_UTF8 === null) {
9975
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9976
      }
9977
9978 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9978
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
9979 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9979
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
9980
    }
9981
9982
    /** @noinspection PhpInternalEntityUsedInspection */
9983 6
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
9984
9985 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9986
      self::checkForSupport();
9987
    }
9988
9989
    // save for later comparision
9990 6
    $str_backup = $str;
9991 6
    $len = self::strlen_in_byte($str);
9992
9993 6
    if (self::$ORD === null) {
9994
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9995
    }
9996
9997 6
    if (self::$CHR === null) {
9998
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9999
    }
10000
10001 6
    $noCharFound = '?';
10002
    /** @noinspection ForeachInvariantsInspection */
10003 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10004 6
      switch ($str[$i] & "\xF0") {
10005 6
        case "\xC0":
10006 6
        case "\xD0":
10007 6
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10008 6
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10009 6
          break;
10010
10011
        /** @noinspection PhpMissingBreakStatementInspection */
10012 6
        case "\xF0":
10013
          ++$i;
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment if this fall-through is intended.
Loading history...
10014 6
        case "\xE0":
10015 5
          $str[$j] = $noCharFound;
10016 5
          $i += 2;
10017 5
          break;
10018
10019
        default:
10020 6
          $str[$j] = $str[$i];
10021
      }
10022
    }
10023
10024 6
    $return = self::substr_in_byte($str, 0, $j);
10025 6
    if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
10026
      $return = '';
10027
    }
10028
10029
    if (
10030 6
        $keepUtf8Chars === true
10031
        &&
10032 6
        self::strlen($return) >= self::strlen($str_backup)
10033
    ) {
10034 1
      return $str_backup;
10035
    }
10036
10037 6
    return $return;
10038
  }
10039
10040
  /**
10041
   * Encodes an ISO-8859-1 string to UTF-8.
10042
   *
10043
   * @param string $str <p>The input string.</p>
10044
   *
10045
   * @return string
10046
   */
10047 7
  public static function utf8_encode(string $str): string
10048
  {
10049 7
    if ('' === $str) {
10050 7
      return '';
10051
    }
10052
10053 7
    $str = \utf8_encode($str);
10054
10055
    // the polyfill maybe return false
10056
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10057 7
    if ($str === false) {
10058
      return '';
10059
    }
10060
10061 7
    if (false === \strpos($str, "\xC2")) {
10062 3
      return $str;
10063
    }
10064
10065 6
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
10066 6
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
10067
10068 6
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
10069
10070 1
      if (self::$WIN1252_TO_UTF8 === null) {
10071
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10072
      }
10073
10074 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10074
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10075 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10075
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10076
    }
10077
10078 6
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
10079
  }
10080
10081
  /**
10082
   * fix -> utf8-win1252 chars
10083
   *
10084
   * @param string $str <p>The input string.</p>
10085
   *
10086
   * @return string
10087
   *
10088
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
10089
   */
10090 1
  public static function utf8_fix_win1252_chars(string $str): string
10091
  {
10092 1
    return self::fix_simple_utf8($str);
10093
  }
10094
10095
  /**
10096
   * Returns an array with all utf8 whitespace characters.
10097
   *
10098
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
10099
   *
10100
   * @author: Derek E. [email protected]
10101
   *
10102
   * @return string[]
10103
   *                 An array with all known whitespace characters as values and the type of whitespace as keys
10104
   *                 as defined in above URL.
10105
   */
10106 1
  public static function whitespace_table(): array
10107
  {
10108 1
    return self::$WHITESPACE_TABLE;
10109
  }
10110
10111
  /**
10112
   * Limit the number of words in a string.
10113
   *
10114
   * @param string $str      <p>The input string.</p>
10115
   * @param int    $limit    <p>The limit of words as integer.</p>
10116
   * @param string $strAddOn <p>Replacement for the striped string.</p>
10117
   *
10118
   * @return string
10119
   */
10120 1
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
10121
  {
10122 1
    if ('' === $str) {
10123 1
      return '';
10124
    }
10125
10126 1
    if ($limit < 1) {
10127 1
      return '';
10128
    }
10129
10130 1
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
10131
10132
    if (
10133 1
        !isset($matches[0])
10134
        ||
10135 1
        self::strlen($str) === self::strlen($matches[0])
10136
    ) {
10137 1
      return $str;
10138
    }
10139
10140 1
    return self::rtrim($matches[0]) . $strAddOn;
10141
  }
10142
10143
  /**
10144
   * Wraps a string to a given number of characters
10145
   *
10146
   * @link  http://php.net/manual/en/function.wordwrap.php
10147
   *
10148
   * @param string $str   <p>The input string.</p>
10149
   * @param int    $width [optional] <p>The column width.</p>
10150
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
10151
   * @param bool   $cut   [optional] <p>
10152
   *                      If the cut is set to true, the string is
10153
   *                      always wrapped at or before the specified width. So if you have
10154
   *                      a word that is larger than the given width, it is broken apart.
10155
   *                      </p>
10156
   *
10157
   * @return string The given string wrapped at the specified column.
10158
   */
10159 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
10160
  {
10161 10
    if ('' === $str || '' === $break) {
10162 3
      return '';
10163
    }
10164
10165 8
    $w = '';
10166 8
    $strSplit = \explode($break, $str);
10167 8
    if ($strSplit === false) {
10168
      $count = 0;
10169
    } else {
10170 8
      $count = \count($strSplit);
10171
    }
10172
10173 8
    $chars = [];
10174
    /** @noinspection ForeachInvariantsInspection */
10175 8
    for ($i = 0; $i < $count; ++$i) {
10176
10177 8
      if ($i) {
10178 1
        $chars[] = $break;
10179 1
        $w .= '#';
10180
      }
10181
10182 8
      $c = $strSplit[$i];
10183 8
      unset($strSplit[$i]);
10184
10185 8
      if ($c !== null) {
10186 8
        foreach (self::split($c) as $c) {
10187 8
          $chars[] = $c;
10188 8
          $w .= ' ' === $c ? ' ' : '?';
10189
        }
10190
      }
10191
    }
10192
10193 8
    $strReturn = '';
10194 8
    $j = 0;
10195 8
    $b = $i = -1;
10196 8
    $w = \wordwrap($w, $width, '#', $cut);
10197
10198 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
10199 6
      for (++$i; $i < $b; ++$i) {
10200 6
        $strReturn .= $chars[$j];
10201 6
        unset($chars[$j++]);
10202
      }
10203
10204 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
10205 3
        unset($chars[$j++]);
10206
      }
10207
10208 6
      $strReturn .= $break;
10209
    }
10210
10211 8
    return $strReturn . \implode('', $chars);
10212
  }
10213
10214
  /**
10215
   * Line-Wrap the string after $limit, but also after the next word.
10216
   *
10217
   * @param string $str
10218
   * @param int    $limit
10219
   *
10220
   * @return string
10221
   */
10222 1
  public static function wordwrap_per_line(string $str, int $limit): string
10223
  {
10224 1
    $strings = (array)\preg_split('/\\r\\n|\\r|\\n/', $str);
10225
10226 1
    $string = '';
10227 1
    foreach ($strings as $value) {
10228 1
      if ($value === false) {
10229
        continue;
10230
      }
10231
10232 1
      $string .= wordwrap($value, $limit);
10233 1
      $string .= "\n";
10234
    }
10235
10236 1
    return $string;
10237
  }
10238
10239
  /**
10240
   * Returns an array of Unicode White Space characters.
10241
   *
10242
   * @return string[] An array with numeric code point as key and White Space Character as value.
10243
   */
10244 1
  public static function ws(): array
10245
  {
10246 1
    return self::$WHITESPACE;
10247
  }
10248
10249
}
10250