Passed
Push — master ( d736a2...201efc )
by Lars
04:07
created

UTF8::is_hexadecimal()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 1
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'upper' => [
157
          'µ',
158
          'ſ',
159
          "\xCD\x85",
160
          'ς',
161
          'ẞ',
162
          "\xCF\x90",
163
          "\xCF\x91",
164
          "\xCF\x95",
165
          "\xCF\x96",
166
          "\xCF\xB0",
167
          "\xCF\xB1",
168
          "\xCF\xB5",
169
          "\xE1\xBA\x9B",
170
          "\xE1\xBE\xBE",
171
      ],
172
      'lower' => [
173
          'μ',
174
          's',
175
          'ι',
176
          'σ',
177
          'ß',
178
          'β',
179
          'θ',
180
          'φ',
181
          'π',
182
          'κ',
183
          'ρ',
184
          'ε',
185
          "\xE1\xB9\xA1",
186
          'ι',
187
      ],
188
  ];
189
190
191
  /**
192
   * @var array
193
   */
194
  private static $SUPPORT = [];
195
196
  /**
197
   * @var null|array
198
   */
199
  private static $UTF8_MSWORD;
200
201
  /**
202
   * @var null|array
203
   */
204
  private static $BROKEN_UTF8_FIX;
205
206
  /**
207
   * @var null|array
208
   */
209
  private static $WIN1252_TO_UTF8;
210
211
  /**
212
   * @var null|array
213
   */
214
  private static $ENCODINGS;
215
216
  /**
217
   * @var null|array
218
   */
219
  private static $ORD;
220
221
  /**
222
   * @var null|array
223
   */
224
  private static $CHR;
225
226
  /**
227
   * __construct()
228
   */
229 32
  public function __construct()
230
  {
231 32
    self::checkForSupport();
232 32
  }
233
234
  /**
235
   * Return the character at the specified position: $str[1] like functionality.
236
   *
237
   * @param string $str <p>A UTF-8 string.</p>
238
   * @param int    $pos <p>The position of character to return.</p>
239
   *
240
   * @return string Single Multi-Byte character.
241
   */
242 3
  public static function access(string $str, int $pos): string
243
  {
244 3
    if ('' === $str) {
245 1
      return '';
246
    }
247
248 3
    if ($pos < 0) {
249 2
      return '';
250
    }
251
252 3
    return (string)self::substr($str, $pos, 1);
253
  }
254
255
  /**
256
   * Prepends UTF-8 BOM character to the string and returns the whole string.
257
   *
258
   * INFO: If BOM already existed there, the Input string is returned.
259
   *
260
   * @param string $str <p>The input string.</p>
261
   *
262
   * @return string The output string that contains BOM.
263
   */
264 2
  public static function add_bom_to_string(string $str): string
265
  {
266 2
    if (self::string_has_bom($str) === false) {
267 2
      $str = self::bom() . $str;
268
    }
269
270 2
    return $str;
271
  }
272
273
  /**
274
   * Adds the specified amount of left and right padding to the given string.
275
   * The default character used is a space.
276
   *
277
   * @param string $str
278
   * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
279
   * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
280
   * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
281
   * @param string $encoding [optional] <p>Default: UTF-8</p>
282
   *
283
   * @return string String with padding applied.
284
   */
285 25
  private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
286
  {
287 25
    $strlen = self::strlen($str, $encoding);
288
289 25
    if ($left && $right) {
290 8
      $length = ($left + $right) + $strlen;
291 8
      $type = STR_PAD_BOTH;
292 17
    } elseif ($left) {
293 7
      $length = $left + $strlen;
294 7
      $type = STR_PAD_LEFT;
295 10
    } elseif ($right) {
296 10
      $length = $right + $strlen;
297 10
      $type = STR_PAD_RIGHT;
298
    } else {
299
      $length = ($left + $right) + $strlen;
300
      $type = STR_PAD_BOTH;
301
    }
302
303 25
    return self::str_pad($str, $length, $padStr, $type, $encoding);
304
  }
305
306
  /**
307
   * Changes all keys in an array.
308
   *
309
   * @param array $array <p>The array to work on</p>
310
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
311
   *                     or <strong>CASE_LOWER</strong> (default)</p>
312
   *
313
   * @return string[] An array with its keys lower or uppercased.
314
   */
315 2
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
316
  {
317
    if (
318 2
        $case !== CASE_LOWER
319
        &&
320 2
        $case !== CASE_UPPER
321
    ) {
322
      $case = CASE_LOWER;
323
    }
324
325 2
    $return = [];
326 2
    foreach ($array as $key => $value) {
327 2
      if ($case === CASE_LOWER) {
328 2
        $key = self::strtolower($key);
329
      } else {
330 2
        $key = self::strtoupper($key);
331
      }
332
333 2
      $return[$key] = $value;
334
    }
335
336 2
    return $return;
337
  }
338
339
  /**
340
   * Returns the substring between $start and $end, if found, or an empty
341
   * string. An optional offset may be supplied from which to begin the
342
   * search for the start string.
343
   *
344
   * @param string $str
345
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
346
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
347
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
348
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
349
   *
350
   * @return string
351
   */
352 16
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
353
  {
354 16
    $posStart = self::strpos($str, $start, $offset, $encoding);
355 16
    if ($posStart === false) {
356 2
      return '';
357
    }
358
359 14
    $substrIndex = $posStart + self::strlen($start, $encoding);
360 14
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
361
    if (
362 14
        $posEnd === false
363
        ||
364 14
        $posEnd === $substrIndex
365
    ) {
366 4
      return '';
367
    }
368
369 10
    $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
370
371 10
    if ($return === false) {
372
      return '';
373
    }
374
375 10
    return $return;
376
  }
377
378
  /**
379
   * Convert binary into an string.
380
   *
381
   * @param mixed $bin 1|0
382
   *
383
   * @return string
384
   */
385 2
  public static function binary_to_str($bin): string
386
  {
387 2
    if (!isset($bin[0])) {
388
      return '';
389
    }
390
391 2
    $convert = \base_convert($bin, 2, 16);
392 2
    if ($convert === '0') {
393 1
      return '';
394
    }
395
396 2
    return \pack('H*', $convert);
397
  }
398
399
  /**
400
   * Returns the UTF-8 Byte Order Mark Character.
401
   *
402
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
403
   *
404
   * @return string UTF-8 Byte Order Mark
405
   */
406 4
  public static function bom(): string
407
  {
408 4
    return "\xef\xbb\xbf";
409
  }
410
411
  /**
412
   * @alias of UTF8::chr_map()
413
   *
414
   * @see   UTF8::chr_map()
415
   *
416
   * @param string|array $callback
417
   * @param string       $str
418
   *
419
   * @return string[]
420
   */
421 2
  public static function callback($callback, string $str): array
422
  {
423 2
    return self::chr_map($callback, $str);
424
  }
425
426
  /**
427
   * Returns the character at $index, with indexes starting at 0.
428
   *
429
   * @param string $str
430
   * @param int    $index    <p>Position of the character.</p>
431
   * @param string $encoding [optional] <p>Default is UTF-8</p>
432
   *
433
   * @return string The character at $index.
434
   */
435 9
  public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
436
  {
437 9
    return (string)self::substr($str, $index, 1, $encoding);
438
  }
439
440
  /**
441
   * Returns an array consisting of the characters in the string.
442
   *
443
   * @param string $str <p>The input string.</p>
444
   *
445
   * @return string[] An array of chars.
446
   */
447 3
  public static function chars(string $str): array
448
  {
449 3
    return self::str_split($str, 1);
450
  }
451
452
  /**
453
   * This method will auto-detect your server environment for UTF-8 support.
454
   *
455
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
456
   */
457 37
  public static function checkForSupport()
458
  {
459 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
460
461
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
462
463
      // http://php.net/manual/en/book.mbstring.php
464
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
465
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
466
467
      // http://php.net/manual/en/book.iconv.php
468
      self::$SUPPORT['iconv'] = self::iconv_loaded();
469
470
      // http://php.net/manual/en/book.intl.php
471
      self::$SUPPORT['intl'] = self::intl_loaded();
472
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
473
474
      self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
475
476
      if (
477
          self::$SUPPORT['intl'] === true
478
          &&
479
          \function_exists('transliterator_list_ids') === true
480
      ) {
481
        /** @noinspection PhpComposerExtensionStubsInspection */
482
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
483
      }
484
485
      // http://php.net/manual/en/class.intlchar.php
486
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
487
488
      // http://php.net/manual/en/book.ctype.php
489
      self::$SUPPORT['ctype'] = self::ctype_loaded();
490
491
      // http://php.net/manual/en/class.finfo.php
492
      self::$SUPPORT['finfo'] = self::finfo_loaded();
493
494
      // http://php.net/manual/en/book.json.php
495
      self::$SUPPORT['json'] = self::json_loaded();
496
497
      // http://php.net/manual/en/book.pcre.php
498
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
499
    }
500 37
  }
501
502
  /**
503
   * Generates a UTF-8 encoded character from the given code point.
504
   *
505
   * INFO: opposite to UTF8::ord()
506
   *
507
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
508
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
509
   *
510
   * @return string|null Multi-Byte character, returns null on failure or empty input.
511
   */
512 17
  public static function chr($code_point, string $encoding = 'UTF-8')
513
  {
514
    // init
515 17
    static $CHAR_CACHE = [];
516
517 17
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
518
      self::checkForSupport();
519
    }
520
521 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
522 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
523
    }
524
525
    if (
526 17
        $encoding !== 'UTF-8'
527
        &&
528 17
        $encoding !== 'ISO-8859-1'
529
        &&
530 17
        $encoding !== 'WINDOWS-1252'
531
        &&
532 17
        self::$SUPPORT['mbstring'] === false
533
    ) {
534
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
535
    }
536
537 17
    $cacheKey = $code_point . $encoding;
538 17
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
539 16
      return $CHAR_CACHE[$cacheKey];
540
    }
541
542 11
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
543
544 10
      if (self::$CHR === null) {
545
        $chrTmp = self::getData('chr');
546
        if ($chrTmp) {
547
          self::$CHR = (array)$chrTmp;
548
        }
549
      }
550
551 10
      $chr = self::$CHR[$code_point];
552
553 10
      if ($encoding !== 'UTF-8') {
554 1
        $chr = self::encode($encoding, $chr);
555
      }
556
557 10
      return $CHAR_CACHE[$cacheKey] = $chr;
558
    }
559
560 7
    if (self::$SUPPORT['intlChar'] === true) {
561
      /** @noinspection PhpComposerExtensionStubsInspection */
562 7
      $chr = \IntlChar::chr($code_point);
563
564 7
      if ($encoding !== 'UTF-8') {
565
        $chr = self::encode($encoding, $chr);
566
      }
567
568 7
      return $CHAR_CACHE[$cacheKey] = $chr;
569
    }
570
571
    if (self::$CHR === null) {
572
      $chrTmp = self::getData('chr');
573
      if ($chrTmp) {
574
        self::$CHR = (array)$chrTmp;
575
      }
576
    }
577
578
    $code_point = (int)$code_point;
579
    if ($code_point <= 0x7F) {
580
      $chr = self::$CHR[$code_point];
581
    } elseif ($code_point <= 0x7FF) {
582
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
583
             self::$CHR[($code_point & 0x3F) + 0x80];
584
    } elseif ($code_point <= 0xFFFF) {
585
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
586
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
587
             self::$CHR[($code_point & 0x3F) + 0x80];
588
    } else {
589
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
590
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
591
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
592
             self::$CHR[($code_point & 0x3F) + 0x80];
593
    }
594
595
    if ($encoding !== 'UTF-8') {
596
      $chr = self::encode($encoding, $chr);
597
    }
598
599
    return $CHAR_CACHE[$cacheKey] = $chr;
600
  }
601
602
  /**
603
   * Applies callback to all characters of a string.
604
   *
605
   * @param string|array $callback <p>The callback function.</p>
606
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
607
   *
608
   * @return string[] The outcome of callback.
609
   */
610 2
  public static function chr_map($callback, string $str): array
611
  {
612 2
    $chars = self::split($str);
613
614 2
    return \array_map($callback, $chars);
615
  }
616
617
  /**
618
   * Generates an array of byte length of each character of a Unicode string.
619
   *
620
   * 1 byte => U+0000  - U+007F
621
   * 2 byte => U+0080  - U+07FF
622
   * 3 byte => U+0800  - U+FFFF
623
   * 4 byte => U+10000 - U+10FFFF
624
   *
625
   * @param string $str <p>The original unicode string.</p>
626
   *
627
   * @return int[] An array of byte lengths of each character.
628
   */
629 4
  public static function chr_size_list(string $str): array
630
  {
631 4
    if ('' === $str) {
632 4
      return [];
633
    }
634
635 4
    $strSplit = self::split($str);
636
637 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
638
      self::checkForSupport();
639
    }
640
641 4
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
642
      return \array_map(
643
          function ($data) {
644
            return UTF8::strlen_in_byte($data);
645
          },
646
          $strSplit
647
      );
648
    }
649
650 4
    return \array_map('\strlen', $strSplit);
651
  }
652
653
  /**
654
   * Get a decimal code representation of a specific character.
655
   *
656
   * @param string $char <p>The input character.</p>
657
   *
658
   * @return int
659
   */
660 4
  public static function chr_to_decimal(string $char): int
661
  {
662 4
    $code = self::ord($char[0]);
663 4
    $bytes = 1;
664
665 4
    if (!($code & 0x80)) {
666
      // 0xxxxxxx
667 4
      return $code;
668
    }
669
670 4
    if (($code & 0xe0) === 0xc0) {
671
      // 110xxxxx
672 4
      $bytes = 2;
673 4
      $code &= ~0xc0;
674 4
    } elseif (($code & 0xf0) === 0xe0) {
675
      // 1110xxxx
676 4
      $bytes = 3;
677 4
      $code &= ~0xe0;
678 2
    } elseif (($code & 0xf8) === 0xf0) {
679
      // 11110xxx
680 2
      $bytes = 4;
681 2
      $code &= ~0xf0;
682
    }
683
684 4
    for ($i = 2; $i <= $bytes; $i++) {
685
      // 10xxxxxx
686 4
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
687
    }
688
689 4
    return $code;
690
  }
691
692
  /**
693
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
694
   *
695
   * @param string|int $char <p>The input character</p>
696
   * @param string     $pfix [optional]
697
   *
698
   * @return string The code point encoded as U+xxxx
699
   */
700 2
  public static function chr_to_hex($char, string $pfix = 'U+'): string
701
  {
702 2
    if ('' === $char) {
703 2
      return '';
704
    }
705
706 2
    if ($char === '&#0;') {
707 2
      $char = '';
708
    }
709
710 2
    return self::int_to_hex(self::ord($char), $pfix);
711
  }
712
713
  /**
714
   * alias for "UTF8::chr_to_decimal()"
715
   *
716
   * @see UTF8::chr_to_decimal()
717
   *
718
   * @param string $chr
719
   *
720
   * @return int
721
   */
722 2
  public static function chr_to_int(string $chr): int
723
  {
724 2
    return self::chr_to_decimal($chr);
725
  }
726
727
  /**
728
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
729
   *
730
   * @param string $body     <p>The original string to be split.</p>
731
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
732
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
733
   *
734
   * @return string The chunked string.
735
   */
736 4
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
737
  {
738 4
    return \implode($end, self::split($body, $chunklen));
739
  }
740
741
  /**
742
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
743
   *
744
   * @param string $str                           <p>The string to be sanitized.</p>
745
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
746
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
747
   *                                              whitespace.</p>
748
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
749
   *                                              e.g.: "…"
750
   *                                              => "..."</p>
751
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
752
   *                                              combination with
753
   *                                              $normalize_whitespace</p>
754
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
755
   *                                              mark e.g.: "�"</p>
756
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
757
   *                                              characters e.g.: "\0"</p>
758
   *
759
   * @return string Clean UTF-8 encoded string.
760
   */
761 111
  public static function clean(
762
      string $str,
763
      bool $remove_bom = false,
764
      bool $normalize_whitespace = false,
765
      bool $normalize_msword = false,
766
      bool $keep_non_breaking_space = false,
767
      bool $replace_diamond_question_mark = false,
768
      bool $remove_invisible_characters = true
769
  ): string
770
  {
771
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
772
    // caused connection reset problem on larger strings
773
774 111
    $regx = '/
775
      (
776
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
777
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
778
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
779
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
780
        ){1,100}                      # ...one or more times
781
      )
782
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
783
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
784
    /x';
785 111
    $str = (string)\preg_replace($regx, '$1', $str);
786
787 111
    if ($replace_diamond_question_mark === true) {
788 61
      $str = self::replace_diamond_question_mark($str, '');
789
    }
790
791 111
    if ($remove_invisible_characters === true) {
792 111
      $str = self::remove_invisible_characters($str);
793
    }
794
795 111
    if ($normalize_whitespace === true) {
796 65
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
797
    }
798
799 111
    if ($normalize_msword === true) {
800 33
      $str = self::normalize_msword($str);
801
    }
802
803 111
    if ($remove_bom === true) {
804 63
      $str = self::remove_bom($str);
805
    }
806
807 111
    return $str;
808
  }
809
810
  /**
811
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
812
   *
813
   * @param string $str <p>The input string.</p>
814
   *
815
   * @return string
816
   */
817 33
  public static function cleanup($str): string
818
  {
819
    // init
820 33
    $str = (string)$str;
821
822 33
    if ('' === $str) {
823 5
      return '';
824
    }
825
826
    // fixed ISO <-> UTF-8 Errors
827 33
    $str = self::fix_simple_utf8($str);
828
829
    // remove all none UTF-8 symbols
830
    // && remove diamond question mark (�)
831
    // && remove remove invisible characters (e.g. "\0")
832
    // && remove BOM
833
    // && normalize whitespace chars (but keep non-breaking-spaces)
834 33
    $str = self::clean(
835 33
        $str,
836 33
        true,
837 33
        true,
838 33
        false,
839 33
        true,
840 33
        true,
841 33
        true
842
    );
843
844 33
    return $str;
845
  }
846
847
  /**
848
   * Accepts a string or a array of strings and returns an array of Unicode code points.
849
   *
850
   * INFO: opposite to UTF8::string()
851
   *
852
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
853
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
854
   *                                    default, code points will be returned as integers.</p>
855
   *
856
   * @return array<int|string>
857
   *                           The array of code points:<br>
858
   *                           array<int> for $u_style === false<br>
859
   *                           array<string> for $u_style === true<br>
860
   */
861 12
  public static function codepoints($arg, bool $u_style = false): array
862
  {
863 12
    if (\is_string($arg) === true) {
864 12
      $arg = self::split($arg);
865
    }
866
867 12
    $arg = \array_map(
868
        [
869 12
            self::class,
870
            'ord',
871
        ],
872 12
        $arg
873
    );
874
875 12
    if (\count($arg) === 0) {
876 7
      return [];
877
    }
878
879 11
    if ($u_style) {
880 2
      $arg = \array_map(
881
          [
882 2
              self::class,
883
              'int_to_hex',
884
          ],
885 2
          $arg
886
      );
887
    }
888
889 11
    return $arg;
890
  }
891
892
  /**
893
   * Trims the string and replaces consecutive whitespace characters with a
894
   * single space. This includes tabs and newline characters, as well as
895
   * multibyte whitespace such as the thin space and ideographic space.
896
   *
897
   * @param string $str <p>The input string.</p>
898
   *
899
   * @return string String with a trimmed $str and condensed whitespace.
900
   */
901 13
  public static function collapse_whitespace(string $str): string
902
  {
903 13
    return self::trim(
904 13
        self::regex_replace($str, '[[:space:]]+', ' ')
905
    );
906
  }
907
908
  /**
909
   * Returns count of characters used in a string.
910
   *
911
   * @param string $str       <p>The input string.</p>
912
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
913
   *
914
   * @return int[] An associative array of Character as keys and
915
   *               their count as values.
916
   */
917 18
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
918
  {
919 18
    return \array_count_values(self::split($str, 1, $cleanUtf8));
920
  }
921
922
  /**
923
   * Remove css media-queries.
924
   *
925
   * @param string $str
926
   *
927
   * @return string
928
   */
929 1
  public static function css_stripe_media_queries(string $str): string
930
  {
931 1
    return (string)\preg_replace(
932 1
        '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
933 1
        '',
934 1
        $str
935
    );
936
  }
937
938
  /**
939
   * Checks whether ctype is available on the server.
940
   *
941
   * @return bool
942
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
943
   */
944
  public static function ctype_loaded(): bool
945
  {
946
    return \extension_loaded('ctype');
947
  }
948
949
  /**
950
   * Converts a int-value into an UTF-8 character.
951
   *
952
   * @param mixed $int
953
   *
954
   * @return string
955
   */
956 10
  public static function decimal_to_chr($int): string
957
  {
958 10
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
959
  }
960
961
  /**
962
   * Decodes a MIME header field
963
   *
964
   * @param string $str
965
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
966
   *
967
   * @return string|false
968
   *                      A decoded MIME field on success,
969
   *                      or false if an error occurs during the decoding.
970
   */
971
  public static function decode_mimeheader($str, $encoding = 'UTF-8')
972
  {
973
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
974
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
975
    }
976
977
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
978
      self::checkForSupport();
979
    }
980
981
    if (self::$SUPPORT['iconv'] === true) {
982
      return \iconv_mime_decode($str, ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
983
    }
984
985
    if ($encoding != 'UTF-8') {
986
      $str = self::encode($encoding, $str);
987
    }
988
989
    return \mb_decode_mimeheader($str);
990
  }
991
992
  /**
993
   * Encode a string with a new charset-encoding.
994
   *
995
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
996
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
997
   *
998
   * @param string $toEncoding                  <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
999
   * @param string $str                         <p>The input string</p>
1000
   * @param bool   $autodetectFromEncoding      [optional] <p>Force the new encoding (we try to fix broken / double
1001
   *                                            encoding for UTF-8)<br> otherwise we auto-detect the current
1002
   *                                            string-encoding</p>
1003
   * @param string $fromEncoding                [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1004
   *                                            A empty string will trigger the autodetect anyway.</p>
1005
   *
1006
   * @return string
1007
   */
1008 30
  public static function encode(string $toEncoding, string $str, bool $autodetectFromEncoding = true, string $fromEncoding = ''): string
1009
  {
1010 30
    if ('' === $str || '' === $toEncoding) {
1011 12
      return $str;
1012
    }
1013
1014 30
    if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1015 8
      $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1016
    }
1017
1018 30
    if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1019 2
      $fromEncoding = self::normalize_encoding($fromEncoding, null);
1020
    }
1021
1022 30
    if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) {
1023
      return $str;
1024
    }
1025
1026 30
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1027
      self::checkForSupport();
1028
    }
1029
1030 30
    if ($toEncoding === 'BASE64') {
1031 2
      return base64_encode($str);
1032
    }
1033 30
    if ($fromEncoding === 'BASE64') {
1034 2
      $str = base64_decode($str);
1035 2
      $fromEncoding = '';
1036
    }
1037
1038 30
    if ($toEncoding === 'HTML-ENTITIES') {
1039 2
      return self::html_encode($str, true, 'UTF-8');
1040
    }
1041 30
    if ($fromEncoding === 'HTML-ENTITIES') {
1042 2
      $str = self::html_decode($str, ENT_COMPAT, 'UTF-8');
1043 2
      $fromEncoding = '';
1044
    }
1045
1046 30
    $fromEncodingDetected = false;
1047
    if (
1048 30
        $autodetectFromEncoding === true
1049
        ||
1050 30
        !$fromEncoding
1051
    ) {
1052 30
      $fromEncodingDetected = self::str_detect_encoding($str);
1053
    }
1054
1055
    // DEBUG
1056
    //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1057
1058 30
    if ($fromEncodingDetected !== false) {
1059 25
      $fromEncoding = $fromEncodingDetected;
1060 7
    } elseif ($fromEncodingDetected === false && $autodetectFromEncoding === true) {
1061
      // fallback for the "autodetect"-mode
1062 7
      return self::to_utf8($str);
1063
    }
1064
1065
    if (
1066 25
        !$fromEncoding
1067
        ||
1068 25
        $fromEncoding === $toEncoding
1069
    ) {
1070 15
      return $str;
1071
    }
1072
1073
    if (
1074 19
        $toEncoding === 'UTF-8'
1075
        &&
1076
        (
1077 17
            $fromEncoding === 'WINDOWS-1252'
1078
            ||
1079 19
            $fromEncoding === 'ISO-8859-1'
1080
        )
1081
    ) {
1082 14
      return self::to_utf8($str);
1083
    }
1084
1085
    if (
1086 11
        $toEncoding === 'ISO-8859-1'
1087
        &&
1088
        (
1089 6
            $fromEncoding === 'WINDOWS-1252'
1090
            ||
1091 11
            $fromEncoding === 'UTF-8'
1092
        )
1093
    ) {
1094 6
      return self::to_iso8859($str);
1095
    }
1096
1097
    if (
1098 9
        $toEncoding !== 'UTF-8'
1099
        &&
1100 9
        $toEncoding !== 'ISO-8859-1'
1101
        &&
1102 9
        $toEncoding !== 'WINDOWS-1252'
1103
        &&
1104 9
        self::$SUPPORT['mbstring'] === false
1105
    ) {
1106
      \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', E_USER_WARNING);
1107
    }
1108
1109 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1110
      self::checkForSupport();
1111
    }
1112
1113 9
    if (self::$SUPPORT['mbstring'] === true) {
1114
      // info: do not use the symfony polyfill here
1115 9
      $strEncoded = \mb_convert_encoding(
1116 9
          $str,
1117 9
          $toEncoding,
1118 9
          ($autodetectFromEncoding === true ? $toEncoding : $fromEncoding)
1119
      );
1120
1121 9
      if ($strEncoded) {
1122 9
        return $strEncoded;
1123
      }
1124
    }
1125
1126
    $return = \iconv($fromEncoding, $toEncoding . '//IGNORE', $str);
1127
    if ($return !== false) {
1128
      return $return;
1129
    }
1130
1131
    return $str;
1132
  }
1133
1134
  /**
1135
   * @param string $str
1136
   * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1137
   * @param string $toCharset        [optional] <p>Set the output charset.</p>
1138
   * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1139
   * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1140
   * @param int    $indent           [optional] <p>Set the max length indent.</p>
1141
   *
1142
   * @return string|false
1143
   *                      An encoded MIME field on success,
1144
   *                      or false if an error occurs during the encoding.
1145
   */
1146
  public static function encode_mimeheader(
1147
      $str,
1148
      $fromCharset = 'UTF-8',
1149
      $toCharset = 'UTF-8',
1150
      $transferEncoding = 'Q',
1151
      $linefeed = "\r\n",
1152
      $indent = 76
1153
  )
1154
  {
1155
    if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1156
      $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1157
    }
1158
1159
    if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1160
      $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1161
    }
1162
1163
    $output = \iconv_mime_encode(
1164
        '',
1165
        $str,
1166
        [
1167
            'scheme'           => $transferEncoding,
1168
            'line-length'      => $indent,
1169
            'input-charset'    => $fromCharset,
1170
            'output-charset'   => $toCharset,
1171
            'line-break-chars' => $linefeed,
1172
        ]
1173
    );
1174
1175
    return $output;
1176
  }
1177
1178
  /**
1179
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1180
   *
1181
   * @param string   $str                    <p>The input string.</p>
1182
   * @param string   $search                 <p>The searched string.</p>
1183
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1184
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1185
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1186
   *
1187
   * @return string
1188
   */
1189 1
  public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1190
  {
1191 1
    if ('' === $str) {
1192 1
      return '';
1193
    }
1194
1195 1
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1196
1197 1
    if ($length === null) {
1198 1
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
1199
    }
1200
1201 1
    if (empty($search)) {
1202
1203 1
      $stringLength = self::strlen($str, $encoding);
1204
1205 1
      if ($length > 0) {
1206 1
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1207
      } else {
1208 1
        $end = 0;
1209
      }
1210
1211 1
      $pos = (int)\min(
1212 1
          self::strpos($str, ' ', $end, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $end can also be of type false; however, parameter $offset of voku\helper\UTF8::strpos() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1212
          self::strpos($str, ' ', /** @scrutinizer ignore-type */ $end, $encoding),
Loading history...
1213 1
          self::strpos($str, '.', $end, $encoding)
1214
      );
1215
1216 1
      if ($pos) {
1217 1
        $strSub = self::substr($str, 0, $pos, $encoding);
1218 1
        if ($strSub === false) {
1219
          return '';
1220
        }
1221
1222 1
        return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1223
      }
1224
1225
      return $str;
1226
    }
1227
1228 1
    $wordPos = self::stripos($str, $search, 0, $encoding);
1229 1
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1230
1231 1
    $pos_start = 0;
1232 1
    if ($halfSide > 0) {
1233 1
      $halfText = self::substr($str, 0, $halfSide, $encoding);
1234 1
      if ($halfText !== false) {
1235 1
        $pos_start = (int)\max(
1236 1
            self::strrpos($halfText, ' ', 0, $encoding),
1237 1
            self::strrpos($halfText, '.', 0, $encoding)
1238
        );
1239
      }
1240
    }
1241
1242 1
    if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type integer|false is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1243 1
      $l = $pos_start + $length - 1;
1244 1
      $realLength = self::strlen($str, $encoding);
1245
1246 1
      if ($l > $realLength) {
1247
        $l = $realLength;
1248
      }
1249
1250 1
      $pos_end = (int)\min(
1251 1
              self::strpos($str, ' ', $l, $encoding),
1252 1
              self::strpos($str, '.', $l, $encoding)
1253 1
          ) - $pos_start;
1254
1255 1
      if (!$pos_end || $pos_end <= 0) {
1256 1
        $strSub = self::substr($str, $pos_start, self::strlen($str), $encoding);
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1256
        $strSub = self::substr($str, $pos_start, /** @scrutinizer ignore-type */ self::strlen($str), $encoding);
Loading history...
1257 1
        if ($strSub !== false) {
1258 1
          $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1259
        } else {
1260 1
          $extract = '';
1261
        }
1262
      } else {
1263 1
        $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1264 1
        if ($strSub !== false) {
1265 1
          $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1266
        } else {
1267 1
          $extract = '';
1268
        }
1269
      }
1270
1271
    } else {
1272
1273 1
      $l = $length - 1;
1274 1
      $trueLength = self::strlen($str, $encoding);
1275
1276 1
      if ($l > $trueLength) {
1277
        $l = $trueLength;
1278
      }
1279
1280 1
      $pos_end = \min(
1281 1
          self::strpos($str, ' ', $l, $encoding),
1282 1
          self::strpos($str, '.', $l, $encoding)
1283
      );
1284
1285 1
      if ($pos_end) {
1286 1
        $strSub = self::substr($str, 0, $pos_end, $encoding);
1287 1
        if ($strSub !== false) {
1288 1
          $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1289
        } else {
1290 1
          $extract = '';
1291
        }
1292
      } else {
1293 1
        $extract = $str;
1294
      }
1295
    }
1296
1297 1
    return $extract;
1298
  }
1299
1300
  /**
1301
   * Reads entire file into a string.
1302
   *
1303
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1304
   *
1305
   * @link http://php.net/manual/en/function.file-get-contents.php
1306
   *
1307
   * @param string        $filename             <p>
1308
   *                                            Name of the file to read.
1309
   *                                            </p>
1310
   * @param bool          $use_include_path     [optional] <p>
1311
   *                                            Prior to PHP 5, this parameter is called
1312
   *                                            use_include_path and is a bool.
1313
   *                                            As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1314
   *                                            to trigger include path
1315
   *                                            search.
1316
   *                                            </p>
1317
   * @param resource|null $context              [optional] <p>
1318
   *                                            A valid context resource created with
1319
   *                                            stream_context_create. If you don't need to use a
1320
   *                                            custom context, you can skip this parameter by &null;.
1321
   *                                            </p>
1322
   * @param int|null      $offset               [optional] <p>
1323
   *                                            The offset where the reading starts.
1324
   *                                            </p>
1325
   * @param int|null      $maxLength            [optional] <p>
1326
   *                                            Maximum length of data read. The default is to read until end
1327
   *                                            of file is reached.
1328
   *                                            </p>
1329
   * @param int           $timeout              <p>The time in seconds for the timeout.</p>
1330
   *
1331
   * @param bool          $convertToUtf8        <strong>WARNING!!!</strong> <p>Maybe you can't use this option for some
1332
   *                                            files, because they used non default utf-8 chars. Binary files like
1333
   *                                            images or pdf will not be converted.</p>
1334
   * @param string        $fromEncoding         [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1335
   *                                            A empty string will trigger the autodetect anyway.</p>
1336
   *
1337
   * @return string|false The function returns the read data or false on failure.
1338
   */
1339 11
  public static function file_get_contents(
1340
      string $filename,
1341
      bool $use_include_path = false,
1342
      $context = null,
1343
      int $offset = null,
1344
      int $maxLength = null,
1345
      int $timeout = 10,
1346
      bool $convertToUtf8 = true,
1347
      string $fromEncoding = ''
1348
  )
1349
  {
1350
    // init
1351 11
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1352
1353 11
    if ($timeout && $context === null) {
1354 9
      $context = \stream_context_create(
1355
          [
1356
              'http' =>
1357
                  [
1358 9
                      'timeout' => $timeout,
1359
                  ],
1360
          ]
1361
      );
1362
    }
1363
1364 11
    if ($offset === null) {
1365 11
      $offset = 0;
1366
    }
1367
1368 11
    if (\is_int($maxLength) === true) {
1369 2
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1370
    } else {
1371 11
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1372
    }
1373
1374
    // return false on error
1375 11
    if ($data === false) {
1376
      return false;
1377
    }
1378
1379 11
    if ($convertToUtf8 === true) {
1380
      // only for non binary, but also for UTF-16 or UTF-32
1381
      if (
1382 11
          self::is_binary($data, true) !== true
1383
          ||
1384 8
          self::is_utf16($data) !== false
1385
          ||
1386 11
          self::is_utf32($data) !== false
1387
      ) {
1388 9
        $data = self::encode('UTF-8', $data, false, $fromEncoding);
1389 9
        $data = self::cleanup($data);
1390
      }
1391
    }
1392
1393 11
    return $data;
1394
  }
1395
1396
  /**
1397
   * Checks if a file starts with BOM (Byte Order Mark) character.
1398
   *
1399
   * @param string $file_path <p>Path to a valid file.</p>
1400
   *
1401
   * @throws \RuntimeException if file_get_contents() returned false
1402
   *
1403
   * @return bool
1404
   *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.
1405
   */
1406 2
  public static function file_has_bom(string $file_path): bool
1407
  {
1408 2
    $file_content = \file_get_contents($file_path);
1409 2
    if ($file_content === false) {
1410
      throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1411
    }
1412
1413 2
    return self::string_has_bom($file_content);
1414
  }
1415
1416
  /**
1417
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1418
   *
1419
   * @param mixed  $var
1420
   * @param int    $normalization_form
1421
   * @param string $leading_combining
1422
   *
1423
   * @return mixed
1424
   */
1425 43
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1426
  {
1427 43
    switch (\gettype($var)) {
1428 43
      case 'array':
1429 6
        foreach ($var as $k => $v) {
1430
          /** @noinspection AlterInForeachInspection */
1431 6
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1432
        }
1433 6
        break;
1434 43
      case 'object':
1435 4
        foreach ($var as $k => $v) {
1436 4
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1437
        }
1438 4
        break;
1439 43
      case 'string':
1440
1441 43
        if (false !== \strpos($var, "\r")) {
1442
          // Workaround https://bugs.php.net/65732
1443 3
          $var = self::normalize_line_ending($var);
1444
        }
1445
1446 43
        if (self::is_ascii($var) === false) {
1447
          /** @noinspection PhpUndefinedClassInspection */
1448 26
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1449 20
            $n = '-';
1450
          } else {
1451
            /** @noinspection PhpUndefinedClassInspection */
1452 13
            $n = \Normalizer::normalize($var, $normalization_form);
1453
1454 13
            if (isset($n[0])) {
1455 7
              $var = $n;
1456
            } else {
1457 9
              $var = self::encode('UTF-8', $var, true);
1458
            }
1459
          }
1460
1461
          if (
1462 26
              $var[0] >= "\x80"
1463
              &&
1464 26
              isset($n[0], $leading_combining[0])
1465
              &&
1466 26
              \preg_match('/^\p{Mn}/u', $var)
1467
          ) {
1468
            // Prevent leading combining chars
1469
            // for NFC-safe concatenations.
1470 3
            $var = $leading_combining . $var;
1471
          }
1472
        }
1473
1474 43
        break;
1475
    }
1476
1477 43
    return $var;
1478
  }
1479
1480
  /**
1481
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1482
   *
1483
   * Gets a specific external variable by name and optionally filters it
1484
   *
1485
   * @link  http://php.net/manual/en/function.filter-input.php
1486
   *
1487
   * @param int    $type          <p>
1488
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1489
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1490
   *                              <b>INPUT_ENV</b>.
1491
   *                              </p>
1492
   * @param string $variable_name <p>
1493
   *                              Name of a variable to get.
1494
   *                              </p>
1495
   * @param int    $filter        [optional] <p>
1496
   *                              The ID of the filter to apply. The
1497
   *                              manual page lists the available filters.
1498
   *                              </p>
1499
   * @param mixed  $options       [optional] <p>
1500
   *                              Associative array of options or bitwise disjunction of flags. If filter
1501
   *                              accepts options, flags can be provided in "flags" field of array.
1502
   *                              </p>
1503
   *
1504
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1505
   *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1506
   *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1507
   */
1508
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
1509
  {
1510
    if (4 > \func_num_args()) {
1511
      $var = \filter_input($type, $variable_name, $filter);
1512
    } else {
1513
      $var = \filter_input($type, $variable_name, $filter, $options);
1514
    }
1515
1516
    return self::filter($var);
1517
  }
1518
1519
  /**
1520
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1521
   *
1522
   * Gets external variables and optionally filters them
1523
   *
1524
   * @link  http://php.net/manual/en/function.filter-input-array.php
1525
   *
1526
   * @param int   $type       <p>
1527
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1528
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1529
   *                          <b>INPUT_ENV</b>.
1530
   *                          </p>
1531
   * @param mixed $definition [optional] <p>
1532
   *                          An array defining the arguments. A valid key is a string
1533
   *                          containing a variable name and a valid value is either a filter type, or an array
1534
   *                          optionally specifying the filter, flags and options. If the value is an
1535
   *                          array, valid keys are filter which specifies the
1536
   *                          filter type,
1537
   *                          flags which specifies any flags that apply to the
1538
   *                          filter, and options which specifies any options that
1539
   *                          apply to the filter. See the example below for a better understanding.
1540
   *                          </p>
1541
   *                          <p>
1542
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1543
   *                          input array are filtered by this filter.
1544
   *                          </p>
1545
   * @param bool  $add_empty  [optional] <p>
1546
   *                          Add missing keys as <b>NULL</b> to the return value.
1547
   *                          </p>
1548
   *
1549
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1550
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set. Or
1551
   *               if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable is not
1552
   *               set and <b>NULL</b> if the filter fails.
1553
   */
1554
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1555
  {
1556
    if (2 > \func_num_args()) {
1557
      $a = \filter_input_array($type);
1558
    } else {
1559
      $a = \filter_input_array($type, $definition, $add_empty);
1560
    }
1561
1562
    return self::filter($a);
1563
  }
1564
1565
  /**
1566
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1567
   *
1568
   * Filters a variable with a specified filter
1569
   *
1570
   * @link  http://php.net/manual/en/function.filter-var.php
1571
   *
1572
   * @param mixed $variable <p>
1573
   *                        Value to filter.
1574
   *                        </p>
1575
   * @param int   $filter   [optional] <p>
1576
   *                        The ID of the filter to apply. The
1577
   *                        manual page lists the available filters.
1578
   *                        </p>
1579
   * @param mixed $options  [optional] <p>
1580
   *                        Associative array of options or bitwise disjunction of flags. If filter
1581
   *                        accepts options, flags can be provided in "flags" field of array. For
1582
   *                        the "callback" filter, callable type should be passed. The
1583
   *                        callback must accept one argument, the value to be filtered, and return
1584
   *                        the value after filtering/sanitizing it.
1585
   *                        </p>
1586
   *                        <p>
1587
   *                        <code>
1588
   *                        // for filters that accept options, use this format
1589
   *                        $options = array(
1590
   *                        'options' => array(
1591
   *                        'default' => 3, // value to return if the filter fails
1592
   *                        // other options here
1593
   *                        'min_range' => 0
1594
   *                        ),
1595
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1596
   *                        );
1597
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1598
   *                        // for filter that only accept flags, you can pass them directly
1599
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1600
   *                        // for filter that only accept flags, you can also pass as an array
1601
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1602
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1603
   *                        // callback validate filter
1604
   *                        function foo($value)
1605
   *                        {
1606
   *                        // Expected format: Surname, GivenNames
1607
   *                        if (strpos($value, ", ") === false) return false;
1608
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1609
   *                        $empty = (empty($surname) || empty($givennames));
1610
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1611
   *                        if ($empty || $notstrings) {
1612
   *                        return false;
1613
   *                        } else {
1614
   *                        return $value;
1615
   *                        }
1616
   *                        }
1617
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1618
   *                        </code>
1619
   *                        </p>
1620
   *
1621
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1622
   */
1623 2
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
1624
  {
1625 2
    if (3 > \func_num_args()) {
1626 2
      $variable = \filter_var($variable, $filter);
1627
    } else {
1628 2
      $variable = \filter_var($variable, $filter, $options);
1629
    }
1630
1631 2
    return self::filter($variable);
1632
  }
1633
1634
  /**
1635
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1636
   *
1637
   * Gets multiple variables and optionally filters them
1638
   *
1639
   * @link  http://php.net/manual/en/function.filter-var-array.php
1640
   *
1641
   * @param array $data       <p>
1642
   *                          An array with string keys containing the data to filter.
1643
   *                          </p>
1644
   * @param mixed $definition [optional] <p>
1645
   *                          An array defining the arguments. A valid key is a string
1646
   *                          containing a variable name and a valid value is either a
1647
   *                          filter type, or an
1648
   *                          array optionally specifying the filter, flags and options.
1649
   *                          If the value is an array, valid keys are filter
1650
   *                          which specifies the filter type,
1651
   *                          flags which specifies any flags that apply to the
1652
   *                          filter, and options which specifies any options that
1653
   *                          apply to the filter. See the example below for a better understanding.
1654
   *                          </p>
1655
   *                          <p>
1656
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1657
   *                          input array are filtered by this filter.
1658
   *                          </p>
1659
   * @param bool  $add_empty  [optional] <p>
1660
   *                          Add missing keys as <b>NULL</b> to the return value.
1661
   *                          </p>
1662
   *
1663
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1664
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set.
1665
   */
1666 2
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1667
  {
1668 2
    if (2 > \func_num_args()) {
1669 2
      $a = \filter_var_array($data);
1670
    } else {
1671 2
      $a = \filter_var_array($data, $definition, $add_empty);
1672
    }
1673
1674 2
    return self::filter($a);
1675
  }
1676
1677
  /**
1678
   * Checks whether finfo is available on the server.
1679
   *
1680
   * @return bool
1681
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
1682
   */
1683
  public static function finfo_loaded(): bool
1684
  {
1685
    return \class_exists('finfo');
1686
  }
1687
1688
  /**
1689
   * Returns the first $n characters of the string.
1690
   *
1691
   * @param string $str      <p>The input string.</p>
1692
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1693
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1694
   *
1695
   * @return string
1696
   */
1697 13
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1698
  {
1699 13
    if ($n <= 0) {
1700 4
      return '';
1701
    }
1702
1703 9
    $strSub = self::substr($str, 0, $n, $encoding);
1704 9
    if ($strSub === false) {
1705
      return '';
1706
    }
1707
1708 9
    return $strSub;
1709
  }
1710
1711
  /**
1712
   * Check if the number of unicode characters are not more than the specified integer.
1713
   *
1714
   * @param string $str      The original string to be checked.
1715
   * @param int    $box_size The size in number of chars to be checked against string.
1716
   *
1717
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1718
   */
1719 2
  public static function fits_inside(string $str, int $box_size): bool
1720
  {
1721 2
    return (self::strlen($str) <= $box_size);
1722
  }
1723
1724
  /**
1725
   * @param string $str
1726
   * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
1727
   * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
1728
   *
1729
   * @return string
1730
   */
1731 54
  private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
1732
  {
1733 54
    $upper = self::$COMMON_CASE_FOLD['upper'];
1734 54
    $lower = self::$COMMON_CASE_FOLD['lower'];
1735
1736 54
    if ($useLower === true) {
1737 2
      $str = (string)\str_replace(
1738 2
          $upper,
1739 2
          $lower,
1740 2
          $str
1741
      );
1742
    } else {
1743 52
      $str = (string)\str_replace(
1744 52
          $lower,
1745 52
          $upper,
1746 52
          $str
1747
      );
1748
    }
1749
1750 54
    if ($fullCaseFold) {
1751
1752 52
      static $FULL_CASE_FOLD = null;
1753 52
      if ($FULL_CASE_FOLD === null) {
1754 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
1755
      }
1756
1757 52
      if ($useLower === true) {
1758 2
        $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
1759
      } else {
1760 50
        $str = (string)\str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
1761
      }
1762
    }
1763
1764 54
    return $str;
1765
  }
1766
1767
  /**
1768
   * Try to fix simple broken UTF-8 strings.
1769
   *
1770
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1771
   *
1772
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1773
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1774
   * See: http://en.wikipedia.org/wiki/Windows-1252
1775
   *
1776
   * @param string $str <p>The input string</p>
1777
   *
1778
   * @return string
1779
   */
1780 42
  public static function fix_simple_utf8(string $str): string
1781
  {
1782 42
    if ('' === $str) {
1783 4
      return '';
1784
    }
1785
1786 42
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1787 42
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1788
1789 42
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1790
1791 1
      if (self::$BROKEN_UTF8_FIX === null) {
1792 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_fix') can also be of type false. However, the property $BROKEN_UTF8_FIX is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1793
      }
1794
1795 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1795
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1796 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1796
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1797
    }
1798
1799 42
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1800
  }
1801
1802
  /**
1803
   * Fix a double (or multiple) encoded UTF8 string.
1804
   *
1805
   * @param string[]|string $str You can use a string or an array of strings.
1806
   *
1807
   * @return string[]|string
1808
   *                          Will return the fixed input-"array" or
1809
   *                          the fixed input-"string".
1810
   */
1811 2
  public static function fix_utf8($str)
1812
  {
1813 2
    if (\is_array($str) === true) {
1814 2
      foreach ($str as $k => $v) {
1815 2
        $str[$k] = self::fix_utf8($v);
1816
      }
1817
1818 2
      return $str;
1819
    }
1820
1821 2
    $str = (string)$str;
1822 2
    $last = '';
1823 2
    while ($last !== $str) {
1824 2
      $last = $str;
1825 2
      $str = self::to_utf8(
1826 2
          self::utf8_decode($str, true)
1827
      );
1828
    }
1829
1830 2
    return $str;
1831
  }
1832
1833
  /**
1834
   * Get character of a specific character.
1835
   *
1836
   * @param string $char
1837
   *
1838
   * @return string 'RTL' or 'LTR'
1839
   */
1840 2
  public static function getCharDirection(string $char): string
1841
  {
1842 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1843
      self::checkForSupport();
1844
    }
1845
1846 2
    if (self::$SUPPORT['intlChar'] === true) {
1847
      /** @noinspection PhpComposerExtensionStubsInspection */
1848 2
      $tmpReturn = \IntlChar::charDirection($char);
1849
1850
      // from "IntlChar"-Class
1851
      $charDirection = [
1852 2
          'RTL' => [1, 13, 14, 15, 21],
1853
          'LTR' => [0, 11, 12, 20],
1854
      ];
1855
1856 2
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1857
        return 'LTR';
1858
      }
1859
1860 2
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1861 2
        return 'RTL';
1862
      }
1863
    }
1864
1865 2
    $c = static::chr_to_decimal($char);
1866
1867 2
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1868 2
      return 'LTR';
1869
    }
1870
1871 2
    if (0x85e >= $c) {
1872
1873 2
      if (0x5be === $c ||
1874 2
          0x5c0 === $c ||
1875 2
          0x5c3 === $c ||
1876 2
          0x5c6 === $c ||
1877 2
          (0x5d0 <= $c && 0x5ea >= $c) ||
1878 2
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1879 2
          0x608 === $c ||
1880 2
          0x60b === $c ||
1881 2
          0x60d === $c ||
1882 2
          0x61b === $c ||
1883 2
          (0x61e <= $c && 0x64a >= $c) ||
1884
          (0x66d <= $c && 0x66f >= $c) ||
1885
          (0x671 <= $c && 0x6d5 >= $c) ||
1886
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1887
          (0x6ee <= $c && 0x6ef >= $c) ||
1888
          (0x6fa <= $c && 0x70d >= $c) ||
1889
          0x710 === $c ||
1890
          (0x712 <= $c && 0x72f >= $c) ||
1891
          (0x74d <= $c && 0x7a5 >= $c) ||
1892
          0x7b1 === $c ||
1893
          (0x7c0 <= $c && 0x7ea >= $c) ||
1894
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1895
          0x7fa === $c ||
1896
          (0x800 <= $c && 0x815 >= $c) ||
1897
          0x81a === $c ||
1898
          0x824 === $c ||
1899
          0x828 === $c ||
1900
          (0x830 <= $c && 0x83e >= $c) ||
1901
          (0x840 <= $c && 0x858 >= $c) ||
1902 2
          0x85e === $c
1903
      ) {
1904 2
        return 'RTL';
1905
      }
1906
1907 2
    } elseif (0x200f === $c) {
1908
1909
      return 'RTL';
1910
1911 2
    } elseif (0xfb1d <= $c) {
1912
1913 2
      if (0xfb1d === $c ||
1914 2
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1915 2
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1916 2
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1917 2
          0xfb3e === $c ||
1918 2
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1919 2
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1920 2
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1921 2
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1922 2
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1923 2
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1924 2
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1925 2
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1926 2
          (0xfe76 <= $c && 0xfefc >= $c) ||
1927 2
          (0x10800 <= $c && 0x10805 >= $c) ||
1928 2
          0x10808 === $c ||
1929 2
          (0x1080a <= $c && 0x10835 >= $c) ||
1930 2
          (0x10837 <= $c && 0x10838 >= $c) ||
1931 2
          0x1083c === $c ||
1932 2
          (0x1083f <= $c && 0x10855 >= $c) ||
1933 2
          (0x10857 <= $c && 0x1085f >= $c) ||
1934 2
          (0x10900 <= $c && 0x1091b >= $c) ||
1935 2
          (0x10920 <= $c && 0x10939 >= $c) ||
1936 2
          0x1093f === $c ||
1937 2
          0x10a00 === $c ||
1938 2
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1939 2
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1940 2
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1941 2
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1942 2
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1943 2
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1944 2
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1945 2
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1946 2
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1947 2
          (0x10b78 <= $c && 0x10b7f >= $c)
1948
      ) {
1949 2
        return 'RTL';
1950
      }
1951
    }
1952
1953 2
    return 'LTR';
1954
  }
1955
1956
  /**
1957
   * get data from "/data/*.ser"
1958
   *
1959
   * @param string $file
1960
   *
1961
   * @return mixed|false Will return false on error.
1962
   */
1963 13
  private static function getData(string $file)
1964
  {
1965 13
    $file = __DIR__ . '/data/' . $file . '.php';
1966 13
    if (\file_exists($file)) {
1967
      /** @noinspection PhpIncludeInspection */
1968 12
      return require $file;
1969
    }
1970
1971 2
    return false;
1972
  }
1973
1974
  /**
1975
   * Check for php-support.
1976
   *
1977
   * @param string|null $key
1978
   *
1979
   * @return mixed
1980
   *               Return the full support-"array", if $key === null<br>
1981
   *               return bool-value, if $key is used and available<br>
1982
   *               otherwise return <strong>null</strong>.
1983
   */
1984 26
  public static function getSupportInfo(string $key = null)
1985
  {
1986 26
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1987
      self::checkForSupport();
1988
    }
1989
1990 26
    if ($key === null) {
1991 4
      return self::$SUPPORT;
1992
    }
1993
1994 24
    if (!isset(self::$SUPPORT[$key])) {
1995 2
      return null;
1996
    }
1997
1998 22
    return self::$SUPPORT[$key];
1999
  }
2000
2001
  /**
2002
   * @param string $str
2003
   *
2004
   * @return string[]
2005
   */
2006 40
  private static function get_file_type($str)
2007
  {
2008 40
    if ('' === $str) {
2009
      return ['ext' => '', 'type' => ''];
2010
    }
2011
2012 40
    $str_info = self::substr_in_byte($str, 0, 2);
2013 40
    if (self::strlen_in_byte($str_info) !== 2) {
2014 11
      return ['ext' => '', 'type' => ''];
2015
    }
2016
2017 35
    $str_info = \unpack('C2chars', $str_info);
2018 35
    $type_code = (int)($str_info['chars1'] . $str_info['chars2']);
2019
2020
    // DEBUG
2021
    //var_dump($type_code);
2022
2023
    switch ($type_code) {
2024 35
      case 3780:
2025 4
        $ext = 'pdf';
2026 4
        $type = 'binary';
2027 4
        break;
2028 35
      case 7790:
2029
        $ext = 'exe';
2030
        $type = 'binary';
2031
        break;
2032 35
      case 7784:
2033
        $ext = 'midi';
2034
        $type = 'binary';
2035
        break;
2036 35
      case 8075:
2037 6
        $ext = 'zip';
2038 6
        $type = 'binary';
2039 6
        break;
2040 35
      case 8297:
2041
        $ext = 'rar';
2042
        $type = 'binary';
2043
        break;
2044 35
      case 255216:
2045
        $ext = 'jpg';
2046
        $type = 'binary';
2047
        break;
2048 35
      case 7173:
2049
        $ext = 'gif';
2050
        $type = 'binary';
2051
        break;
2052 35
      case 6677:
2053
        $ext = 'bmp';
2054
        $type = 'binary';
2055
        break;
2056 35
      case 13780:
2057 6
        $ext = 'png';
2058 6
        $type = 'binary';
2059 6
        break;
2060
      default:
2061 33
        $ext = '???';
2062 33
        $type = '???';
2063 33
        break;
2064
    }
2065
2066 35
    return ['ext' => $ext, 'type' => $type];
2067
  }
2068
2069
  /**
2070
   * @param int    $length        <p>Length of the random string.</p>
2071
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2072
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2073
   *
2074
   * @return string
2075
   */
2076 1
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2077
  {
2078
    // init
2079 1
    $i = 0;
2080 1
    $str = '';
2081 1
    $maxlength = self::strlen($possibleChars, $encoding);
2082
2083 1
    if ($maxlength === 0) {
2084 1
      return '';
2085
    }
2086
2087
    // add random chars
2088 1
    while ($i < $length) {
2089
      try {
2090 1
        $randInt = \random_int(0, $maxlength - 1);
2091
      } catch (\Exception $e) {
2092
        /** @noinspection RandomApiMigrationInspection */
2093
        $randInt = \mt_rand(0, $maxlength - 1);
2094
      }
2095 1
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
2096 1
      $str .= $char;
2097 1
      $i++;
2098
    }
2099
2100 1
    return $str;
2101
  }
2102
2103
  /**
2104
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2105
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2106
   *
2107
   * @return string
2108
   */
2109 1
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2110
  {
2111 1
    $uniqueHelper = \mt_rand() .
2112 1
                    \session_id() .
2113 1
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
2114 1
                    ($_SERVER['SERVER_ADDR'] ?? '') .
2115 1
                    $entropyExtra;
2116
2117 1
    $uniqueString = \uniqid($uniqueHelper, true);
2118
2119 1
    if ($md5) {
2120 1
      $uniqueString = \md5($uniqueString . $uniqueHelper);
2121
    }
2122
2123 1
    return $uniqueString;
2124
  }
2125
2126
  /**
2127
   * alias for "UTF8::string_has_bom()"
2128
   *
2129
   * @see        UTF8::string_has_bom()
2130
   *
2131
   * @param string $str
2132
   *
2133
   * @return bool
2134
   *
2135
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2136
   */
2137 2
  public static function hasBom(string $str): bool
2138
  {
2139 2
    return self::string_has_bom($str);
2140
  }
2141
2142
  /**
2143
   * Returns true if the string contains a lower case char, false otherwise.
2144
   *
2145
   * @param string $str <p>The input string.</p>
2146
   *
2147
   * @return bool Whether or not the string contains a lower case character.
2148
   */
2149 47
  public static function has_lowercase(string $str): bool
2150
  {
2151 47
    return self::str_matches_pattern($str, '.*[[:lower:]]');
2152
  }
2153
2154
  /**
2155
   * Returns true if the string contains an upper case char, false otherwise.
2156
   *
2157
   * @param string $str <p>The input string.</p>
2158
   *
2159
   * @return bool Whether or not the string contains an upper case character.
2160
   */
2161 12
  public static function has_uppercase(string $str): bool
2162
  {
2163 12
    return self::str_matches_pattern($str, '.*[[:upper:]]');
2164
  }
2165
2166
  /**
2167
   * Converts a hexadecimal-value into an UTF-8 character.
2168
   *
2169
   * @param string $hexdec <p>The hexadecimal value.</p>
2170
   *
2171
   * @return string|false One single UTF-8 character.
2172
   */
2173 4
  public static function hex_to_chr(string $hexdec)
2174
  {
2175 4
    return self::decimal_to_chr(\hexdec($hexdec));
2176
  }
2177
2178
  /**
2179
   * Converts hexadecimal U+xxxx code point representation to integer.
2180
   *
2181
   * INFO: opposite to UTF8::int_to_hex()
2182
   *
2183
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2184
   *
2185
   * @return int|false The code point, or false on failure.
2186
   */
2187 2
  public static function hex_to_int($hexDec)
2188
  {
2189
    // init
2190 2
    $hexDec = (string)$hexDec;
2191
2192 2
    if ('' === $hexDec) {
2193 2
      return false;
2194
    }
2195
2196 2
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2197 2
      return \intval($match[1], 16);
2198
    }
2199
2200 2
    return false;
2201
  }
2202
2203
  /**
2204
   * alias for "UTF8::html_entity_decode()"
2205
   *
2206
   * @see UTF8::html_entity_decode()
2207
   *
2208
   * @param string $str
2209
   * @param int    $flags
2210
   * @param string $encoding
2211
   *
2212
   * @return string
2213
   */
2214 4
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2215
  {
2216 4
    return self::html_entity_decode($str, $flags, $encoding);
2217
  }
2218
2219
  /**
2220
   * Converts a UTF-8 string to a series of HTML numbered entities.
2221
   *
2222
   * INFO: opposite to UTF8::html_decode()
2223
   *
2224
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2225
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2226
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2227
   *
2228
   * @return string HTML numbered entities.
2229
   */
2230 13
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2231
  {
2232 13
    if ('' === $str) {
2233 4
      return '';
2234
    }
2235
2236 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2237 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2238
    }
2239
2240 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2241
      self::checkForSupport();
2242
    }
2243
2244
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2245 13
    if (self::$SUPPORT['mbstring'] === true) {
2246 13
      $startCode = 0x00;
2247 13
      if ($keepAsciiChars === true) {
2248 13
        $startCode = 0x80;
2249
      }
2250
2251 13
      return \mb_encode_numericentity(
2252 13
          $str,
2253 13
          [$startCode, 0xfffff, 0, 0xfffff, 0],
2254 13
          $encoding
2255
      );
2256
    }
2257
2258
    //
2259
    // fallback via vanilla php
2260
    //
2261
2262
    return \implode(
2263
        '',
2264
        \array_map(
2265
            function ($chr) use ($keepAsciiChars, $encoding) {
2266
              return UTF8::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2267
            },
2268
            self::split($str)
2269
        )
2270
    );
2271
  }
2272
2273
  /**
2274
   * UTF-8 version of html_entity_decode()
2275
   *
2276
   * The reason we are not using html_entity_decode() by itself is because
2277
   * while it is not technically correct to leave out the semicolon
2278
   * at the end of an entity most browsers will still interpret the entity
2279
   * correctly. html_entity_decode() does not convert entities without
2280
   * semicolons, so we are left with our own little solution here. Bummer.
2281
   *
2282
   * Convert all HTML entities to their applicable characters
2283
   *
2284
   * INFO: opposite to UTF8::html_encode()
2285
   *
2286
   * @link http://php.net/manual/en/function.html-entity-decode.php
2287
   *
2288
   * @param string $str      <p>
2289
   *                         The input string.
2290
   *                         </p>
2291
   * @param int    $flags    [optional] <p>
2292
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2293
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2294
   *                         <table>
2295
   *                         Available <i>flags</i> constants
2296
   *                         <tr valign="top">
2297
   *                         <td>Constant Name</td>
2298
   *                         <td>Description</td>
2299
   *                         </tr>
2300
   *                         <tr valign="top">
2301
   *                         <td><b>ENT_COMPAT</b></td>
2302
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2303
   *                         </tr>
2304
   *                         <tr valign="top">
2305
   *                         <td><b>ENT_QUOTES</b></td>
2306
   *                         <td>Will convert both double and single quotes.</td>
2307
   *                         </tr>
2308
   *                         <tr valign="top">
2309
   *                         <td><b>ENT_NOQUOTES</b></td>
2310
   *                         <td>Will leave both double and single quotes unconverted.</td>
2311
   *                         </tr>
2312
   *                         <tr valign="top">
2313
   *                         <td><b>ENT_HTML401</b></td>
2314
   *                         <td>
2315
   *                         Handle code as HTML 4.01.
2316
   *                         </td>
2317
   *                         </tr>
2318
   *                         <tr valign="top">
2319
   *                         <td><b>ENT_XML1</b></td>
2320
   *                         <td>
2321
   *                         Handle code as XML 1.
2322
   *                         </td>
2323
   *                         </tr>
2324
   *                         <tr valign="top">
2325
   *                         <td><b>ENT_XHTML</b></td>
2326
   *                         <td>
2327
   *                         Handle code as XHTML.
2328
   *                         </td>
2329
   *                         </tr>
2330
   *                         <tr valign="top">
2331
   *                         <td><b>ENT_HTML5</b></td>
2332
   *                         <td>
2333
   *                         Handle code as HTML 5.
2334
   *                         </td>
2335
   *                         </tr>
2336
   *                         </table>
2337
   *                         </p>
2338
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2339
   *
2340
   * @return string The decoded string.
2341
   */
2342 40
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2343
  {
2344 40
    if ('' === $str) {
2345 12
      return '';
2346
    }
2347
2348 40
    if (!isset($str[3])) { // examples: &; || &x;
2349 19
      return $str;
2350
    }
2351
2352
    if (
2353 39
        \strpos($str, '&') === false
2354
        ||
2355
        (
2356 39
            \strpos($str, '&#') === false
2357
            &&
2358 39
            \strpos($str, ';') === false
2359
        )
2360
    ) {
2361 18
      return $str;
2362
    }
2363
2364 39
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2365 9
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2366
    }
2367
2368 39
    if ($flags === null) {
2369 10
      $flags = ENT_QUOTES | ENT_HTML5;
2370
    }
2371
2372
    if (
2373 39
        $encoding !== 'UTF-8'
2374
        &&
2375 39
        $encoding !== 'ISO-8859-1'
2376
        &&
2377 39
        $encoding !== 'WINDOWS-1252'
2378
        &&
2379 39
        self::$SUPPORT['mbstring'] === false
2380
    ) {
2381
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2382
    }
2383
2384 39
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2385
      self::checkForSupport();
2386
    }
2387
2388
    do {
2389 39
      $str_compare = $str;
2390
2391
      # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2392 39
      if (self::$SUPPORT['mbstring'] === true) {
2393
2394 39
        $str = \mb_decode_numericentity(
2395 39
            $str,
2396 39
            [0x80, 0xfffff, 0, 0xfffff, 0],
2397 39
            $encoding
2398
        );
2399
2400
      } else {
2401
2402
        $str = (string)\preg_replace_callback(
2403
            "/&#\d{2,6};/",
2404
            function ($matches) use ($encoding) {
2405
              // always fallback via symfony polyfill
2406
              $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2407
2408
              if ($returnTmp !== '"' && $returnTmp !== "'") {
2409
                return $returnTmp;
2410
              }
2411
2412
              return $matches[0];
2413
            },
2414
            $str
2415
        );
2416
2417
      }
2418
2419
      // decode numeric & UTF16 two byte entities
2420 39
      $str = \html_entity_decode(
2421 39
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2422 39
          $flags,
2423 39
          $encoding
2424
      );
2425
2426 39
    } while ($str_compare !== $str);
2427
2428 39
    return $str;
2429
  }
2430
2431
  /**
2432
   * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2433
   *
2434
   * @param string $str
2435
   * @param string $encoding [optional] <p>Default: UTF-8</p>
2436
   *
2437
   * @return string
2438
   */
2439 6
  public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2440
  {
2441 6
    return self::htmlspecialchars(
2442 6
        $str,
2443 6
        ENT_QUOTES | ENT_SUBSTITUTE,
2444 6
        $encoding
2445
    );
2446
  }
2447
2448
  /**
2449
   * Remove empty html-tag.
2450
   *
2451
   * e.g.: <tag></tag>
2452
   *
2453
   * @param string $str
2454
   *
2455
   * @return string
2456
   */
2457 1
  public static function html_stripe_empty_tags(string $str): string
2458
  {
2459 1
    return (string)\preg_replace(
2460 1
        "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2461 1
        '',
2462 1
        $str
2463
    );
2464
  }
2465
2466
  /**
2467
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2468
   *
2469
   * @link http://php.net/manual/en/function.htmlentities.php
2470
   *
2471
   * @param string $str           <p>
2472
   *                              The input string.
2473
   *                              </p>
2474
   * @param int    $flags         [optional] <p>
2475
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2476
   *                              invalid code unit sequences and the used document type. The default is
2477
   *                              ENT_COMPAT | ENT_HTML401.
2478
   *                              <table>
2479
   *                              Available <i>flags</i> constants
2480
   *                              <tr valign="top">
2481
   *                              <td>Constant Name</td>
2482
   *                              <td>Description</td>
2483
   *                              </tr>
2484
   *                              <tr valign="top">
2485
   *                              <td><b>ENT_COMPAT</b></td>
2486
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2487
   *                              </tr>
2488
   *                              <tr valign="top">
2489
   *                              <td><b>ENT_QUOTES</b></td>
2490
   *                              <td>Will convert both double and single quotes.</td>
2491
   *                              </tr>
2492
   *                              <tr valign="top">
2493
   *                              <td><b>ENT_NOQUOTES</b></td>
2494
   *                              <td>Will leave both double and single quotes unconverted.</td>
2495
   *                              </tr>
2496
   *                              <tr valign="top">
2497
   *                              <td><b>ENT_IGNORE</b></td>
2498
   *                              <td>
2499
   *                              Silently discard invalid code unit sequences instead of returning
2500
   *                              an empty string. Using this flag is discouraged as it
2501
   *                              may have security implications.
2502
   *                              </td>
2503
   *                              </tr>
2504
   *                              <tr valign="top">
2505
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2506
   *                              <td>
2507
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2508
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2509
   *                              </td>
2510
   *                              </tr>
2511
   *                              <tr valign="top">
2512
   *                              <td><b>ENT_DISALLOWED</b></td>
2513
   *                              <td>
2514
   *                              Replace invalid code points for the given document type with a
2515
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2516
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2517
   *                              instance, to ensure the well-formedness of XML documents with
2518
   *                              embedded external content.
2519
   *                              </td>
2520
   *                              </tr>
2521
   *                              <tr valign="top">
2522
   *                              <td><b>ENT_HTML401</b></td>
2523
   *                              <td>
2524
   *                              Handle code as HTML 4.01.
2525
   *                              </td>
2526
   *                              </tr>
2527
   *                              <tr valign="top">
2528
   *                              <td><b>ENT_XML1</b></td>
2529
   *                              <td>
2530
   *                              Handle code as XML 1.
2531
   *                              </td>
2532
   *                              </tr>
2533
   *                              <tr valign="top">
2534
   *                              <td><b>ENT_XHTML</b></td>
2535
   *                              <td>
2536
   *                              Handle code as XHTML.
2537
   *                              </td>
2538
   *                              </tr>
2539
   *                              <tr valign="top">
2540
   *                              <td><b>ENT_HTML5</b></td>
2541
   *                              <td>
2542
   *                              Handle code as HTML 5.
2543
   *                              </td>
2544
   *                              </tr>
2545
   *                              </table>
2546
   *                              </p>
2547
   * @param string $encoding      [optional] <p>
2548
   *                              Like <b>htmlspecialchars</b>,
2549
   *                              <b>htmlentities</b> takes an optional third argument
2550
   *                              <i>encoding</i> which defines encoding used in
2551
   *                              conversion.
2552
   *                              Although this argument is technically optional, you are highly
2553
   *                              encouraged to specify the correct value for your code.
2554
   *                              </p>
2555
   * @param bool   $double_encode [optional] <p>
2556
   *                              When <i>double_encode</i> is turned off PHP will not
2557
   *                              encode existing html entities. The default is to convert everything.
2558
   *                              </p>
2559
   *
2560
   *
2561
   * @return string The encoded string.
2562
   * </p>
2563
   * <p>
2564
   * If the input <i>string</i> contains an invalid code unit
2565
   * sequence within the given <i>encoding</i> an empty string
2566
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2567
   * <b>ENT_SUBSTITUTE</b> flags are set.
2568
   */
2569 9
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2570
  {
2571 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2572 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2573
    }
2574
2575 9
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2576
2577
    /**
2578
     * PHP doesn't replace a backslash to its html entity since this is something
2579
     * that's mostly used to escape characters when inserting in a database. Since
2580
     * we're using a decent database layer, we don't need this shit and we're replacing
2581
     * the double backslashes by its' html entity equivalent.
2582
     *
2583
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2584
     */
2585 9
    $str = \str_replace('\\', '&#92;', $str);
2586
2587 9
    return self::html_encode($str, true, $encoding);
2588
  }
2589
2590
  /**
2591
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2592
   *
2593
   * INFO: Take a look at "UTF8::htmlentities()"
2594
   *
2595
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2596
   *
2597
   * @param string $str           <p>
2598
   *                              The string being converted.
2599
   *                              </p>
2600
   * @param int    $flags         [optional] <p>
2601
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2602
   *                              invalid code unit sequences and the used document type. The default is
2603
   *                              ENT_COMPAT | ENT_HTML401.
2604
   *                              <table>
2605
   *                              Available <i>flags</i> constants
2606
   *                              <tr valign="top">
2607
   *                              <td>Constant Name</td>
2608
   *                              <td>Description</td>
2609
   *                              </tr>
2610
   *                              <tr valign="top">
2611
   *                              <td><b>ENT_COMPAT</b></td>
2612
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2613
   *                              </tr>
2614
   *                              <tr valign="top">
2615
   *                              <td><b>ENT_QUOTES</b></td>
2616
   *                              <td>Will convert both double and single quotes.</td>
2617
   *                              </tr>
2618
   *                              <tr valign="top">
2619
   *                              <td><b>ENT_NOQUOTES</b></td>
2620
   *                              <td>Will leave both double and single quotes unconverted.</td>
2621
   *                              </tr>
2622
   *                              <tr valign="top">
2623
   *                              <td><b>ENT_IGNORE</b></td>
2624
   *                              <td>
2625
   *                              Silently discard invalid code unit sequences instead of returning
2626
   *                              an empty string. Using this flag is discouraged as it
2627
   *                              may have security implications.
2628
   *                              </td>
2629
   *                              </tr>
2630
   *                              <tr valign="top">
2631
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2632
   *                              <td>
2633
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2634
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2635
   *                              </td>
2636
   *                              </tr>
2637
   *                              <tr valign="top">
2638
   *                              <td><b>ENT_DISALLOWED</b></td>
2639
   *                              <td>
2640
   *                              Replace invalid code points for the given document type with a
2641
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2642
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2643
   *                              instance, to ensure the well-formedness of XML documents with
2644
   *                              embedded external content.
2645
   *                              </td>
2646
   *                              </tr>
2647
   *                              <tr valign="top">
2648
   *                              <td><b>ENT_HTML401</b></td>
2649
   *                              <td>
2650
   *                              Handle code as HTML 4.01.
2651
   *                              </td>
2652
   *                              </tr>
2653
   *                              <tr valign="top">
2654
   *                              <td><b>ENT_XML1</b></td>
2655
   *                              <td>
2656
   *                              Handle code as XML 1.
2657
   *                              </td>
2658
   *                              </tr>
2659
   *                              <tr valign="top">
2660
   *                              <td><b>ENT_XHTML</b></td>
2661
   *                              <td>
2662
   *                              Handle code as XHTML.
2663
   *                              </td>
2664
   *                              </tr>
2665
   *                              <tr valign="top">
2666
   *                              <td><b>ENT_HTML5</b></td>
2667
   *                              <td>
2668
   *                              Handle code as HTML 5.
2669
   *                              </td>
2670
   *                              </tr>
2671
   *                              </table>
2672
   *                              </p>
2673
   * @param string $encoding      [optional] <p>
2674
   *                              Defines encoding used in conversion.
2675
   *                              </p>
2676
   *                              <p>
2677
   *                              For the purposes of this function, the encodings
2678
   *                              ISO-8859-1, ISO-8859-15,
2679
   *                              UTF-8, cp866,
2680
   *                              cp1251, cp1252, and
2681
   *                              KOI8-R are effectively equivalent, provided the
2682
   *                              <i>string</i> itself is valid for the encoding, as
2683
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2684
   *                              the same positions in all of these encodings.
2685
   *                              </p>
2686
   * @param bool   $double_encode [optional] <p>
2687
   *                              When <i>double_encode</i> is turned off PHP will not
2688
   *                              encode existing html entities, the default is to convert everything.
2689
   *                              </p>
2690
   *
2691
   * @return string The converted string.
2692
   * </p>
2693
   * <p>
2694
   * If the input <i>string</i> contains an invalid code unit
2695
   * sequence within the given <i>encoding</i> an empty string
2696
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2697
   * <b>ENT_SUBSTITUTE</b> flags are set.
2698
   */
2699 8
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2700
  {
2701 8
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2702 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2703
    }
2704
2705 8
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2706
  }
2707
2708
  /**
2709
   * Checks whether iconv is available on the server.
2710
   *
2711
   * @return bool
2712
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2713
   */
2714
  public static function iconv_loaded(): bool
2715
  {
2716
    return \extension_loaded('iconv') ? true : false;
2717
  }
2718
2719
  /**
2720
   * alias for "UTF8::decimal_to_chr()"
2721
   *
2722
   * @see UTF8::decimal_to_chr()
2723
   *
2724
   * @param mixed $int
2725
   *
2726
   * @return string
2727
   */
2728 4
  public static function int_to_chr($int): string
2729
  {
2730 4
    return self::decimal_to_chr($int);
2731
  }
2732
2733
  /**
2734
   * Converts Integer to hexadecimal U+xxxx code point representation.
2735
   *
2736
   * INFO: opposite to UTF8::hex_to_int()
2737
   *
2738
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2739
   * @param string $pfix [optional]
2740
   *
2741
   * @return string The code point, or empty string on failure.
2742
   */
2743 6
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2744
  {
2745 6
    $hex = \dechex($int);
2746
2747 6
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2748
2749 6
    return $pfix . $hex . '';
2750
  }
2751
2752
  /**
2753
   * Checks whether intl-char is available on the server.
2754
   *
2755
   * @return bool
2756
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2757
   */
2758
  public static function intlChar_loaded(): bool
2759
  {
2760
    return \class_exists('IntlChar');
2761
  }
2762
2763
  /**
2764
   * Checks whether intl is available on the server.
2765
   *
2766
   * @return bool
2767
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2768
   */
2769 5
  public static function intl_loaded(): bool
2770
  {
2771 5
    return \extension_loaded('intl');
2772
  }
2773
2774
  /**
2775
   * alias for "UTF8::is_ascii()"
2776
   *
2777
   * @see        UTF8::is_ascii()
2778
   *
2779
   * @param string $str
2780
   *
2781
   * @return bool
2782
   *
2783
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2784
   */
2785 2
  public static function isAscii(string $str): bool
2786
  {
2787 2
    return self::is_ascii($str);
2788
  }
2789
2790
  /**
2791
   * alias for "UTF8::is_base64()"
2792
   *
2793
   * @see        UTF8::is_base64()
2794
   *
2795
   * @param string $str
2796
   *
2797
   * @return bool
2798
   *
2799
   * @deprecated <p>use "UTF8::is_base64()"</p>
2800
   */
2801 2
  public static function isBase64($str): bool
2802
  {
2803 2
    return self::is_base64($str);
2804
  }
2805
2806
  /**
2807
   * alias for "UTF8::is_binary()"
2808
   *
2809
   * @see        UTF8::is_binary()
2810
   *
2811
   * @param mixed $str
2812
   * @param bool  $strict
2813
   *
2814
   * @return bool
2815
   *
2816
   * @deprecated <p>use "UTF8::is_binary()"</p>
2817
   */
2818 4
  public static function isBinary($str, $strict = false): bool
2819
  {
2820 4
    return self::is_binary($str, $strict);
2821
  }
2822
2823
  /**
2824
   * alias for "UTF8::is_bom()"
2825
   *
2826
   * @see        UTF8::is_bom()
2827
   *
2828
   * @param string $utf8_chr
2829
   *
2830
   * @return bool
2831
   *
2832
   * @deprecated <p>use "UTF8::is_bom()"</p>
2833
   */
2834 2
  public static function isBom(string $utf8_chr): bool
2835
  {
2836 2
    return self::is_bom($utf8_chr);
2837
  }
2838
2839
  /**
2840
   * alias for "UTF8::is_html()"
2841
   *
2842
   * @see        UTF8::is_html()
2843
   *
2844
   * @param string $str
2845
   *
2846
   * @return bool
2847
   *
2848
   * @deprecated <p>use "UTF8::is_html()"</p>
2849
   */
2850 2
  public static function isHtml(string $str): bool
2851
  {
2852 2
    return self::is_html($str);
2853
  }
2854
2855
  /**
2856
   * alias for "UTF8::is_json()"
2857
   *
2858
   * @see        UTF8::is_json()
2859
   *
2860
   * @param string $str
2861
   *
2862
   * @return bool
2863
   *
2864
   * @deprecated <p>use "UTF8::is_json()"</p>
2865
   */
2866
  public static function isJson(string $str): bool
2867
  {
2868
    return self::is_json($str);
2869
  }
2870
2871
  /**
2872
   * alias for "UTF8::is_utf16()"
2873
   *
2874
   * @see        UTF8::is_utf16()
2875
   *
2876
   * @param mixed $str
2877
   *
2878
   * @return int|false
2879
   *                    <strong>false</strong> if is't not UTF16,<br>
2880
   *                    <strong>1</strong> for UTF-16LE,<br>
2881
   *                    <strong>2</strong> for UTF-16BE.
2882
   *
2883
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2884
   */
2885 2
  public static function isUtf16($str)
2886
  {
2887 2
    return self::is_utf16($str);
2888
  }
2889
2890
  /**
2891
   * alias for "UTF8::is_utf32()"
2892
   *
2893
   * @see        UTF8::is_utf32()
2894
   *
2895
   * @param mixed $str
2896
   *
2897
   * @return int|false
2898
   *                   <strong>false</strong> if is't not UTF16,
2899
   *                   <strong>1</strong> for UTF-32LE,
2900
   *                   <strong>2</strong> for UTF-32BE.
2901
   *
2902
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2903
   */
2904 2
  public static function isUtf32($str)
2905
  {
2906 2
    return self::is_utf32($str);
2907
  }
2908
2909
  /**
2910
   * alias for "UTF8::is_utf8()"
2911
   *
2912
   * @see        UTF8::is_utf8()
2913
   *
2914
   * @param string $str
2915
   * @param bool   $strict
2916
   *
2917
   * @return bool
2918
   *
2919
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2920
   */
2921 17
  public static function isUtf8($str, $strict = false): bool
2922
  {
2923 17
    return self::is_utf8($str, $strict);
2924
  }
2925
2926
  /**
2927
   * Returns true if the string contains only alphabetic chars, false otherwise.
2928
   *
2929
   * @param string $str
2930
   *
2931
   * @return bool
2932
   *               Whether or not $str contains only alphabetic chars.
2933
   */
2934 10
  public static function is_alpha(string $str): bool
2935
  {
2936 10
    return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2937
  }
2938
2939
  /**
2940
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2941
   *
2942
   * @param string $str
2943
   *
2944
   * @return bool
2945
   *               Whether or not $str contains only alphanumeric chars.
2946
   */
2947 13
  public static function is_alphanumeric(string $str): bool
2948
  {
2949 13
    return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2950
  }
2951
2952
  /**
2953
   * Checks if a string is 7 bit ASCII.
2954
   *
2955
   * @param string $str <p>The string to check.</p>
2956
   *
2957
   * @return bool
2958
   *              <strong>true</strong> if it is ASCII<br>
2959
   *              <strong>false</strong> otherwise
2960
   *
2961
   */
2962 201
  public static function is_ascii(string $str): bool
2963
  {
2964 201
    if ('' === $str) {
2965 10
      return true;
2966
    }
2967
2968 200
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2969
  }
2970
2971
  /**
2972
   * Returns true if the string is base64 encoded, false otherwise.
2973
   *
2974
   * @param string $str <p>The input string.</p>
2975
   *
2976
   * @return bool Whether or not $str is base64 encoded.
2977
   */
2978 9
  public static function is_base64($str): bool
2979
  {
2980 9
    if ('' === $str) {
2981 3
      return false;
2982
    }
2983
2984 8
    if (\is_string($str) === false) {
0 ignored issues
show
introduced by
The condition is_string($str) === false is always false.
Loading history...
2985 2
      return false;
2986
    }
2987
2988 8
    $base64String = (string)\base64_decode($str, true);
2989
2990 8
    return $base64String && \base64_encode($base64String) === $str;
2991
  }
2992
2993
  /**
2994
   * Check if the input is binary... (is look like a hack).
2995
   *
2996
   * @param mixed $input
2997
   * @param bool  $strict
2998
   *
2999
   * @return bool
3000
   */
3001 40
  public static function is_binary($input, bool $strict = false): bool
3002
  {
3003 40
    $input = (string)$input;
3004 40
    if ('' === $input) {
3005 10
      return false;
3006
    }
3007
3008 40
    if (\preg_match('~^[01]+$~', $input)) {
3009 12
      return true;
3010
    }
3011
3012 40
    if ($strict === true) {
3013
3014 34
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3015
        self::checkForSupport();
3016
      }
3017
3018 34
      if (self::$SUPPORT['finfo'] === false) {
3019
        throw new \RuntimeException('ext-fileinfo: is not installed');
3020
      }
3021
3022
      /** @noinspection PhpComposerExtensionStubsInspection */
3023 34
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
3024 34
      $finfo_encoding = $finfo->buffer($input);
3025 34
      if ($finfo_encoding && $finfo_encoding === 'binary') {
3026 15
        return true;
3027
      }
3028
3029
    }
3030
3031 40
    $ext = self::get_file_type($input);
3032 40
    if ($ext['type'] === 'binary') {
3033 6
      return true;
3034
    }
3035
3036 38
    $testLength = self::strlen_in_byte($input);
3037 38
    if ($testLength) {
3038 38
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3039
        self::checkForSupport();
3040
      }
3041
3042 38
      $testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength);
3043 38
      if (($testNull / $testLength) > 0.256) {
3044 12
        return true;
3045
      }
3046
    }
3047
3048 36
    return false;
3049
  }
3050
3051
  /**
3052
   * Check if the file is binary.
3053
   *
3054
   * @param string $file
3055
   *
3056
   * @return bool
3057
   */
3058 6
  public static function is_binary_file($file): bool
3059
  {
3060
    // init
3061 6
    $block = '';
3062
3063 6
    $fp = \fopen($file, 'rb');
3064 6
    if (\is_resource($fp)) {
3065 6
      $block = \fread($fp, 512);
3066 6
      \fclose($fp);
3067
    }
3068
3069 6
    if ($block === '') {
3070 2
      return false;
3071
    }
3072
3073 6
    return self::is_binary($block, true);
3074
  }
3075
3076
  /**
3077
   * Returns true if the string contains only whitespace chars, false otherwise.
3078
   *
3079
   * @param string $str
3080
   *
3081
   * @return bool
3082
   *               Whether or not $str contains only whitespace characters.
3083
   */
3084 15
  public static function is_blank(string $str): bool
3085
  {
3086 15
    return self::str_matches_pattern($str, '^[[:space:]]*$');
3087
  }
3088
3089
  /**
3090
   * Checks if the given string is equal to any "Byte Order Mark".
3091
   *
3092
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3093
   *
3094
   * @param string $str <p>The input string.</p>
3095
   *
3096
   * @return bool
3097
   *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.
3098
   */
3099 2
  public static function is_bom($str): bool
3100
  {
3101 2
    foreach (self::$BOM as $bomString => $bomByteLength) {
3102 2
      if ($str === $bomString) {
3103 2
        return true;
3104
      }
3105
    }
3106
3107 2
    return false;
3108
  }
3109
3110
  /**
3111
   * Determine whether the string is considered to be empty.
3112
   *
3113
   * A variable is considered empty if it does not exist or if its value equals FALSE.
3114
   * empty() does not generate a warning if the variable does not exist.
3115
   *
3116
   * @param mixed $str
3117
   *
3118
   * @return bool Whether or not $str is empty().
3119
   */
3120
  public static function is_empty($str): bool
3121
  {
3122
    return empty($str);
3123
  }
3124
3125
  /**
3126
   * Returns true if the string contains only hexadecimal chars, false otherwise.
3127
   *
3128
   * @param string $str
3129
   *
3130
   * @return bool
3131
   *               Whether or not $str contains only hexadecimal chars.
3132
   */
3133 13
  public static function is_hexadecimal(string $str): bool
3134
  {
3135 13
    return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3136
  }
3137
3138
  /**
3139
   * Check if the string contains any html-tags <lall>.
3140
   *
3141
   * @param string $str <p>The input string.</p>
3142
   *
3143
   * @return bool
3144
   */
3145 3
  public static function is_html(string $str): bool
3146
  {
3147 3
    if ('' === $str) {
3148 3
      return false;
3149
    }
3150
3151
    // init
3152 3
    $matches = [];
3153
3154 3
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3155
3156 3
    return !(\count($matches) === 0);
3157
  }
3158
3159
  /**
3160
   * Try to check if "$str" is an json-string.
3161
   *
3162
   * @param string $str <p>The input string.</p>
3163
   *
3164
   * @return bool
3165
   */
3166 22
  public static function is_json(string $str): bool
3167
  {
3168 22
    if ('' === $str) {
3169 3
      return false;
3170
    }
3171
3172 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3173
      self::checkForSupport();
3174
    }
3175
3176 21
    if (self::$SUPPORT['json'] === false) {
3177
      throw new \RuntimeException('ext-json: is not installed');
3178
    }
3179
3180 21
    $json = self::json_decode($str);
3181
3182
    /** @noinspection PhpComposerExtensionStubsInspection */
3183
    return (
3184 21
               \is_object($json) === true
3185
               ||
3186 21
               \is_array($json) === true
3187
           )
3188
           &&
3189 21
           \json_last_error() === JSON_ERROR_NONE;
3190
  }
3191
3192
  /**
3193
   * @param string $str
3194
   *
3195
   * @return bool
3196
   */
3197 8
  public static function is_lowercase(string $str): bool
3198
  {
3199 8
    if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
3200 3
      return true;
3201
    }
3202
3203 5
    return false;
3204
  }
3205
3206
  /**
3207
   * Returns true if the string is serialized, false otherwise.
3208
   *
3209
   * @param string $str
3210
   *
3211
   * @return bool Whether or not $str is serialized.
3212
   */
3213 7
  public static function is_serialized(string $str): bool
3214
  {
3215 7
    if ('' === $str) {
3216 1
      return false;
3217
    }
3218
3219
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3220
    /** @noinspection UnserializeExploitsInspection */
3221 6
    return $str === 'b:0;'
3222
           ||
3223 6
           @\unserialize($str) !== false;
3224
  }
3225
3226
  /**
3227
   * Returns true if the string contains only lower case chars, false
3228
   * otherwise.
3229
   *
3230
   * @param string $str <p>The input string.</p>
3231
   *
3232
   * @return bool
3233
   *               Whether or not $str contains only lower case characters.
3234
   */
3235 8
  public static function is_uppercase(string $str): bool
3236
  {
3237 8
    return self::str_matches_pattern($str, '^[[:upper:]]*$');
3238
  }
3239
3240
  /**
3241
   * Check if the string is UTF-16.
3242
   *
3243
   * @param mixed $str <p>The input string.</p>
3244
   *
3245
   * @return int|false
3246
   *                   <strong>false</strong> if is't not UTF-16,<br>
3247
   *                   <strong>1</strong> for UTF-16LE,<br>
3248
   *                   <strong>2</strong> for UTF-16BE.
3249
   */
3250 21
  public static function is_utf16($str)
3251
  {
3252
    // init
3253 21
    $str = (string)$str;
3254
3255 21
    if (self::is_binary($str) === false) {
3256 9
      return false;
3257
    }
3258
3259 16
    if (self::$SUPPORT['mbstring'] === false) {
3260 2
      \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', E_USER_WARNING);
3261
    }
3262
3263
    // init
3264 16
    $strChars = [];
3265
3266 16
    $str = self::remove_bom($str);
3267
3268 16
    $maybeUTF16LE = 0;
3269 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3270 16
    if ($test) {
3271 14
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3272 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3273 14
      if ($test3 === $test) {
3274 14
        if (\count($strChars) === 0) {
3275 14
          $strChars = self::count_chars($str, true);
3276
        }
3277 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3278 14
          if (\in_array($test3char, $strChars, true) === true) {
3279 14
            $maybeUTF16LE++;
3280
          }
3281
        }
3282
      }
3283
    }
3284
3285 16
    $maybeUTF16BE = 0;
3286 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3287 16
    if ($test) {
3288 14
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3289 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3290 14
      if ($test3 === $test) {
3291 14
        if (\count($strChars) === 0) {
3292 6
          $strChars = self::count_chars($str, true);
3293
        }
3294 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3295 14
          if (\in_array($test3char, $strChars, true) === true) {
3296 14
            $maybeUTF16BE++;
3297
          }
3298
        }
3299
      }
3300
    }
3301
3302 16
    if ($maybeUTF16BE !== $maybeUTF16LE) {
3303 6
      if ($maybeUTF16LE > $maybeUTF16BE) {
3304 4
        return 1;
3305
      }
3306
3307 6
      return 2;
3308
    }
3309
3310 12
    return false;
3311
  }
3312
3313
  /**
3314
   * Check if the string is UTF-32.
3315
   *
3316
   * @param mixed $str
3317
   *
3318
   * @return int|false
3319
   *                   <strong>false</strong> if is't not UTF-32,<br>
3320
   *                   <strong>1</strong> for UTF-32LE,<br>
3321
   *                   <strong>2</strong> for UTF-32BE.
3322
   */
3323 17
  public static function is_utf32($str)
3324
  {
3325
    // init
3326 17
    $str = (string)$str;
3327
3328 17
    if (self::is_binary($str) === false) {
3329 9
      return false;
3330
    }
3331
3332 12
    if (self::$SUPPORT['mbstring'] === false) {
3333 2
      \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', E_USER_WARNING);
3334
    }
3335
3336
    // init
3337 12
    $strChars = [];
3338
3339 12
    $str = self::remove_bom($str);
3340
3341 12
    $maybeUTF32LE = 0;
3342 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3343 12
    if ($test) {
3344 10
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3345 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3346 10
      if ($test3 === $test) {
3347 10
        if (\count($strChars) === 0) {
3348 10
          $strChars = self::count_chars($str, true);
3349
        }
3350 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3351 10
          if (\in_array($test3char, $strChars, true) === true) {
3352 10
            $maybeUTF32LE++;
3353
          }
3354
        }
3355
      }
3356
    }
3357
3358 12
    $maybeUTF32BE = 0;
3359 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3360 12
    if ($test) {
3361 10
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3362 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3363 10
      if ($test3 === $test) {
3364 10
        if (\count($strChars) === 0) {
3365 6
          $strChars = self::count_chars($str, true);
3366
        }
3367 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3368 10
          if (\in_array($test3char, $strChars, true) === true) {
3369 10
            $maybeUTF32BE++;
3370
          }
3371
        }
3372
      }
3373
    }
3374
3375 12
    if ($maybeUTF32BE !== $maybeUTF32LE) {
3376 2
      if ($maybeUTF32LE > $maybeUTF32BE) {
3377 2
        return 1;
3378
      }
3379
3380 2
      return 2;
3381
    }
3382
3383 12
    return false;
3384
  }
3385
3386
  /**
3387
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3388
   *
3389
   * @see    http://hsivonen.iki.fi/php-utf8/
3390
   *
3391
   * @param string|string[] $str    <p>The string to be checked.</p>
3392
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3393
   *
3394
   * @return bool
3395
   */
3396 107
  public static function is_utf8($str, bool $strict = false): bool
3397
  {
3398 107
    if (\is_array($str) === true) {
3399 2
      foreach ($str as $k => $v) {
3400 2
        if (false === self::is_utf8($v, $strict)) {
3401 2
          return false;
3402
        }
3403
      }
3404
3405
      return true;
3406
    }
3407
3408 107
    if ('' === $str) {
3409 12
      return true;
3410
    }
3411
3412 103
    if ($strict === true) {
3413 2
      if (self::is_utf16($str) !== false) {
3414 2
        return false;
3415
      }
3416
3417
      if (self::is_utf32($str) !== false) {
3418
        return false;
3419
      }
3420
    }
3421
3422 103
    if (self::pcre_utf8_support() !== true) {
3423
3424
      // If even just the first character can be matched, when the /u
3425
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3426
      // invalid, nothing at all will match, even if the string contains
3427
      // some valid sequences
3428
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
3429
    }
3430
3431 103
    $mState = 0; // cached expected number of octets after the current octet
3432
    // until the beginning of the next UTF8 character sequence
3433 103
    $mUcs4 = 0; // cached Unicode character
3434 103
    $mBytes = 1; // cached expected number of octets in the current sequence
3435
3436 103
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3437
      self::checkForSupport();
3438
    }
3439
3440 103
    if (self::$ORD === null) {
3441
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3442
    }
3443
3444 103
    $len = self::strlen_in_byte((string)$str);
3445
    /** @noinspection ForeachInvariantsInspection */
3446 103
    for ($i = 0; $i < $len; $i++) {
3447 103
      $in = self::$ORD[$str[$i]];
3448 103
      if ($mState === 0) {
3449
        // When mState is zero we expect either a US-ASCII character or a
3450
        // multi-octet sequence.
3451 103
        if (0 === (0x80 & $in)) {
3452
          // US-ASCII, pass straight through.
3453 98
          $mBytes = 1;
3454 84
        } elseif (0xC0 === (0xE0 & $in)) {
3455
          // First octet of 2 octet sequence.
3456 75
          $mUcs4 = $in;
3457 75
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3458 75
          $mState = 1;
3459 75
          $mBytes = 2;
3460 58
        } elseif (0xE0 === (0xF0 & $in)) {
3461
          // First octet of 3 octet sequence.
3462 41
          $mUcs4 = $in;
3463 41
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3464 41
          $mState = 2;
3465 41
          $mBytes = 3;
3466 30
        } elseif (0xF0 === (0xF8 & $in)) {
3467
          // First octet of 4 octet sequence.
3468 19
          $mUcs4 = $in;
3469 19
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3470 19
          $mState = 3;
3471 19
          $mBytes = 4;
3472 13
        } elseif (0xF8 === (0xFC & $in)) {
3473
          /* First octet of 5 octet sequence.
3474
          *
3475
          * This is illegal because the encoded codepoint must be either
3476
          * (a) not the shortest form or
3477
          * (b) outside the Unicode range of 0-0x10FFFF.
3478
          * Rather than trying to resynchronize, we will carry on until the end
3479
          * of the sequence and let the later error handling code catch it.
3480
          */
3481 5
          $mUcs4 = $in;
3482 5
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3483 5
          $mState = 4;
3484 5
          $mBytes = 5;
3485 10
        } elseif (0xFC === (0xFE & $in)) {
3486
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3487 5
          $mUcs4 = $in;
3488 5
          $mUcs4 = ($mUcs4 & 1) << 30;
3489 5
          $mState = 5;
3490 5
          $mBytes = 6;
3491
        } else {
3492
          // Current octet is neither in the US-ASCII range nor a legal first
3493
          // octet of a multi-octet sequence.
3494 103
          return false;
3495
        }
3496
      } else {
3497
        // When mState is non-zero, we expect a continuation of the multi-octet
3498
        // sequence
3499 84
        if (0x80 === (0xC0 & $in)) {
3500
          // Legal continuation.
3501 76
          $shift = ($mState - 1) * 6;
3502 76
          $tmp = $in;
3503 76
          $tmp = ($tmp & 0x0000003F) << $shift;
3504 76
          $mUcs4 |= $tmp;
3505
          // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3506
          // Unicode code point to be output.
3507 76
          if (0 === --$mState) {
3508
            // Check for illegal sequences and code points.
3509
            //
3510
            // From Unicode 3.1, non-shortest form is illegal
3511
            if (
3512 76
                (2 === $mBytes && $mUcs4 < 0x0080)
3513
                ||
3514 76
                (3 === $mBytes && $mUcs4 < 0x0800)
3515
                ||
3516 76
                (4 === $mBytes && $mUcs4 < 0x10000)
3517
                ||
3518 76
                (4 < $mBytes)
3519
                ||
3520
                // From Unicode 3.2, surrogate characters are illegal.
3521 76
                (($mUcs4 & 0xFFFFF800) === 0xD800)
3522
                ||
3523
                // Code points outside the Unicode range are illegal.
3524 76
                ($mUcs4 > 0x10FFFF)
3525
            ) {
3526 8
              return false;
3527
            }
3528
            // initialize UTF8 cache
3529 76
            $mState = 0;
3530 76
            $mUcs4 = 0;
3531 76
            $mBytes = 1;
3532
          }
3533
        } else {
3534
          // ((0xC0 & (*in) != 0x80) && (mState != 0))
3535
          // Incomplete multi-octet sequence.
3536 36
          return false;
3537
        }
3538
      }
3539
    }
3540
3541 67
    return true;
3542
  }
3543
3544
  /**
3545
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3546
   * Decodes a JSON string
3547
   *
3548
   * @link http://php.net/manual/en/function.json-decode.php
3549
   *
3550
   * @param string $json    <p>
3551
   *                        The <i>json</i> string being decoded.
3552
   *                        </p>
3553
   *                        <p>
3554
   *                        This function only works with UTF-8 encoded strings.
3555
   *                        </p>
3556
   *                        <p>PHP implements a superset of
3557
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3558
   *                        only supports these values when they are nested inside an array or an object.
3559
   *                        </p>
3560
   * @param bool   $assoc   [optional] <p>
3561
   *                        When <b>TRUE</b>, returned objects will be converted into
3562
   *                        associative arrays.
3563
   *                        </p>
3564
   * @param int    $depth   [optional] <p>
3565
   *                        User specified recursion depth.
3566
   *                        </p>
3567
   * @param int    $options [optional] <p>
3568
   *                        Bitmask of JSON decode options. Currently only
3569
   *                        <b>JSON_BIGINT_AS_STRING</b>
3570
   *                        is supported (default is to cast large integers as floats)
3571
   *                        </p>
3572
   *
3573
   * @return mixed
3574
   *                The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3575
   *                null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3576
   *                <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3577
   *                is deeper than the recursion limit.
3578
   */
3579 23
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3580
  {
3581 23
    $json = self::filter($json);
3582
3583 23
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3584
      self::checkForSupport();
3585
    }
3586
3587 23
    if (self::$SUPPORT['json'] === false) {
3588
      throw new \RuntimeException('ext-json: is not installed');
3589
    }
3590
3591
    /** @noinspection PhpComposerExtensionStubsInspection */
3592 23
    $json = \json_decode($json, $assoc, $depth, $options);
3593
3594 23
    return $json;
3595
  }
3596
3597
  /**
3598
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3599
   * Returns the JSON representation of a value.
3600
   *
3601
   * @link http://php.net/manual/en/function.json-encode.php
3602
   *
3603
   * @param mixed $value   <p>
3604
   *                       The <i>value</i> being encoded. Can be any type except
3605
   *                       a resource.
3606
   *                       </p>
3607
   *                       <p>
3608
   *                       All string data must be UTF-8 encoded.
3609
   *                       </p>
3610
   *                       <p>PHP implements a superset of
3611
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3612
   *                       only supports these values when they are nested inside an array or an object.
3613
   *                       </p>
3614
   * @param int   $options [optional] <p>
3615
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3616
   *                       <b>JSON_HEX_TAG</b>,
3617
   *                       <b>JSON_HEX_AMP</b>,
3618
   *                       <b>JSON_HEX_APOS</b>,
3619
   *                       <b>JSON_NUMERIC_CHECK</b>,
3620
   *                       <b>JSON_PRETTY_PRINT</b>,
3621
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3622
   *                       <b>JSON_FORCE_OBJECT</b>,
3623
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3624
   *                       constants is described on
3625
   *                       the JSON constants page.
3626
   *                       </p>
3627
   * @param int   $depth   [optional] <p>
3628
   *                       Set the maximum depth. Must be greater than zero.
3629
   *                       </p>
3630
   *
3631
   * @return string|false
3632
   *                      A JSON encoded <strong>string</strong> on success or<br>
3633
   *                      <strong>FALSE</strong> on failure.
3634
   */
3635 4
  public static function json_encode($value, int $options = 0, int $depth = 512)
3636
  {
3637 4
    $value = self::filter($value);
3638
3639 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3640
      self::checkForSupport();
3641
    }
3642
3643 4
    if (self::$SUPPORT['json'] === false) {
3644
      throw new \RuntimeException('ext-json: is not installed');
3645
    }
3646
3647
    /** @noinspection PhpComposerExtensionStubsInspection */
3648 4
    $json = \json_encode($value, $options, $depth);
3649
3650 4
    return $json;
3651
  }
3652
3653
  /**
3654
   * Checks whether JSON is available on the server.
3655
   *
3656
   * @return bool
3657
   *              <strong>true</strong> if available, <strong>false</strong> otherwise
3658
   */
3659
  public static function json_loaded(): bool
3660
  {
3661
    return \function_exists('json_decode');
3662
  }
3663
3664
  /**
3665
   * Makes string's first char lowercase.
3666
   *
3667
   * @param string $str       <p>The input string</p>
3668
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
3669
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3670
   *
3671
   * @return string The resulting string.
3672
   */
3673 46
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3674
  {
3675 46
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3676 46
    if ($strPartTwo === false) {
3677
      $strPartTwo = '';
3678
    }
3679
3680 46
    $strPartOne = self::strtolower(
3681 46
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3682 46
        $encoding,
3683 46
        $cleanUtf8
3684
    );
3685
3686 46
    return $strPartOne . $strPartTwo;
3687
  }
3688
3689
  /**
3690
   * alias for "UTF8::lcfirst()"
3691
   *
3692
   * @see UTF8::lcfirst()
3693
   *
3694
   * @param string $str
3695
   * @param string $encoding
3696
   * @param bool   $cleanUtf8
3697
   *
3698
   * @return string
3699
   */
3700 2
  public static function lcword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3701
  {
3702 2
    return self::lcfirst($str, $encoding, $cleanUtf8);
3703
  }
3704
3705
  /**
3706
   * Lowercase for all words in the string.
3707
   *
3708
   * @param string   $str        <p>The input string.</p>
3709
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3710
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3711
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3712
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3713
   *
3714
   * @return string
3715
   */
3716 2
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3717
  {
3718 2
    if (!$str) {
3719 2
      return '';
3720
    }
3721
3722 2
    $words = self::str_to_words($str, $charlist);
3723 2
    $newWords = [];
3724
3725 2
    if (\count($exceptions) > 0) {
3726 2
      $useExceptions = true;
3727
    } else {
3728 2
      $useExceptions = false;
3729
    }
3730
3731 2
    foreach ($words as $word) {
3732
3733 2
      if (!$word) {
3734 2
        continue;
3735
      }
3736
3737
      if (
3738 2
          $useExceptions === false
3739
          ||
3740
          (
3741 2
              $useExceptions === true
3742
              &&
3743 2
              !\in_array($word, $exceptions, true)
3744
          )
3745
      ) {
3746 2
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3747
      }
3748
3749 2
      $newWords[] = $word;
3750
    }
3751
3752 2
    return \implode('', $newWords);
3753
  }
3754
3755
  /**
3756
   * alias for "UTF8::lcfirst()"
3757
   *
3758
   * @see UTF8::lcfirst()
3759
   *
3760
   * @param string $str
3761
   * @param string $encoding
3762
   * @param bool   $cleanUtf8
3763
   *
3764
   * @return string
3765
   */
3766 5
  public static function lowerCaseFirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3767
  {
3768 5
    return self::lcfirst($str, $encoding, $cleanUtf8);
3769
  }
3770
3771
  /**
3772
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3773
   *
3774
   * @param string $str   <p>The string to be trimmed</p>
3775
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3776
   *
3777
   * @return string The string with unwanted characters stripped from the left.
3778
   */
3779 22
  public static function ltrim(string $str = '', $chars = INF): string
3780
  {
3781 22
    if ('' === $str) {
3782 3
      return '';
3783
    }
3784
3785
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3786 21
    if ($chars === INF || !$chars) {
3787 14
      $pattern = "^[\pZ\pC]+";
3788
    } else {
3789 10
      $chars = \preg_quote($chars, '/');
3790 10
      $pattern = "^[$chars]+";
3791
    }
3792
3793 21
    return self::regex_replace($str, $pattern, '', '', '/');
3794
  }
3795
3796
  /**
3797
   * Returns the UTF-8 character with the maximum code point in the given data.
3798
   *
3799
   * @param string|array<string> $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3800
   *
3801
   * @return string|null The character with the highest code point than others, returns null on failure or empty input.
3802
   */
3803 2
  public static function max($arg)
3804
  {
3805 2
    if (\is_array($arg) === true) {
3806 2
      $arg = \implode('', $arg);
3807
    }
3808
3809 2
    $codepoints = self::codepoints($arg, false);
3810 2
    if (\count($codepoints) === 0) {
3811 2
      return null;
3812
    }
3813
3814 2
    $codepoint_max = \max($codepoints);
3815
3816 2
    return self::chr($codepoint_max);
3817
  }
3818
3819
  /**
3820
   * Calculates and returns the maximum number of bytes taken by any
3821
   * UTF-8 encoded character in the given string.
3822
   *
3823
   * @param string $str <p>The original Unicode string.</p>
3824
   *
3825
   * @return int Max byte lengths of the given chars.
3826
   */
3827 2
  public static function max_chr_width(string $str): int
3828
  {
3829 2
    $bytes = self::chr_size_list($str);
3830 2
    if (\count($bytes) > 0) {
3831 2
      return (int)\max($bytes);
3832
    }
3833
3834 2
    return 0;
3835
  }
3836
3837
  /**
3838
   * Checks whether mbstring is available on the server.
3839
   *
3840
   * @return bool
3841
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
3842
   */
3843 27
  public static function mbstring_loaded(): bool
3844
  {
3845 27
    $return = \extension_loaded('mbstring') ? true : false;
3846
3847 27
    if ($return === true) {
3848 27
      \mb_internal_encoding('UTF-8');
3849
    }
3850
3851 27
    return $return;
3852
  }
3853
3854
  /**
3855
   * Checks whether mbstring "overloaded" is active on the server.
3856
   *
3857
   * @return bool
3858
   */
3859
  private static function mbstring_overloaded(): bool
3860
  {
3861
    /**
3862
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3863
     */
3864
3865
    /** @noinspection PhpComposerExtensionStubsInspection */
3866
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3867
    return \defined('MB_OVERLOAD_STRING')
3868
           &&
3869
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3870
  }
3871
3872
  /**
3873
   * Returns the UTF-8 character with the minimum code point in the given data.
3874
   *
3875
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3876
   *
3877
   * @return string|null The character with the lowest code point than others, returns null on failure or empty input.
3878
   */
3879 2
  public static function min($arg)
3880
  {
3881 2
    if (\is_array($arg) === true) {
3882 2
      $arg = \implode('', $arg);
3883
    }
3884
3885 2
    $codepoints = self::codepoints($arg, false);
3886 2
    if (\count($codepoints) === 0) {
3887 2
      return null;
3888
    }
3889
3890 2
    $codepoint_min = \min($codepoints);
3891
3892 2
    return self::chr($codepoint_min);
3893
  }
3894
3895
  /**
3896
   * alias for "UTF8::normalize_encoding()"
3897
   *
3898
   * @see        UTF8::normalize_encoding()
3899
   *
3900
   * @param mixed $encoding
3901
   * @param mixed $fallback
3902
   *
3903
   * @return mixed
3904
   *
3905
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3906
   */
3907 2
  public static function normalizeEncoding($encoding, $fallback = '')
3908
  {
3909 2
    return self::normalize_encoding($encoding, $fallback);
3910
  }
3911
3912
  /**
3913
   * Normalize the encoding-"name" input.
3914
   *
3915
   * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3916
   * @param mixed $fallback <p>e.g.: UTF-8</p>
3917
   *
3918
   * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3919
   */
3920 340
  public static function normalize_encoding($encoding, $fallback = '')
3921
  {
3922 340
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3923
3924
    // init
3925 340
    $encoding = (string)$encoding;
3926
3927
    if (
3928 340
        !$encoding
3929
        ||
3930 49
        $encoding === '1' // only a fallback, for non "strict_types" usage ...
3931
        ||
3932 340
        $encoding === '0' // only a fallback, for non "strict_types" usage ...
3933
    ) {
3934 296
      return $fallback;
3935
    }
3936
3937
    if (
3938 48
        'UTF-8' === $encoding
3939
        ||
3940 48
        'UTF8' === $encoding
3941
    ) {
3942 21
      return 'UTF-8';
3943
    }
3944
3945
    if (
3946 41
        '8BIT' === $encoding
3947
        ||
3948 41
        'BINARY' === $encoding
3949
    ) {
3950
      return 'CP850';
3951
    }
3952
3953
    if (
3954 41
        'HTML' === $encoding
3955
        ||
3956 41
        'HTML-ENTITIES' === $encoding
3957
    ) {
3958 2
      return 'HTML-ENTITIES';
3959
    }
3960
3961 41
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3962 39
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3963
    }
3964
3965 5
    if (self::$ENCODINGS === null) {
3966 1
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3967
    }
3968
3969 5
    if (\in_array($encoding, self::$ENCODINGS, true)) {
0 ignored issues
show
Bug introduced by
It seems like self::ENCODINGS can also be of type false; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3969
    if (\in_array($encoding, /** @scrutinizer ignore-type */ self::$ENCODINGS, true)) {
Loading history...
3970 4
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3971
3972 4
      return $encoding;
3973
    }
3974
3975 4
    $encodingOrig = $encoding;
3976 4
    $encoding = \strtoupper($encoding);
3977 4
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3978
3979
    $equivalences = [
3980 4
        'ISO8859'     => 'ISO-8859-1',
3981
        'ISO88591'    => 'ISO-8859-1',
3982
        'ISO'         => 'ISO-8859-1',
3983
        'LATIN'       => 'ISO-8859-1',
3984
        'LATIN1'      => 'ISO-8859-1', // Western European
3985
        'ISO88592'    => 'ISO-8859-2',
3986
        'LATIN2'      => 'ISO-8859-2', // Central European
3987
        'ISO88593'    => 'ISO-8859-3',
3988
        'LATIN3'      => 'ISO-8859-3', // Southern European
3989
        'ISO88594'    => 'ISO-8859-4',
3990
        'LATIN4'      => 'ISO-8859-4', // Northern European
3991
        'ISO88595'    => 'ISO-8859-5',
3992
        'ISO88596'    => 'ISO-8859-6', // Greek
3993
        'ISO88597'    => 'ISO-8859-7',
3994
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3995
        'ISO88599'    => 'ISO-8859-9',
3996
        'LATIN5'      => 'ISO-8859-9', // Turkish
3997
        'ISO885911'   => 'ISO-8859-11',
3998
        'TIS620'      => 'ISO-8859-11', // Thai
3999
        'ISO885910'   => 'ISO-8859-10',
4000
        'LATIN6'      => 'ISO-8859-10', // Nordic
4001
        'ISO885913'   => 'ISO-8859-13',
4002
        'LATIN7'      => 'ISO-8859-13', // Baltic
4003
        'ISO885914'   => 'ISO-8859-14',
4004
        'LATIN8'      => 'ISO-8859-14', // Celtic
4005
        'ISO885915'   => 'ISO-8859-15',
4006
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4007
        'ISO885916'   => 'ISO-8859-16',
4008
        'LATIN10'     => 'ISO-8859-16', // Southeast European
4009
        'CP1250'      => 'WINDOWS-1250',
4010
        'WIN1250'     => 'WINDOWS-1250',
4011
        'WINDOWS1250' => 'WINDOWS-1250',
4012
        'CP1251'      => 'WINDOWS-1251',
4013
        'WIN1251'     => 'WINDOWS-1251',
4014
        'WINDOWS1251' => 'WINDOWS-1251',
4015
        'CP1252'      => 'WINDOWS-1252',
4016
        'WIN1252'     => 'WINDOWS-1252',
4017
        'WINDOWS1252' => 'WINDOWS-1252',
4018
        'CP1253'      => 'WINDOWS-1253',
4019
        'WIN1253'     => 'WINDOWS-1253',
4020
        'WINDOWS1253' => 'WINDOWS-1253',
4021
        'CP1254'      => 'WINDOWS-1254',
4022
        'WIN1254'     => 'WINDOWS-1254',
4023
        'WINDOWS1254' => 'WINDOWS-1254',
4024
        'CP1255'      => 'WINDOWS-1255',
4025
        'WIN1255'     => 'WINDOWS-1255',
4026
        'WINDOWS1255' => 'WINDOWS-1255',
4027
        'CP1256'      => 'WINDOWS-1256',
4028
        'WIN1256'     => 'WINDOWS-1256',
4029
        'WINDOWS1256' => 'WINDOWS-1256',
4030
        'CP1257'      => 'WINDOWS-1257',
4031
        'WIN1257'     => 'WINDOWS-1257',
4032
        'WINDOWS1257' => 'WINDOWS-1257',
4033
        'CP1258'      => 'WINDOWS-1258',
4034
        'WIN1258'     => 'WINDOWS-1258',
4035
        'WINDOWS1258' => 'WINDOWS-1258',
4036
        'UTF16'       => 'UTF-16',
4037
        'UTF32'       => 'UTF-32',
4038
        'UTF8'        => 'UTF-8',
4039
        'UTF'         => 'UTF-8',
4040
        'UTF7'        => 'UTF-7',
4041
        '8BIT'        => 'CP850',
4042
        'BINARY'      => 'CP850',
4043
    ];
4044
4045 4
    if (!empty($equivalences[$encodingUpperHelper])) {
4046 4
      $encoding = $equivalences[$encodingUpperHelper];
4047
    }
4048
4049 4
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4050
4051 4
    return $encoding;
4052
  }
4053
4054
  /**
4055
   * Standardize line ending to unix-like.
4056
   *
4057
   * @param string $str
4058
   *
4059
   * @return string
4060
   */
4061 5
  public static function normalize_line_ending(string $str): string
4062
  {
4063 5
    return (string)str_replace(["\r\n", "\r"], "\n", $str);
4064
  }
4065
4066
  /**
4067
   * Normalize some MS Word special characters.
4068
   *
4069
   * @param string $str <p>The string to be normalized.</p>
4070
   *
4071
   * @return string
4072
   */
4073 39
  public static function normalize_msword(string $str): string
4074
  {
4075 39
    if ('' === $str) {
4076 2
      return '';
4077
    }
4078
4079 39
    static $UTF8_MSWORD_KEYS_CACHE = null;
4080 39
    static $UTF8_MSWORD_VALUES_CACHE = null;
4081
4082 39
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
4083
4084 1
      if (self::$UTF8_MSWORD === null) {
4085 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_msword') can also be of type false. However, the property $UTF8_MSWORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4086
      }
4087
4088 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4088
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4089 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4089
      $UTF8_MSWORD_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4090
    }
4091
4092 39
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
4093
  }
4094
4095
  /**
4096
   * Normalize the whitespace.
4097
   *
4098
   * @param string $str                     <p>The string to be normalized.</p>
4099
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4100
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4101
   *                                        bidirectional text chars.</p>
4102
   *
4103
   * @return string
4104
   */
4105 87
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4106
  {
4107 87
    if ('' === $str) {
4108 9
      return '';
4109
    }
4110
4111 87
    static $WHITESPACE_CACHE = [];
4112 87
    $cacheKey = (int)$keepNonBreakingSpace;
4113
4114 87
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4115
4116 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4117
4118 2
      if ($keepNonBreakingSpace === true) {
4119 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4120
      }
4121
4122 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4123
    }
4124
4125 87
    if ($keepBidiUnicodeControls === false) {
4126 87
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
4127
4128 87
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4129 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4130
      }
4131
4132 87
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4133
    }
4134
4135 87
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4136
  }
4137
4138
  /**
4139
   * Calculates Unicode code point of the given UTF-8 encoded character.
4140
   *
4141
   * INFO: opposite to UTF8::chr()
4142
   *
4143
   * @param string $chr      <p>The character of which to calculate code point.<p/>
4144
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4145
   *
4146
   * @return int
4147
   *             Unicode code point of the given character,<br>
4148
   *             0 on invalid UTF-8 byte sequence.
4149
   */
4150 35
  public static function ord($chr, string $encoding = 'UTF-8'): int
4151
  {
4152
    // init
4153 35
    $chr = (string)$chr;
4154
4155 35
    static $CHAR_CACHE = [];
4156
4157
    // save the original string
4158 35
    $chr_orig = $chr;
4159
4160 35
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4161 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4162
4163
      // check again, if it's still not UTF-8
4164 4
      if ($encoding !== 'UTF-8') {
4165 4
        $chr = self::encode($encoding, $chr);
4166
      }
4167
    }
4168
4169 35
    $cacheKey = $chr_orig . $encoding;
4170 35
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
4171 35
      return $CHAR_CACHE[$cacheKey];
4172
    }
4173
4174 12
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4175
      self::checkForSupport();
4176
    }
4177
4178 12
    if (self::$SUPPORT['intlChar'] === true) {
4179
      /** @noinspection PhpComposerExtensionStubsInspection */
4180 11
      $code = \IntlChar::ord($chr);
4181 11
      if ($code) {
4182 10
        return $CHAR_CACHE[$cacheKey] = $code;
4183
      }
4184
    }
4185
4186
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4187 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
0 ignored issues
show
Bug introduced by
$chr of type array is incompatible with the type string expected by parameter $str of voku\helper\UTF8::substr(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4187
    $chr = \unpack('C*', (string)self::substr(/** @scrutinizer ignore-type */ $chr, 0, 4, 'CP850'));
Loading history...
4188 6
    $code = $chr ? $chr[1] : 0;
4189
4190 6
    if (0xF0 <= $code && isset($chr[4])) {
4191
      /** @noinspection UnnecessaryCastingInspection */
4192
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4193
    }
4194
4195 6
    if (0xE0 <= $code && isset($chr[3])) {
4196
      /** @noinspection UnnecessaryCastingInspection */
4197 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4198
    }
4199
4200 6
    if (0xC0 <= $code && isset($chr[2])) {
4201
      /** @noinspection UnnecessaryCastingInspection */
4202 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xC0) << 6) + $chr[2] - 0x80);
4203
    }
4204
4205 6
    return $CHAR_CACHE[$cacheKey] = $code;
4206
  }
4207
4208
  /**
4209
   * Parses the string into an array (into the the second parameter).
4210
   *
4211
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4212
   *          if the second parameter is not set!
4213
   *
4214
   * @link http://php.net/manual/en/function.parse-str.php
4215
   *
4216
   * @param string $str       <p>The input string.</p>
4217
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4218
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4219
   *
4220
   * @return bool
4221
   *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result.
4222
   */
4223 2
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4224
  {
4225 2
    if ($cleanUtf8 === true) {
4226 2
      $str = self::clean($str);
4227
    }
4228
4229 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4230
      self::checkForSupport();
4231
    }
4232
4233 2
    if (self::$SUPPORT['mbstring'] === true) {
4234 2
      $return = \mb_parse_str($str, $result);
4235
4236 2
      return !($return === false || empty($result));
4237
    }
4238
4239
    /** @noinspection PhpVoidFunctionResultUsedInspection */
4240
    \parse_str($str, $result);
4241
4242
    return !empty($result);
4243
  }
4244
4245
  /**
4246
   * Checks if \u modifier is available that enables Unicode support in PCRE.
4247
   *
4248
   * @return bool
4249
   *              <strong>true</strong> if support is available,<br>
4250
   *              <strong>false</strong> otherwise.
4251
   */
4252 103
  public static function pcre_utf8_support(): bool
4253
  {
4254
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4255 103
    return (bool)@\preg_match('//u', '');
4256
  }
4257
4258
  /**
4259
   * Create an array containing a range of UTF-8 characters.
4260
   *
4261
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4262
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4263
   *
4264
   * @return string[]
4265
   */
4266 2
  public static function range($var1, $var2): array
4267
  {
4268 2
    if (!$var1 || !$var2) {
4269 2
      return [];
4270
    }
4271
4272 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4273
      self::checkForSupport();
4274
    }
4275
4276 2
    if (self::$SUPPORT['ctype'] === false) {
4277
      throw new \RuntimeException('ext-ctype: is not installed');
4278
    }
4279
4280
    /** @noinspection PhpComposerExtensionStubsInspection */
4281 2
    if (\ctype_digit((string)$var1)) {
4282 2
      $start = (int)$var1;
4283 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4284
      $start = (int)self::hex_to_int($var1);
4285
    } else {
4286 2
      $start = self::ord($var1);
4287
    }
4288
4289 2
    if (!$start) {
4290
      return [];
4291
    }
4292
4293
    /** @noinspection PhpComposerExtensionStubsInspection */
4294 2
    if (\ctype_digit((string)$var2)) {
4295 2
      $end = (int)$var2;
4296 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4297
      $end = (int)self::hex_to_int($var2);
4298
    } else {
4299 2
      $end = self::ord($var2);
4300
    }
4301
4302 2
    if (!$end) {
4303
      return [];
4304
    }
4305
4306 2
    return \array_map(
4307
        [
4308 2
            self::class,
4309
            'chr',
4310
        ],
4311 2
        \range($start, $end)
4312
    );
4313
  }
4314
4315
  /**
4316
   * Multi decode html entity & fix urlencoded-win1252-chars.
4317
   *
4318
   * e.g:
4319
   * 'test+test'                     => 'test+test'
4320
   * 'D&#252;sseldorf'               => 'Düsseldorf'
4321
   * 'D%FCsseldorf'                  => 'Düsseldorf'
4322
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4323
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4324
   * 'Düsseldorf'                   => 'Düsseldorf'
4325
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4326
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4327
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4328
   *
4329
   * @param string $str          <p>The input string.</p>
4330
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
4331
   *
4332
   * @return string
4333
   */
4334 3
  public static function rawurldecode(string $str, bool $multi_decode = true): string
4335
  {
4336 3
    if ('' === $str) {
4337 2
      return '';
4338
    }
4339
4340 3
    $pattern = '/%u([0-9a-f]{3,4})/i';
4341 3
    if (\preg_match($pattern, $str)) {
4342 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4343
    }
4344
4345 3
    $flags = ENT_QUOTES | ENT_HTML5;
4346
4347
    do {
4348 3
      $str_compare = $str;
4349
4350 3
      $str = self::fix_simple_utf8(
4351 3
          \rawurldecode(
4352 3
              self::html_entity_decode(
4353 3
                  self::to_utf8($str),
4354 3
                  $flags
4355
              )
4356
          )
4357
      );
4358
4359 3
    } while ($multi_decode === true && $str_compare !== $str);
4360
4361 3
    return $str;
4362
  }
4363
4364
  /**
4365
   * @param array $strings
4366
   * @param bool  $removeEmptyValues
4367
   * @param int   $removeShortValues
4368
   *
4369
   * @return array
4370
   */
4371 2
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
4372
  {
4373
    // init
4374 2
    $return = [];
4375
4376 2
    foreach ($strings as $str) {
4377
      if (
4378 2
          $removeShortValues !== null
4379
          &&
4380 2
          self::strlen($str) <= $removeShortValues
4381
      ) {
4382 2
        continue;
4383
      }
4384
4385
      if (
4386 2
          $removeEmptyValues === true
4387
          &&
4388 2
          \trim($str) === ''
4389
      ) {
4390 2
        continue;
4391
      }
4392
4393 2
      $return[] = $str;
4394
    }
4395
4396 2
    return $return;
4397
  }
4398
4399
  /**
4400
   * Replaces all occurrences of $pattern in $str by $replacement.
4401
   *
4402
   * @param string $str         <p>The input string.</p>
4403
   * @param string $pattern     <p>The regular expression pattern.</p>
4404
   * @param string $replacement <p>The string to replace with.</p>
4405
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
4406
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4407
   *
4408
   * @return string
4409
   */
4410 291
  public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4411
  {
4412 291
    if ($options === 'msr') {
4413 9
      $options = 'ms';
4414
    }
4415
4416
    // fallback
4417 291
    if (!$delimiter) {
4418
      $delimiter = '/';
4419
    }
4420
4421 291
    $str = (string)\preg_replace(
4422 291
        $delimiter . $pattern . $delimiter . 'u' . $options,
4423 291
        $replacement,
4424 291
        $str
4425
    );
4426
4427 291
    return $str;
4428
  }
4429
4430
  /**
4431
   * alias for "UTF8::remove_bom()"
4432
   *
4433
   * @see        UTF8::remove_bom()
4434
   *
4435
   * @param string $str
4436
   *
4437
   * @return string
4438
   *
4439
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4440
   */
4441
  public static function removeBOM(string $str): string
4442
  {
4443
    return self::remove_bom($str);
4444
  }
4445
4446
  /**
4447
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4448
   *
4449
   * @param string $str <p>The input string.</p>
4450
   *
4451
   * @return string String without UTF-BOM.
4452
   */
4453 75
  public static function remove_bom(string $str): string
4454
  {
4455 75
    if ('' === $str) {
4456 7
      return '';
4457
    }
4458
4459 75
    $strLength = self::strlen_in_byte($str);
4460 75
    foreach (self::$BOM as $bomString => $bomByteLength) {
4461 75
      if (0 === self::strpos_in_byte($str, $bomString, 0)) {
4462 10
        $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4463 10
        if ($strTmp === false) {
4464
          return '';
4465
        }
4466
4467 10
        $strLength -= $bomByteLength;
4468
4469 75
        $str = (string)$strTmp;
4470
      }
4471
    }
4472
4473 75
    return $str;
4474
  }
4475
4476
  /**
4477
   * Removes duplicate occurrences of a string in another string.
4478
   *
4479
   * @param string          $str  <p>The base string.</p>
4480
   * @param string|string[] $what <p>String to search for in the base string.</p>
4481
   *
4482
   * @return string The result string with removed duplicates.
4483
   */
4484 2
  public static function remove_duplicates(string $str, $what = ' '): string
4485
  {
4486 2
    if (\is_string($what) === true) {
4487 2
      $what = [$what];
4488
    }
4489
4490 2
    if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4491
      /** @noinspection ForeachSourceInspection */
4492 2
      foreach ($what as $item) {
4493 2
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4494
      }
4495
    }
4496
4497 2
    return $str;
4498
  }
4499
4500
  /**
4501
   * Remove html via "strip_tags()" from the string.
4502
   *
4503
   * @param string $str
4504
   * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4505
   *                              not be stripped. Default: null
4506
   *                              </p>
4507
   *
4508
   * @return string
4509
   */
4510 6
  public static function remove_html(string $str, string $allowableTags = ''): string
4511
  {
4512 6
    return \strip_tags($str, $allowableTags);
4513
  }
4514
4515
  /**
4516
   * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4517
   *
4518
   * @param string $str
4519
   * @param string $replacement [optional] <p>Default is a empty string.</p>
4520
   *
4521
   * @return string
4522
   */
4523 6
  public static function remove_html_breaks(string $str, string $replacement = ''): string
4524
  {
4525 6
    return (string)\preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4526
  }
4527
4528
  /**
4529
   * Remove invisible characters from a string.
4530
   *
4531
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4532
   *
4533
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4534
   *
4535
   * @param string $str
4536
   * @param bool   $url_encoded
4537
   * @param string $replacement
4538
   *
4539
   * @return string
4540
   */
4541 113
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4542
  {
4543
    // init
4544 113
    $non_displayables = [];
4545
4546
    // every control character except newline (dec 10),
4547
    // carriage return (dec 13) and horizontal tab (dec 09)
4548 113
    if ($url_encoded) {
4549 113
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4550 113
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4551
    }
4552
4553 113
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4554
4555
    do {
4556 113
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4557 113
    } while ($count !== 0);
4558
4559 113
    return $str;
4560
  }
4561
4562
  /**
4563
   * Returns a new string with the prefix $substring removed, if present.
4564
   *
4565
   * @param string $str
4566
   * @param string $substring <p>The prefix to remove.</p>
4567
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4568
   *
4569
   * @return string String without the prefix $substring.
4570
   */
4571 12
  public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4572
  {
4573 12
    if (self::str_starts_with($str, $substring)) {
4574
4575 6
      return (string)self::substr(
4576 6
          $str,
4577 6
          self::strlen($substring, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::strlen($substring, $encoding) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4577
          /** @scrutinizer ignore-type */ self::strlen($substring, $encoding),
Loading history...
4578 6
          null,
4579 6
          $encoding
4580
      );
4581
    }
4582
4583 6
    return $str;
4584
  }
4585
4586
  /**
4587
   * Returns a new string with the suffix $substring removed, if present.
4588
   *
4589
   * @param string $str
4590
   * @param string $substring <p>The suffix to remove.</p>
4591
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4592
   *
4593
   * @return string String having a $str without the suffix $substring.
4594
   */
4595 12
  public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4596
  {
4597 12
    if (self::str_ends_with($str, $substring)) {
4598
4599 6
      return (string)self::substr(
4600 6
          $str,
4601 6
          0,
4602 6
          self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4603
      );
4604
    }
4605
4606 6
    return $str;
4607
  }
4608
4609
  /**
4610
   * Replaces all occurrences of $search in $str by $replacement.
4611
   *
4612
   * @param string $str           <p>The input string.</p>
4613
   * @param string $search        <p>The needle to search for.</p>
4614
   * @param string $replacement   <p>The string to replace with.</p>
4615
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4616
   *
4617
   * @return string String after the replacements.
4618
   */
4619 29
  public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4620
  {
4621 29
    if ($caseSensitive) {
4622 22
      return self::str_replace($search, $replacement, $str);
4623
    }
4624
4625 7
    return self::str_ireplace($search, $replacement, $str);
4626
  }
4627
4628
  /**
4629
   * Replaces all occurrences of $search in $str by $replacement.
4630
   *
4631
   * @param string       $str           <p>The input string.</p>
4632
   * @param array        $search        <p>The elements to search for.</p>
4633
   * @param string|array $replacement   <p>The string to replace with.</p>
4634
   * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4635
   *
4636
   * @return string String after the replacements.
4637
   */
4638 30
  public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4639
  {
4640 30
    if ($caseSensitive) {
4641 23
      return self::str_replace($search, $replacement, $str);
4642
    }
4643
4644 7
    return self::str_ireplace($search, $replacement, $str);
4645
  }
4646
4647
  /**
4648
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4649
   *
4650
   * @param string $str                <p>The input string</p>
4651
   * @param string $replacementChar    <p>The replacement character.</p>
4652
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4653
   *
4654
   * @return string
4655
   */
4656 63
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4657
  {
4658 63
    if ('' === $str) {
4659 9
      return '';
4660
    }
4661
4662 63
    if ($processInvalidUtf8 === true) {
4663 63
      $replacementCharHelper = $replacementChar;
4664 63
      if ($replacementChar === '') {
4665 63
        $replacementCharHelper = 'none';
4666
      }
4667
4668 63
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4669
        self::checkForSupport();
4670
      }
4671
4672 63
      if (self::$SUPPORT['mbstring'] === false) {
4673
        // if there is no native support for "mbstring",
4674
        // then we need to clean the string before ...
4675
        $str = self::clean($str);
4676
      }
4677
4678
      // always fallback via symfony polyfill
4679 63
      $save = \mb_substitute_character();
4680 63
      \mb_substitute_character($replacementCharHelper);
4681 63
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4682 63
      \mb_substitute_character($save);
4683
4684 63
      if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4685 63
        $str = $strTmp;
4686
      } else {
4687
        $str = '';
4688
      }
4689
    }
4690
4691 63
    return str_replace(
4692
        [
4693 63
            "\xEF\xBF\xBD",
4694
            '�',
4695
        ],
4696
        [
4697 63
            $replacementChar,
4698 63
            $replacementChar,
4699
        ],
4700 63
        $str
4701
    );
4702
  }
4703
4704
  /**
4705
   * Strip whitespace or other characters from end of a UTF-8 string.
4706
   *
4707
   * @param string $str   <p>The string to be trimmed.</p>
4708
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4709
   *
4710
   * @return string The string with unwanted characters stripped from the right.
4711
   */
4712 22
  public static function rtrim(string $str = '', $chars = INF): string
4713
  {
4714 22
    if ('' === $str) {
4715 3
      return '';
4716
    }
4717
4718
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4719 21
    if ($chars === INF || !$chars) {
4720 16
      $pattern = "[\pZ\pC]+\$";
4721
    } else {
4722 8
      $chars = \preg_quote($chars, '/');
4723 8
      $pattern = "[$chars]+\$";
4724
    }
4725
4726 21
    return self::regex_replace($str, $pattern, '', '', '/');
4727
  }
4728
4729
  /**
4730
   * rxClass
4731
   *
4732
   * @param string $s
4733
   * @param string $class
4734
   *
4735
   * @return string
4736
   */
4737 37
  private static function rxClass(string $s, string $class = ''): string
4738
  {
4739 37
    static $RX_CLASSS_CACHE = [];
4740
4741 37
    $cacheKey = $s . $class;
4742
4743 37
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4744 25
      return $RX_CLASSS_CACHE[$cacheKey];
4745
    }
4746
4747
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4748 16
    $class = [$class];
4749
4750
    /** @noinspection SuspiciousLoopInspection */
4751 16
    foreach (self::str_split($s) as $s) {
4752 15
      if ('-' === $s) {
4753
        $class[0] = '-' . $class[0];
4754 15
      } elseif (!isset($s[2])) {
4755 15
        $class[0] .= \preg_quote($s, '/');
4756 1
      } elseif (1 === self::strlen($s)) {
4757 1
        $class[0] .= $s;
4758
      } else {
4759 15
        $class[] = $s;
4760
      }
4761
    }
4762
4763 16
    if ($class[0]) {
4764 16
      $class[0] = '[' . $class[0] . ']';
4765
    }
4766
4767 16
    if (1 === \count($class)) {
4768 16
      $return = $class[0];
4769
    } else {
4770
      $return = '(?:' . \implode('|', $class) . ')';
4771
    }
4772
4773 16
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4774
4775 16
    return $return;
4776
  }
4777
4778
  /**
4779
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4780
   */
4781 2
  public static function showSupport()
4782
  {
4783 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4784
      self::checkForSupport();
4785
    }
4786
4787 2
    echo '<pre>';
4788 2
    foreach (self::$SUPPORT as $key => $value) {
4789 2
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4790
    }
4791 2
    echo '</pre>';
4792 2
  }
4793
4794
  /**
4795
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4796
   *
4797
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4798
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4799
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4800
   *
4801
   * @return string The HTML numbered entity.
4802
   */
4803 2
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4804
  {
4805 2
    if ('' === $char) {
4806 2
      return '';
4807
    }
4808
4809
    if (
4810 2
        $keepAsciiChars === true
4811
        &&
4812 2
        self::is_ascii($char) === true
4813
    ) {
4814 2
      return $char;
4815
    }
4816
4817 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4818 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4819
    }
4820
4821 2
    return '&#' . self::ord($char, $encoding) . ';';
4822
  }
4823
4824
  /**
4825
   * @param string $str
4826
   * @param int    $tabLength
4827
   *
4828
   * @return string
4829
   */
4830 5
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4831
  {
4832 5
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4833
  }
4834
4835
  /**
4836
   * Convert a string to an array of Unicode characters.
4837
   *
4838
   * @param string|int|string[]|int[] $str       <p>The string to split into array.</p>
4839
   * @param int                       $length    [optional] <p>Max character length of each array element.</p>
4840
   * @param bool                      $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4841
   *
4842
   * @return string[] An array containing chunks of the string.
4843
   */
4844 86
  public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4845
  {
4846 86
    if ($length <= 0) {
4847 3
      return [];
4848
    }
4849
4850 85
    if (\is_array($str) === true) {
4851 2
      foreach ($str as $k => $v) {
4852 2
        $str[$k] = self::split($v, $length);
4853
      }
4854
4855 2
      return $str;
4856
    }
4857
4858
    // init
4859 85
    $str = (string)$str;
4860
4861 85
    if ('' === $str) {
4862 13
      return [];
4863
    }
4864
4865
    // init
4866 82
    $ret = [];
4867
4868 82
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4869
      self::checkForSupport();
4870
    }
4871
4872 82
    if ($cleanUtf8 === true) {
4873 18
      $str = self::clean($str);
4874
    }
4875
4876 82
    if (self::$SUPPORT['pcre_utf8'] === true) {
4877
4878 78
      \preg_match_all('/./us', $str, $retArray);
4879 78
      if (isset($retArray[0])) {
4880 78
        $ret = $retArray[0];
4881
      }
4882 78
      unset($retArray);
4883
4884
    } else {
4885
4886
      // fallback
4887
4888 8
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4889
        self::checkForSupport();
4890
      }
4891
4892 8
      $len = self::strlen_in_byte($str);
4893
4894
      /** @noinspection ForeachInvariantsInspection */
4895 8
      for ($i = 0; $i < $len; $i++) {
4896
4897 8
        if (($str[$i] & "\x80") === "\x00") {
4898
4899 8
          $ret[] = $str[$i];
4900
4901
        } elseif (
4902 8
            isset($str[$i + 1])
4903
            &&
4904 8
            ($str[$i] & "\xE0") === "\xC0"
4905
        ) {
4906
4907 4
          if (($str[$i + 1] & "\xC0") === "\x80") {
4908 4
            $ret[] = $str[$i] . $str[$i + 1];
4909
4910 4
            $i++;
4911
          }
4912
4913
        } elseif (
4914 6
            isset($str[$i + 2])
4915
            &&
4916 6
            ($str[$i] & "\xF0") === "\xE0"
4917
        ) {
4918
4919
          if (
4920 6
              ($str[$i + 1] & "\xC0") === "\x80"
4921
              &&
4922 6
              ($str[$i + 2] & "\xC0") === "\x80"
4923
          ) {
4924 6
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4925
4926 6
            $i += 2;
4927
          }
4928
4929
        } elseif (
4930
            isset($str[$i + 3])
4931
            &&
4932
            ($str[$i] & "\xF8") === "\xF0"
4933
        ) {
4934
4935
          if (
4936
              ($str[$i + 1] & "\xC0") === "\x80"
4937
              &&
4938
              ($str[$i + 2] & "\xC0") === "\x80"
4939
              &&
4940
              ($str[$i + 3] & "\xC0") === "\x80"
4941
          ) {
4942
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4943
4944
            $i += 3;
4945
          }
4946
4947
        }
4948
      }
4949
    }
4950
4951 82
    if ($length > 1) {
4952 11
      $ret = \array_chunk($ret, $length);
4953
4954 11
      return \array_map(
4955 11
          function ($item) {
4956 11
            return \implode('', $item);
4957 11
          }, $ret
4958
      );
4959
    }
4960
4961 75
    if (isset($ret[0]) && $ret[0] === '') {
4962
      return [];
4963
    }
4964
4965 75
    return $ret;
4966
  }
4967
4968
  /**
4969
   * Returns a camelCase version of the string. Trims surrounding spaces,
4970
   * capitalizes letters following digits, spaces, dashes and underscores,
4971
   * and removes spaces, dashes, as well as underscores.
4972
   *
4973
   * @param string $str      <p>The input string.</p>
4974
   * @param string $encoding [optional] <p>Default: UTF-8</p>
4975
   *
4976
   * @return string
4977
   */
4978 32
  public static function str_camelize(string $str, string $encoding = 'UTF-8'): string
4979
  {
4980 32
    $str = self::lcfirst(self::trim($str), $encoding);
4981 32
    $str = (string)\preg_replace('/^[-_]+/', '', $str);
4982
4983 32
    $str = (string)\preg_replace_callback(
4984 32
        '/[-_\s]+(.)?/u',
4985 32
        function ($match) use ($encoding) {
4986 27
          if (isset($match[1])) {
4987 27
            return UTF8::strtoupper($match[1], $encoding);
4988
          }
4989
4990 1
          return '';
4991 32
        },
4992 32
        $str
4993
    );
4994
4995 32
    $str = (string)\preg_replace_callback(
4996 32
        '/[\d]+(.)?/u',
4997 32
        function ($match) use ($encoding) {
4998 6
          return UTF8::strtoupper($match[0], $encoding);
4999 32
        },
5000 32
        $str
5001
    );
5002
5003 32
    return $str;
5004
  }
5005
5006
  /**
5007
   * Returns the string with the first letter of each word capitalized,
5008
   * except for when the word is a name which shouldn't be capitalized.
5009
   *
5010
   * @param string $str
5011
   *
5012
   * @return string String with $str capitalized.
5013
   */
5014 1
  public static function str_capitalize_name(string $str): string
5015
  {
5016 1
    $str = self::collapse_whitespace($str);
5017
5018 1
    $str = self::str_capitalize_name_helper($str, ' ');
5019 1
    $str = self::str_capitalize_name_helper($str, '-');
5020
5021 1
    return $str;
5022
  }
5023
5024
  /**
5025
   * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
5026
   *
5027
   * @param string $names
5028
   * @param string $delimiter
5029
   * @param string $encoding
5030
   *
5031
   * @return string
5032
   */
5033 1
  private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
5034
  {
5035
    // init
5036 1
    $namesArray = \explode($delimiter, $names);
5037
5038 1
    if ($namesArray === false) {
5039
      return '';
5040
    }
5041
5042
    $specialCases = [
5043 1
        'names'    => [
5044
            'ab',
5045
            'af',
5046
            'al',
5047
            'and',
5048
            'ap',
5049
            'bint',
5050
            'binte',
5051
            'da',
5052
            'de',
5053
            'del',
5054
            'den',
5055
            'der',
5056
            'di',
5057
            'dit',
5058
            'ibn',
5059
            'la',
5060
            'mac',
5061
            'nic',
5062
            'of',
5063
            'ter',
5064
            'the',
5065
            'und',
5066
            'van',
5067
            'von',
5068
            'y',
5069
            'zu',
5070
        ],
5071
        'prefixes' => [
5072
            'al-',
5073
            "d'",
5074
            'ff',
5075
            "l'",
5076
            'mac',
5077
            'mc',
5078
            'nic',
5079
        ],
5080
    ];
5081
5082 1
    foreach ($namesArray as &$name) {
5083 1
      if (\in_array($name, $specialCases['names'], true)) {
5084 1
        continue;
5085
      }
5086
5087 1
      $continue = false;
5088
5089 1
      if ($delimiter == '-') {
5090 1
        foreach ($specialCases['names'] as $beginning) {
5091 1
          if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5092 1
            $continue = true;
5093
          }
5094
        }
5095
      }
5096
5097 1
      foreach ($specialCases['prefixes'] as $beginning) {
5098 1
        if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5099 1
          $continue = true;
5100
        }
5101
      }
5102
5103 1
      if ($continue) {
5104 1
        continue;
5105
      }
5106
5107 1
      $name = self::str_upper_first($name);
5108
    }
5109
5110 1
    return \implode($delimiter, $namesArray);
5111
  }
5112
5113
  /**
5114
   * Returns true if the string contains $needle, false otherwise. By default
5115
   * the comparison is case-sensitive, but can be made insensitive by setting
5116
   * $caseSensitive to false.
5117
   *
5118
   * @param string $haystack      <p>The input string.</p>
5119
   * @param string $needle        <p>Substring to look for.</p>
5120
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5121
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5122
   *
5123
   * @return bool Whether or not $haystack contains $needle.
5124
   */
5125 106
  public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5126
  {
5127 106
    if ('' === $haystack || '' === $needle) {
5128 1
      return false;
5129
    }
5130
5131
    // only a fallback to prevent BC in the api ...
5132 105
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5133 2
      $encoding = (string)$caseSensitive;
5134
    }
5135
5136 105
    if ($caseSensitive) {
5137 55
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
5138
    }
5139
5140 50
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
5141
  }
5142
5143
  /**
5144
   * Returns true if the string contains all $needles, false otherwise. By
5145
   * default the comparison is case-sensitive, but can be made insensitive by
5146
   * setting $caseSensitive to false.
5147
   *
5148
   * @param string $haystack      <p>The input string.</p>
5149
   * @param array  $needles       <p>SubStrings to look for.</p>
5150
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5151
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5152
   *
5153
   * @return bool Whether or not $haystack contains $needle.
5154
   */
5155 44
  public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5156
  {
5157 44
    if ('' === $haystack) {
5158
      return false;
5159
    }
5160
5161 44
    if (empty($needles)) {
5162 1
      return false;
5163
    }
5164
5165
    // only a fallback to prevent BC in the api ...
5166 43
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5167 1
      $encoding = (string)$caseSensitive;
5168
    }
5169
5170 43
    foreach ($needles as $needle) {
5171 43
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5172 43
        return false;
5173
      }
5174
    }
5175
5176 24
    return true;
5177
  }
5178
5179
  /**
5180
   * Returns true if the string contains any $needles, false otherwise. By
5181
   * default the comparison is case-sensitive, but can be made insensitive by
5182
   * setting $caseSensitive to false.
5183
   *
5184
   * @param string $haystack      <p>The input string.</p>
5185
   * @param array  $needles       <p>SubStrings to look for.</p>
5186
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5187
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5188
   *
5189
   * @return bool
5190
   *               Whether or not $str contains $needle.
5191
   */
5192 43
  public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5193
  {
5194 43
    if (empty($needles)) {
5195 1
      return false;
5196
    }
5197
5198 42
    foreach ($needles as $needle) {
5199 42
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5200 42
        return true;
5201
      }
5202
    }
5203
5204 18
    return false;
5205
  }
5206
5207
  /**
5208
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
5209
   * inserted before uppercase characters (with the exception of the first
5210
   * character of the string), and in place of spaces as well as underscores.
5211
   *
5212
   * @param string $str      <p>The input string.</p>
5213
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5214
   *
5215
   * @return string
5216
   */
5217 19
  public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5218
  {
5219 19
    return self::str_delimit($str, '-', $encoding);
5220
  }
5221
5222
  /**
5223
   * Returns a lowercase and trimmed string separated by the given delimiter.
5224
   * Delimiters are inserted before uppercase characters (with the exception
5225
   * of the first character of the string), and in place of spaces, dashes,
5226
   * and underscores. Alpha delimiters are not converted to lowercase.
5227
   *
5228
   * @param string $str       <p>The input string.</p>
5229
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
5230
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5231
   *
5232
   * @return string
5233
   */
5234 49
  public static function str_delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
5235
  {
5236 49
    $str = self::trim($str);
5237
5238 49
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
5239
5240 49
    $str = self::strtolower($str, $encoding);
5241
5242 49
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
5243
  }
5244
5245
  /**
5246
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5247
   *
5248
   * @param string $str <p>The input string.</p>
5249
   *
5250
   * @return false|string
5251
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5252
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5253
   */
5254 32
  public static function str_detect_encoding($str)
5255
  {
5256
    // init
5257 32
    $str = (string)$str;
5258
5259
    //
5260
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5261
    //
5262
5263 32
    if (self::is_binary($str, true) === true) {
5264
5265 11
      if (self::is_utf16($str) === 1) {
5266 2
        return 'UTF-16LE';
5267
      }
5268
5269 11
      if (self::is_utf16($str) === 2) {
5270 2
        return 'UTF-16BE';
5271
      }
5272
5273 9
      if (self::is_utf32($str) === 1) {
5274
        return 'UTF-32LE';
5275
      }
5276
5277 9
      if (self::is_utf32($str) === 2) {
5278
        return 'UTF-32BE';
5279
      }
5280
5281
      // is binary but not "UTF-16" or "UTF-32"
5282 9
      return false;
5283
    }
5284
5285
    //
5286
    // 2.) simple check for ASCII chars
5287
    //
5288
5289 27
    if (self::is_ascii($str) === true) {
5290 9
      return 'ASCII';
5291
    }
5292
5293
    //
5294
    // 3.) simple check for UTF-8 chars
5295
    //
5296
5297 27
    if (self::is_utf8($str) === true) {
5298 19
      return 'UTF-8';
5299
    }
5300
5301
    //
5302
    // 4.) check via "mb_detect_encoding()"
5303
    //
5304
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5305
5306
    $detectOrder = [
5307 16
        'ISO-8859-1',
5308
        'ISO-8859-2',
5309
        'ISO-8859-3',
5310
        'ISO-8859-4',
5311
        'ISO-8859-5',
5312
        'ISO-8859-6',
5313
        'ISO-8859-7',
5314
        'ISO-8859-8',
5315
        'ISO-8859-9',
5316
        'ISO-8859-10',
5317
        'ISO-8859-13',
5318
        'ISO-8859-14',
5319
        'ISO-8859-15',
5320
        'ISO-8859-16',
5321
        'WINDOWS-1251',
5322
        'WINDOWS-1252',
5323
        'WINDOWS-1254',
5324
        'CP932',
5325
        'CP936',
5326
        'CP950',
5327
        'CP866',
5328
        'CP850',
5329
        'CP51932',
5330
        'CP50220',
5331
        'CP50221',
5332
        'CP50222',
5333
        'ISO-2022-JP',
5334
        'ISO-2022-KR',
5335
        'JIS',
5336
        'JIS-ms',
5337
        'EUC-CN',
5338
        'EUC-JP',
5339
    ];
5340
5341 16
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5342
      self::checkForSupport();
5343
    }
5344
5345 16
    if (self::$SUPPORT['mbstring'] === true) {
5346
      // info: do not use the symfony polyfill here
5347 16
      $encoding = \mb_detect_encoding($str, $detectOrder, true);
5348 16
      if ($encoding) {
5349 16
        return $encoding;
5350
      }
5351
    }
5352
5353
    //
5354
    // 5.) check via "iconv()"
5355
    //
5356
5357
    if (self::$ENCODINGS === null) {
5358
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
5359
    }
5360
5361
    foreach (self::$ENCODINGS as $encodingTmp) {
5362
      # INFO: //IGNORE but still throw notice
5363
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
5364
      if ((string)@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5365
        return $encodingTmp;
5366
      }
5367
    }
5368
5369
    return false;
5370
  }
5371
5372
  /**
5373
   * Check if the string ends with the given substring.
5374
   *
5375
   * @param string $haystack <p>The string to search in.</p>
5376
   * @param string $needle   <p>The substring to search for.</p>
5377
   *
5378
   * @return bool
5379
   */
5380 40
  public static function str_ends_with(string $haystack, string $needle): bool
5381
  {
5382 40
    if ('' === $haystack || '' === $needle) {
5383 4
      return false;
5384
    }
5385
5386 38
    return \substr($haystack, -\strlen($needle)) === $needle;
5387
  }
5388
5389
  /**
5390
   * Returns true if the string ends with any of $substrings, false otherwise.
5391
   *
5392
   * - case-sensitive
5393
   *
5394
   * @param string   $str        <p>The input string.</p>
5395
   * @param string[] $substrings <p>Substrings to look for.</p>
5396
   *
5397
   * @return bool Whether or not $str ends with $substring.
5398
   */
5399 7
  public static function str_ends_with_any(string $str, array $substrings): bool
5400
  {
5401 7
    if (empty($substrings)) {
5402
      return false;
5403
    }
5404
5405 7
    foreach ($substrings as $substring) {
5406 7
      if (self::str_ends_with($str, $substring)) {
5407 7
        return true;
5408
      }
5409
    }
5410
5411 6
    return false;
5412
  }
5413
5414
  /**
5415
   * Ensures that the string begins with $substring. If it doesn't, it's
5416
   * prepended.
5417
   *
5418
   * @param string $str       <p>The input string.</p>
5419
   * @param string $substring <p>The substring to add if not present.</p>
5420
   *
5421
   * @return string
5422
   */
5423 10
  public static function str_ensure_left(string $str, string $substring): string
5424
  {
5425 10
    if (!self::str_starts_with($str, $substring)) {
5426 4
      $str = $substring . $str;
5427
    }
5428
5429 10
    return $str;
5430
  }
5431
5432
  /**
5433
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
5434
   *
5435
   * @param string $str       <p>The input string.</p>
5436
   * @param string $substring <p>The substring to add if not present.</p>
5437
   *
5438
   * @return string
5439
   */
5440 10
  public static function str_ensure_right(string $str, string $substring): string
5441
  {
5442 10
    if (!self::str_ends_with($str, $substring)) {
5443 4
      $str .= $substring;
5444
    }
5445
5446 10
    return $str;
5447
  }
5448
5449
  /**
5450
   * Capitalizes the first word of the string, replaces underscores with
5451
   * spaces, and strips '_id'.
5452
   *
5453
   * @param string $str
5454
   *
5455
   * @return string
5456
   */
5457 3
  public static function str_humanize($str): string
5458
  {
5459 3
    $str = self::str_replace(
5460
        [
5461 3
            '_id',
5462
            '_',
5463
        ],
5464
        [
5465 3
            '',
5466
            ' ',
5467
        ],
5468 3
        $str
5469
    );
5470
5471 3
    return self::ucfirst(self::trim($str));
5472
  }
5473
5474
  /**
5475
   * Check if the string ends with the given substring, case insensitive.
5476
   *
5477
   * @param string $haystack <p>The string to search in.</p>
5478
   * @param string $needle   <p>The substring to search for.</p>
5479
   *
5480
   * @return bool
5481
   */
5482 12
  public static function str_iends_with(string $haystack, string $needle): bool
5483
  {
5484 12
    if ('' === $haystack || '' === $needle) {
5485 2
      return false;
5486
    }
5487
5488 12
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
5489 12
      return true;
5490
    }
5491
5492 8
    return false;
5493
  }
5494
5495
  /**
5496
   * Returns true if the string ends with any of $substrings, false otherwise.
5497
   *
5498
   * - case-insensitive
5499
   *
5500
   * @param string   $str        <p>The input string.</p>
5501
   * @param string[] $substrings <p>Substrings to look for.</p>
5502
   *
5503
   * @return bool Whether or not $str ends with $substring.
5504
   */
5505 4
  public static function str_iends_with_any(string $str, array $substrings): bool
5506
  {
5507 4
    if (empty($substrings)) {
5508
      return false;
5509
    }
5510
5511 4
    foreach ($substrings as $substring) {
5512 4
      if (self::str_iends_with($str, $substring)) {
5513 4
        return true;
5514
      }
5515
    }
5516
5517
    return false;
5518
  }
5519
5520
  /**
5521
   * Returns the index of the first occurrence of $needle in the string,
5522
   * and false if not found. Accepts an optional offset from which to begin
5523
   * the search.
5524
   *
5525
   * @param string $str      <p>The input string.</p>
5526
   * @param string $needle   <p>Substring to look for.</p>
5527
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5528
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5529
   *
5530
   * @return int|false
5531
   *                    The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5532
   */
5533 2
  public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5534
  {
5535 2
    return self::stripos(
5536 2
        $str,
5537 2
        $needle,
5538 2
        $offset,
5539 2
        $encoding
5540
    );
5541
  }
5542
5543
  /**
5544
   * Returns the index of the last occurrence of $needle in the string,
5545
   * and false if not found. Accepts an optional offset from which to begin
5546
   * the search. Offsets may be negative to count from the last character
5547
   * in the string.
5548
   *
5549
   * @param string $str      <p>The input string.</p>
5550
   * @param string $needle   <p>Substring to look for.</p>
5551
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5552
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5553
   *
5554
   * @return int|false
5555
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5556
   */
5557 2
  public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5558
  {
5559 2
    return self::strripos(
5560 2
        $str,
5561 2
        $needle,
5562 2
        $offset,
5563 2
        $encoding
5564
    );
5565
  }
5566
5567
  /**
5568
   * Returns the index of the first occurrence of $needle in the string,
5569
   * and false if not found. Accepts an optional offset from which to begin
5570
   * the search.
5571
   *
5572
   * @param string $str      <p>The input string.</p>
5573
   * @param string $needle   <p>Substring to look for.</p>
5574
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5575
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5576
   *
5577
   * @return int|false
5578
   *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5579
   */
5580 12
  public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5581
  {
5582 12
    return self::strpos(
5583 12
        $str,
5584 12
        $needle,
5585 12
        $offset,
5586 12
        $encoding
5587
    );
5588
  }
5589
5590
  /**
5591
   * Returns the index of the last occurrence of $needle in the string,
5592
   * and false if not found. Accepts an optional offset from which to begin
5593
   * the search. Offsets may be negative to count from the last character
5594
   * in the string.
5595
   *
5596
   * @param string $str      <p>The input string.</p>
5597
   * @param string $needle   <p>Substring to look for.</p>
5598
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5599
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5600
   *
5601
   * @return int|false
5602
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5603
   */
5604 12
  public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5605
  {
5606 12
    return self::strrpos(
5607 12
        $str,
5608 12
        $needle,
5609 12
        $offset,
5610 12
        $encoding
5611
    );
5612
  }
5613
5614
  /**
5615
   * Inserts $substring into the string at the $index provided.
5616
   *
5617
   * @param string $str       <p>The input string.</p>
5618
   * @param string $substring <p>String to be inserted.</p>
5619
   * @param int    $index     <p>The index at which to insert the substring.</p>
5620
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5621
   *
5622
   * @return string
5623
   */
5624 8
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5625
  {
5626 8
    $len = self::strlen($str, $encoding);
5627
5628 8
    if ($index > $len) {
5629 1
      return $str;
5630
    }
5631
5632 7
    $start = self::substr($str, 0, $index, $encoding);
5633 7
    $end = self::substr($str, $index, $len, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $len can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5633
    $end = self::substr($str, $index, /** @scrutinizer ignore-type */ $len, $encoding);
Loading history...
5634
5635 7
    return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5635
    return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5635
    return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
5636
  }
5637
5638
  /**
5639
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5640
   *
5641
   * @link  http://php.net/manual/en/function.str-ireplace.php
5642
   *
5643
   * @param mixed $search  <p>
5644
   *                       Every replacement with search array is
5645
   *                       performed on the result of previous replacement.
5646
   *                       </p>
5647
   * @param mixed $replace <p>
5648
   *                       </p>
5649
   * @param mixed $subject <p>
5650
   *                       If subject is an array, then the search and
5651
   *                       replace is performed with every entry of
5652
   *                       subject, and the return value is an array as
5653
   *                       well.
5654
   *                       </p>
5655
   * @param int   $count   [optional] <p>
5656
   *                       The number of matched and replaced needles will
5657
   *                       be returned in count which is passed by
5658
   *                       reference.
5659
   *                       </p>
5660
   *
5661
   * @return mixed A string or an array of replacements.
5662
   */
5663 41
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5664
  {
5665 41
    $search = (array)$search;
5666
5667
    /** @noinspection AlterInForeachInspection */
5668 41
    foreach ($search as &$s) {
5669 41
      if ('' === $s .= '') {
5670 7
        $s = '/^(?<=.)$/';
5671
      } else {
5672 41
        $s = '/' . \preg_quote($s, '/') . '/ui';
5673
      }
5674
    }
5675
5676 41
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5677 41
    $count = $replace; // used as reference parameter
5678
5679 41
    return $subject;
5680
  }
5681
5682
  /**
5683
   * Check if the string starts with the given substring, case insensitive.
5684
   *
5685
   * @param string $haystack <p>The string to search in.</p>
5686
   * @param string $needle   <p>The substring to search for.</p>
5687
   *
5688
   * @return bool
5689
   */
5690 12
  public static function str_istarts_with(string $haystack, string $needle): bool
5691
  {
5692 12
    if ('' === $haystack || '' === $needle) {
5693 2
      return false;
5694
    }
5695
5696 12
    if (self::stripos($haystack, $needle) === 0) {
5697 12
      return true;
5698
    }
5699
5700 4
    return false;
5701
  }
5702
5703
  /**
5704
   * Returns true if the string begins with any of $substrings, false otherwise.
5705
   *
5706
   * - case-insensitive
5707
   *
5708
   * @param string $str        <p>The input string.</p>
5709
   * @param array  $substrings <p>Substrings to look for.</p>
5710
   *
5711
   * @return bool Whether or not $str starts with $substring.
5712
   */
5713 4
  public static function str_istarts_with_any(string $str, array $substrings): bool
5714
  {
5715 4
    if ('' === $str) {
5716
      return false;
5717
    }
5718
5719 4
    if (empty($substrings)) {
5720
      return false;
5721
    }
5722
5723 4
    foreach ($substrings as $substring) {
5724 4
      if (self::str_istarts_with($str, $substring)) {
5725 4
        return true;
5726
      }
5727
    }
5728
5729
    return false;
5730
  }
5731
5732
  /**
5733
   * Gets the substring after the first occurrence of a separator.
5734
   *
5735
   * @param string $str       <p>The input string.</p>
5736
   * @param string $separator <p>The string separator.</p>
5737
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5738
   *
5739
   * @return string
5740
   */
5741 1
  public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5742
  {
5743
    if (
5744 1
        $separator === ''
5745
        ||
5746 1
        $str === ''
5747
    ) {
5748 1
      return '';
5749
    }
5750
5751 1
    $offset = self::str_iindex_first($str, $separator);
5752 1
    if ($offset === false) {
5753 1
      return '';
5754
    }
5755
5756 1
    return (string)self::substr(
5757 1
        $str,
5758 1
        $offset + self::strlen($separator, $encoding),
5759 1
        null,
5760 1
        $encoding
5761
    );
5762
  }
5763
5764
  /**
5765
   * Gets the substring after the last occurrence of a separator.
5766
   *
5767
   * @param string $str       <p>The input string.</p>
5768
   * @param string $separator <p>The string separator.</p>
5769
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5770
   *
5771
   * @return string
5772
   */
5773 1
  public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5774
  {
5775
    if (
5776 1
        $separator === ''
5777
        ||
5778 1
        $str === ''
5779
    ) {
5780 1
      return '';
5781
    }
5782
5783 1
    $offset = self::str_iindex_last($str, $separator);
5784 1
    if ($offset === false) {
5785 1
      return '';
5786
    }
5787
5788 1
    return (string)self::substr(
5789 1
        $str,
5790 1
        $offset + self::strlen($separator, $encoding),
5791 1
        null,
5792 1
        $encoding
5793
    );
5794
  }
5795
5796
  /**
5797
   * Gets the substring before the first occurrence of a separator.
5798
   *
5799
   * @param string $str       <p>The input string.</p>
5800
   * @param string $separator <p>The string separator.</p>
5801
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5802
   *
5803
   * @return string
5804
   */
5805 1
  public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5806
  {
5807
    if (
5808 1
        $separator === ''
5809
        ||
5810 1
        $str === ''
5811
    ) {
5812 1
      return '';
5813
    }
5814
5815 1
    $offset = self::str_iindex_first($str, $separator);
5816 1
    if ($offset === false) {
5817 1
      return '';
5818
    }
5819
5820 1
    return (string)self::substr($str, 0, $offset, $encoding);
5821
  }
5822
5823
  /**
5824
   * Gets the substring before the last occurrence of a separator.
5825
   *
5826
   * @param string $str       <p>The input string.</p>
5827
   * @param string $separator <p>The string separator.</p>
5828
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5829
   *
5830
   * @return string
5831
   */
5832 1
  public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5833
  {
5834
    if (
5835 1
        $separator === ''
5836
        ||
5837 1
        $str === ''
5838
    ) {
5839 1
      return '';
5840
    }
5841
5842 1
    $offset = self::str_iindex_last($str, $separator);
5843 1
    if ($offset === false) {
5844 1
      return '';
5845
    }
5846
5847 1
    return (string)self::substr($str, 0, $offset, $encoding);
5848
  }
5849
5850
  /**
5851
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5852
   *
5853
   * @param string $str          <p>The input string.</p>
5854
   * @param string $needle       <p>The string to look for.</p>
5855
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5856
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5857
   *
5858
   * @return string
5859
   */
5860 2
  public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5861
  {
5862
    if (
5863 2
        '' === $needle
5864
        ||
5865 2
        '' === $str
5866
    ) {
5867 2
      return '';
5868
    }
5869
5870 2
    $part = self::stristr(
5871 2
        $str,
5872 2
        $needle,
5873 2
        $beforeNeedle,
5874 2
        $encoding
5875
    );
5876 2
    if (false === $part) {
5877 2
      return '';
5878
    }
5879
5880 2
    return $part;
5881
  }
5882
5883
  /**
5884
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5885
   *
5886
   * @param string $str          <p>The input string.</p>
5887
   * @param string $needle       <p>The string to look for.</p>
5888
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5889
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5890
   *
5891
   * @return string
5892
   */
5893 1
  public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5894
  {
5895
    if (
5896 1
        '' === $needle
5897
        ||
5898 1
        '' === $str
5899
    ) {
5900 1
      return '';
5901
    }
5902
5903 1
    $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5904 1
    if (false === $part) {
5905 1
      return '';
5906
    }
5907
5908 1
    return $part;
5909
  }
5910
5911
  /**
5912
   * Returns the last $n characters of the string.
5913
   *
5914
   * @param string $str      <p>The input string.</p>
5915
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5916
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5917
   *
5918
   * @return string
5919
   */
5920 12
  public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5921
  {
5922 12
    if ($n <= 0) {
5923 4
      return '';
5924
    }
5925
5926 8
    $returnTmp = self::substr($str, -$n, null, $encoding);
5927
5928 8
    return ($returnTmp === false ? '' : $returnTmp);
5929
  }
5930
5931
  /**
5932
   * Limit the number of characters in a string.
5933
   *
5934
   * @param string $str      <p>The input string.</p>
5935
   * @param int    $length   [optional] <p>Default: 100</p>
5936
   * @param string $strAddOn [optional] <p>Default: …</p>
5937
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5938
   *
5939
   * @return string
5940
   */
5941 2
  public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5942
  {
5943 2
    if ('' === $str) {
5944 2
      return '';
5945
    }
5946
5947 2
    if ($length <= 0) {
5948 2
      return '';
5949
    }
5950
5951 2
    if (self::strlen($str, $encoding) <= $length) {
5952 2
      return $str;
5953
    }
5954
5955 2
    return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5955
    return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
5956
  }
5957
5958
  /**
5959
   * Limit the number of characters in a string, but also after the next word.
5960
   *
5961
   * @param string $str      <p>The input string.</p>
5962
   * @param int    $length   [optional] <p>Default: 100</p>
5963
   * @param string $strAddOn [optional] <p>Default: …</p>
5964
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5965
   *
5966
   * @return string
5967
   */
5968 6
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5969
  {
5970 6
    if ('' === $str) {
5971 2
      return '';
5972
    }
5973
5974 6
    if ($length <= 0) {
5975 2
      return '';
5976
    }
5977
5978 6
    if (self::strlen($str, $encoding) <= $length) {
5979 2
      return $str;
5980
    }
5981
5982 6
    if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
5983 5
      return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5983
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
5984
    }
5985
5986 3
    $str = (string)self::substr($str, 0, $length, $encoding);
5987 3
    $array = \explode(' ', $str);
5988 3
    \array_pop($array);
5989 3
    $new_str = \implode(' ', $array);
5990
5991 3
    if ($new_str === '') {
5992 2
      $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
5993
    } else {
5994 3
      $str = $new_str . $strAddOn;
5995
    }
5996
5997 3
    return $str;
5998
  }
5999
6000
  /**
6001
   * Returns the longest common prefix between the string and $otherStr.
6002
   *
6003
   * @param string $str      <p>The input sting.</p>
6004
   * @param string $otherStr <p>Second string for comparison.</p>
6005
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6006
   *
6007
   * @return string
6008
   */
6009 10
  public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6010
  {
6011 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6012
6013 10
    $longestCommonPrefix = '';
6014 10
    for ($i = 0; $i < $maxLength; $i++) {
6015 8
      $char = self::substr($str, $i, 1, $encoding);
6016
6017 8
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
6018 6
        $longestCommonPrefix .= $char;
6019
      } else {
6020 6
        break;
6021
      }
6022
    }
6023
6024 10
    return $longestCommonPrefix;
6025
  }
6026
6027
  /**
6028
   * Returns the longest common substring between the string and $otherStr.
6029
   * In the case of ties, it returns that which occurs first.
6030
   *
6031
   * @param string $str
6032
   * @param string $otherStr <p>Second string for comparison.</p>
6033
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6034
   *
6035
   * @return string String with its $str being the longest common substring.
6036
   */
6037 11
  public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6038
  {
6039
    // Uses dynamic programming to solve
6040
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6041 11
    $strLength = self::strlen($str, $encoding);
6042 11
    $otherLength = self::strlen($otherStr, $encoding);
6043
6044
    // Return if either string is empty
6045 11
    if ($strLength == 0 || $otherLength == 0) {
6046 2
      return '';
6047
    }
6048
6049 9
    $len = 0;
6050 9
    $end = 0;
6051 9
    $table = \array_fill(
6052 9
        0,
6053 9
        $strLength + 1,
6054 9
        \array_fill(0, $otherLength + 1, 0)
6055
    );
6056
6057 9
    for ($i = 1; $i <= $strLength; $i++) {
6058 9
      for ($j = 1; $j <= $otherLength; $j++) {
6059 9
        $strChar = self::substr($str, $i - 1, 1, $encoding);
6060 9
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6061
6062 9
        if ($strChar == $otherChar) {
6063 8
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6064 8
          if ($table[$i][$j] > $len) {
6065 8
            $len = $table[$i][$j];
6066 8
            $end = $i;
6067
          }
6068
        } else {
6069 9
          $table[$i][$j] = 0;
6070
        }
6071
      }
6072
    }
6073
6074 9
    $returnTmp = self::substr($str, $end - $len, $len, $encoding);
6075
6076 9
    return ($returnTmp === false ? '' : $returnTmp);
6077
  }
6078
6079
  /**
6080
   * Returns the longest common suffix between the string and $otherStr.
6081
   *
6082
   * @param string $str
6083
   * @param string $otherStr <p>Second string for comparison.</p>
6084
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6085
   *
6086
   * @return string
6087
   */
6088 10
  public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6089
  {
6090 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6091
6092 10
    $longestCommonSuffix = '';
6093 10
    for ($i = 1; $i <= $maxLength; $i++) {
6094 8
      $char = self::substr($str, -$i, 1, $encoding);
6095
6096 8
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
6097 6
        $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6097
        $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
6098
      } else {
6099 6
        break;
6100
      }
6101
    }
6102
6103 10
    return $longestCommonSuffix;
6104
  }
6105
6106
  /**
6107
   * Returns true if $str matches the supplied pattern, false otherwise.
6108
   *
6109
   * @param string $str     <p>The input string.</p>
6110
   * @param string $pattern <p>Regex pattern to match against.</p>
6111
   *
6112
   * @return bool Whether or not $str matches the pattern.
6113
   */
6114 126
  public static function str_matches_pattern(string $str, string $pattern): bool
6115
  {
6116 126
    if (\preg_match('/' . $pattern . '/u', $str)) {
6117 87
      return true;
6118
    }
6119
6120 39
    return false;
6121
  }
6122
6123
  /**
6124
   * Returns whether or not a character exists at an index. Offsets may be
6125
   * negative to count from the last character in the string. Implements
6126
   * part of the ArrayAccess interface.
6127
   *
6128
   * @param string $str      <p>The input string.</p>
6129
   * @param int    $offset   <p>The index to check.</p>
6130
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6131
   *
6132
   *
6133
   * @return bool Whether or not the index exists.
6134
   */
6135 6
  public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6136
  {
6137
    // init
6138 6
    $length = self::strlen($str, $encoding);
6139
6140 6
    if ($offset >= 0) {
6141 3
      return ($length > $offset);
6142
    }
6143
6144 3
    return ($length >= \abs($offset));
6145
  }
6146
6147
  /**
6148
   * Returns the character at the given index. Offsets may be negative to
6149
   * count from the last character in the string. Implements part of the
6150
   * ArrayAccess interface, and throws an OutOfBoundsException if the index
6151
   * does not exist.
6152
   *
6153
   * @param string $str      <p>The input string.</p>
6154
   * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6155
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6156
   *
6157
   * @return string The character at the specified index.
6158
   *
6159
   * @throws \OutOfBoundsException If the positive or negative offset does not exist.
6160
   */
6161 2
  public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6162
  {
6163
    // init
6164 2
    $length = self::strlen($str);
6165
6166
    if (
6167 2
        ($index >= 0 && $length <= $index)
6168
        ||
6169 2
        $length < \abs($index)
6170
    ) {
6171 1
      throw new \OutOfBoundsException('No character exists at the index');
6172
    }
6173
6174 1
    return self::char_at($str, $index, $encoding);
6175
  }
6176
6177
  /**
6178
   * Pad a UTF-8 string to given length with another string.
6179
   *
6180
   * @param string $str        <p>The input string.</p>
6181
   * @param int    $pad_length <p>The length of return string.</p>
6182
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
6183
   * @param int    $pad_type   [optional] <p>
6184
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
6185
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
6186
   *                           </p>
6187
   * @param string $encoding   [optional] <p>Default: UTF-8</p>
6188
   *
6189
   * @return string Returns the padded string.
6190
   */
6191 41
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
6192
  {
6193 41
    if ('' === $str) {
6194
      return '';
6195
    }
6196
6197 41
    if ($pad_type !== (int)$pad_type) {
6198 13
      if ($pad_type == 'left') {
6199 3
        $pad_type = STR_PAD_LEFT;
6200 10
      } elseif ($pad_type == 'right') {
6201 6
        $pad_type = STR_PAD_RIGHT;
6202 4
      } elseif ($pad_type == 'both') {
6203 3
        $pad_type = STR_PAD_BOTH;
6204
      } else {
6205 1
        throw new \InvalidArgumentException(
6206 1
            'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6207
        );
6208
      }
6209
    }
6210
6211 40
    $str_length = self::strlen($str, $encoding);
6212
6213
    if (
6214 40
        $pad_length > 0
6215
        &&
6216 40
        $pad_length >= $str_length
6217
    ) {
6218 39
      $ps_length = self::strlen($pad_string, $encoding);
6219
6220 39
      $diff = ($pad_length - $str_length);
6221
6222
      switch ($pad_type) {
6223 39
        case STR_PAD_LEFT:
6224 13
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6225 13
          $pre = (string)self::substr($pre, 0, $diff, $encoding);
6226 13
          $post = '';
6227 13
          break;
6228
6229 29
        case STR_PAD_BOTH:
6230 14
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6231 14
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2), $encoding);
6232 14
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6233 14
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2), $encoding);
6234 14
          break;
6235
6236 18
        case STR_PAD_RIGHT:
6237
        default:
6238 18
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6239 18
          $post = (string)self::substr($post, 0, $diff, $encoding);
6240 18
          $pre = '';
6241
      }
6242
6243 39
      return $pre . $str . $post;
6244
    }
6245
6246 4
    return $str;
6247
  }
6248
6249
  /**
6250
   * Returns a new string of a given length such that both sides of the
6251
   * string are padded. Alias for pad() with a $padType of 'both'.
6252
   *
6253
   * @param string $str
6254
   * @param int    $length   <p>Desired string length after padding.</p>
6255
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6256
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6257
   *
6258
   * @return string String with padding applied.
6259
   */
6260 11
  public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6261
  {
6262 11
    $padding = $length - self::strlen($str, $encoding);
6263
6264 11
    return self::apply_padding($str, (int)\floor($padding / 2), (int)\ceil($padding / 2), $padStr, $encoding);
6265
  }
6266
6267
  /**
6268
   * Returns a new string of a given length such that the beginning of the
6269
   * string is padded. Alias for pad() with a $padType of 'left'.
6270
   *
6271
   * @param string $str
6272
   * @param int    $length   <p>Desired string length after padding.</p>
6273
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6274
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6275
   *
6276
   * @return string String with left padding.
6277
   */
6278 7
  public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6279
  {
6280 7
    return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
6281
  }
6282
6283
  /**
6284
   * Returns a new string of a given length such that the end of the string
6285
   * is padded. Alias for pad() with a $padType of 'right'.
6286
   *
6287
   * @param string $str
6288
   * @param int    $length   <p>Desired string length after padding.</p>
6289
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6290
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6291
   *
6292
   * @return string String with right padding.
6293
   */
6294 7
  public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6295
  {
6296 7
    return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
6297
  }
6298
6299
  /**
6300
   * Repeat a string.
6301
   *
6302
   * @param string $str        <p>
6303
   *                           The string to be repeated.
6304
   *                           </p>
6305
   * @param int    $multiplier <p>
6306
   *                           Number of time the input string should be
6307
   *                           repeated.
6308
   *                           </p>
6309
   *                           <p>
6310
   *                           multiplier has to be greater than or equal to 0.
6311
   *                           If the multiplier is set to 0, the function
6312
   *                           will return an empty string.
6313
   *                           </p>
6314
   *
6315
   * @return string The repeated string.
6316
   */
6317 9
  public static function str_repeat(string $str, int $multiplier): string
6318
  {
6319 9
    $str = self::filter($str);
6320
6321 9
    return \str_repeat($str, $multiplier);
6322
  }
6323
6324
  /**
6325
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6326
   *
6327
   * Replace all occurrences of the search string with the replacement string
6328
   *
6329
   * @link http://php.net/manual/en/function.str-replace.php
6330
   *
6331
   * @param mixed $search  <p>
6332
   *                       The value being searched for, otherwise known as the needle.
6333
   *                       An array may be used to designate multiple needles.
6334
   *                       </p>
6335
   * @param mixed $replace <p>
6336
   *                       The replacement value that replaces found search
6337
   *                       values. An array may be used to designate multiple replacements.
6338
   *                       </p>
6339
   * @param mixed $subject <p>
6340
   *                       The string or array being searched and replaced on,
6341
   *                       otherwise known as the haystack.
6342
   *                       </p>
6343
   *                       <p>
6344
   *                       If subject is an array, then the search and
6345
   *                       replace is performed with every entry of
6346
   *                       subject, and the return value is an array as
6347
   *                       well.
6348
   *                       </p>
6349
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
6350
   *
6351
   * @return mixed This function returns a string or an array with the replaced values.
6352
   */
6353 92
  public static function str_replace($search, $replace, $subject, int &$count = null)
6354
  {
6355 92
    return \str_replace($search, $replace, $subject, $count);
6356
  }
6357
6358
  /**
6359
   * Replaces all occurrences of $search from the beginning of string with $replacement.
6360
   *
6361
   * @param string $str         <p>The input string.</p>
6362
   * @param string $search      <p>The string to search for.</p>
6363
   * @param string $replacement <p>The replacement.</p>
6364
   *
6365
   * @return string String after the replacements.
6366
   */
6367 16
  public static function str_replace_beginning(string $str, string $search, string $replacement): string
6368
  {
6369 16
    return self::regex_replace(
6370 16
        $str,
6371 16
        '^' . \preg_quote($search, '/'),
6372 16
        self::str_replace('\\', '\\\\', $replacement)
6373
    );
6374
  }
6375
6376
  /**
6377
   * Replaces all occurrences of $search from the ending of string with $replacement.
6378
   *
6379
   * @param string $str         <p>The input string.</p>
6380
   * @param string $search      <p>The string to search for.</p>
6381
   * @param string $replacement <p>The replacement.</p>
6382
   *
6383
   * @return string String after the replacements.
6384
   */
6385 16
  public static function str_replace_ending(string $str, string $search, string $replacement): string
6386
  {
6387 16
    return self::regex_replace(
6388 16
        $str,
6389 16
        \preg_quote($search, '/') . '$',
6390 16
        self::str_replace('\\', '\\\\', $replacement)
6391
    );
6392
  }
6393
6394
  /**
6395
   * Replace the first "$search"-term with the "$replace"-term.
6396
   *
6397
   * @param string $search
6398
   * @param string $replace
6399
   * @param string $subject
6400
   *
6401
   * @return string
6402
   */
6403 2
  public static function str_replace_first(string $search, string $replace, string $subject): string
6404
  {
6405 2
    $pos = self::strpos($subject, $search);
6406 2
    if ($pos !== false) {
6407 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6407
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6408
    }
6409
6410 2
    return $subject;
6411
  }
6412
6413
  /**
6414
   * Replace the last "$search"-term with the "$replace"-term.
6415
   *
6416
   * @param string $search
6417
   * @param string $replace
6418
   * @param string $subject
6419
   *
6420
   * @return string
6421
   */
6422 2
  public static function str_replace_last(string $search, string $replace, string $subject): string
6423
  {
6424 2
    $pos = self::strrpos($subject, $search);
6425 2
    if ($pos !== false) {
6426 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6426
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
6427
    }
6428
6429 2
    return $subject;
6430
  }
6431
6432
  /**
6433
   * Shuffles all the characters in the string.
6434
   *
6435
   * PS: uses random algorithm which is weak for cryptography purposes
6436
   *
6437
   * @param string $str <p>The input string</p>
6438
   *
6439
   * @return string The shuffled string.
6440
   */
6441 5
  public static function str_shuffle(string $str): string
6442
  {
6443 5
    $indexes = \range(0, self::strlen($str) - 1);
6444
    /** @noinspection NonSecureShuffleUsageInspection */
6445 5
    \shuffle($indexes);
6446
6447 5
    $shuffledStr = '';
6448 5
    foreach ($indexes as $i) {
6449 5
      $shuffledStr .= self::substr($str, $i, 1);
6450
    }
6451
6452 5
    return $shuffledStr;
6453
  }
6454
6455
  /**
6456
   * Returns the substring beginning at $start, and up to, but not including
6457
   * the index specified by $end. If $end is omitted, the function extracts
6458
   * the remaining string. If $end is negative, it is computed from the end
6459
   * of the string.
6460
   *
6461
   * @param string $str
6462
   * @param int    $start    <p>Initial index from which to begin extraction.</p>
6463
   * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6464
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6465
   *
6466
   * @return string|false
6467
   *                     <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6468
   *                     characters long, <b>FALSE</b> will be returned.
6469
   */
6470 18
  public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6471
  {
6472 18
    if ($end === null) {
6473 6
      $length = self::strlen($str);
6474 12
    } elseif ($end >= 0 && $end <= $start) {
6475 4
      return '';
6476 8
    } elseif ($end < 0) {
6477 2
      $length = self::strlen($str) + $end - $start;
6478
    } else {
6479 6
      $length = $end - $start;
6480
    }
6481
6482 14
    return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6482
    return self::substr($str, $start, /** @scrutinizer ignore-type */ $length, $encoding);
Loading history...
6483
  }
6484
6485
  /**
6486
   * Convert a string to e.g.: "snake_case"
6487
   *
6488
   * @param string $str
6489
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6490
   *
6491
   * @return string String in snake_case.
6492
   */
6493 20
  public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6494
  {
6495 20
    $str = self::normalize_whitespace($str);
6496 20
    $str = \str_replace('-', '_', $str);
6497
6498 20
    $str = (string)\preg_replace_callback(
6499 20
        '/([\d|A-Z])/u',
6500 20
        function ($matches) use ($encoding) {
6501 8
          $match = $matches[1];
6502 8
          $matchInt = (int)$match;
6503
6504 8
          if ((string)$matchInt == $match) {
6505 4
            return '_' . $match . '_';
6506
          }
6507
6508 4
          return '_' . UTF8::strtolower($match, $encoding);
6509 20
        },
6510 20
        $str
6511
    );
6512
6513 20
    $str = (string)\preg_replace(
6514
        [
6515 20
            '/\s+/',        // convert spaces to "_"
6516
            '/^\s+|\s+$/',  // trim leading & trailing spaces
6517
            '/_+/',         // remove double "_"
6518
        ],
6519
        [
6520 20
            '_',
6521
            '',
6522
            '_',
6523
        ],
6524 20
        $str
6525
    );
6526
6527 20
    $str = self::trim($str, '_'); // trim leading & trailing "_"
6528 20
    $str = self::trim($str); // trim leading & trailing whitespace
6529
6530 20
    return $str;
6531
  }
6532
6533
  /**
6534
   * Sort all characters according to code points.
6535
   *
6536
   * @param string $str    <p>A UTF-8 string.</p>
6537
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6538
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6539
   *
6540
   * @return string String of sorted characters.
6541
   */
6542 2
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6543
  {
6544 2
    $array = self::codepoints($str);
6545
6546 2
    if ($unique) {
6547 2
      $array = \array_flip(\array_flip($array));
6548
    }
6549
6550 2
    if ($desc) {
6551 2
      \arsort($array);
6552
    } else {
6553 2
      \asort($array);
6554
    }
6555
6556 2
    return self::string($array);
6557
  }
6558
6559
  /**
6560
   * alias for "UTF8::split()"
6561
   *
6562
   * @see UTF8::split()
6563
   *
6564
   * @param string|string[] $str
6565
   * @param int             $len
6566
   *
6567
   * @return string[]
6568
   */
6569 25
  public static function str_split($str, int $len = 1): array
6570
  {
6571 25
    return self::split($str, $len);
6572
  }
6573
6574
  /**
6575
   * Splits the string with the provided regular expression, returning an
6576
   * array of Stringy objects. An optional integer $limit will truncate the
6577
   * results.
6578
   *
6579
   * @param string $str
6580
   * @param string $pattern <p>The regex with which to split the string.</p>
6581
   * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6582
   *
6583
   * @return string[] An array of strings.
6584
   */
6585 16
  public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6586
  {
6587 16
    if ($limit === 0) {
6588 2
      return [];
6589
    }
6590
6591
    // this->split errors when supplied an empty pattern in < PHP 5.4.13
6592
    // and current versions of HHVM (3.8 and below)
6593 14
    if ($pattern === '') {
6594 1
      return [$str];
6595
    }
6596
6597
    // this->split returns the remaining unsplit string in the last index when
6598
    // supplying a limit
6599 13
    if ($limit > 0) {
6600 8
      ++$limit;
6601
    } else {
6602 5
      $limit = -1;
6603
    }
6604
6605 13
    $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6606
6607 13
    if ($array === false) {
6608
      return [];
6609
    }
6610
6611 13
    if ($limit > 0 && \count($array) === $limit) {
6612 4
      \array_pop($array);
6613
    }
6614
6615 13
    return $array;
6616
  }
6617
6618
  /**
6619
   * Check if the string starts with the given substring.
6620
   *
6621
   * @param string $haystack <p>The string to search in.</p>
6622
   * @param string $needle   <p>The substring to search for.</p>
6623
   *
6624
   * @return bool
6625
   */
6626 41
  public static function str_starts_with(string $haystack, string $needle): bool
6627
  {
6628 41
    if ('' === $haystack || '' === $needle) {
6629 4
      return false;
6630
    }
6631
6632 39
    if (\strpos($haystack, $needle) === 0) {
6633 19
      return true;
6634
    }
6635
6636 24
    return false;
6637
  }
6638
6639
  /**
6640
   * Returns true if the string begins with any of $substrings, false otherwise.
6641
   *
6642
   * - case-sensitive
6643
   *
6644
   * @param string $str        <p>The input string.</p>
6645
   * @param array  $substrings <p>Substrings to look for.</p>
6646
   *
6647
   * @return bool Whether or not $str starts with $substring.
6648
   */
6649 8
  public static function str_starts_with_any(string $str, array $substrings): bool
6650
  {
6651 8
    if ('' === $str) {
6652
      return false;
6653
    }
6654
6655 8
    if (empty($substrings)) {
6656
      return false;
6657
    }
6658
6659 8
    foreach ($substrings as $substring) {
6660 8
      if (self::str_starts_with($str, $substring)) {
6661 8
        return true;
6662
      }
6663
    }
6664
6665 6
    return false;
6666
  }
6667
6668
  /**
6669
   * Gets the substring after the first occurrence of a separator.
6670
   *
6671
   * @param string $str       <p>The input string.</p>
6672
   * @param string $separator <p>The string separator.</p>
6673
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6674
   *
6675
   * @return string
6676
   */
6677 1
  public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6678
  {
6679
    if (
6680 1
        $separator === ''
6681
        ||
6682 1
        $str === ''
6683
    ) {
6684 1
      return '';
6685
    }
6686
6687 1
    $offset = self::str_index_first($str, $separator);
6688 1
    if ($offset === false) {
6689 1
      return '';
6690
    }
6691
6692 1
    return (string)self::substr(
6693 1
        $str,
6694 1
        $offset + self::strlen($separator, $encoding),
6695 1
        null,
6696 1
        $encoding
6697
    );
6698
  }
6699
6700
  /**
6701
   * Gets the substring after the last occurrence of a separator.
6702
   *
6703
   * @param string $str       <p>The input string.</p>
6704
   * @param string $separator <p>The string separator.</p>
6705
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6706
   *
6707
   * @return string
6708
   */
6709 1
  public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6710
  {
6711
    if (
6712 1
        $separator === ''
6713
        ||
6714 1
        $str === ''
6715
    ) {
6716 1
      return '';
6717
    }
6718
6719 1
    $offset = self::str_index_last($str, $separator);
6720 1
    if ($offset === false) {
6721 1
      return '';
6722
    }
6723
6724 1
    return (string)self::substr(
6725 1
        $str,
6726 1
        $offset + self::strlen($separator, $encoding),
6727 1
        null,
6728 1
        $encoding
6729
    );
6730
  }
6731
6732
  /**
6733
   * Gets the substring before the first occurrence of a separator.
6734
   *
6735
   * @param string $str       <p>The input string.</p>
6736
   * @param string $separator <p>The string separator.</p>
6737
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6738
   *
6739
   * @return string
6740
   */
6741 1
  public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6742
  {
6743
    if (
6744 1
        $separator === ''
6745
        ||
6746 1
        $str === ''
6747
    ) {
6748 1
      return '';
6749
    }
6750
6751 1
    $offset = self::str_index_first($str, $separator);
6752 1
    if ($offset === false) {
6753 1
      return '';
6754
    }
6755
6756 1
    return (string)self::substr(
6757 1
        $str,
6758 1
        0,
6759 1
        $offset,
6760 1
        $encoding
6761
    );
6762
  }
6763
6764
  /**
6765
   * Gets the substring before the last occurrence of a separator.
6766
   *
6767
   * @param string $str       <p>The input string.</p>
6768
   * @param string $separator <p>The string separator.</p>
6769
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6770
   *
6771
   * @return string
6772
   */
6773 1
  public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6774
  {
6775
    if (
6776 1
        $separator === ''
6777
        ||
6778 1
        $str === ''
6779
    ) {
6780 1
      return '';
6781
    }
6782
6783 1
    $offset = self::str_index_last($str, $separator);
6784 1
    if ($offset === false) {
6785 1
      return '';
6786
    }
6787
6788 1
    return (string)self::substr(
6789 1
        $str,
6790 1
        0,
6791 1
        $offset,
6792 1
        $encoding
6793
    );
6794
  }
6795
6796
  /**
6797
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6798
   *
6799
   * @param string $str          <p>The input string.</p>
6800
   * @param string $needle       <p>The string to look for.</p>
6801
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6802
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6803
   *
6804
   * @return string
6805
   */
6806 2
  public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6807
  {
6808
    if (
6809 2
        '' === $str
6810
        ||
6811 2
        '' === $needle
6812
    ) {
6813 2
      return '';
6814
    }
6815
6816 2
    $part = self::strstr(
6817 2
        $str,
6818 2
        $needle,
6819 2
        $beforeNeedle,
6820 2
        $encoding
6821
    );
6822 2
    if (false === $part) {
6823 2
      return '';
6824
    }
6825
6826 2
    return $part;
6827
  }
6828
6829
  /**
6830
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6831
   *
6832
   * @param string $str          <p>The input string.</p>
6833
   * @param string $needle       <p>The string to look for.</p>
6834
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6835
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6836
   *
6837
   * @return string
6838
   */
6839 2
  public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6840
  {
6841
    if (
6842 2
        '' === $str
6843
        ||
6844 2
        '' === $needle
6845
    ) {
6846 2
      return '';
6847
    }
6848
6849 2
    $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6850 2
    if (false === $part) {
6851 2
      return '';
6852
    }
6853
6854 2
    return $part;
6855
  }
6856
6857
  /**
6858
   * Surrounds $str with the given substring.
6859
   *
6860
   * @param string $str
6861
   * @param string $substring <p>The substring to add to both sides.</P>
6862
   *
6863
   * @return string String with the substring both prepended and appended.
6864
   */
6865 5
  public static function str_surround(string $str, string $substring): string
6866
  {
6867 5
    return \implode('', [$substring, $str, $substring]);
6868
  }
6869
6870
  /**
6871
   * Returns a trimmed string with the first letter of each word capitalized.
6872
   * Also accepts an array, $ignore, allowing you to list words not to be
6873
   * capitalized.
6874
   *
6875
   * @param string              $str
6876
   * @param string[]|array|null $ignore   [optional] <p>An array of words not to capitalize or null. Default: null</p>
6877
   * @param string              $encoding [optional] <p>Default: UTF-8</p>
6878
   *
6879
   * @return string The titleized string.
6880
   */
6881 5
  public static function str_titleize(string $str, array $ignore = null, string $encoding = 'UTF-8'): string
6882
  {
6883 5
    $str = self::trim($str);
6884
6885 5
    $str = (string)\preg_replace_callback(
6886 5
        '/([\S]+)/u',
6887 5
        function ($match) use ($encoding, $ignore) {
6888 5
          if ($ignore && \in_array($match[0], $ignore, true)) {
6889 2
            return $match[0];
6890
          }
6891
6892 5
          return self::str_upper_first(self::strtolower($match[0], $encoding));
6893 5
        },
6894 5
        $str
6895
    );
6896
6897 5
    return $str;
6898
  }
6899
6900
  /**
6901
   * Returns a trimmed string in proper title case.
6902
   *
6903
   * Also accepts an array, $ignore, allowing you to list words not to be
6904
   * capitalized.
6905
   *
6906
   * Adapted from John Gruber's script.
6907
   *
6908
   * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6909
   *
6910
   * @param string $str
6911
   * @param array  $ignore   <p>An array of words not to capitalize.</p>
6912
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6913
   *
6914
   * @return string The titleized string.
6915
   */
6916 35
  public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6917
  {
6918 35
    $smallWords = \array_merge(
6919
        [
6920 35
            '(?<!q&)a',
6921
            'an',
6922
            'and',
6923
            'as',
6924
            'at(?!&t)',
6925
            'but',
6926
            'by',
6927
            'en',
6928
            'for',
6929
            'if',
6930
            'in',
6931
            'of',
6932
            'on',
6933
            'or',
6934
            'the',
6935
            'to',
6936
            'v[.]?',
6937
            'via',
6938
            'vs[.]?',
6939
        ],
6940 35
        $ignore
6941
    );
6942
6943 35
    $smallWordsRx = \implode('|', $smallWords);
6944 35
    $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6945
6946 35
    $str = self::trim($str);
6947
6948 35
    if (self::has_lowercase($str) === false) {
6949 2
      $str = self::strtolower($str);
6950
    }
6951
6952
    // The main substitutions
6953 35
    $str = (string)\preg_replace_callback(
6954
        '~\b (_*) (?:                                                              # 1. Leading underscore and
6955
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6956 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6957
                        |
6958 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6959
                        |
6960 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6961
                        |
6962 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
6963
                      ) (_*) \b                                                           # 6. With trailing underscore
6964
                    ~ux',
6965 35
        function ($matches) use ($encoding) {
6966
          // Preserve leading underscore
6967 35
          $str = $matches[1];
6968 35
          if ($matches[2]) {
6969
            // Preserve URLs, domains, emails and file paths
6970 5
            $str .= $matches[2];
6971 35
          } elseif ($matches[3]) {
6972
            // Lower-case small words
6973 25
            $str .= self::strtolower($matches[3], $encoding);
6974 35
          } elseif ($matches[4]) {
6975
            // Capitalize word w/o internal caps
6976 34
            $str .= static::str_upper_first($matches[4], $encoding);
6977
          } else {
6978
            // Preserve other kinds of word (iPhone)
6979 7
            $str .= $matches[5];
6980
          }
6981
          // Preserve trailing underscore
6982 35
          $str .= $matches[6];
6983
6984 35
          return $str;
6985 35
        },
6986 35
        $str
6987
    );
6988
6989
    // Exceptions for small words: capitalize at start of title...
6990 35
    $str = (string)\preg_replace_callback(
6991
        '~(  \A [[:punct:]]*                # start of title...
6992
                      |  [:.;?!][ ]+               # or of subsentence...
6993
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
6994 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
6995
                     ~uxi',
6996 35
        function ($matches) use ($encoding) {
6997 11
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
6998 35
        },
6999 35
        $str
7000
    );
7001
7002
    // ...and end of title
7003 35
    $str = (string)\preg_replace_callback(
7004 35
        '~\b ( ' . $smallWordsRx . ' ) # small word...
7005
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7006
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7007
                     ~uxi',
7008 35
        function ($matches) use ($encoding) {
7009 3
          return static::str_upper_first($matches[1], $encoding);
7010 35
        },
7011 35
        $str
7012
    );
7013
7014
    // Exceptions for small words in hyphenated compound words
7015
    // e.g. "in-flight" -> In-Flight
7016 35
    $str = (string)\preg_replace_callback(
7017
        '~\b
7018
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7019 35
                        ( ' . $smallWordsRx . ' )
7020
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7021
                       ~uxi',
7022 35
        function ($matches) use ($encoding) {
7023
          return static::str_upper_first($matches[1], $encoding);
7024 35
        },
7025 35
        $str
7026
    );
7027
7028
    // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7029 35
    $str = (string)\preg_replace_callback(
7030
        '~\b
7031
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7032
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7033 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7034
                      (?!	- )                   # Negative lookahead for another -
7035
                     ~uxi',
7036 35
        function ($matches) use ($encoding) {
7037
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
7038 35
        },
7039 35
        $str
7040
    );
7041
7042 35
    return $str;
7043
  }
7044
7045
  /**
7046
   * Get a binary representation of a specific string.
7047
   *
7048
   * @param string $str <p>The input string.</p>
7049
   *
7050
   * @return string
7051
   */
7052 2
  public static function str_to_binary(string $str): string
7053
  {
7054 2
    $value = \unpack('H*', $str);
7055
7056 2
    return \base_convert($value[1], 16, 2);
7057
  }
7058
7059
  /**
7060
   * @param string   $str
7061
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7062
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7063
   *
7064
   * @return string[]
7065
   */
7066 17
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7067
  {
7068 17
    if ('' === $str) {
7069 1
      return ($removeEmptyValues === true ? [] : ['']);
7070
    }
7071
7072 16
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
7073
7074 16
    if ($return === false) {
7075
      return ($removeEmptyValues === true ? [] : ['']);
7076
    }
7077
7078
    if (
7079 16
        $removeShortValues === null
7080
        &&
7081 16
        $removeEmptyValues === false
7082
    ) {
7083 16
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7084
    }
7085
7086
    $tmpReturn = self::reduce_string_array(
7087
        $return,
7088
        $removeEmptyValues,
7089
        $removeShortValues
7090
    );
7091
7092
    return $tmpReturn;
7093
  }
7094
7095
  /**
7096
   * Convert a string into an array of words.
7097
   *
7098
   * @param string   $str
7099
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7100
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7101
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7102
   *
7103
   * @return string[]
7104
   */
7105 14
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
7106
  {
7107 14
    if ('' === $str) {
7108 4
      return ($removeEmptyValues === true ? [] : ['']);
7109
    }
7110
7111 14
    $charList = self::rxClass($charList, '\pL');
7112
7113 14
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
7114
7115 14
    if ($return === false) {
7116
      return ($removeEmptyValues === true ? [] : ['']);
7117
    }
7118
7119
    if (
7120 14
        $removeShortValues === null
7121
        &&
7122 14
        $removeEmptyValues === false
7123
    ) {
7124 14
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7125
    }
7126
7127 2
    $tmpReturn = self::reduce_string_array(
7128 2
        $return,
7129 2
        $removeEmptyValues,
7130 2
        $removeShortValues
7131
    );
7132
7133 2
    foreach ($tmpReturn as &$item) {
7134 2
      $item = (string)$item;
7135
    }
7136
7137 2
    return $tmpReturn;
7138
  }
7139
7140
  /**
7141
   * alias for "UTF8::to_ascii()"
7142
   *
7143
   * @see UTF8::to_ascii()
7144
   *
7145
   * @param string $str
7146
   * @param string $unknown
7147
   * @param bool   $strict
7148
   *
7149
   * @return string
7150
   */
7151 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7152
  {
7153 7
    return self::to_ascii($str, $unknown, $strict);
7154
  }
7155
7156
  /**
7157
   * Truncates the string to a given length. If $substring is provided, and
7158
   * truncating occurs, the string is further truncated so that the substring
7159
   * may be appended without exceeding the desired length.
7160
   *
7161
   * @param string $str
7162
   * @param int    $length    <p>Desired length of the truncated string.</p>
7163
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7164
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7165
   *
7166
   * @return string String after truncating.
7167
   */
7168 22
  public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7169
  {
7170
    // init
7171 22
    $str = (string)$str;
7172
7173 22
    if ('' === $str) {
7174
      return '';
7175
    }
7176
7177 22
    if ($length >= self::strlen($str, $encoding)) {
7178 4
      return $str;
7179
    }
7180
7181
    // Need to further trim the string so we can append the substring
7182 18
    $substringLength = self::strlen($substring, $encoding);
7183 18
    $length -= $substringLength;
7184
7185 18
    $truncated = self::substr($str, 0, $length, $encoding);
7186
7187 18
    return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7187
    return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7188
  }
7189
7190
  /**
7191
   * Truncates the string to a given length, while ensuring that it does not
7192
   * split words. If $substring is provided, and truncating occurs, the
7193
   * string is further truncated so that the substring may be appended without
7194
   * exceeding the desired length.
7195
   *
7196
   * @param string $str
7197
   * @param int    $length    <p>Desired length of the truncated string.</p>
7198
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7199
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7200
   *
7201
   * @return string String after truncating.
7202
   */
7203 23
  public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7204
  {
7205 23
    if ($length >= self::strlen($str, $encoding)) {
7206 4
      return $str;
7207
    }
7208
7209
    // need to further trim the string so we can append the substring
7210 19
    $substringLength = self::strlen($substring, $encoding);
7211 19
    $length -= $substringLength;
7212
7213 19
    $truncated = self::substr($str, 0, $length, $encoding);
7214 19
    if ($truncated === false) {
7215
      return '';
7216
    }
7217
7218
    // if the last word was truncated
7219 19
    $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7220 19
    if ($strPosSpace != $length) {
7221
      // find pos of the last occurrence of a space, get up to that
7222 12
      $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7223
7224 12
      if ($lastPos !== false || $strPosSpace !== false) {
7225 11
        $truncated = self::substr($truncated, 0, (int)$lastPos, $encoding);
7226
      }
7227
    }
7228
7229 19
    $str = $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7229
    $str = /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7230
7231 19
    return $str;
7232
  }
7233
7234
  /**
7235
   * Returns a lowercase and trimmed string separated by underscores.
7236
   * Underscores are inserted before uppercase characters (with the exception
7237
   * of the first character of the string), and in place of spaces as well as
7238
   * dashes.
7239
   *
7240
   * @param string $str
7241
   *
7242
   * @return string The underscored string.
7243
   */
7244 16
  public static function str_underscored(string $str): string
7245
  {
7246 16
    return self::str_delimit($str, '_');
7247
  }
7248
7249
  /**
7250
   * Returns an UpperCamelCase version of the supplied string. It trims
7251
   * surrounding spaces, capitalizes letters following digits, spaces, dashes
7252
   * and underscores, and removes spaces, dashes, underscores.
7253
   *
7254
   * @param string $str      <p>The input string.</p>
7255
   * @param string $encoding [optional] <p>Default: UTF-8</p>
7256
   *
7257
   * @return string String in UpperCamelCase.
7258
   */
7259 13
  public static function str_upper_camelize(string $str, string $encoding = 'UTF-8'): string
7260
  {
7261 13
    return self::str_upper_first(self::str_camelize($str, $encoding), $encoding);
7262
  }
7263
7264
  /**
7265
   * alias for "UTF8::ucfirst()"
7266
   *
7267
   * @see UTF8::ucfirst()
7268
   *
7269
   * @param string $str
7270
   * @param string $encoding
7271
   * @param bool   $cleanUtf8
7272
   *
7273
   * @return string
7274
   */
7275 58
  public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7276
  {
7277 58
    return self::ucfirst($str, $encoding, $cleanUtf8);
7278
  }
7279
7280
  /**
7281
   * Counts number of words in the UTF-8 string.
7282
   *
7283
   * @param string $str      <p>The input string.</p>
7284
   * @param int    $format   [optional] <p>
7285
   *                         <strong>0</strong> => return a number of words (default)<br>
7286
   *                         <strong>1</strong> => return an array of words<br>
7287
   *                         <strong>2</strong> => return an array of words with word-offset as key
7288
   *                         </p>
7289
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7290
   *
7291
   * @return string[]|int The number of words in the string
7292
   */
7293 2
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7294
  {
7295 2
    $strParts = self::str_to_words($str, $charlist);
7296
7297 2
    $len = \count($strParts);
7298
7299 2
    if ($format === 1) {
7300
7301 2
      $numberOfWords = [];
7302 2
      for ($i = 1; $i < $len; $i += 2) {
7303 2
        $numberOfWords[] = $strParts[$i];
7304
      }
7305
7306 2
    } elseif ($format === 2) {
7307
7308 2
      $numberOfWords = [];
7309 2
      $offset = self::strlen($strParts[0]);
7310 2
      for ($i = 1; $i < $len; $i += 2) {
7311 2
        $numberOfWords[$offset] = $strParts[$i];
7312 2
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
7313
      }
7314
7315
    } else {
7316
7317 2
      $numberOfWords = (int)(($len - 1) / 2);
7318
7319
    }
7320
7321 2
    return $numberOfWords;
7322
  }
7323
7324
  /**
7325
   * Case-insensitive string comparison.
7326
   *
7327
   * INFO: Case-insensitive version of UTF8::strcmp()
7328
   *
7329
   * @param string $str1     <p>The first string.</p>
7330
   * @param string $str2     <p>The second string.</p>
7331
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7332
   *
7333
   * @return int
7334
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7335
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7336
   *             <strong>0</strong> if they are equal.
7337
   */
7338 23
  public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7339
  {
7340 23
    return self::strcmp(
7341 23
        self::strtocasefold($str1, true, false, $encoding, null, false),
7342 23
        self::strtocasefold($str2, true, false, $encoding, null, false)
7343
    );
7344
  }
7345
7346
  /**
7347
   * alias for "UTF8::strstr()"
7348
   *
7349
   * @see UTF8::strstr()
7350
   *
7351
   * @param string $haystack
7352
   * @param string $needle
7353
   * @param bool   $before_needle
7354
   * @param string $encoding
7355
   * @param bool   $cleanUtf8
7356
   *
7357
   * @return string|false
7358
   */
7359 2
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7360
  {
7361 2
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7362
  }
7363
7364
  /**
7365
   * Case-sensitive string comparison.
7366
   *
7367
   * @param string $str1 <p>The first string.</p>
7368
   * @param string $str2 <p>The second string.</p>
7369
   *
7370
   * @return int
7371
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
7372
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
7373
   *              <strong>0</strong> if they are equal.
7374
   */
7375 29
  public static function strcmp(string $str1, string $str2): int
7376
  {
7377
    /** @noinspection PhpUndefinedClassInspection */
7378 29
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7379 24
        \Normalizer::normalize($str1, \Normalizer::NFD),
7380 29
        \Normalizer::normalize($str2, \Normalizer::NFD)
7381
    );
7382
  }
7383
7384
  /**
7385
   * Find length of initial segment not matching mask.
7386
   *
7387
   * @param string $str
7388
   * @param string $charList
7389
   * @param int    $offset
7390
   * @param int    $length
7391
   *
7392
   * @return int|null
7393
   */
7394 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
7395
  {
7396 15
    if ('' === $charList .= '') {
7397 1
      return null;
7398
    }
7399
7400 14
    if ($offset || $length !== null) {
7401 2
      $strTmp = self::substr($str, $offset, $length);
7402 2
      if ($strTmp === false) {
7403
        return null;
7404
      }
7405 2
      $str = (string)$strTmp;
7406
    }
7407
7408 14
    if ('' === $str) {
7409 1
      return null;
7410
    }
7411
7412 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept null|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7412
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7413 13
      return self::strlen($length[1]);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($length[1]) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7414
    }
7415
7416 1
    return self::strlen($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($str) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7417
  }
7418
7419
  /**
7420
   * alias for "UTF8::stristr()"
7421
   *
7422
   * @see UTF8::stristr()
7423
   *
7424
   * @param string $haystack
7425
   * @param string $needle
7426
   * @param bool   $before_needle
7427
   * @param string $encoding
7428
   * @param bool   $cleanUtf8
7429
   *
7430
   * @return string|false
7431
   */
7432 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7433
  {
7434 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7435
  }
7436
7437
  /**
7438
   * Create a UTF-8 string from code points.
7439
   *
7440
   * INFO: opposite to UTF8::codepoints()
7441
   *
7442
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7443
   *
7444
   * @return string UTF-8 encoded string.
7445
   */
7446 4
  public static function string(array $array): string
7447
  {
7448 4
    return \implode(
7449 4
        '',
7450 4
        \array_map(
7451
            [
7452 4
                self::class,
7453
                'chr',
7454
            ],
7455 4
            $array
7456
        )
7457
    );
7458
  }
7459
7460
  /**
7461
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7462
   *
7463
   * @param string $str <p>The input string.</p>
7464
   *
7465
   * @return bool
7466
   *              <strong>true</strong> if the string has BOM at the start,<br>
7467
   *              <strong>false</strong> otherwise.
7468
   */
7469 6
  public static function string_has_bom(string $str): bool
7470
  {
7471 6
    foreach (self::$BOM as $bomString => $bomByteLength) {
7472 6
      if (0 === \strpos($str, $bomString)) {
7473 6
        return true;
7474
      }
7475
    }
7476
7477 6
    return false;
7478
  }
7479
7480
  /**
7481
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7482
   *
7483
   * @link http://php.net/manual/en/function.strip-tags.php
7484
   *
7485
   * @param string $str             <p>
7486
   *                                The input string.
7487
   *                                </p>
7488
   * @param string $allowable_tags  [optional] <p>
7489
   *                                You can use the optional second parameter to specify tags which should
7490
   *                                not be stripped.
7491
   *                                </p>
7492
   *                                <p>
7493
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
7494
   *                                can not be changed with allowable_tags.
7495
   *                                </p>
7496
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
7497
   *
7498
   * @return string The stripped string.
7499
   */
7500 4
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7501
  {
7502 4
    if ('' === $str) {
7503 1
      return '';
7504
    }
7505
7506 4
    if ($cleanUtf8 === true) {
7507 2
      $str = self::clean($str);
7508
    }
7509
7510 4
    return \strip_tags($str, $allowable_tags);
7511
  }
7512
7513
  /**
7514
   * Strip all whitespace characters. This includes tabs and newline
7515
   * characters, as well as multibyte whitespace such as the thin space
7516
   * and ideographic space.
7517
   *
7518
   * @param string $str
7519
   *
7520
   * @return string
7521
   */
7522 36
  public static function strip_whitespace(string $str): string
7523
  {
7524 36
    if ('' === $str) {
7525 3
      return '';
7526
    }
7527
7528 33
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
7529
  }
7530
7531
  /**
7532
   * Finds position of first occurrence of a string within another, case insensitive.
7533
   *
7534
   * @link http://php.net/manual/en/function.mb-stripos.php
7535
   *
7536
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7537
   * @param string $needle    <p>The string to find in haystack.</p>
7538
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7539
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7540
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7541
   *
7542
   * @return int|false
7543
   *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7544
   *                   haystack string,<br> or <strong>false</strong> if needle is not found.
7545
   */
7546 75
  public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7547
  {
7548 75
    if ('' === $haystack || '' === $needle) {
7549 5
      return false;
7550
    }
7551
7552 74
    if ($cleanUtf8 === true) {
7553
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7554
      // if invalid characters are found in $haystack before $needle
7555 1
      $haystack = self::clean($haystack);
7556 1
      $needle = self::clean($needle);
7557
    }
7558
7559 74
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7560 23
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7561
    }
7562
7563 74
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7564
      self::checkForSupport();
7565
    }
7566
7567 74
    if (self::$SUPPORT['mbstring'] === true) {
7568 74
      $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7569 74
      if ($returnTmp !== false) {
7570 54
        return $returnTmp;
7571
      }
7572
    }
7573
7574
    if (
7575 31
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7576
        &&
7577 31
        $offset >= 0 // grapheme_stripos() can't handle negative offset
7578
        &&
7579 31
        self::$SUPPORT['intl'] === true
7580
    ) {
7581 31
      $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7582 31
      if ($returnTmp !== false) {
7583
        return $returnTmp;
7584
      }
7585
    }
7586
7587
    //
7588
    // fallback for ascii only
7589
    //
7590
7591 31
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7592 15
      return \stripos($haystack, $needle, $offset);
7593
    }
7594
7595
    //
7596
    // fallback via vanilla php
7597
    //
7598
7599 20
    $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
7600 20
    $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
7601
7602 20
    return self::strpos($haystack, $needle, $offset, $encoding);
7603
  }
7604
7605
  /**
7606
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
7607
   *
7608
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7609
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7610
   * @param bool   $before_needle  [optional] <p>
7611
   *                               If <b>TRUE</b>, it returns the part of the
7612
   *                               haystack before the first occurrence of the needle (excluding the needle).
7613
   *                               </p>
7614
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
7615
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7616
   *
7617
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
7618
   */
7619 19
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7620
  {
7621 19
    if ('' === $haystack || '' === $needle) {
7622 6
      return false;
7623
    }
7624
7625 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7626 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7627
    }
7628
7629 13
    if ($cleanUtf8 === true) {
7630
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7631
      // if invalid characters are found in $haystack before $needle
7632 1
      $needle = self::clean($needle);
7633 1
      $haystack = self::clean($haystack);
7634
    }
7635
7636 13
    if (!$needle) {
7637
      return $haystack;
7638
    }
7639
7640 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7641
      self::checkForSupport();
7642
    }
7643
7644
    if (
7645 13
        $encoding !== 'UTF-8'
7646
        &&
7647 13
        self::$SUPPORT['mbstring'] === false
7648
    ) {
7649
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7650
    }
7651
7652 13
    if (self::$SUPPORT['mbstring'] === true) {
7653 13
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7654
    }
7655
7656
    if (
7657
        $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7658
        &&
7659
        self::$SUPPORT['intl'] === true
7660
    ) {
7661
      $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7662
      if ($returnTmp !== false) {
7663
        return $returnTmp;
7664
      }
7665
    }
7666
7667
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7668
      return \stristr($haystack, $needle, $before_needle);
7669
    }
7670
7671
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7672
7673
    if (!isset($match[1])) {
7674
      return false;
7675
    }
7676
7677
    if ($before_needle) {
7678
      return $match[1];
7679
    }
7680
7681
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7681
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
7682
  }
7683
7684
  /**
7685
   * Get the string length, not the byte-length!
7686
   *
7687
   * @link     http://php.net/manual/en/function.mb-strlen.php
7688
   *
7689
   * @param string $str       <p>The string being checked for length.</p>
7690
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7691
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7692
   *
7693
   * @return int|false
7694
   *             The number <strong>(int)</strong> of characters in the string $str having character encoding $encoding.
7695
   *             (One multi-byte character counted as +1).
7696
   *             <br>
7697
   *             Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid chars.
7698
   */
7699 262
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7700
  {
7701 262
    if ('' === $str) {
7702 37
      return 0;
7703
    }
7704
7705 260
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7706 83
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7707
    }
7708
7709
    //
7710
    // fallback for binary || ascii only
7711
    //
7712
7713
    if (
7714 260
        $encoding === 'CP850'
7715
        ||
7716 260
        $encoding === 'ASCII'
7717
    ) {
7718 2
      return self::strlen_in_byte($str);
7719
    }
7720
7721 260
    if ($cleanUtf8 === true) {
7722
      // "mb_strlen" and "\iconv_strlen" returns wrong length,
7723
      // if invalid characters are found in $str
7724 4
      $str = self::clean($str);
7725
    }
7726
7727 260
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7728
      self::checkForSupport();
7729
    }
7730
7731
    if (
7732 260
        $encoding !== 'UTF-8'
7733
        &&
7734 260
        self::$SUPPORT['mbstring'] === false
7735
        &&
7736 260
        self::$SUPPORT['iconv'] === false
7737
    ) {
7738 2
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7739
    }
7740
7741
    //
7742
    // fallback via mbstring
7743
    //
7744
7745 260
    if (self::$SUPPORT['mbstring'] === true) {
7746 256
      $returnTmp = \mb_strlen($str, $encoding);
7747 256
      if ($returnTmp !== false) {
7748 256
        return $returnTmp;
7749
      }
7750
    }
7751
7752
    //
7753
    // fallback via iconv
7754
    //
7755
7756 8
    if (self::$SUPPORT['iconv'] === true) {
7757
      $returnTmp = \iconv_strlen($str, $encoding);
7758
      if ($returnTmp !== false) {
7759
        return $returnTmp;
7760
      }
7761
    }
7762
7763
    //
7764
    // fallback via intl
7765
    //
7766
7767
    if (
7768 8
        $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7769
        &&
7770 8
        self::$SUPPORT['intl'] === true
7771
    ) {
7772
      $returnTmp = \grapheme_strlen($str);
7773
      if ($returnTmp !== null) {
7774
        return $returnTmp;
7775
      }
7776
    }
7777
7778
    //
7779
    // fallback for ascii only
7780
    //
7781
7782 8
    if (self::is_ascii($str)) {
7783 4
      return \strlen($str);
7784
    }
7785
7786
    //
7787
    // fallback via vanilla php
7788
    //
7789
7790 8
    \preg_match_all('/./us', $str, $parts);
7791
7792 8
    $returnTmp = \count($parts[0]);
7793 8
    if ($returnTmp === 0 && isset($str[0])) {
7794
      return false;
7795
    }
7796
7797 8
    return $returnTmp;
7798
  }
7799
7800
  /**
7801
   * Get string length in byte.
7802
   *
7803
   * @param string $str
7804
   *
7805
   * @return int
7806
   */
7807 194
  public static function strlen_in_byte(string $str): int
7808
  {
7809 194
    if ($str === '') {
7810
      return 0;
7811
    }
7812
7813 194
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7814
      self::checkForSupport();
7815
    }
7816
7817 194
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7818
      // "mb_" is available if overload is used, so use it ...
7819
      return \mb_strlen($str, 'CP850'); // 8-BIT
7820
    }
7821
7822 194
    return \strlen($str);
7823
  }
7824
7825
  /**
7826
   * Case insensitive string comparisons using a "natural order" algorithm.
7827
   *
7828
   * INFO: natural order version of UTF8::strcasecmp()
7829
   *
7830
   * @param string $str1     <p>The first string.</p>
7831
   * @param string $str2     <p>The second string.</p>
7832
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7833
   *
7834
   * @return int
7835
   *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7836
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7837
   *             <strong>0</strong> if they are equal
7838
   */
7839 2
  public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7840
  {
7841 2
    return self::strnatcmp(
7842 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
7843 2
        self::strtocasefold($str2, true, false, $encoding, null, false)
7844
    );
7845
  }
7846
7847
  /**
7848
   * String comparisons using a "natural order" algorithm
7849
   *
7850
   * INFO: natural order version of UTF8::strcmp()
7851
   *
7852
   * @link  http://php.net/manual/en/function.strnatcmp.php
7853
   *
7854
   * @param string $str1 <p>The first string.</p>
7855
   * @param string $str2 <p>The second string.</p>
7856
   *
7857
   * @return int
7858
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7859
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7860
   *             <strong>0</strong> if they are equal
7861
   */
7862 4
  public static function strnatcmp(string $str1, string $str2): int
7863
  {
7864 4
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
7865
  }
7866
7867
  /**
7868
   * Case-insensitive string comparison of the first n characters.
7869
   *
7870
   * @link  http://php.net/manual/en/function.strncasecmp.php
7871
   *
7872
   * @param string $str1     <p>The first string.</p>
7873
   * @param string $str2     <p>The second string.</p>
7874
   * @param int    $len      <p>The length of strings to be used in the comparison.</p>
7875
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7876
   *
7877
   * @return int
7878
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7879
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7880
   *             <strong>0</strong> if they are equal
7881
   */
7882 2
  public static function strncasecmp(string $str1, string $str2, int $len, string $encoding = 'UTF-8'): int
7883
  {
7884 2
    return self::strncmp(
7885 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
7886 2
        self::strtocasefold($str2, true, false, $encoding, null, false),
7887 2
        $len
7888
    );
7889
  }
7890
7891
  /**
7892
   * String comparison of the first n characters.
7893
   *
7894
   * @link  http://php.net/manual/en/function.strncmp.php
7895
   *
7896
   * @param string $str1 <p>The first string.</p>
7897
   * @param string $str2 <p>The second string.</p>
7898
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
7899
   *
7900
   * @return int
7901
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7902
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7903
   *             <strong>0</strong> if they are equal
7904
   */
7905 4
  public static function strncmp(string $str1, string $str2, int $len): int
7906
  {
7907 4
    $str1 = (string)self::substr($str1, 0, $len);
7908 4
    $str2 = (string)self::substr($str2, 0, $len);
7909
7910 4
    return self::strcmp($str1, $str2);
7911
  }
7912
7913
  /**
7914
   * Search a string for any of a set of characters.
7915
   *
7916
   * @link  http://php.net/manual/en/function.strpbrk.php
7917
   *
7918
   * @param string $haystack  <p>The string where char_list is looked for.</p>
7919
   * @param string $char_list <p>This parameter is case sensitive.</p>
7920
   *
7921
   * @return string|false String starting from the character found, or false if it is not found.
7922
   */
7923 2
  public static function strpbrk(string $haystack, string $char_list)
7924
  {
7925 2
    if ('' === $haystack || '' === $char_list) {
7926 2
      return false;
7927
    }
7928
7929 2
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7930 2
      return \substr($haystack, (int)\strpos($haystack, $m[0]));
7931
    }
7932
7933 2
    return false;
7934
  }
7935
7936
  /**
7937
   * Find position of first occurrence of string in a string.
7938
   *
7939
   * @link http://php.net/manual/en/function.mb-strpos.php
7940
   *
7941
   * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7942
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7943
   * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7944
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7945
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7946
   *
7947
   * @return int|false
7948
   *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
7949
   *                   string.<br> If needle is not found it returns false.
7950
   */
7951 142
  public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7952
  {
7953 142
    if ('' === $haystack) {
7954 4
      return false;
7955
    }
7956
7957
    // iconv and mbstring do not support integer $needle
7958 141
    if ((int)$needle === $needle && $needle >= 0) {
7959
      $needle = (string)self::chr($needle);
7960
    }
7961 141
    $needle = (string)$needle;
7962
7963 141
    if ('' === $needle) {
7964 2
      return false;
7965
    }
7966
7967 141
    if ($cleanUtf8 === true) {
7968
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7969
      // if invalid characters are found in $haystack before $needle
7970 3
      $needle = self::clean($needle);
7971 3
      $haystack = self::clean($haystack);
7972
    }
7973
7974 141
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7975 55
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7976
    }
7977
7978 141
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7979
      self::checkForSupport();
7980
    }
7981
7982
    //
7983
    // fallback for binary || ascii only
7984
    //
7985
7986
    if (
7987 141
        $encoding === 'CP850'
7988
        ||
7989 141
        $encoding === 'ASCII'
7990
    ) {
7991 2
      return self::strpos_in_byte($haystack, $needle, $offset);
7992
    }
7993
7994
    if (
7995 141
        $encoding !== 'UTF-8'
7996
        &&
7997 141
        self::$SUPPORT['iconv'] === false
7998
        &&
7999 141
        self::$SUPPORT['mbstring'] === false
8000
    ) {
8001 2
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8002
    }
8003
8004
    //
8005
    // fallback via mbstring
8006
    //
8007
8008 141
    if (self::$SUPPORT['mbstring'] === true) {
8009 141
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
8010 141
      if ($returnTmp !== false) {
8011 86
        return $returnTmp;
8012
      }
8013
    }
8014
8015
    //
8016
    // fallback via intl
8017
    //
8018
8019
    if (
8020 69
        $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8021
        &&
8022 69
        $offset >= 0 // grapheme_strpos() can't handle negative offset
8023
        &&
8024 69
        self::$SUPPORT['intl'] === true
8025
    ) {
8026 69
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
8027 69
      if ($returnTmp !== false) {
8028
        return $returnTmp;
8029
      }
8030
    }
8031
8032
    //
8033
    // fallback via iconv
8034
    //
8035
8036
    if (
8037 69
        $offset >= 0 // iconv_strpos() can't handle negative offset
8038
        &&
8039 69
        self::$SUPPORT['iconv'] === true
8040
    ) {
8041
      // ignore invalid negative offset to keep compatibility
8042
      // with php < 5.5.35, < 5.6.21, < 7.0.6
8043 69
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
8044 69
      if ($returnTmp !== false) {
8045
        return $returnTmp;
8046
      }
8047
    }
8048
8049
    //
8050
    // fallback for ascii only
8051
    //
8052
8053 69
    if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
8054 35
      return \strpos($haystack, $needle, $offset);
8055
    }
8056
8057
    //
8058
    // fallback via vanilla php
8059
    //
8060
8061 39
    if ($haystackIsAscii) {
8062
      $haystackTmp = \substr($haystack, $offset);
8063
    } else {
8064 39
      $haystackTmp = self::substr($haystack, $offset, null, $encoding);
8065
    }
8066 39
    if ($haystackTmp === false) {
8067
      $haystackTmp = '';
8068
    }
8069 39
    $haystack = (string)$haystackTmp;
8070
8071 39
    if ($offset < 0) {
8072 2
      $offset = 0;
8073
    }
8074
8075 39
    $pos = \strpos($haystack, $needle);
8076 39
    if ($pos === false) {
8077 39
      return false;
8078
    }
8079
8080 4
    if ($pos) {
8081 4
      return ($offset + (self::strlen(substr($haystack, 0, $pos), $encoding)));
8082
    }
8083
8084 2
    return ($offset + 0);
8085
  }
8086
8087
  /**
8088
   * Find position of first occurrence of string in a string.
8089
   *
8090
   * @param string $haystack <p>
8091
   *                         The string being checked.
8092
   *                         </p>
8093
   * @param string $needle   <p>
8094
   *                         The position counted from the beginning of haystack.
8095
   *                         </p>
8096
   * @param int    $offset   [optional] <p>
8097
   *                         The search offset. If it is not specified, 0 is used.
8098
   *                         </p>
8099
   *
8100
   * @return int|false The numeric position of the first occurrence of needle in the
8101
   *                   haystack string. If needle is not found, it returns false.
8102
   */
8103 77
  public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8104
  {
8105 77
    if ($haystack === '' || $needle === '') {
8106
      return false;
8107
    }
8108
8109 77
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8110
      self::checkForSupport();
8111
    }
8112
8113 77
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8114
      // "mb_" is available if overload is used, so use it ...
8115
      return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8116
    }
8117
8118 77
    return \strpos($haystack, $needle, $offset);
8119
  }
8120
8121
  /**
8122
   * Finds the last occurrence of a character in a string within another.
8123
   *
8124
   * @link http://php.net/manual/en/function.mb-strrchr.php
8125
   *
8126
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8127
   * @param string $needle        <p>The string to find in haystack</p>
8128
   * @param bool   $before_needle [optional] <p>
8129
   *                              Determines which portion of haystack
8130
   *                              this function returns.
8131
   *                              If set to true, it returns all of haystack
8132
   *                              from the beginning to the last occurrence of needle.
8133
   *                              If set to false, it returns all of haystack
8134
   *                              from the last occurrence of needle to the end,
8135
   *                              </p>
8136
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8137
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8138
   *
8139
   * @return string|false The portion of haystack or false if needle is not found.
8140
   */
8141 4
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8142
  {
8143 4
    if ('' === $haystack || '' === $needle) {
8144 2
      return false;
8145
    }
8146
8147 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8148 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8149
    }
8150
8151 4
    if ($cleanUtf8 === true) {
8152
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8153
      // if invalid characters are found in $haystack before $needle
8154 2
      $needle = self::clean($needle);
8155 2
      $haystack = self::clean($haystack);
8156
    }
8157
8158 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8159
      self::checkForSupport();
8160
    }
8161
8162
    if (
8163 4
        $encoding !== 'UTF-8'
8164
        &&
8165 4
        self::$SUPPORT['mbstring'] === false
8166
    ) {
8167
      \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8168
    }
8169
8170 4
    if (self::$SUPPORT['mbstring'] === true) {
8171 4
      return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8172
    }
8173
8174
    //
8175
    // fallback for binary || ascii only
8176
    //
8177
8178
    if (
8179
        $before_needle === false
8180
        &&
8181
        (
8182
            $encoding === 'CP850'
8183
            ||
8184
            $encoding === 'ASCII'
8185
        )
8186
    ) {
8187
      return \strrchr($haystack, $needle);
8188
    }
8189
8190
    //
8191
    // fallback via iconv
8192
    //
8193
8194
    if (self::$SUPPORT['iconv'] === true) {
8195
      $needleTmp = self::substr($needle, 0, 1, $encoding);
8196
      if ($needleTmp === false) {
8197
        return false;
8198
      }
8199
      $needle = (string)$needleTmp;
8200
8201
      $pos = \iconv_strrpos($haystack, $needle, $encoding);
8202
      if (false === $pos) {
8203
        return false;
8204
      }
8205
8206
      if ($before_needle) {
8207
        return self::substr($haystack, 0, $pos, $encoding);
8208
      }
8209
8210
      return self::substr($haystack, $pos, null, $encoding);
8211
    }
8212
8213
    //
8214
    // fallback via vanilla php
8215
    //
8216
8217
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8218
    if ($needleTmp === false) {
8219
      return false;
8220
    }
8221
    $needle = (string)$needleTmp;
8222
8223
    $pos = self::strrpos($haystack, $needle, null, $encoding);
8224
    if ($pos === false) {
8225
      return false;
8226
    }
8227
8228
    if ($before_needle) {
8229
      return self::substr($haystack, 0, $pos, $encoding);
8230
    }
8231
8232
    return self::substr($haystack, $pos, null, $encoding);
8233
  }
8234
8235
  /**
8236
   * Reverses characters order in the string.
8237
   *
8238
   * @param string $str <p>The input string.</p>
8239
   *
8240
   * @return string The string with characters in the reverse sequence.
8241
   */
8242 10
  public static function strrev(string $str): string
8243
  {
8244 10
    if ('' === $str) {
8245 4
      return '';
8246
    }
8247
8248 8
    $reversed = '';
8249 8
    $i = self::strlen($str);
8250 8
    while ($i--) {
8251 8
      $reversed .= self::substr($str, $i, 1);
0 ignored issues
show
Bug introduced by
It seems like $i can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8251
      $reversed .= self::substr($str, /** @scrutinizer ignore-type */ $i, 1);
Loading history...
8252
    }
8253
8254 8
    return $reversed;
8255
  }
8256
8257
  /**
8258
   * Finds the last occurrence of a character in a string within another, case insensitive.
8259
   *
8260
   * @link http://php.net/manual/en/function.mb-strrichr.php
8261
   *
8262
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
8263
   * @param string $needle         <p>The string to find in haystack.</p>
8264
   * @param bool   $before_needle  [optional] <p>
8265
   *                               Determines which portion of haystack
8266
   *                               this function returns.
8267
   *                               If set to true, it returns all of haystack
8268
   *                               from the beginning to the last occurrence of needle.
8269
   *                               If set to false, it returns all of haystack
8270
   *                               from the last occurrence of needle to the end,
8271
   *                               </p>
8272
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8273
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8274
   *
8275
   * @return string|false The portion of haystack or<br>false if needle is not found.
8276
   */
8277 3
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8278
  {
8279 3
    if ('' === $haystack || '' === $needle) {
8280 2
      return false;
8281
    }
8282
8283 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8284 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8285
    }
8286
8287 3
    if ($cleanUtf8 === true) {
8288
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8289
      // if invalid characters are found in $haystack before $needle
8290 2
      $needle = self::clean($needle);
8291 2
      $haystack = self::clean($haystack);
8292
    }
8293
8294 3
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8295
      self::checkForSupport();
8296
    }
8297
8298
    //
8299
    // fallback via mbstring
8300
    //
8301
8302 3
    if (self::$SUPPORT['mbstring'] === true) {
8303 3
      return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8304
    }
8305
8306
    //
8307
    // fallback via vanilla php
8308
    //
8309
8310
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8311
    if ($needleTmp === false) {
8312
      return false;
8313
    }
8314
    $needle = (string)$needleTmp;
8315
8316
    $pos = self::strripos($haystack, $needle, 0, $encoding);
8317
    if ($pos === false) {
8318
      return false;
8319
    }
8320
8321
    if ($before_needle) {
8322
      return self::substr($haystack, 0, $pos, $encoding);
8323
    }
8324
8325
    return self::substr($haystack, $pos, null, $encoding);
8326
  }
8327
8328
  /**
8329
   * Find position of last occurrence of a case-insensitive string.
8330
   *
8331
   * @param string     $haystack  <p>The string to look in.</p>
8332
   * @param string|int $needle    <p>The string to look for.</p>
8333
   * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8334
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8335
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8336
   *
8337
   * @return int|false
8338
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8339
   *                   string.<br>If needle is not found, it returns false.
8340
   */
8341 4
  public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8342
  {
8343 4
    if ('' === $haystack) {
8344
      return false;
8345
    }
8346
8347
    // iconv and mbstring do not support integer $needle
8348 4
    if ((int)$needle === $needle && $needle >= 0) {
8349
      $needle = (string)self::chr($needle);
8350
    }
8351 4
    $needle = (string)$needle;
8352
8353 4
    if ('' === $needle) {
8354
      return false;
8355
    }
8356
8357 4
    if ($cleanUtf8 === true) {
8358
      // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8359 2
      $needle = self::clean($needle);
8360 2
      $haystack = self::clean($haystack);
8361
    }
8362
8363 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8364 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8365
    }
8366
8367
    //
8368
    // fallback for binary || ascii only
8369
    //
8370
8371
    if (
8372 4
        $encoding === 'CP850'
8373
        ||
8374 4
        $encoding === 'ASCII'
8375
    ) {
8376
      return self::strripos_in_byte($haystack, $needle, $offset);
8377
    }
8378
8379 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8380
      self::checkForSupport();
8381
    }
8382
8383
    if (
8384 4
        $encoding !== 'UTF-8'
8385
        &&
8386 4
        self::$SUPPORT['mbstring'] === false
8387
    ) {
8388
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8389
    }
8390
8391
    //
8392
    // fallback via mbstrig
8393
    //
8394
8395 4
    if (self::$SUPPORT['mbstring'] === true) {
8396 4
      return \mb_strripos($haystack, $needle, $offset, $encoding);
8397
    }
8398
8399
    //
8400
    // fallback via intl
8401
    //
8402
8403
    if (
8404
        $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8405
        &&
8406
        $offset >= 0 // grapheme_strripos() can't handle negative offset
8407
        &&
8408
        self::$SUPPORT['intl'] === true
8409
    ) {
8410
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8411
      if ($returnTmp !== false) {
8412
        return $returnTmp;
8413
      }
8414
    }
8415
8416
    //
8417
    // fallback for ascii only
8418
    //
8419
8420
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8421
      return self::strripos_in_byte($haystack, $needle, $offset);
8422
    }
8423
8424
    //
8425
    // fallback via vanilla php
8426
    //
8427
8428
    $haystack = self::strtocasefold($haystack, true, false, $encoding);
8429
    $needle = self::strtocasefold($needle, true, false, $encoding);
8430
8431
    return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8432
  }
8433
8434
  /**
8435
   * Finds position of last occurrence of a string within another, case insensitive.
8436
   *
8437
   * @param string $haystack <p>
8438
   *                         The string from which to get the position of the last occurrence
8439
   *                         of needle.
8440
   *                         </p>
8441
   * @param string $needle   <p>
8442
   *                         The string to find in haystack.
8443
   *                         </p>
8444
   * @param int    $offset   [optional] <p>
8445
   *                         The position in haystack
8446
   *                         to start searching.
8447
   *                         </p>
8448
   *
8449
   * @return int|false Return the numeric position of the last occurrence of needle in the
8450
   *                   haystack string, or false if needle is not found.
8451
   */
8452
  public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
8453
  {
8454
    if ($haystack === '' || $needle === '') {
8455
      return false;
8456
    }
8457
8458
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8459
      self::checkForSupport();
8460
    }
8461
8462
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8463
      // "mb_" is available if overload is used, so use it ...
8464
      return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8465
    }
8466
8467
    return \strripos($haystack, $needle, $offset);
8468
  }
8469
8470
  /**
8471
   * Find position of last occurrence of a string in a string.
8472
   *
8473
   * @link http://php.net/manual/en/function.mb-strrpos.php
8474
   *
8475
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8476
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8477
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8478
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
8479
   *                              the end of the string.
8480
   *                              </p>
8481
   * @param string     $encoding  [optional] <p>Set the charset.</p>
8482
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8483
   *
8484
   * @return int|false
8485
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8486
   *                   string.<br>If needle is not found, it returns false.
8487
   */
8488 38
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8489
  {
8490 38
    if ('' === $haystack) {
8491 3
      return false;
8492
    }
8493
8494
    // iconv and mbstring do not support integer $needle
8495 37
    if ((int)$needle === $needle && $needle >= 0) {
8496 2
      $needle = (string)self::chr($needle);
8497
    }
8498 37
    $needle = (string)$needle;
8499
8500 37
    if ('' === $needle) {
8501 2
      return false;
8502
    }
8503
8504 37
    if ($cleanUtf8 === true) {
8505
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8506 4
      $needle = self::clean($needle);
8507 4
      $haystack = self::clean($haystack);
8508
    }
8509
8510 37
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8511 14
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8512
    }
8513
8514
    //
8515
    // fallback for binary || ascii only
8516
    //
8517
8518
    if (
8519 37
        $encoding === 'CP850'
8520
        ||
8521 37
        $encoding === 'ASCII'
8522
    ) {
8523 2
      return self::strrpos_in_byte($haystack, $needle, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::strrpos_in_byte() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8523
      return self::strrpos_in_byte($haystack, $needle, /** @scrutinizer ignore-type */ $offset);
Loading history...
8524
    }
8525
8526 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8527
      self::checkForSupport();
8528
    }
8529
8530
    if (
8531 37
        $encoding !== 'UTF-8'
8532
        &&
8533 37
        self::$SUPPORT['mbstring'] === false
8534
    ) {
8535
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8536
    }
8537
8538
    //
8539
    // fallback via mbstring
8540
    //
8541
8542 37
    if (self::$SUPPORT['mbstring'] === true) {
8543 37
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
8544
    }
8545
8546
    //
8547
    // fallback via intl
8548
    //
8549
8550
    if (
8551
        $offset !== null
8552
        &&
8553
        $offset >= 0 // grapheme_strrpos() can't handle negative offset
8554
        &&
8555
        $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8556
        &&
8557
        self::$SUPPORT['intl'] === true
8558
    ) {
8559
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8560
      if ($returnTmp !== false) {
8561
        return $returnTmp;
8562
      }
8563
    }
8564
8565
    //
8566
    // fallback for ascii only
8567
    //
8568
8569
    if (
8570
        $offset !== null
8571
        &&
8572
        self::is_ascii($haystack)
8573
        &&
8574
        self::is_ascii($needle)
8575
    ) {
8576
      return self::strrpos_in_byte($haystack, $needle, $offset);
8577
    }
8578
8579
    //
8580
    // fallback via vanilla php
8581
    //
8582
8583
    $haystackTmp = null;
8584
    if ($offset > 0) {
8585
      $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8585
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
8586
    } elseif ($offset < 0) {
8587
      $haystackTmp = self::substr($haystack, 0, $offset);
8588
      $offset = 0;
8589
    }
8590
8591
    if ($haystackTmp !== null) {
8592
      if ($haystackTmp === false) {
8593
        $haystackTmp = '';
8594
      }
8595
      $haystack = (string)$haystackTmp;
8596
    }
8597
8598
    $pos = self::strrpos_in_byte($haystack, $needle);
8599
    if ($pos === false) {
8600
      return false;
8601
    }
8602
8603
    return $offset + self::strlen(self::substr_in_byte($haystack, 0, $pos));
8604
  }
8605
8606
  /**
8607
   * Find position of last occurrence of a string in a string.
8608
   *
8609
   * @param string $haystack <p>
8610
   *                         The string being checked, for the last occurrence
8611
   *                         of needle.
8612
   *                         </p>
8613
   * @param string $needle   <p>
8614
   *                         The string to find in haystack.
8615
   *                         </p>
8616
   * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
8617
   *                         the string. Negative values will stop searching at an arbitrary point
8618
   *                         prior to the end of the string.
8619
   *
8620
   * @return int|false The numeric position of the last occurrence of needle in the
8621
   *                   haystack string. If needle is not found, it returns false.
8622
   */
8623 2
  public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
8624
  {
8625 2
    if ($haystack === '' || $needle === '') {
8626
      return false;
8627
    }
8628
8629 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8630
      self::checkForSupport();
8631
    }
8632
8633 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8634
      // "mb_" is available if overload is used, so use it ...
8635
      return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8636
    }
8637
8638 2
    return \strrpos($haystack, $needle, $offset);
8639
  }
8640
8641
  /**
8642
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8643
   * mask.
8644
   *
8645
   * @param string $str    <p>The input string.</p>
8646
   * @param string $mask   <p>The mask of chars</p>
8647
   * @param int    $offset [optional]
8648
   * @param int    $length [optional]
8649
   *
8650
   * @return int
8651
   */
8652 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
8653
  {
8654 10
    if ($offset || $length !== null) {
8655 2
      $strTmp = self::substr($str, $offset, $length);
8656 2
      if ($strTmp === false) {
8657
        $strTmp = '';
8658
      }
8659 2
      $str = (string)$strTmp;
8660
    }
8661
8662 10
    if ('' === $str || '' === $mask) {
8663 2
      return 0;
8664
    }
8665
8666 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug Best Practice introduced by
The expression return preg_match('/^' ....lf::strlen($str[0]) : 0 could return the type false which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
$str of type string is incompatible with the type null|array expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8666
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
8667
  }
8668
8669
  /**
8670
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8671
   *
8672
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
8673
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
8674
   * @param bool   $before_needle  [optional] <p>
8675
   *                               If <b>TRUE</b>, strstr() returns the part of the
8676
   *                               haystack before the first occurrence of the needle (excluding the needle).
8677
   *                               </p>
8678
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8679
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8680
   *
8681
   * @return string|false
8682
   *                       A sub-string,<br>or <strong>false</strong> if needle is not found.
8683
   */
8684 5
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8685
  {
8686 5
    if ('' === $haystack || '' === $needle) {
8687 2
      return false;
8688
    }
8689
8690 5
    if ($cleanUtf8 === true) {
8691
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8692
      // if invalid characters are found in $haystack before $needle
8693
      $needle = self::clean($needle);
8694
      $haystack = self::clean($haystack);
8695
    }
8696
8697 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8698 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8699
    }
8700
8701
    //
8702
    // fallback for binary || ascii only
8703
    //
8704
8705
    if (
8706 5
        $encoding === 'CP850'
8707
        ||
8708 5
        $encoding === 'ASCII'
8709
    ) {
8710
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8711
    }
8712
8713 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8714
      self::checkForSupport();
8715
    }
8716
8717
    if (
8718 5
        $encoding !== 'UTF-8'
8719
        &&
8720 5
        self::$SUPPORT['mbstring'] === false
8721
    ) {
8722
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8723
    }
8724
8725
    //
8726
    // fallback via mbstring
8727
    //
8728
8729 5
    if (self::$SUPPORT['mbstring'] === true) {
8730 5
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8731
    }
8732
8733
    //
8734
    // fallback via intl
8735
    //
8736
8737
    if (
8738
        $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8739
        &&
8740
        self::$SUPPORT['intl'] === true
8741
    ) {
8742
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8743
      if ($returnTmp !== false) {
8744
        return $returnTmp;
8745
      }
8746
    }
8747
8748
    //
8749
    // fallback for ascii only
8750
    //
8751
8752
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8753
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8754
    }
8755
8756
    //
8757
    // fallback via vanilla php
8758
    //
8759
8760
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8761
8762
    if (!isset($match[1])) {
8763
      return false;
8764
    }
8765
8766
    if ($before_needle) {
8767
      return $match[1];
8768
    }
8769
8770
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8770
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
8771
  }
8772
8773
  /**
8774
   *  * Finds first occurrence of a string within another.
8775
   *
8776
   * @param string $haystack      <p>
8777
   *                              The string from which to get the first occurrence
8778
   *                              of needle.
8779
   *                              </p>
8780
   * @param string $needle        <p>
8781
   *                              The string to find in haystack.
8782
   *                              </p>
8783
   * @param bool   $before_needle [optional] <p>
8784
   *                              Determines which portion of haystack
8785
   *                              this function returns.
8786
   *                              If set to true, it returns all of haystack
8787
   *                              from the beginning to the first occurrence of needle.
8788
   *                              If set to false, it returns all of haystack
8789
   *                              from the first occurrence of needle to the end,
8790
   *                              </p>
8791
   *
8792
   * @return string|false The portion of haystack,
8793
   *                      or false if needle is not found.
8794
   */
8795
  public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
8796
  {
8797
    if ($haystack === '' || $needle === '') {
8798
      return false;
8799
    }
8800
8801
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8802
      self::checkForSupport();
8803
    }
8804
8805
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8806
      // "mb_" is available if overload is used, so use it ...
8807
      return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
8808
    }
8809
8810
    return \strstr($haystack, $needle, $before_needle);
8811
  }
8812
8813
  /**
8814
   * Unicode transformation for case-less matching.
8815
   *
8816
   * @link http://unicode.org/reports/tr21/tr21-5.html
8817
   *
8818
   * @param string      $str       <p>The input string.</p>
8819
   * @param bool        $full      [optional] <p>
8820
   *                               <b>true</b>, replace full case folding chars (default)<br>
8821
   *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
8822
   *                               </p>
8823
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8824
   * @param string      $encoding  [optional] <p>Set the charset.</p>
8825
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8826
   * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase is
8827
   *                               for some languages better ...</p>
8828
   *
8829
   * @return string
8830
   */
8831 53
  public static function strtocasefold(
8832
      string $str,
8833
      bool $full = true,
8834
      bool $cleanUtf8 = false,
8835
      string $encoding = 'UTF-8',
8836
      string $lang = null,
8837
      $lower = true
8838
  ): string
8839
  {
8840 53
    if ('' === $str) {
8841 5
      return '';
8842
    }
8843
8844 52
    $str = self::fixStrCaseHelper($str, $lower, $full);
8845
8846 52
    if ($lower === true) {
8847 2
      return self::strtolower($str, $encoding, $cleanUtf8, $lang);
8848
    }
8849
8850 50
    return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
8851
  }
8852
8853
  /**
8854
   * Make a string lowercase.
8855
   *
8856
   * @link http://php.net/manual/en/function.mb-strtolower.php
8857
   *
8858
   * @param string      $str                   <p>The string being lowercased.</p>
8859
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
8860
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8861
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8862
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8863
   *
8864
   * @return string String with all alphabetic characters converted to lowercase.
8865
   */
8866 151
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8867
  {
8868
    // init
8869 151
    $str = (string)$str;
8870
8871 151
    if ('' === $str) {
8872 4
      return '';
8873
    }
8874
8875 149
    if ($cleanUtf8 === true) {
8876
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8877
      // if invalid characters are found in $haystack before $needle
8878 4
      $str = self::clean($str);
8879
    }
8880
8881 149
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8882 94
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8883
    }
8884
8885
    // hack for old php version or for the polyfill ...
8886 149
    if ($tryToKeepStringLength === true) {
8887
      $str = self::fixStrCaseHelper($str, true);
8888
    }
8889
8890 149
    if ($lang !== null) {
8891
8892 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8893
        self::checkForSupport();
8894
      }
8895
8896 2
      if (self::$SUPPORT['intl'] === true) {
8897
8898 2
        $langCode = $lang . '-Lower';
8899 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8900
          \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, E_USER_WARNING);
8901
8902
          $langCode = 'Any-Lower';
8903
        }
8904
8905
        /** @noinspection PhpComposerExtensionStubsInspection */
8906 2
        return transliterator_transliterate($langCode, $str);
8907
      }
8908
8909
      \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, E_USER_WARNING);
8910
    }
8911
8912
    // always fallback via symfony polyfill
8913 149
    return \mb_strtolower($str, $encoding);
8914
  }
8915
8916
  /**
8917
   * Generic case sensitive transformation for collation matching.
8918
   *
8919
   * @param string $str <p>The input string</p>
8920
   *
8921
   * @return string
8922
   */
8923 6
  private static function strtonatfold(string $str): string
8924
  {
8925
    /** @noinspection PhpUndefinedClassInspection */
8926 6
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
8927
  }
8928
8929
  /**
8930
   * Make a string uppercase.
8931
   *
8932
   * @link http://php.net/manual/en/function.mb-strtoupper.php
8933
   *
8934
   * @param string      $str                   <p>The string being uppercased.</p>
8935
   * @param string      $encoding              [optional] <p>Set the charset.</p>
8936
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8937
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8938
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8939
   *
8940
   * @return string String with all alphabetic characters converted to uppercase.
8941
   */
8942 160
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8943
  {
8944
    // init
8945 160
    $str = (string)$str;
8946
8947 160
    if ('' === $str) {
8948 4
      return '';
8949
    }
8950
8951 158
    if ($cleanUtf8 === true) {
8952
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8953
      // if invalid characters are found in $haystack before $needle
8954 3
      $str = self::clean($str);
8955
    }
8956
8957 158
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8958 72
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8959
    }
8960
8961
    // hack for old php version or for the polyfill ...
8962 158
    if ($tryToKeepStringLength === true) {
8963 2
      $str = self::fixStrCaseHelper($str, false);
8964
    }
8965
8966 158
    if ($lang !== null) {
8967
8968 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8969
        self::checkForSupport();
8970
      }
8971
8972 2
      if (self::$SUPPORT['intl'] === true) {
8973
8974 2
        $langCode = $lang . '-Upper';
8975 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8976
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
8977
8978
          $langCode = 'Any-Upper';
8979
        }
8980
8981
        /** @noinspection PhpComposerExtensionStubsInspection */
8982 2
        return transliterator_transliterate($langCode, $str);
8983
      }
8984
8985
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
8986
    }
8987
8988
    // always fallback via symfony polyfill
8989 158
    return \mb_strtoupper($str, $encoding);
8990
  }
8991
8992
  /**
8993
   * Translate characters or replace sub-strings.
8994
   *
8995
   * @link  http://php.net/manual/en/function.strtr.php
8996
   *
8997
   * @param string          $str  <p>The string being translated.</p>
8998
   * @param string|string[] $from <p>The string replacing from.</p>
8999
   * @param string|string[] $to   <p>The string being translated to to.</p>
9000
   *
9001
   * @return string
9002
   *                This function returns a copy of str, translating all occurrences of each character in from to the
9003
   *                corresponding character in to.
9004
   */
9005 2
  public static function strtr(string $str, $from, $to = INF): string
9006
  {
9007 2
    if ('' === $str) {
9008
      return '';
9009
    }
9010
9011 2
    if ($from === $to) {
9012
      return $str;
9013
    }
9014
9015 2
    if (INF !== $to) {
9016 2
      $from = self::str_split($from);
9017 2
      $to = self::str_split($to);
9018 2
      $countFrom = \count($from);
9019 2
      $countTo = \count($to);
9020
9021 2
      if ($countFrom > $countTo) {
9022 2
        $from = \array_slice($from, 0, $countTo);
9023 2
      } elseif ($countFrom < $countTo) {
9024 2
        $to = \array_slice($to, 0, $countFrom);
9025
      }
9026
9027 2
      $from = \array_combine($from, $to);
9028
    }
9029
9030 2
    if (\is_string($from)) {
9031 2
      return \str_replace($from, '', $str);
9032
    }
9033
9034 2
    return \strtr($str, $from);
9035
  }
9036
9037
  /**
9038
   * Return the width of a string.
9039
   *
9040
   * @param string $str       <p>The input string.</p>
9041
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9042
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9043
   *
9044
   * @return int
9045
   */
9046 2
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
9047
  {
9048 2
    if ('' === $str) {
9049 2
      return 0;
9050
    }
9051
9052 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9053 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9054
    }
9055
9056 2
    if ($cleanUtf8 === true) {
9057
      // iconv and mbstring are not tolerant to invalid encoding
9058
      // further, their behaviour is inconsistent with that of PHP's substr
9059 2
      $str = self::clean($str);
9060
    }
9061
9062 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9063
      self::checkForSupport();
9064
    }
9065
9066
    //
9067
    // fallback via mbstring
9068
    //
9069
9070 2
    if (self::$SUPPORT['mbstring'] === true) {
9071 2
      return \mb_strwidth($str, $encoding);
9072
    }
9073
9074
    //
9075
    // fallback via vanilla php
9076
    //
9077
9078
    if ('UTF-8' !== $encoding) {
9079
      $str = self::encode('UTF-8', $str, false, $encoding);
9080
    }
9081
9082
    $wide = 0;
9083
    $str = (string)preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
9084
9085
    return ($wide << 1) + self::strlen($str, 'UTF-8');
9086
  }
9087
9088
  /**
9089
   * Get part of a string.
9090
   *
9091
   * @link http://php.net/manual/en/function.mb-substr.php
9092
   *
9093
   * @param string $str       <p>The string being checked.</p>
9094
   * @param int    $offset    <p>The first position used in str.</p>
9095
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
9096
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9097
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9098
   *
9099
   * @return string|false
9100
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9101
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9102
   *                      characters long, <b>FALSE</b> will be returned.
9103
   */
9104 394
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
9105
  {
9106 394
    if ('' === $str) {
9107 19
      return '';
9108
    }
9109
9110
    // Empty string
9111 388
    if ($length === 0) {
9112 14
      return '';
9113
    }
9114
9115 385
    if ($cleanUtf8 === true) {
9116
      // iconv and mbstring are not tolerant to invalid encoding
9117
      // further, their behaviour is inconsistent with that of PHP's substr
9118 2
      $str = self::clean($str);
9119
    }
9120
9121
    // Whole string
9122 385
    if (!$offset && $length === null) {
9123 40
      return $str;
9124
    }
9125
9126 356
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9127 157
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9128
    }
9129
9130 356
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9131
      self::checkForSupport();
9132
    }
9133
9134
    //
9135
    // fallback for binary || ascii only
9136
    //
9137
9138
    if (
9139 356
        $encoding === 'CP850'
9140
        ||
9141 356
        $encoding === 'ASCII'
9142
    ) {
9143 5
      return self::substr_in_byte($str, $offset, $length);
9144
    }
9145
9146
    //
9147
    // fallback via mbstring
9148
    //
9149
9150 351
    if (self::$SUPPORT['mbstring'] === true) {
9151 351
      $return = \mb_substr($str, $offset, $length ?? 2147483647, $encoding);
9152 351
      if ($return !== false) {
9153 351
        return $return;
9154
      }
9155
    }
9156
9157
    // otherwise we need the string-length and can't fake it via "2147483647"
9158 4
    $str_length = 0;
9159 4
    if ($offset || $length === null) {
9160 4
      $str_length = self::strlen($str, $encoding);
9161
    }
9162
9163
    // e.g.: invalid chars + mbstring not installed
9164 4
    if ($str_length === false) {
9165
      return false;
9166
    }
9167
9168
    // Empty string
9169 4
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
9170
      return '';
9171
    }
9172
9173
    // Impossible
9174 4
    if ($offset && $offset > $str_length) {
9175
      // "false" is the php native return type here,
9176
      //  but we optimized this for performance ... see "2147483647" instead of "strlen"
9177
      return '';
9178
9179
    }
9180
9181 4
    if ($length === null) {
9182 4
      $length = (int)$str_length;
9183
    } else {
9184 2
      $length = (int)$length;
9185
    }
9186
9187
    if (
9188 4
        $encoding !== 'UTF-8'
9189
        &&
9190 4
        self::$SUPPORT['mbstring'] === false
9191
    ) {
9192 2
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9193
    }
9194
9195
    //
9196
    // fallback via intl
9197
    //
9198
9199
    if (
9200 4
        $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
9201
        &&
9202 4
        $offset >= 0 // grapheme_substr() can't handle negative offset
9203
        &&
9204 4
        self::$SUPPORT['intl'] === true
9205
    ) {
9206
      $returnTmp = \grapheme_substr($str, $offset, $length);
9207
      if ($returnTmp !== false) {
9208
        return $returnTmp;
9209
      }
9210
    }
9211
9212
    //
9213
    // fallback via iconv
9214
    //
9215
9216
    if (
9217 4
        $length >= 0 // "iconv_substr()" can't handle negative length
9218
        &&
9219 4
        self::$SUPPORT['iconv'] === true
9220
    ) {
9221
      $returnTmp = \iconv_substr($str, $offset, $length);
9222
      if ($returnTmp !== false) {
9223
        return $returnTmp;
9224
      }
9225
    }
9226
9227
    //
9228
    // fallback for ascii only
9229
    //
9230
9231 4
    if (self::is_ascii($str)) {
9232
      return \substr($str, $offset, $length);
9233
    }
9234
9235
    //
9236
    // fallback via vanilla php
9237
    //
9238
9239
    // split to array, and remove invalid characters
9240 4
    $array = self::split($str);
9241
9242
    // extract relevant part, and join to make sting again
9243 4
    return \implode('', \array_slice($array, $offset, $length));
9244
  }
9245
9246
  /**
9247
   * Binary safe comparison of two strings from an offset, up to length characters.
9248
   *
9249
   * @param string   $str1               <p>The main string being compared.</p>
9250
   * @param string   $str2               <p>The secondary string being compared.</p>
9251
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
9252
   *                                     counting from the end of the string.</p>
9253
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
9254
   *                                     the length of the str compared to the length of main_str less the offset.</p>
9255
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
9256
   *                                     insensitive.</p>
9257
   *
9258
   * @return int
9259
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9260
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9261
   *             <strong>0</strong> if they are equal.
9262
   */
9263 2
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
9264
  {
9265
    if (
9266 2
        $offset !== 0
9267
        ||
9268 2
        $length !== null
9269
    ) {
9270 2
      $str1Tmp = self::substr($str1, $offset, $length);
9271 2
      if ($str1Tmp === false) {
9272
        $str1Tmp = '';
9273
      }
9274 2
      $str1 = (string)$str1Tmp;
9275
9276 2
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str1) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9276
      $str2Tmp = self::substr($str2, 0, /** @scrutinizer ignore-type */ self::strlen($str1));
Loading history...
9277 2
      if ($str2Tmp === false) {
9278
        $str2Tmp = '';
9279
      }
9280 2
      $str2 = (string)$str2Tmp;
9281
    }
9282
9283 2
    if ($case_insensitivity === true) {
9284 2
      return self::strcasecmp($str1, $str2);
9285
    }
9286
9287 2
    return self::strcmp($str1, $str2);
9288
  }
9289
9290
  /**
9291
   * Count the number of substring occurrences.
9292
   *
9293
   * @link  http://php.net/manual/en/function.substr-count.php
9294
   *
9295
   * @param string $haystack   <p>The string to search in.</p>
9296
   * @param string $needle     <p>The substring to search for.</p>
9297
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
9298
   * @param int    $length     [optional] <p>
9299
   *                           The maximum length after the specified offset to search for the
9300
   *                           substring. It outputs a warning if the offset plus the length is
9301
   *                           greater than the haystack length.
9302
   *                           </p>
9303
   * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9304
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9305
   *
9306
   * @return int|false This functions returns an integer or false if there isn't a string.
9307
   */
9308 18
  public static function substr_count(
9309
      string $haystack,
9310
      string $needle,
9311
      int $offset = 0,
9312
      int $length = null,
9313
      string $encoding = 'UTF-8',
9314
      bool $cleanUtf8 = false
9315
  )
9316
  {
9317 18
    if ('' === $haystack || '' === $needle) {
9318 2
      return false;
9319
    }
9320
9321 18
    if ($offset || $length !== null) {
9322
9323 2
      if ($length === null) {
9324 2
        $lengthTmp = self::strlen($haystack);
9325 2
        if ($lengthTmp === false) {
9326
          return false;
9327
        }
9328 2
        $length = (int)$lengthTmp;
9329
      }
9330
9331
      if (
9332
          (
9333 2
              $length !== 0
9334
              &&
9335 2
              $offset !== 0
9336
          )
9337
          &&
9338 2
          ($length + $offset) <= 0
9339
          &&
9340 2
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9341
      ) {
9342 2
        return false;
9343
      }
9344
9345 2
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
9346 2
      if ($haystackTmp === false) {
9347
        $haystackTmp = '';
9348
      }
9349 2
      $haystack = (string)$haystackTmp;
9350
    }
9351
9352 18
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9353 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9354
    }
9355
9356 18
    if ($cleanUtf8 === true) {
9357
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9358
      // if invalid characters are found in $haystack before $needle
9359
      $needle = self::clean($needle);
9360
      $haystack = self::clean($haystack);
9361
    }
9362
9363 18
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9364
      self::checkForSupport();
9365
    }
9366
9367
    if (
9368 18
        $encoding !== 'UTF-8'
9369
        &&
9370 18
        self::$SUPPORT['mbstring'] === false
9371
    ) {
9372
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9373
    }
9374
9375 18
    if (self::$SUPPORT['mbstring'] === true) {
9376 18
      return \mb_substr_count($haystack, $needle, $encoding);
9377
    }
9378
9379
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
9380
9381
    return \count($matches);
9382
  }
9383
9384
  /**
9385
   * Count the number of substring occurrences.
9386
   *
9387
   * @param string $haystack <p>
9388
   *                         The string being checked.
9389
   *                         </p>
9390
   * @param string $needle   <p>
9391
   *                         The string being found.
9392
   *                         </p>
9393
   * @param int    $offset   [optional] <p>
9394
   *                         The offset where to start counting
9395
   *                         </p>
9396
   * @param int    $length   [optional] <p>
9397
   *                         The maximum length after the specified offset to search for the
9398
   *                         substring. It outputs a warning if the offset plus the length is
9399
   *                         greater than the haystack length.
9400
   *                         </p>
9401
   *
9402
   * @return int|false The number of times the
9403
   *                   needle substring occurs in the
9404
   *                   haystack string.
9405
   */
9406 38
  public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null)
9407
  {
9408 38
    if ($haystack === '' || $needle === '') {
9409
      return 0;
9410
    }
9411
9412 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9413
      self::checkForSupport();
9414
    }
9415
9416
    if (
9417 38
        ($offset || $length !== null)
9418
        &&
9419 38
        self::$SUPPORT['mbstring_func_overload'] === true
9420
    ) {
9421
9422
      if ($length === null) {
9423
        $lengthTmp = self::strlen($haystack);
9424
        if ($lengthTmp === false) {
9425
          return false;
9426
        }
9427
        $length = (int)$lengthTmp;
9428
      }
9429
9430
      if (
9431
          (
9432
              $length !== 0
9433
              &&
9434
              $offset !== 0
9435
          )
9436
          &&
9437
          ($length + $offset) <= 0
9438
          &&
9439
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9440
      ) {
9441
        return false;
9442
      }
9443
9444
      $haystackTmp = self::substr_in_byte($haystack, $offset, $length);
9445
      if ($haystackTmp === false) {
0 ignored issues
show
introduced by
The condition $haystackTmp === false is always false.
Loading history...
9446
        $haystackTmp = '';
9447
      }
9448
      $haystack = (string)$haystackTmp;
9449
    }
9450
9451 38
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9452
      // "mb_" is available if overload is used, so use it ...
9453
      return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
9454
    }
9455
9456 38
    return \substr_count($haystack, $needle, $offset, $length);
9457
  }
9458
9459
  /**
9460
   * Returns the number of occurrences of $substring in the given string.
9461
   * By default, the comparison is case-sensitive, but can be made insensitive
9462
   * by setting $caseSensitive to false.
9463
   *
9464
   * @param string $str           <p>The input string.</p>
9465
   * @param string $substring     <p>The substring to search for.</p>
9466
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
9467
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9468
   *
9469
   * @return int
9470
   */
9471 15
  public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
9472
  {
9473 15
    if ('' === $str || '' === $substring) {
9474 2
      return 0;
9475
    }
9476
9477
    // only a fallback to prevent BC in the api ...
9478 13
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
9479 4
      $encoding = (string)$caseSensitive;
9480
    }
9481
9482 13
    if (!$caseSensitive) {
9483 6
      $str = self::strtocasefold($str, true, false, $encoding, null, false);
9484 6
      $substring = self::strtocasefold($substring, true, false, $encoding, null, false);
9485
    }
9486
9487 13
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
9488
  }
9489
9490
  /**
9491
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
9492
   *
9493
   * @param string $haystack <p>The string to search in.</p>
9494
   * @param string $needle   <p>The substring to search for.</p>
9495
   *
9496
   * @return string Return the sub-string.
9497
   */
9498 2
  public static function substr_ileft(string $haystack, string $needle): string
9499
  {
9500 2
    if ('' === $haystack) {
9501 2
      return '';
9502
    }
9503
9504 2
    if ('' === $needle) {
9505 2
      return $haystack;
9506
    }
9507
9508 2
    if (self::str_istarts_with($haystack, $needle) === true) {
9509 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9509
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9510 2
      if ($haystackTmp === false) {
9511
        $haystackTmp = '';
9512
      }
9513 2
      $haystack = (string)$haystackTmp;
9514
    }
9515
9516 2
    return $haystack;
9517
  }
9518
9519
  /**
9520
   * Get part of a string process in bytes.
9521
   *
9522
   * @param string $str    <p>The string being checked.</p>
9523
   * @param int    $offset <p>The first position used in str.</p>
9524
   * @param int    $length [optional] <p>The maximum length of the returned string.</p>
9525
   *
9526
   * @return string|false
9527
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9528
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9529
   *                      characters long, <b>FALSE</b> will be returned.
9530
   */
9531 55
  public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
9532
  {
9533 55
    if ($str === '') {
9534
      return '';
9535
    }
9536
9537
    // Empty string
9538 55
    if ($length === 0) {
9539
      return '';
9540
    }
9541
9542
    // Whole string
9543 55
    if (!$offset && $length === null) {
9544
      return $str;
9545
    }
9546
9547 55
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9548
      self::checkForSupport();
9549
    }
9550
9551 55
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9552
      // "mb_" is available if overload is used, so use it ...
9553
      return \mb_substr($str, $offset, $length ?? 2147483647, 'CP850'); // 8-BIT
9554
    }
9555
9556 55
    return \substr($str, $offset, $length ?? 2147483647);
9557
  }
9558
9559
  /**
9560
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
9561
   *
9562
   * @param string $haystack <p>The string to search in.</p>
9563
   * @param string $needle   <p>The substring to search for.</p>
9564
   *
9565
   * @return string Return the sub-string.
9566
   */
9567 2
  public static function substr_iright(string $haystack, string $needle): string
9568
  {
9569 2
    if ('' === $haystack) {
9570 2
      return '';
9571
    }
9572
9573 2
    if ('' === $needle) {
9574 2
      return $haystack;
9575
    }
9576
9577 2
    if (self::str_iends_with($haystack, $needle) === true) {
9578 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9579 2
      if ($haystackTmp === false) {
9580
        $haystackTmp = '';
9581
      }
9582 2
      $haystack = (string)$haystackTmp;
9583
    }
9584
9585 2
    return $haystack;
9586
  }
9587
9588
  /**
9589
   * Removes an prefix ($needle) from start of the string ($haystack).
9590
   *
9591
   * @param string $haystack <p>The string to search in.</p>
9592
   * @param string $needle   <p>The substring to search for.</p>
9593
   *
9594
   * @return string Return the sub-string.
9595
   */
9596 2
  public static function substr_left(string $haystack, string $needle): string
9597
  {
9598 2
    if ('' === $haystack) {
9599 2
      return '';
9600
    }
9601
9602 2
    if ('' === $needle) {
9603 2
      return $haystack;
9604
    }
9605
9606 2
    if (self::str_starts_with($haystack, $needle) === true) {
9607 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9607
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9608 2
      if ($haystackTmp === false) {
9609
        $haystackTmp = '';
9610
      }
9611 2
      $haystack = (string)$haystackTmp;
9612
    }
9613
9614 2
    return $haystack;
9615
  }
9616
9617
  /**
9618
   * Replace text within a portion of a string.
9619
   *
9620
   * source: https://gist.github.com/stemar/8287074
9621
   *
9622
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
9623
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
9624
   * @param int|int[]       $offset           <p>
9625
   *                                          If start is positive, the replacing will begin at the start'th offset
9626
   *                                          into string.
9627
   *                                          <br><br>
9628
   *                                          If start is negative, the replacing will begin at the start'th character
9629
   *                                          from the end of string.
9630
   *                                          </p>
9631
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
9632
   *                                          portion of string which is to be replaced. If it is negative, it
9633
   *                                          represents the number of characters from the end of string at which to
9634
   *                                          stop replacing. If it is not given, then it will default to strlen(
9635
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
9636
   *                                          length is zero then this function will have the effect of inserting
9637
   *                                          replacement into string at the given start offset.</p>
9638
   *
9639
   * @return string|string[] The result string is returned. If string is an array then array is returned.
9640
   */
9641 10
  public static function substr_replace($str, $replacement, $offset, $length = null)
9642
  {
9643 10
    if (\is_array($str) === true) {
9644 1
      $num = \count($str);
9645
9646
      // the replacement
9647 1
      if (\is_array($replacement) === true) {
9648 1
        $replacement = \array_slice($replacement, 0, $num);
9649
      } else {
9650 1
        $replacement = \array_pad([$replacement], $num, $replacement);
9651
      }
9652
9653
      // the offset
9654 1
      if (\is_array($offset) === true) {
9655 1
        $offset = \array_slice($offset, 0, $num);
9656 1
        foreach ($offset as &$valueTmp) {
9657 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
9658
        }
9659 1
        unset($valueTmp);
9660
      } else {
9661 1
        $offset = \array_pad([$offset], $num, $offset);
9662
      }
9663
9664
      // the length
9665 1
      if (null === $length) {
9666 1
        $length = \array_fill(0, $num, 0);
9667 1
      } elseif (\is_array($length) === true) {
9668 1
        $length = \array_slice($length, 0, $num);
9669 1
        foreach ($length as &$valueTmpV2) {
9670 1
          if (null !== $valueTmpV2) {
9671 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9672
          } else {
9673 1
            $valueTmpV2 = 0;
9674
          }
9675
        }
9676 1
        unset($valueTmpV2);
9677
      } else {
9678 1
        $length = \array_pad([$length], $num, $length);
9679
      }
9680
9681
      // recursive call
9682 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9683
    }
9684
9685 10
    if (\is_array($replacement) === true) {
9686 1
      if (\count($replacement) > 0) {
9687 1
        $replacement = $replacement[0];
9688
      } else {
9689 1
        $replacement = '';
9690
      }
9691
    }
9692
9693
    // init
9694 10
    $str = (string)$str;
9695 10
    $replacement = (string)$replacement;
9696
9697 10
    if ('' === $str) {
9698 1
      return $replacement;
9699
    }
9700
9701 9
    if (self::is_ascii($str)) {
9702 6
      return ($length === null) ?
9703
          \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9703
          \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
9704 6
          \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9704
          \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
9705
    }
9706
9707 8
    \preg_match_all('/./us', $str, $smatches);
9708 8
    \preg_match_all('/./us', $replacement, $rmatches);
9709
9710 8
    if ($length === null) {
9711 3
      $lengthTmp = self::strlen($str);
9712 3
      if ($lengthTmp === false) {
9713
        // e.g.: non mbstring support + invalid chars
9714
        return '';
9715
      }
9716 3
      $length = (int)$lengthTmp;
9717
    }
9718
9719 8
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9719
    \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9719
    \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
9720
9721 8
    return \implode('', $smatches[0]);
9722
  }
9723
9724
  /**
9725
   * Removes an suffix ($needle) from end of the string ($haystack).
9726
   *
9727
   * @param string $haystack <p>The string to search in.</p>
9728
   * @param string $needle   <p>The substring to search for.</p>
9729
   *
9730
   * @return string Return the sub-string.
9731
   */
9732 2
  public static function substr_right(string $haystack, string $needle): string
9733
  {
9734 2
    if ('' === $haystack) {
9735 2
      return '';
9736
    }
9737
9738 2
    if ('' === $needle) {
9739 2
      return $haystack;
9740
    }
9741
9742 2
    if (self::str_ends_with($haystack, $needle) === true) {
9743 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9744 2
      if ($haystackTmp === false) {
9745
        $haystackTmp = '';
9746
      }
9747 2
      $haystack = (string)$haystackTmp;
9748
    }
9749
9750 2
    return $haystack;
9751
  }
9752
9753
  /**
9754
   * Returns a case swapped version of the string.
9755
   *
9756
   * @param string $str       <p>The input string.</p>
9757
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9758
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9759
   *
9760
   * @return string Each character's case swapped.
9761
   */
9762 6
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9763
  {
9764 6
    if ('' === $str) {
9765 1
      return '';
9766
    }
9767
9768 6
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9769 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9770
    }
9771
9772 6
    if ($cleanUtf8 === true) {
9773
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9774
      // if invalid characters are found in $haystack before $needle
9775 2
      $str = self::clean($str);
9776
    }
9777
9778 6
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9779
  }
9780
9781
  /**
9782
   * Checks whether mbstring is available on the server.
9783
   *
9784
   * @return bool
9785
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
9786
   */
9787
  public static function symfony_polyfill_used(): bool
9788
  {
9789
    // init
9790
    $return = false;
9791
9792
    $returnTmp = \extension_loaded('mbstring') ? true : false;
9793
    if ($returnTmp === false && \function_exists('mb_strlen')) {
9794
      $return = true;
9795
    }
9796
9797
    $returnTmp = \extension_loaded('iconv') ? true : false;
9798
    if ($returnTmp === false && \function_exists('iconv')) {
9799
      $return = true;
9800
    }
9801
9802
    return $return;
9803
  }
9804
9805
  /**
9806
   * @param string $str
9807
   * @param int    $tabLength
9808
   *
9809
   * @return string
9810
   */
9811 6
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9812
  {
9813 6
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9814
  }
9815
9816
  /**
9817
   * Converts the first character of each word in the string to uppercase
9818
   * and all other chars to lowercase.
9819
   *
9820
   * @param string $str      <p>The input string.</p>
9821
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9822
   *
9823
   * @return string String with all characters of $str being title-cased.
9824
   */
9825 5
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
9826
  {
9827 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9828 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9829
    }
9830
9831
    // always fallback via symfony polyfill
9832 5
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
9833
  }
9834
9835
  /**
9836
   * alias for "UTF8::to_ascii()"
9837
   *
9838
   * @see        UTF8::to_ascii()
9839
   *
9840
   * @param string $str
9841
   * @param string $subst_chr
9842
   * @param bool   $strict
9843
   *
9844
   * @return string
9845
   *
9846
   * @deprecated <p>use "UTF8::to_ascii()"</p>
9847
   */
9848 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
9849
  {
9850 7
    return self::to_ascii($str, $subst_chr, $strict);
9851
  }
9852
9853
  /**
9854
   * alias for "UTF8::to_iso8859()"
9855
   *
9856
   * @see        UTF8::to_iso8859()
9857
   *
9858
   * @param string|string[] $str
9859
   *
9860
   * @return string|string[]
9861
   *
9862
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
9863
   */
9864 2
  public static function toIso8859($str)
9865
  {
9866 2
    return self::to_iso8859($str);
9867
  }
9868
9869
  /**
9870
   * alias for "UTF8::to_latin1()"
9871
   *
9872
   * @see        UTF8::to_latin1()
9873
   *
9874
   * @param string|string[] $str
9875
   *
9876
   * @return string|string[]
9877
   *
9878
   * @deprecated <p>use "UTF8::to_latin1()"</p>
9879
   */
9880 2
  public static function toLatin1($str)
9881
  {
9882 2
    return self::to_latin1($str);
9883
  }
9884
9885
  /**
9886
   * alias for "UTF8::to_utf8()"
9887
   *
9888
   * @see        UTF8::to_utf8()
9889
   *
9890
   * @param string|string[] $str
9891
   *
9892
   * @return string|string[]
9893
   *
9894
   * @deprecated <p>use "UTF8::to_utf8()"</p>
9895
   */
9896 2
  public static function toUTF8($str)
9897
  {
9898 2
    return self::to_utf8($str);
9899
  }
9900
9901
  /**
9902
   * Convert a string into ASCII.
9903
   *
9904
   * @param string $str     <p>The input string.</p>
9905
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
9906
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
9907
   *                        performance</p>
9908
   *
9909
   * @return string
9910
   */
9911 37
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
9912
  {
9913 37
    static $UTF8_TO_ASCII;
9914
9915 37
    if ('' === $str) {
9916 3
      return '';
9917
    }
9918
9919
    // check if we only have ASCII, first (better performance)
9920 34
    if (self::is_ascii($str) === true) {
9921 6
      return $str;
9922
    }
9923
9924 29
    $str = self::clean(
9925 29
        $str,
9926 29
        true,
9927 29
        true,
9928 29
        true,
9929 29
        false,
9930 29
        true,
9931 29
        true
9932
    );
9933
9934
    // check again, if we only have ASCII, now ...
9935 29
    if (self::is_ascii($str) === true) {
9936 12
      return $str;
9937
    }
9938
9939 18
    if ($strict === true) {
9940
9941 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9942
        self::checkForSupport();
9943
      }
9944
9945 1
      if (self::$SUPPORT['intl'] === true) {
9946
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
9947
        /** @noinspection PhpComposerExtensionStubsInspection */
9948 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
9949
9950
        // check again, if we only have ASCII, now ...
9951 1
        if (self::is_ascii($str) === true) {
9952 1
          return $str;
9953
        }
9954
9955
      }
9956
    }
9957
9958 18
    if (self::$ORD === null) {
9959
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9960
    }
9961
9962 18
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
9963 18
    $chars = $ar[0];
9964 18
    $ord = null;
9965 18
    foreach ($chars as &$c) {
9966
9967 18
      $ordC0 = self::$ORD[$c[0]];
9968
9969 18
      if ($ordC0 >= 0 && $ordC0 <= 127) {
9970 14
        continue;
9971
      }
9972
9973 18
      $ordC1 = self::$ORD[$c[1]];
9974
9975
      // ASCII - next please
9976 18
      if ($ordC0 >= 192 && $ordC0 <= 223) {
9977 16
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
9978
      }
9979
9980 18
      if ($ordC0 >= 224) {
9981 7
        $ordC2 = self::$ORD[$c[2]];
9982
9983 7
        if ($ordC0 <= 239) {
9984 6
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
9985
        }
9986
9987 7
        if ($ordC0 >= 240) {
9988 2
          $ordC3 = self::$ORD[$c[3]];
9989
9990 2
          if ($ordC0 <= 247) {
9991 2
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
9992
          }
9993
9994 2
          if ($ordC0 >= 248) {
9995
            $ordC4 = self::$ORD[$c[4]];
9996
9997
            if ($ordC0 <= 251) {
9998
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
9999
            }
10000
10001
            if ($ordC0 >= 252) {
10002
              $ordC5 = self::$ORD[$c[5]];
10003
10004
              if ($ordC0 <= 253) {
10005
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
10006
              }
10007
            }
10008
          }
10009
        }
10010
      }
10011
10012 18
      if ($ordC0 === 254 || $ordC0 === 255) {
10013
        $c = $unknown;
10014
        continue;
10015
      }
10016
10017 18
      if ($ord === null) {
10018
        $c = $unknown;
10019
        continue;
10020
      }
10021
10022 18
      $bank = $ord >> 8;
10023 18
      if (!isset($UTF8_TO_ASCII[$bank])) {
10024 9
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
10025 9
        if ($UTF8_TO_ASCII[$bank] === false) {
10026 2
          $UTF8_TO_ASCII[$bank] = [];
10027
        }
10028
      }
10029
10030 18
      $newchar = $ord & 255;
10031
10032 18
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
10033
10034
        // keep for debugging
10035
        /*
10036
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10037
        echo "char: " . $c . "\n";
10038
        echo "ord: " . $ord . "\n";
10039
        echo "newchar: " . $newchar . "\n";
10040
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
10041
        echo "bank:" . $bank . "\n\n";
10042
        */
10043
10044 17
        $c = $UTF8_TO_ASCII[$bank][$newchar];
10045
      } else {
10046
10047
        // keep for debugging missing chars
10048
        /*
10049
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10050
        echo "char: " . $c . "\n";
10051
        echo "ord: " . $ord . "\n";
10052
        echo "newchar: " . $newchar . "\n";
10053
        echo "bank:" . $bank . "\n\n";
10054
        */
10055
10056 18
        $c = $unknown;
10057
      }
10058
    }
10059
10060 18
    return \implode('', $chars);
10061
  }
10062
10063
  /**
10064
   * @param mixed $str
10065
   *
10066
   * @return bool
10067
   */
10068 19
  public static function to_boolean($str): bool
10069
  {
10070
    // init
10071 19
    $str = (string)$str;
10072
10073 19
    if ('' === $str) {
10074 2
      return false;
10075
    }
10076
10077 17
    $key = \strtolower($str);
10078
10079
    // Info: http://php.net/manual/en/filter.filters.validate.php
10080
    $map = [
10081 17
        'true'  => true,
10082
        '1'     => true,
10083
        'on'    => true,
10084
        'yes'   => true,
10085
        'false' => false,
10086
        '0'     => false,
10087
        'off'   => false,
10088
        'no'    => false,
10089
    ];
10090
10091 17
    if (isset($map[$key])) {
10092 13
      return $map[$key];
10093
    }
10094
10095
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10096 4
    if (\is_numeric($str)) {
10097 2
      return (((float)$str + 0) > 0);
10098
    }
10099
10100 2
    return (bool)self::trim($str);
10101
  }
10102
10103
  /**
10104
   * Convert a string into "ISO-8859"-encoding (Latin-1).
10105
   *
10106
   * @param string|string[] $str
10107
   *
10108
   * @return string|string[]
10109
   */
10110 8
  public static function to_iso8859($str)
10111
  {
10112 8
    if (\is_array($str) === true) {
10113 2
      foreach ($str as $k => $v) {
10114 2
        $str[$k] = self::to_iso8859($v);
10115
      }
10116
10117 2
      return $str;
10118
    }
10119
10120 8
    $str = (string)$str;
10121 8
    if ('' === $str) {
10122 2
      return '';
10123
    }
10124
10125 8
    return self::utf8_decode($str);
10126
  }
10127
10128
  /**
10129
   * alias for "UTF8::to_iso8859()"
10130
   *
10131
   * @see UTF8::to_iso8859()
10132
   *
10133
   * @param string|string[] $str
10134
   *
10135
   * @return string|string[]
10136
   */
10137 2
  public static function to_latin1($str)
10138
  {
10139 2
    return self::to_iso8859($str);
10140
  }
10141
10142
  /**
10143
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
10144
   *
10145
   * <ul>
10146
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
10147
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
10148
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
10149
   * case.</li>
10150
   * </ul>
10151
   *
10152
   * @param string|string[] $str                    <p>Any string or array.</p>
10153
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
10154
   *
10155
   * @return string|string[] The UTF-8 encoded string.
10156
   */
10157 38
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
10158
  {
10159 38
    if (\is_array($str) === true) {
10160 4
      foreach ($str as $k => $v) {
10161 4
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
10162
      }
10163
10164 4
      return $str;
10165
    }
10166
10167 38
    $str = (string)$str;
10168 38
    if ('' === $str) {
10169 6
      return $str;
10170
    }
10171
10172 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10173
      self::checkForSupport();
10174
    }
10175
10176 38
    $max = self::strlen_in_byte($str);
10177 38
    $buf = '';
10178
10179
    /** @noinspection ForeachInvariantsInspection */
10180 38
    for ($i = 0; $i < $max; $i++) {
10181 38
      $c1 = $str[$i];
10182
10183 38
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
10184
10185 34
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
10186
10187 31
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10188
10189 31
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
10190 17
            $buf .= $c1 . $c2;
10191 17
            $i++;
10192
          } else { // not valid UTF8 - convert it
10193 31
            $buf .= self::to_utf8_convert_helper($c1);
10194
          }
10195
10196 34
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
10197
10198 32
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10199 32
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10200
10201 32
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
10202 14
            $buf .= $c1 . $c2 . $c3;
10203 14
            $i += 2;
10204
          } else { // not valid UTF8 - convert it
10205 32
            $buf .= self::to_utf8_convert_helper($c1);
10206
          }
10207
10208 26
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
10209
10210 26
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10211 26
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10212 26
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
10213
10214 26
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
10215 8
            $buf .= $c1 . $c2 . $c3 . $c4;
10216 8
            $i += 3;
10217
          } else { // not valid UTF8 - convert it
10218 26
            $buf .= self::to_utf8_convert_helper($c1);
10219
          }
10220
10221
        } else { // doesn't look like UTF8, but should be converted
10222 34
          $buf .= self::to_utf8_convert_helper($c1);
10223
        }
10224
10225 35
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
10226
10227 4
        $buf .= self::to_utf8_convert_helper($c1);
10228
10229
      } else { // it doesn't need conversion
10230 35
        $buf .= $c1;
10231
      }
10232
    }
10233
10234
    // decode unicode escape sequences
10235 38
    $buf = \preg_replace_callback(
10236 38
        '/\\\\u([0-9a-f]{4})/i',
10237 38
        function ($match) {
10238
          // always fallback via symfony polyfill
10239 8
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
10240 38
        },
10241 38
        $buf
10242
    );
10243
10244
    // decode UTF-8 codepoints
10245 38
    if ($decodeHtmlEntityToUtf8 === true) {
10246 2
      $buf = self::html_entity_decode($buf);
10247
    }
10248
10249 38
    return $buf;
10250
  }
10251
10252
  /**
10253
   * @param int|string $input
10254
   *
10255
   * @return string
10256
   */
10257 30
  private static function to_utf8_convert_helper($input): string
10258
  {
10259
    // init
10260 30
    $buf = '';
10261
10262 30
    if (self::$ORD === null) {
10263 1
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10264
    }
10265
10266 30
    if (self::$CHR === null) {
10267 1
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10268
    }
10269
10270 30
    if (self::$WIN1252_TO_UTF8 === null) {
10271 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10272
    }
10273
10274 30
    $ordC1 = self::$ORD[$input];
10275 30
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
10276 30
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
10277
    } else {
10278 2
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
10279 2
      $cc2 = ((string)$input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
10280 2
      $buf .= $cc1 . $cc2;
10281
    }
10282
10283 30
    return $buf;
10284
  }
10285
10286
  /**
10287
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
10288
   *
10289
   * INFO: This is slower then "trim()"
10290
   *
10291
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
10292
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
10293
   *
10294
   * @param string $str   <p>The string to be trimmed</p>
10295
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
10296
   *
10297
   * @return string The trimmed string.
10298
   */
10299 214
  public static function trim(string $str = '', $chars = INF): string
10300
  {
10301 214
    if ('' === $str) {
10302 11
      return '';
10303
    }
10304
10305
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
10306 206
    if ($chars === INF || !$chars) {
10307 179
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
10308
    } else {
10309 47
      $chars = \preg_quote($chars, '/');
10310 47
      $pattern = "^[$chars]+|[$chars]+\$";
10311
    }
10312
10313 206
    return self::regex_replace($str, $pattern, '', '', '/');
10314
  }
10315
10316
  /**
10317
   * Makes string's first char uppercase.
10318
   *
10319
   * @param string $str       <p>The input string.</p>
10320
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10321
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10322
   *
10323
   * @return string The resulting string.
10324
   */
10325 76
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10326
  {
10327 76
    if ($cleanUtf8 === true) {
10328
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10329
      // if invalid characters are found in $haystack before $needle
10330 1
      $str = self::clean($str);
10331
    }
10332
10333 76
    $strPartTwo = self::substr($str, 1, null, $encoding);
10334 76
    if ($strPartTwo === false) {
10335
      $strPartTwo = '';
10336
    }
10337
10338 76
    $strPartOne = self::strtoupper(
10339 76
        (string)self::substr($str, 0, 1, $encoding),
10340 76
        $encoding,
10341 76
        $cleanUtf8
10342
    );
10343
10344 76
    return $strPartOne . $strPartTwo;
10345
  }
10346
10347
  /**
10348
   * alias for "UTF8::ucfirst()"
10349
   *
10350
   * @see UTF8::ucfirst()
10351
   *
10352
   * @param string $str
10353
   * @param string $encoding
10354
   * @param bool   $cleanUtf8
10355
   *
10356
   * @return string
10357
   */
10358 1
  public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10359
  {
10360 1
    return self::ucfirst($str, $encoding, $cleanUtf8);
10361
  }
10362
10363
  /**
10364
   * Uppercase for all words in the string.
10365
   *
10366
   * @param string   $str        <p>The input string.</p>
10367
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
10368
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
10369
   * @param string   $encoding   [optional] <p>Set the charset.</p>
10370
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
10371
   *
10372
   * @return string
10373
   */
10374 9
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10375
  {
10376 9
    if (!$str) {
10377 2
      return '';
10378
    }
10379
10380
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
10381
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
10382
10383 8
    if ($cleanUtf8 === true) {
10384
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10385
      // if invalid characters are found in $haystack before $needle
10386 1
      $str = self::clean($str);
10387
    }
10388
10389 8
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
10390
10391
    if (
10392 8
        $usePhpDefaultFunctions === true
10393
        &&
10394 8
        self::is_ascii($str) === true
10395
    ) {
10396
      return \ucwords($str);
10397
    }
10398
10399 8
    $words = self::str_to_words($str, $charlist);
10400 8
    $newWords = [];
10401
10402 8
    if (\count($exceptions) > 0) {
10403 1
      $useExceptions = true;
10404
    } else {
10405 8
      $useExceptions = false;
10406
    }
10407
10408 8
    foreach ($words as $word) {
10409
10410 8
      if (!$word) {
10411 8
        continue;
10412
      }
10413
10414
      if (
10415 8
          $useExceptions === false
10416
          ||
10417
          (
10418 1
              $useExceptions === true
10419
              &&
10420 8
              !\in_array($word, $exceptions, true)
10421
          )
10422
      ) {
10423 8
        $word = self::ucfirst($word, $encoding);
10424
      }
10425
10426 8
      $newWords[] = $word;
10427
    }
10428
10429 8
    return \implode('', $newWords);
10430
  }
10431
10432
  /**
10433
   * Multi decode html entity & fix urlencoded-win1252-chars.
10434
   *
10435
   * e.g:
10436
   * 'test+test'                     => 'test test'
10437
   * 'D&#252;sseldorf'               => 'Düsseldorf'
10438
   * 'D%FCsseldorf'                  => 'Düsseldorf'
10439
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
10440
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
10441
   * 'Düsseldorf'                   => 'Düsseldorf'
10442
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
10443
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
10444
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
10445
   *
10446
   * @param string $str          <p>The input string.</p>
10447
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
10448
   *
10449
   * @return string
10450
   */
10451 2
  public static function urldecode(string $str, bool $multi_decode = true): string
10452
  {
10453 2
    if ('' === $str) {
10454 2
      return '';
10455
    }
10456
10457 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
10458 2
    if (\preg_match($pattern, $str)) {
10459 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
10460
    }
10461
10462 2
    $flags = ENT_QUOTES | ENT_HTML5;
10463
10464
    do {
10465 2
      $str_compare = $str;
10466
10467 2
      $str = self::fix_simple_utf8(
10468 2
          \urldecode(
10469 2
              self::html_entity_decode(
10470 2
                  self::to_utf8($str),
10471 2
                  $flags
10472
              )
10473
          )
10474
      );
10475
10476 2
    } while ($multi_decode === true && $str_compare !== $str);
10477
10478 2
    return $str;
10479
  }
10480
10481
  /**
10482
   * Return a array with "urlencoded"-win1252 -> UTF-8
10483
   *
10484
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
10485
   *
10486
   * @return string[]
10487
   */
10488 2
  public static function urldecode_fix_win1252_chars(): array
10489
  {
10490
    return [
10491 2
        '%20' => ' ',
10492
        '%21' => '!',
10493
        '%22' => '"',
10494
        '%23' => '#',
10495
        '%24' => '$',
10496
        '%25' => '%',
10497
        '%26' => '&',
10498
        '%27' => "'",
10499
        '%28' => '(',
10500
        '%29' => ')',
10501
        '%2A' => '*',
10502
        '%2B' => '+',
10503
        '%2C' => ',',
10504
        '%2D' => '-',
10505
        '%2E' => '.',
10506
        '%2F' => '/',
10507
        '%30' => '0',
10508
        '%31' => '1',
10509
        '%32' => '2',
10510
        '%33' => '3',
10511
        '%34' => '4',
10512
        '%35' => '5',
10513
        '%36' => '6',
10514
        '%37' => '7',
10515
        '%38' => '8',
10516
        '%39' => '9',
10517
        '%3A' => ':',
10518
        '%3B' => ';',
10519
        '%3C' => '<',
10520
        '%3D' => '=',
10521
        '%3E' => '>',
10522
        '%3F' => '?',
10523
        '%40' => '@',
10524
        '%41' => 'A',
10525
        '%42' => 'B',
10526
        '%43' => 'C',
10527
        '%44' => 'D',
10528
        '%45' => 'E',
10529
        '%46' => 'F',
10530
        '%47' => 'G',
10531
        '%48' => 'H',
10532
        '%49' => 'I',
10533
        '%4A' => 'J',
10534
        '%4B' => 'K',
10535
        '%4C' => 'L',
10536
        '%4D' => 'M',
10537
        '%4E' => 'N',
10538
        '%4F' => 'O',
10539
        '%50' => 'P',
10540
        '%51' => 'Q',
10541
        '%52' => 'R',
10542
        '%53' => 'S',
10543
        '%54' => 'T',
10544
        '%55' => 'U',
10545
        '%56' => 'V',
10546
        '%57' => 'W',
10547
        '%58' => 'X',
10548
        '%59' => 'Y',
10549
        '%5A' => 'Z',
10550
        '%5B' => '[',
10551
        '%5C' => '\\',
10552
        '%5D' => ']',
10553
        '%5E' => '^',
10554
        '%5F' => '_',
10555
        '%60' => '`',
10556
        '%61' => 'a',
10557
        '%62' => 'b',
10558
        '%63' => 'c',
10559
        '%64' => 'd',
10560
        '%65' => 'e',
10561
        '%66' => 'f',
10562
        '%67' => 'g',
10563
        '%68' => 'h',
10564
        '%69' => 'i',
10565
        '%6A' => 'j',
10566
        '%6B' => 'k',
10567
        '%6C' => 'l',
10568
        '%6D' => 'm',
10569
        '%6E' => 'n',
10570
        '%6F' => 'o',
10571
        '%70' => 'p',
10572
        '%71' => 'q',
10573
        '%72' => 'r',
10574
        '%73' => 's',
10575
        '%74' => 't',
10576
        '%75' => 'u',
10577
        '%76' => 'v',
10578
        '%77' => 'w',
10579
        '%78' => 'x',
10580
        '%79' => 'y',
10581
        '%7A' => 'z',
10582
        '%7B' => '{',
10583
        '%7C' => '|',
10584
        '%7D' => '}',
10585
        '%7E' => '~',
10586
        '%7F' => '',
10587
        '%80' => '`',
10588
        '%81' => '',
10589
        '%82' => '‚',
10590
        '%83' => 'ƒ',
10591
        '%84' => '„',
10592
        '%85' => '…',
10593
        '%86' => '†',
10594
        '%87' => '‡',
10595
        '%88' => 'ˆ',
10596
        '%89' => '‰',
10597
        '%8A' => 'Š',
10598
        '%8B' => '‹',
10599
        '%8C' => 'Œ',
10600
        '%8D' => '',
10601
        '%8E' => 'Ž',
10602
        '%8F' => '',
10603
        '%90' => '',
10604
        '%91' => '‘',
10605
        '%92' => '’',
10606
        '%93' => '“',
10607
        '%94' => '”',
10608
        '%95' => '•',
10609
        '%96' => '–',
10610
        '%97' => '—',
10611
        '%98' => '˜',
10612
        '%99' => '™',
10613
        '%9A' => 'š',
10614
        '%9B' => '›',
10615
        '%9C' => 'œ',
10616
        '%9D' => '',
10617
        '%9E' => 'ž',
10618
        '%9F' => 'Ÿ',
10619
        '%A0' => '',
10620
        '%A1' => '¡',
10621
        '%A2' => '¢',
10622
        '%A3' => '£',
10623
        '%A4' => '¤',
10624
        '%A5' => '¥',
10625
        '%A6' => '¦',
10626
        '%A7' => '§',
10627
        '%A8' => '¨',
10628
        '%A9' => '©',
10629
        '%AA' => 'ª',
10630
        '%AB' => '«',
10631
        '%AC' => '¬',
10632
        '%AD' => '',
10633
        '%AE' => '®',
10634
        '%AF' => '¯',
10635
        '%B0' => '°',
10636
        '%B1' => '±',
10637
        '%B2' => '²',
10638
        '%B3' => '³',
10639
        '%B4' => '´',
10640
        '%B5' => 'µ',
10641
        '%B6' => '¶',
10642
        '%B7' => '·',
10643
        '%B8' => '¸',
10644
        '%B9' => '¹',
10645
        '%BA' => 'º',
10646
        '%BB' => '»',
10647
        '%BC' => '¼',
10648
        '%BD' => '½',
10649
        '%BE' => '¾',
10650
        '%BF' => '¿',
10651
        '%C0' => 'À',
10652
        '%C1' => 'Á',
10653
        '%C2' => 'Â',
10654
        '%C3' => 'Ã',
10655
        '%C4' => 'Ä',
10656
        '%C5' => 'Å',
10657
        '%C6' => 'Æ',
10658
        '%C7' => 'Ç',
10659
        '%C8' => 'È',
10660
        '%C9' => 'É',
10661
        '%CA' => 'Ê',
10662
        '%CB' => 'Ë',
10663
        '%CC' => 'Ì',
10664
        '%CD' => 'Í',
10665
        '%CE' => 'Î',
10666
        '%CF' => 'Ï',
10667
        '%D0' => 'Ð',
10668
        '%D1' => 'Ñ',
10669
        '%D2' => 'Ò',
10670
        '%D3' => 'Ó',
10671
        '%D4' => 'Ô',
10672
        '%D5' => 'Õ',
10673
        '%D6' => 'Ö',
10674
        '%D7' => '×',
10675
        '%D8' => 'Ø',
10676
        '%D9' => 'Ù',
10677
        '%DA' => 'Ú',
10678
        '%DB' => 'Û',
10679
        '%DC' => 'Ü',
10680
        '%DD' => 'Ý',
10681
        '%DE' => 'Þ',
10682
        '%DF' => 'ß',
10683
        '%E0' => 'à',
10684
        '%E1' => 'á',
10685
        '%E2' => 'â',
10686
        '%E3' => 'ã',
10687
        '%E4' => 'ä',
10688
        '%E5' => 'å',
10689
        '%E6' => 'æ',
10690
        '%E7' => 'ç',
10691
        '%E8' => 'è',
10692
        '%E9' => 'é',
10693
        '%EA' => 'ê',
10694
        '%EB' => 'ë',
10695
        '%EC' => 'ì',
10696
        '%ED' => 'í',
10697
        '%EE' => 'î',
10698
        '%EF' => 'ï',
10699
        '%F0' => 'ð',
10700
        '%F1' => 'ñ',
10701
        '%F2' => 'ò',
10702
        '%F3' => 'ó',
10703
        '%F4' => 'ô',
10704
        '%F5' => 'õ',
10705
        '%F6' => 'ö',
10706
        '%F7' => '÷',
10707
        '%F8' => 'ø',
10708
        '%F9' => 'ù',
10709
        '%FA' => 'ú',
10710
        '%FB' => 'û',
10711
        '%FC' => 'ü',
10712
        '%FD' => 'ý',
10713
        '%FE' => 'þ',
10714
        '%FF' => 'ÿ',
10715
    ];
10716
  }
10717
10718
  /**
10719
   * Decodes an UTF-8 string to ISO-8859-1.
10720
   *
10721
   * @param string $str <p>The input string.</p>
10722
   * @param bool   $keepUtf8Chars
10723
   *
10724
   * @return string
10725
   */
10726 14
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10727
  {
10728 14
    if ('' === $str) {
10729 5
      return '';
10730
    }
10731
10732 14
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10733 14
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10734
10735 14
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10736
10737 1
      if (self::$WIN1252_TO_UTF8 === null) {
10738
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10739
      }
10740
10741 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10741
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10742 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10742
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10743
    }
10744
10745
    /** @noinspection PhpInternalEntityUsedInspection */
10746 14
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10747
10748 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10749
      self::checkForSupport();
10750
    }
10751
10752
    // save for later comparision
10753 14
    $str_backup = $str;
10754 14
    $len = self::strlen_in_byte($str);
10755
10756 14
    if (self::$ORD === null) {
10757
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10758
    }
10759
10760 14
    if (self::$CHR === null) {
10761
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10762
    }
10763
10764 14
    $noCharFound = '?';
10765
    /** @noinspection ForeachInvariantsInspection */
10766 14
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10767 14
      switch ($str[$i] & "\xF0") {
10768 14
        case "\xC0":
10769 12
        case "\xD0":
10770 14
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10771 14
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10772 14
          break;
10773
10774
        /** @noinspection PhpMissingBreakStatementInspection */
10775 12
        case "\xF0":
10776
          ++$i;
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment if this fall-through is intended.
Loading history...
10777 12
        case "\xE0":
10778 10
          $str[$j] = $noCharFound;
10779 10
          $i += 2;
10780 10
          break;
10781
10782
        default:
10783 12
          $str[$j] = $str[$i];
10784
      }
10785
    }
10786
10787 14
    $return = self::substr_in_byte($str, 0, $j);
10788 14
    if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
10789
      $return = '';
10790
    }
10791
10792
    if (
10793 14
        $keepUtf8Chars === true
10794
        &&
10795 14
        self::strlen($return) >= self::strlen($str_backup)
10796
    ) {
10797 2
      return $str_backup;
10798
    }
10799
10800 14
    return $return;
10801
  }
10802
10803
  /**
10804
   * Encodes an ISO-8859-1 string to UTF-8.
10805
   *
10806
   * @param string $str <p>The input string.</p>
10807
   *
10808
   * @return string
10809
   */
10810 14
  public static function utf8_encode(string $str): string
10811
  {
10812 14
    if ('' === $str) {
10813 13
      return '';
10814
    }
10815
10816 14
    $str = \utf8_encode($str);
10817
10818
    // the polyfill maybe return false
10819
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10820 14
    if ($str === false) {
10821
      return '';
10822
    }
10823
10824 14
    if (false === \strpos($str, "\xC2")) {
10825 6
      return $str;
10826
    }
10827
10828 12
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
10829 12
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
10830
10831 12
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
10832
10833 1
      if (self::$WIN1252_TO_UTF8 === null) {
10834
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10835
      }
10836
10837 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10837
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10838 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10838
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10839
    }
10840
10841 12
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
10842
  }
10843
10844
  /**
10845
   * fix -> utf8-win1252 chars
10846
   *
10847
   * @param string $str <p>The input string.</p>
10848
   *
10849
   * @return string
10850
   *
10851
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
10852
   */
10853 2
  public static function utf8_fix_win1252_chars(string $str): string
10854
  {
10855 2
    return self::fix_simple_utf8($str);
10856
  }
10857
10858
  /**
10859
   * Returns an array with all utf8 whitespace characters.
10860
   *
10861
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
10862
   *
10863
   * @author: Derek E. [email protected]
10864
   *
10865
   * @return string[]
10866
   *                 An array with all known whitespace characters as values and the type of whitespace as keys
10867
   *                 as defined in above URL.
10868
   */
10869 2
  public static function whitespace_table(): array
10870
  {
10871 2
    return self::$WHITESPACE_TABLE;
10872
  }
10873
10874
  /**
10875
   * Limit the number of words in a string.
10876
   *
10877
   * @param string $str      <p>The input string.</p>
10878
   * @param int    $limit    <p>The limit of words as integer.</p>
10879
   * @param string $strAddOn <p>Replacement for the striped string.</p>
10880
   *
10881
   * @return string
10882
   */
10883 2
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
10884
  {
10885 2
    if ('' === $str) {
10886 2
      return '';
10887
    }
10888
10889 2
    if ($limit < 1) {
10890 2
      return '';
10891
    }
10892
10893 2
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
10894
10895
    if (
10896 2
        !isset($matches[0])
10897
        ||
10898 2
        self::strlen($str) === self::strlen($matches[0])
10899
    ) {
10900 2
      return $str;
10901
    }
10902
10903 2
    return self::rtrim($matches[0]) . $strAddOn;
10904
  }
10905
10906
  /**
10907
   * Wraps a string to a given number of characters
10908
   *
10909
   * @link  http://php.net/manual/en/function.wordwrap.php
10910
   *
10911
   * @param string $str   <p>The input string.</p>
10912
   * @param int    $width [optional] <p>The column width.</p>
10913
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
10914
   * @param bool   $cut   [optional] <p>
10915
   *                      If the cut is set to true, the string is
10916
   *                      always wrapped at or before the specified width. So if you have
10917
   *                      a word that is larger than the given width, it is broken apart.
10918
   *                      </p>
10919
   *
10920
   * @return string The given string wrapped at the specified column.
10921
   */
10922 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
10923
  {
10924 10
    if ('' === $str || '' === $break) {
10925 3
      return '';
10926
    }
10927
10928 8
    $w = '';
10929 8
    $strSplit = \explode($break, $str);
10930 8
    if ($strSplit === false) {
10931
      $count = 0;
10932
    } else {
10933 8
      $count = \count($strSplit);
10934
    }
10935
10936 8
    $chars = [];
10937
    /** @noinspection ForeachInvariantsInspection */
10938 8
    for ($i = 0; $i < $count; ++$i) {
10939
10940 8
      if ($i) {
10941 1
        $chars[] = $break;
10942 1
        $w .= '#';
10943
      }
10944
10945 8
      $c = $strSplit[$i];
10946 8
      unset($strSplit[$i]);
10947
10948 8
      if ($c !== null) {
10949 8
        foreach (self::split($c) as $c) {
10950 8
          $chars[] = $c;
10951 8
          $w .= ' ' === $c ? ' ' : '?';
10952
        }
10953
      }
10954
    }
10955
10956 8
    $strReturn = '';
10957 8
    $j = 0;
10958 8
    $b = $i = -1;
10959 8
    $w = \wordwrap($w, $width, '#', $cut);
10960
10961 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
10962 6
      for (++$i; $i < $b; ++$i) {
10963 6
        $strReturn .= $chars[$j];
10964 6
        unset($chars[$j++]);
10965
      }
10966
10967 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
10968 3
        unset($chars[$j++]);
10969
      }
10970
10971 6
      $strReturn .= $break;
10972
    }
10973
10974 8
    return $strReturn . \implode('', $chars);
10975
  }
10976
10977
  /**
10978
   * Line-Wrap the string after $limit, but also after the next word.
10979
   *
10980
   * @param string $str
10981
   * @param int    $limit
10982
   *
10983
   * @return string
10984
   */
10985 1
  public static function wordwrap_per_line(string $str, int $limit): string
10986
  {
10987 1
    $strings = (array)\preg_split('/\\r\\n|\\r|\\n/', $str);
10988
10989 1
    $string = '';
10990 1
    foreach ($strings as $value) {
10991 1
      if ($value === false) {
10992
        continue;
10993
      }
10994
10995 1
      $string .= wordwrap($value, $limit);
10996 1
      $string .= "\n";
10997
    }
10998
10999 1
    return $string;
11000
  }
11001
11002
  /**
11003
   * Returns an array of Unicode White Space characters.
11004
   *
11005
   * @return string[] An array with numeric code point as key and White Space Character as value.
11006
   */
11007 2
  public static function ws(): array
11008
  {
11009 2
    return self::$WHITESPACE;
11010
  }
11011
11012
11013
}
11014