Passed
Push — master ( ab26c7...dd0550 )
by Lars
03:42
created

UTF8::str_istarts_with()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 11
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 4

Importance

Changes 0
Metric Value
cc 4
eloc 5
nc 3
nop 2
dl 0
loc 11
ccs 6
cts 6
cp 1
crap 4
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'upper' => [
157
          'µ',
158
          'ſ',
159
          "\xCD\x85",
160
          'ς',
161
          'ẞ',
162
          "\xCF\x90",
163
          "\xCF\x91",
164
          "\xCF\x95",
165
          "\xCF\x96",
166
          "\xCF\xB0",
167
          "\xCF\xB1",
168
          "\xCF\xB5",
169
          "\xE1\xBA\x9B",
170
          "\xE1\xBE\xBE",
171
      ],
172
      'lower' => [
173
          'μ',
174
          's',
175
          'ι',
176
          'σ',
177
          'ß',
178
          'β',
179
          'θ',
180
          'φ',
181
          'π',
182
          'κ',
183
          'ρ',
184
          'ε',
185
          "\xE1\xB9\xA1",
186
          'ι',
187
      ],
188
  ];
189
190
191
  /**
192
   * @var array
193
   */
194
  private static $SUPPORT = [];
195
196
  /**
197
   * @var null|array
198
   */
199
  private static $UTF8_MSWORD;
200
201
  /**
202
   * @var null|array
203
   */
204
  private static $BROKEN_UTF8_FIX;
205
206
  /**
207
   * @var null|array
208
   */
209
  private static $WIN1252_TO_UTF8;
210
211
  /**
212
   * @var null|array
213
   */
214
  private static $ENCODINGS;
215
216
  /**
217
   * @var null|array
218
   */
219
  private static $ORD;
220
221
  /**
222
   * @var null|array
223
   */
224
  private static $CHR;
225
226
  /**
227
   * __construct()
228
   */
229 32
  public function __construct()
230
  {
231 32
    self::checkForSupport();
232 32
  }
233
234
  /**
235
   * Return the character at the specified position: $str[1] like functionality.
236
   *
237
   * @param string $str <p>A UTF-8 string.</p>
238
   * @param int    $pos <p>The position of character to return.</p>
239
   *
240
   * @return string Single Multi-Byte character.
241
   */
242 3
  public static function access(string $str, int $pos): string
243
  {
244 3
    if ('' === $str) {
245 1
      return '';
246
    }
247
248 3
    if ($pos < 0) {
249 2
      return '';
250
    }
251
252 3
    return (string)self::substr($str, $pos, 1);
253
  }
254
255
  /**
256
   * Prepends UTF-8 BOM character to the string and returns the whole string.
257
   *
258
   * INFO: If BOM already existed there, the Input string is returned.
259
   *
260
   * @param string $str <p>The input string.</p>
261
   *
262
   * @return string The output string that contains BOM.
263
   */
264 2
  public static function add_bom_to_string(string $str): string
265
  {
266 2
    if (self::string_has_bom($str) === false) {
267 2
      $str = self::bom() . $str;
268
    }
269
270 2
    return $str;
271
  }
272
273
  /**
274
   * Adds the specified amount of left and right padding to the given string.
275
   * The default character used is a space.
276
   *
277
   * @param string $str
278
   * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
279
   * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
280
   * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
281
   * @param string $encoding [optional] <p>Default: UTF-8</p>
282
   *
283
   * @return string String with padding applied.
284
   */
285 25
  private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
286
  {
287 25
    $strlen = self::strlen($str, $encoding);
288
289 25
    if ($left && $right) {
290 8
      $length = ($left + $right) + $strlen;
291 8
      $type = STR_PAD_BOTH;
292 17
    } elseif ($left) {
293 7
      $length = $left + $strlen;
294 7
      $type = STR_PAD_LEFT;
295 10
    } elseif ($right) {
296 10
      $length = $right + $strlen;
297 10
      $type = STR_PAD_RIGHT;
298
    } else {
299
      $length = ($left + $right) + $strlen;
300
      $type = STR_PAD_BOTH;
301
    }
302
303 25
    return self::str_pad($str, $length, $padStr, $type, $encoding);
304
  }
305
306
  /**
307
   * Changes all keys in an array.
308
   *
309
   * @param array $array <p>The array to work on</p>
310
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
311
   *                     or <strong>CASE_LOWER</strong> (default)</p>
312
   *
313
   * @return string[] An array with its keys lower or uppercased.
314
   */
315 2
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
316
  {
317
    if (
318 2
        $case !== CASE_LOWER
319
        &&
320 2
        $case !== CASE_UPPER
321
    ) {
322
      $case = CASE_LOWER;
323
    }
324
325 2
    $return = [];
326 2
    foreach ($array as $key => $value) {
327 2
      if ($case === CASE_LOWER) {
328 2
        $key = self::strtolower($key);
329
      } else {
330 2
        $key = self::strtoupper($key);
331
      }
332
333 2
      $return[$key] = $value;
334
    }
335
336 2
    return $return;
337
  }
338
339
  /**
340
   * Returns the substring between $start and $end, if found, or an empty
341
   * string. An optional offset may be supplied from which to begin the
342
   * search for the start string.
343
   *
344
   * @param string $str
345
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
346
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
347
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
348
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
349
   *
350
   * @return string
351
   */
352 16
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
353
  {
354 16
    $posStart = self::strpos($str, $start, $offset, $encoding);
355 16
    if ($posStart === false) {
356 2
      return '';
357
    }
358
359 14
    $substrIndex = $posStart + self::strlen($start, $encoding);
360 14
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
361
    if (
362 14
        $posEnd === false
363
        ||
364 14
        $posEnd === $substrIndex
365
    ) {
366 4
      return '';
367
    }
368
369 10
    $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
370
371 10
    if ($return === false) {
372
      return '';
373
    }
374
375 10
    return $return;
376
  }
377
378
  /**
379
   * Convert binary into an string.
380
   *
381
   * @param mixed $bin 1|0
382
   *
383
   * @return string
384
   */
385 2
  public static function binary_to_str($bin): string
386
  {
387 2
    if (!isset($bin[0])) {
388
      return '';
389
    }
390
391 2
    $convert = \base_convert($bin, 2, 16);
392 2
    if ($convert === '0') {
393 1
      return '';
394
    }
395
396 2
    return \pack('H*', $convert);
397
  }
398
399
  /**
400
   * Returns the UTF-8 Byte Order Mark Character.
401
   *
402
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
403
   *
404
   * @return string UTF-8 Byte Order Mark
405
   */
406 4
  public static function bom(): string
407
  {
408 4
    return "\xef\xbb\xbf";
409
  }
410
411
  /**
412
   * @alias of UTF8::chr_map()
413
   *
414
   * @see   UTF8::chr_map()
415
   *
416
   * @param string|array $callback
417
   * @param string       $str
418
   *
419
   * @return string[]
420
   */
421 2
  public static function callback($callback, string $str): array
422
  {
423 2
    return self::chr_map($callback, $str);
424
  }
425
426
  /**
427
   * Returns the character at $index, with indexes starting at 0.
428
   *
429
   * @param string $str
430
   * @param int    $index    <p>Position of the character.</p>
431
   * @param string $encoding [optional] <p>Default is UTF-8</p>
432
   *
433
   * @return string The character at $index.
434
   */
435 9
  public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
436
  {
437 9
    return (string)self::substr($str, $index, 1, $encoding);
438
  }
439
440
  /**
441
   * Returns an array consisting of the characters in the string.
442
   *
443
   * @param string $str <p>The input string.</p>
444
   *
445
   * @return string[] An array of chars.
446
   */
447 3
  public static function chars(string $str): array
448
  {
449 3
    return self::str_split($str, 1);
450
  }
451
452
  /**
453
   * This method will auto-detect your server environment for UTF-8 support.
454
   *
455
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
456
   */
457 37
  public static function checkForSupport()
458
  {
459 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
460
461
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
462
463
      // http://php.net/manual/en/book.mbstring.php
464
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
465
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
466
467
      // http://php.net/manual/en/book.iconv.php
468
      self::$SUPPORT['iconv'] = self::iconv_loaded();
469
470
      // http://php.net/manual/en/book.intl.php
471
      self::$SUPPORT['intl'] = self::intl_loaded();
472
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
473
474
      self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
475
476
      if (
477
          self::$SUPPORT['intl'] === true
478
          &&
479
          \function_exists('transliterator_list_ids') === true
480
      ) {
481
        /** @noinspection PhpComposerExtensionStubsInspection */
482
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
483
      }
484
485
      // http://php.net/manual/en/class.intlchar.php
486
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
487
488
      // http://php.net/manual/en/book.ctype.php
489
      self::$SUPPORT['ctype'] = self::ctype_loaded();
490
491
      // http://php.net/manual/en/class.finfo.php
492
      self::$SUPPORT['finfo'] = self::finfo_loaded();
493
494
      // http://php.net/manual/en/book.json.php
495
      self::$SUPPORT['json'] = self::json_loaded();
496
497
      // http://php.net/manual/en/book.pcre.php
498
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
499
    }
500 37
  }
501
502
  /**
503
   * Generates a UTF-8 encoded character from the given code point.
504
   *
505
   * INFO: opposite to UTF8::ord()
506
   *
507
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
508
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
509
   *
510
   * @return string|null Multi-Byte character, returns null on failure or empty input.
511
   */
512 17
  public static function chr($code_point, string $encoding = 'UTF-8')
513
  {
514
    // init
515 17
    static $CHAR_CACHE = [];
516
517 17
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
518
      self::checkForSupport();
519
    }
520
521 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
522 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
523
    }
524
525
    if (
526 17
        $encoding !== 'UTF-8'
527
        &&
528 17
        $encoding !== 'ISO-8859-1'
529
        &&
530 17
        $encoding !== 'WINDOWS-1252'
531
        &&
532 17
        self::$SUPPORT['mbstring'] === false
533
    ) {
534
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
535
    }
536
537 17
    $cacheKey = $code_point . $encoding;
538 17
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
539 16
      return $CHAR_CACHE[$cacheKey];
540
    }
541
542 11
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
543
544 10
      if (self::$CHR === null) {
545
        $chrTmp = self::getData('chr');
546
        if ($chrTmp) {
547
          self::$CHR = (array)$chrTmp;
548
        }
549
      }
550
551 10
      $chr = self::$CHR[$code_point];
552
553 10
      if ($encoding !== 'UTF-8') {
554 1
        $chr = self::encode($encoding, $chr);
555
      }
556
557 10
      return $CHAR_CACHE[$cacheKey] = $chr;
558
    }
559
560 7
    if (self::$SUPPORT['intlChar'] === true) {
561
      /** @noinspection PhpComposerExtensionStubsInspection */
562 7
      $chr = \IntlChar::chr($code_point);
563
564 7
      if ($encoding !== 'UTF-8') {
565
        $chr = self::encode($encoding, $chr);
566
      }
567
568 7
      return $CHAR_CACHE[$cacheKey] = $chr;
569
    }
570
571
    if (self::$CHR === null) {
572
      $chrTmp = self::getData('chr');
573
      if ($chrTmp) {
574
        self::$CHR = (array)$chrTmp;
575
      }
576
    }
577
578
    $code_point = (int)$code_point;
579
    if ($code_point <= 0x7F) {
580
      $chr = self::$CHR[$code_point];
581
    } elseif ($code_point <= 0x7FF) {
582
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
583
             self::$CHR[($code_point & 0x3F) + 0x80];
584
    } elseif ($code_point <= 0xFFFF) {
585
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
586
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
587
             self::$CHR[($code_point & 0x3F) + 0x80];
588
    } else {
589
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
590
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
591
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
592
             self::$CHR[($code_point & 0x3F) + 0x80];
593
    }
594
595
    if ($encoding !== 'UTF-8') {
596
      $chr = self::encode($encoding, $chr);
597
    }
598
599
    return $CHAR_CACHE[$cacheKey] = $chr;
600
  }
601
602
  /**
603
   * Applies callback to all characters of a string.
604
   *
605
   * @param string|array $callback <p>The callback function.</p>
606
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
607
   *
608
   * @return string[] The outcome of callback.
609
   */
610 2
  public static function chr_map($callback, string $str): array
611
  {
612 2
    $chars = self::split($str);
613
614 2
    return \array_map($callback, $chars);
615
  }
616
617
  /**
618
   * Generates an array of byte length of each character of a Unicode string.
619
   *
620
   * 1 byte => U+0000  - U+007F
621
   * 2 byte => U+0080  - U+07FF
622
   * 3 byte => U+0800  - U+FFFF
623
   * 4 byte => U+10000 - U+10FFFF
624
   *
625
   * @param string $str <p>The original unicode string.</p>
626
   *
627
   * @return int[] An array of byte lengths of each character.
628
   */
629 4
  public static function chr_size_list(string $str): array
630
  {
631 4
    if ('' === $str) {
632 4
      return [];
633
    }
634
635 4
    $strSplit = self::split($str);
636
637 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
638
      self::checkForSupport();
639
    }
640
641 4
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
642
      return \array_map(
643
          function ($data) {
644
            return UTF8::strlen_in_byte($data);
645
          },
646
          $strSplit
647
      );
648
    }
649
650 4
    return \array_map('\strlen', $strSplit);
651
  }
652
653
  /**
654
   * Get a decimal code representation of a specific character.
655
   *
656
   * @param string $char <p>The input character.</p>
657
   *
658
   * @return int
659
   */
660 4
  public static function chr_to_decimal(string $char): int
661
  {
662 4
    $code = self::ord($char[0]);
663 4
    $bytes = 1;
664
665 4
    if (!($code & 0x80)) {
666
      // 0xxxxxxx
667 4
      return $code;
668
    }
669
670 4
    if (($code & 0xe0) === 0xc0) {
671
      // 110xxxxx
672 4
      $bytes = 2;
673 4
      $code &= ~0xc0;
674 4
    } elseif (($code & 0xf0) === 0xe0) {
675
      // 1110xxxx
676 4
      $bytes = 3;
677 4
      $code &= ~0xe0;
678 2
    } elseif (($code & 0xf8) === 0xf0) {
679
      // 11110xxx
680 2
      $bytes = 4;
681 2
      $code &= ~0xf0;
682
    }
683
684 4
    for ($i = 2; $i <= $bytes; $i++) {
685
      // 10xxxxxx
686 4
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
687
    }
688
689 4
    return $code;
690
  }
691
692
  /**
693
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
694
   *
695
   * @param string|int $char <p>The input character</p>
696
   * @param string     $pfix [optional]
697
   *
698
   * @return string The code point encoded as U+xxxx
699
   */
700 2
  public static function chr_to_hex($char, string $pfix = 'U+'): string
701
  {
702 2
    if ('' === $char) {
703 2
      return '';
704
    }
705
706 2
    if ($char === '&#0;') {
707 2
      $char = '';
708
    }
709
710 2
    return self::int_to_hex(self::ord($char), $pfix);
711
  }
712
713
  /**
714
   * alias for "UTF8::chr_to_decimal()"
715
   *
716
   * @see UTF8::chr_to_decimal()
717
   *
718
   * @param string $chr
719
   *
720
   * @return int
721
   */
722 2
  public static function chr_to_int(string $chr): int
723
  {
724 2
    return self::chr_to_decimal($chr);
725
  }
726
727
  /**
728
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
729
   *
730
   * @param string $body     <p>The original string to be split.</p>
731
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
732
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
733
   *
734
   * @return string The chunked string.
735
   */
736 4
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
737
  {
738 4
    return \implode($end, self::split($body, $chunklen));
739
  }
740
741
  /**
742
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
743
   *
744
   * @param string $str                           <p>The string to be sanitized.</p>
745
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
746
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
747
   *                                              whitespace.</p>
748
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
749
   *                                              e.g.: "…"
750
   *                                              => "..."</p>
751
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
752
   *                                              combination with
753
   *                                              $normalize_whitespace</p>
754
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
755
   *                                              mark e.g.: "�"</p>
756
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
757
   *                                              characters e.g.: "\0"</p>
758
   *
759
   * @return string Clean UTF-8 encoded string.
760
   */
761 112
  public static function clean(
762
      string $str,
763
      bool $remove_bom = false,
764
      bool $normalize_whitespace = false,
765
      bool $normalize_msword = false,
766
      bool $keep_non_breaking_space = false,
767
      bool $replace_diamond_question_mark = false,
768
      bool $remove_invisible_characters = true
769
  ): string
770
  {
771
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
772
    // caused connection reset problem on larger strings
773
774 112
    $regx = '/
775
      (
776
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
777
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
778
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
779
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
780
        ){1,100}                      # ...one or more times
781
      )
782
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
783
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
784
    /x';
785 112
    $str = (string)\preg_replace($regx, '$1', $str);
786
787 112
    if ($replace_diamond_question_mark === true) {
788 62
      $str = self::replace_diamond_question_mark($str, '');
789
    }
790
791 112
    if ($remove_invisible_characters === true) {
792 112
      $str = self::remove_invisible_characters($str);
793
    }
794
795 112
    if ($normalize_whitespace === true) {
796 66
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
797
    }
798
799 112
    if ($normalize_msword === true) {
800 34
      $str = self::normalize_msword($str);
801
    }
802
803 112
    if ($remove_bom === true) {
804 64
      $str = self::remove_bom($str);
805
    }
806
807 112
    return $str;
808
  }
809
810
  /**
811
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
812
   *
813
   * @param string $str <p>The input string.</p>
814
   *
815
   * @return string
816
   */
817 33
  public static function cleanup($str): string
818
  {
819
    // init
820 33
    $str = (string)$str;
821
822 33
    if ('' === $str) {
823 5
      return '';
824
    }
825
826
    // fixed ISO <-> UTF-8 Errors
827 33
    $str = self::fix_simple_utf8($str);
828
829
    // remove all none UTF-8 symbols
830
    // && remove diamond question mark (�)
831
    // && remove remove invisible characters (e.g. "\0")
832
    // && remove BOM
833
    // && normalize whitespace chars (but keep non-breaking-spaces)
834 33
    $str = self::clean(
835 33
        $str,
836 33
        true,
837 33
        true,
838 33
        false,
839 33
        true,
840 33
        true,
841 33
        true
842
    );
843
844 33
    return $str;
845
  }
846
847
  /**
848
   * Accepts a string or a array of strings and returns an array of Unicode code points.
849
   *
850
   * INFO: opposite to UTF8::string()
851
   *
852
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
853
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
854
   *                                    default, code points will be returned as integers.</p>
855
   *
856
   * @return array<int|string>
857
   *                           The array of code points:<br>
858
   *                           array<int> for $u_style === false<br>
859
   *                           array<string> for $u_style === true<br>
860
   */
861 12
  public static function codepoints($arg, bool $u_style = false): array
862
  {
863 12
    if (\is_string($arg) === true) {
864 12
      $arg = self::split($arg);
865
    }
866
867 12
    $arg = \array_map(
868
        [
869 12
            self::class,
870
            'ord',
871
        ],
872 12
        $arg
873
    );
874
875 12
    if (\count($arg) === 0) {
876 7
      return [];
877
    }
878
879 11
    if ($u_style) {
880 2
      $arg = \array_map(
881
          [
882 2
              self::class,
883
              'int_to_hex',
884
          ],
885 2
          $arg
886
      );
887
    }
888
889 11
    return $arg;
890
  }
891
892
  /**
893
   * Trims the string and replaces consecutive whitespace characters with a
894
   * single space. This includes tabs and newline characters, as well as
895
   * multibyte whitespace such as the thin space and ideographic space.
896
   *
897
   * @param string $str <p>The input string.</p>
898
   *
899
   * @return string String with a trimmed $str and condensed whitespace.
900
   */
901 13
  public static function collapse_whitespace(string $str): string
902
  {
903 13
    return self::trim(
904 13
        self::regex_replace($str, '[[:space:]]+', ' ')
905
    );
906
  }
907
908
  /**
909
   * Returns count of characters used in a string.
910
   *
911
   * @param string $str       <p>The input string.</p>
912
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
913
   *
914
   * @return int[] An associative array of Character as keys and
915
   *               their count as values.
916
   */
917 18
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
918
  {
919 18
    return \array_count_values(self::split($str, 1, $cleanUtf8));
920
  }
921
922
  /**
923
   * Remove css media-queries.
924
   *
925
   * @param string $str
926
   *
927
   * @return string
928
   */
929 1
  public static function css_stripe_media_queries(string $str): string
930
  {
931 1
    return (string)\preg_replace(
932 1
        '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
933 1
        '',
934 1
        $str
935
    );
936
  }
937
938
  /**
939
   * Checks whether ctype is available on the server.
940
   *
941
   * @return bool
942
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
943
   */
944
  public static function ctype_loaded(): bool
945
  {
946
    return \extension_loaded('ctype');
947
  }
948
949
  /**
950
   * Converts a int-value into an UTF-8 character.
951
   *
952
   * @param mixed $int
953
   *
954
   * @return string
955
   */
956 10
  public static function decimal_to_chr($int): string
957
  {
958 10
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
959
  }
960
961
  /**
962
   * Decodes a MIME header field
963
   *
964
   * @param string $str
965
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
966
   *
967
   * @return string|false
968
   *                      A decoded MIME field on success,
969
   *                      or false if an error occurs during the decoding.
970
   */
971
  public static function decode_mimeheader($str, $encoding = 'UTF-8')
972
  {
973
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
974
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
975
    }
976
977
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
978
      self::checkForSupport();
979
    }
980
981
    if (self::$SUPPORT['iconv'] === true) {
982
      return \iconv_mime_decode($str, ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
983
    }
984
985
    if ($encoding != 'UTF-8') {
986
      $str = self::encode($encoding, $str);
987
    }
988
989
    return \mb_decode_mimeheader($str);
990
  }
991
992
  /**
993
   * Encode a string with a new charset-encoding.
994
   *
995
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
996
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
997
   *
998
   * @param string $toEncoding                  <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
999
   * @param string $str                         <p>The input string</p>
1000
   * @param bool   $autodetectFromEncoding      [optional] <p>Force the new encoding (we try to fix broken / double
1001
   *                                            encoding for UTF-8)<br> otherwise we auto-detect the current
1002
   *                                            string-encoding</p>
1003
   * @param string $fromEncoding                [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1004
   *                                            A empty string will trigger the autodetect anyway.</p>
1005
   *
1006
   * @return string
1007
   */
1008 30
  public static function encode(string $toEncoding, string $str, bool $autodetectFromEncoding = true, string $fromEncoding = ''): string
1009
  {
1010 30
    if ('' === $str || '' === $toEncoding) {
1011 12
      return $str;
1012
    }
1013
1014 30
    if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1015 8
      $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1016
    }
1017
1018 30
    if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1019 2
      $fromEncoding = self::normalize_encoding($fromEncoding, null);
1020
    }
1021
1022 30
    if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) {
1023
      return $str;
1024
    }
1025
1026 30
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1027
      self::checkForSupport();
1028
    }
1029
1030 30
    if ($toEncoding === 'JSON') {
1031 1
      return self::json_encode($str);
1032
    }
1033 30
    if ($fromEncoding === 'JSON') {
1034 1
      $str = self::json_decode($str);
1035 1
      $fromEncoding = '';
1036
    }
1037
1038 30
    if ($toEncoding === 'BASE64') {
1039 2
      return base64_encode($str);
1040
    }
1041 30
    if ($fromEncoding === 'BASE64') {
1042 2
      $str = base64_decode($str);
1043 2
      $fromEncoding = '';
1044
    }
1045
1046 30
    if ($toEncoding === 'HTML-ENTITIES') {
1047 2
      return self::html_encode($str, true, 'UTF-8');
1048
    }
1049 30
    if ($fromEncoding === 'HTML-ENTITIES') {
1050 2
      $str = self::html_decode($str, ENT_COMPAT, 'UTF-8');
1051 2
      $fromEncoding = '';
1052
    }
1053
1054 30
    $fromEncodingDetected = false;
1055
    if (
1056 30
        $autodetectFromEncoding === true
1057
        ||
1058 30
        !$fromEncoding
1059
    ) {
1060 30
      $fromEncodingDetected = self::str_detect_encoding($str);
1061
    }
1062
1063
    // DEBUG
1064
    //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1065
1066 30
    if ($fromEncodingDetected !== false) {
1067 25
      $fromEncoding = $fromEncodingDetected;
1068 7
    } elseif ($fromEncodingDetected === false && $autodetectFromEncoding === true) {
1069
      // fallback for the "autodetect"-mode
1070 7
      return self::to_utf8($str);
1071
    }
1072
1073
    if (
1074 25
        !$fromEncoding
1075
        ||
1076 25
        $fromEncoding === $toEncoding
1077
    ) {
1078 15
      return $str;
1079
    }
1080
1081
    if (
1082 19
        $toEncoding === 'UTF-8'
1083
        &&
1084
        (
1085 17
            $fromEncoding === 'WINDOWS-1252'
1086
            ||
1087 19
            $fromEncoding === 'ISO-8859-1'
1088
        )
1089
    ) {
1090 14
      return self::to_utf8($str);
1091
    }
1092
1093
    if (
1094 11
        $toEncoding === 'ISO-8859-1'
1095
        &&
1096
        (
1097 6
            $fromEncoding === 'WINDOWS-1252'
1098
            ||
1099 11
            $fromEncoding === 'UTF-8'
1100
        )
1101
    ) {
1102 6
      return self::to_iso8859($str);
1103
    }
1104
1105
    if (
1106 9
        $toEncoding !== 'UTF-8'
1107
        &&
1108 9
        $toEncoding !== 'ISO-8859-1'
1109
        &&
1110 9
        $toEncoding !== 'WINDOWS-1252'
1111
        &&
1112 9
        self::$SUPPORT['mbstring'] === false
1113
    ) {
1114
      \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', E_USER_WARNING);
1115
    }
1116
1117 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1118
      self::checkForSupport();
1119
    }
1120
1121 9
    if (self::$SUPPORT['mbstring'] === true) {
1122
      // info: do not use the symfony polyfill here
1123 9
      $strEncoded = \mb_convert_encoding(
1124 9
          $str,
1125 9
          $toEncoding,
1126 9
          $fromEncoding
1127
      );
1128
1129 9
      if ($strEncoded) {
1130 9
        return $strEncoded;
1131
      }
1132
    }
1133
1134
    $return = \iconv($fromEncoding, $toEncoding, $str);
1135
    if ($return !== false) {
1136
      return $return;
1137
    }
1138
1139
    return $str;
1140
  }
1141
1142
  /**
1143
   * @param string $str
1144
   * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1145
   * @param string $toCharset        [optional] <p>Set the output charset.</p>
1146
   * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1147
   * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1148
   * @param int    $indent           [optional] <p>Set the max length indent.</p>
1149
   *
1150
   * @return string|false
1151
   *                      An encoded MIME field on success,
1152
   *                      or false if an error occurs during the encoding.
1153
   */
1154
  public static function encode_mimeheader(
1155
      $str,
1156
      $fromCharset = 'UTF-8',
1157
      $toCharset = 'UTF-8',
1158
      $transferEncoding = 'Q',
1159
      $linefeed = "\r\n",
1160
      $indent = 76
1161
  )
1162
  {
1163
    if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1164
      $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1165
    }
1166
1167
    if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1168
      $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1169
    }
1170
1171
    $output = \iconv_mime_encode(
1172
        '',
1173
        $str,
1174
        [
1175
            'scheme'           => $transferEncoding,
1176
            'line-length'      => $indent,
1177
            'input-charset'    => $fromCharset,
1178
            'output-charset'   => $toCharset,
1179
            'line-break-chars' => $linefeed,
1180
        ]
1181
    );
1182
1183
    return $output;
1184
  }
1185
1186
  /**
1187
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1188
   *
1189
   * @param string   $str                    <p>The input string.</p>
1190
   * @param string   $search                 <p>The searched string.</p>
1191
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1192
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1193
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1194
   *
1195
   * @return string
1196
   */
1197 1
  public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1198
  {
1199 1
    if ('' === $str) {
1200 1
      return '';
1201
    }
1202
1203 1
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1204
1205 1
    if ($length === null) {
1206 1
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
1207
    }
1208
1209 1
    if (empty($search)) {
1210
1211 1
      $stringLength = self::strlen($str, $encoding);
1212
1213 1
      if ($length > 0) {
1214 1
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1215
      } else {
1216 1
        $end = 0;
1217
      }
1218
1219 1
      $pos = (int)\min(
1220 1
          self::strpos($str, ' ', $end, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $end can also be of type false; however, parameter $offset of voku\helper\UTF8::strpos() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1220
          self::strpos($str, ' ', /** @scrutinizer ignore-type */ $end, $encoding),
Loading history...
1221 1
          self::strpos($str, '.', $end, $encoding)
1222
      );
1223
1224 1
      if ($pos) {
1225 1
        $strSub = self::substr($str, 0, $pos, $encoding);
1226 1
        if ($strSub === false) {
1227
          return '';
1228
        }
1229
1230 1
        return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1231
      }
1232
1233
      return $str;
1234
    }
1235
1236 1
    $wordPos = self::stripos($str, $search, 0, $encoding);
1237 1
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1238
1239 1
    $pos_start = 0;
1240 1
    if ($halfSide > 0) {
1241 1
      $halfText = self::substr($str, 0, $halfSide, $encoding);
1242 1
      if ($halfText !== false) {
1243 1
        $pos_start = (int)\max(
1244 1
            self::strrpos($halfText, ' ', 0, $encoding),
1245 1
            self::strrpos($halfText, '.', 0, $encoding)
1246
        );
1247
      }
1248
    }
1249
1250 1
    if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type integer|false is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1251 1
      $l = $pos_start + $length - 1;
1252 1
      $realLength = self::strlen($str, $encoding);
1253
1254 1
      if ($l > $realLength) {
1255
        $l = $realLength;
1256
      }
1257
1258 1
      $pos_end = (int)\min(
1259 1
              self::strpos($str, ' ', $l, $encoding),
1260 1
              self::strpos($str, '.', $l, $encoding)
1261 1
          ) - $pos_start;
1262
1263 1
      if (!$pos_end || $pos_end <= 0) {
1264 1
        $strSub = self::substr($str, $pos_start, self::strlen($str), $encoding);
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1264
        $strSub = self::substr($str, $pos_start, /** @scrutinizer ignore-type */ self::strlen($str), $encoding);
Loading history...
1265 1
        if ($strSub !== false) {
1266 1
          $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1267
        } else {
1268 1
          $extract = '';
1269
        }
1270
      } else {
1271 1
        $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1272 1
        if ($strSub !== false) {
1273 1
          $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1274
        } else {
1275 1
          $extract = '';
1276
        }
1277
      }
1278
1279
    } else {
1280
1281 1
      $l = $length - 1;
1282 1
      $trueLength = self::strlen($str, $encoding);
1283
1284 1
      if ($l > $trueLength) {
1285
        $l = $trueLength;
1286
      }
1287
1288 1
      $pos_end = \min(
1289 1
          self::strpos($str, ' ', $l, $encoding),
1290 1
          self::strpos($str, '.', $l, $encoding)
1291
      );
1292
1293 1
      if ($pos_end) {
1294 1
        $strSub = self::substr($str, 0, $pos_end, $encoding);
1295 1
        if ($strSub !== false) {
1296 1
          $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1297
        } else {
1298 1
          $extract = '';
1299
        }
1300
      } else {
1301 1
        $extract = $str;
1302
      }
1303
    }
1304
1305 1
    return $extract;
1306
  }
1307
1308
  /**
1309
   * Reads entire file into a string.
1310
   *
1311
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1312
   *
1313
   * @link http://php.net/manual/en/function.file-get-contents.php
1314
   *
1315
   * @param string        $filename             <p>
1316
   *                                            Name of the file to read.
1317
   *                                            </p>
1318
   * @param bool          $use_include_path     [optional] <p>
1319
   *                                            Prior to PHP 5, this parameter is called
1320
   *                                            use_include_path and is a bool.
1321
   *                                            As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1322
   *                                            to trigger include path
1323
   *                                            search.
1324
   *                                            </p>
1325
   * @param resource|null $context              [optional] <p>
1326
   *                                            A valid context resource created with
1327
   *                                            stream_context_create. If you don't need to use a
1328
   *                                            custom context, you can skip this parameter by &null;.
1329
   *                                            </p>
1330
   * @param int|null      $offset               [optional] <p>
1331
   *                                            The offset where the reading starts.
1332
   *                                            </p>
1333
   * @param int|null      $maxLength            [optional] <p>
1334
   *                                            Maximum length of data read. The default is to read until end
1335
   *                                            of file is reached.
1336
   *                                            </p>
1337
   * @param int           $timeout              <p>The time in seconds for the timeout.</p>
1338
   *
1339
   * @param bool          $convertToUtf8        <strong>WARNING!!!</strong> <p>Maybe you can't use this option for some
1340
   *                                            files, because they used non default utf-8 chars. Binary files like
1341
   *                                            images or pdf will not be converted.</p>
1342
   * @param string        $fromEncoding         [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1343
   *                                            A empty string will trigger the autodetect anyway.</p>
1344
   *
1345
   * @return string|false The function returns the read data or false on failure.
1346
   */
1347 11
  public static function file_get_contents(
1348
      string $filename,
1349
      bool $use_include_path = false,
1350
      $context = null,
1351
      int $offset = null,
1352
      int $maxLength = null,
1353
      int $timeout = 10,
1354
      bool $convertToUtf8 = true,
1355
      string $fromEncoding = ''
1356
  )
1357
  {
1358
    // init
1359 11
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1360
1361 11
    if ($timeout && $context === null) {
1362 9
      $context = \stream_context_create(
1363
          [
1364
              'http' =>
1365
                  [
1366 9
                      'timeout' => $timeout,
1367
                  ],
1368
          ]
1369
      );
1370
    }
1371
1372 11
    if ($offset === null) {
1373 11
      $offset = 0;
1374
    }
1375
1376 11
    if (\is_int($maxLength) === true) {
1377 2
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1378
    } else {
1379 11
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1380
    }
1381
1382
    // return false on error
1383 11
    if ($data === false) {
1384
      return false;
1385
    }
1386
1387 11
    if ($convertToUtf8 === true) {
1388
      // only for non binary, but also for UTF-16 or UTF-32
1389
      if (
1390 11
          self::is_binary($data, true) !== true
1391
          ||
1392 8
          self::is_utf16($data) !== false
1393
          ||
1394 11
          self::is_utf32($data) !== false
1395
      ) {
1396 9
        $data = self::encode('UTF-8', $data, false, $fromEncoding);
1397 9
        $data = self::cleanup($data);
1398
      }
1399
    }
1400
1401 11
    return $data;
1402
  }
1403
1404
  /**
1405
   * Checks if a file starts with BOM (Byte Order Mark) character.
1406
   *
1407
   * @param string $file_path <p>Path to a valid file.</p>
1408
   *
1409
   * @throws \RuntimeException if file_get_contents() returned false
1410
   *
1411
   * @return bool
1412
   *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.
1413
   */
1414 2
  public static function file_has_bom(string $file_path): bool
1415
  {
1416 2
    $file_content = \file_get_contents($file_path);
1417 2
    if ($file_content === false) {
1418
      throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1419
    }
1420
1421 2
    return self::string_has_bom($file_content);
1422
  }
1423
1424
  /**
1425
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1426
   *
1427
   * @param mixed  $var
1428
   * @param int    $normalization_form
1429
   * @param string $leading_combining
1430
   *
1431
   * @return mixed
1432
   */
1433 43
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1434
  {
1435 43
    switch (\gettype($var)) {
1436 43
      case 'array':
1437 6
        foreach ($var as $k => $v) {
1438
          /** @noinspection AlterInForeachInspection */
1439 6
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1440
        }
1441 6
        break;
1442 43
      case 'object':
1443 4
        foreach ($var as $k => $v) {
1444 4
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1445
        }
1446 4
        break;
1447 43
      case 'string':
1448
1449 43
        if (false !== \strpos($var, "\r")) {
1450
          // Workaround https://bugs.php.net/65732
1451 3
          $var = self::normalize_line_ending($var);
1452
        }
1453
1454 43
        if (self::is_ascii($var) === false) {
1455
          /** @noinspection PhpUndefinedClassInspection */
1456 26
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1457 21
            $n = '-';
1458
          } else {
1459
            /** @noinspection PhpUndefinedClassInspection */
1460 13
            $n = \Normalizer::normalize($var, $normalization_form);
1461
1462 13
            if (isset($n[0])) {
1463 7
              $var = $n;
1464
            } else {
1465 9
              $var = self::encode('UTF-8', $var, true);
1466
            }
1467
          }
1468
1469
          if (
1470 26
              $var[0] >= "\x80"
1471
              &&
1472 26
              isset($n[0], $leading_combining[0])
1473
              &&
1474 26
              \preg_match('/^\p{Mn}/u', $var)
1475
          ) {
1476
            // Prevent leading combining chars
1477
            // for NFC-safe concatenations.
1478 3
            $var = $leading_combining . $var;
1479
          }
1480
        }
1481
1482 43
        break;
1483
    }
1484
1485 43
    return $var;
1486
  }
1487
1488
  /**
1489
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1490
   *
1491
   * Gets a specific external variable by name and optionally filters it
1492
   *
1493
   * @link  http://php.net/manual/en/function.filter-input.php
1494
   *
1495
   * @param int    $type          <p>
1496
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1497
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1498
   *                              <b>INPUT_ENV</b>.
1499
   *                              </p>
1500
   * @param string $variable_name <p>
1501
   *                              Name of a variable to get.
1502
   *                              </p>
1503
   * @param int    $filter        [optional] <p>
1504
   *                              The ID of the filter to apply. The
1505
   *                              manual page lists the available filters.
1506
   *                              </p>
1507
   * @param mixed  $options       [optional] <p>
1508
   *                              Associative array of options or bitwise disjunction of flags. If filter
1509
   *                              accepts options, flags can be provided in "flags" field of array.
1510
   *                              </p>
1511
   *
1512
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1513
   *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1514
   *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1515
   */
1516
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
1517
  {
1518
    if (4 > \func_num_args()) {
1519
      $var = \filter_input($type, $variable_name, $filter);
1520
    } else {
1521
      $var = \filter_input($type, $variable_name, $filter, $options);
1522
    }
1523
1524
    return self::filter($var);
1525
  }
1526
1527
  /**
1528
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1529
   *
1530
   * Gets external variables and optionally filters them
1531
   *
1532
   * @link  http://php.net/manual/en/function.filter-input-array.php
1533
   *
1534
   * @param int   $type       <p>
1535
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1536
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1537
   *                          <b>INPUT_ENV</b>.
1538
   *                          </p>
1539
   * @param mixed $definition [optional] <p>
1540
   *                          An array defining the arguments. A valid key is a string
1541
   *                          containing a variable name and a valid value is either a filter type, or an array
1542
   *                          optionally specifying the filter, flags and options. If the value is an
1543
   *                          array, valid keys are filter which specifies the
1544
   *                          filter type,
1545
   *                          flags which specifies any flags that apply to the
1546
   *                          filter, and options which specifies any options that
1547
   *                          apply to the filter. See the example below for a better understanding.
1548
   *                          </p>
1549
   *                          <p>
1550
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1551
   *                          input array are filtered by this filter.
1552
   *                          </p>
1553
   * @param bool  $add_empty  [optional] <p>
1554
   *                          Add missing keys as <b>NULL</b> to the return value.
1555
   *                          </p>
1556
   *
1557
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1558
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set. Or
1559
   *               if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable is not
1560
   *               set and <b>NULL</b> if the filter fails.
1561
   */
1562
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1563
  {
1564
    if (2 > \func_num_args()) {
1565
      $a = \filter_input_array($type);
1566
    } else {
1567
      $a = \filter_input_array($type, $definition, $add_empty);
1568
    }
1569
1570
    return self::filter($a);
1571
  }
1572
1573
  /**
1574
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1575
   *
1576
   * Filters a variable with a specified filter
1577
   *
1578
   * @link  http://php.net/manual/en/function.filter-var.php
1579
   *
1580
   * @param mixed $variable <p>
1581
   *                        Value to filter.
1582
   *                        </p>
1583
   * @param int   $filter   [optional] <p>
1584
   *                        The ID of the filter to apply. The
1585
   *                        manual page lists the available filters.
1586
   *                        </p>
1587
   * @param mixed $options  [optional] <p>
1588
   *                        Associative array of options or bitwise disjunction of flags. If filter
1589
   *                        accepts options, flags can be provided in "flags" field of array. For
1590
   *                        the "callback" filter, callable type should be passed. The
1591
   *                        callback must accept one argument, the value to be filtered, and return
1592
   *                        the value after filtering/sanitizing it.
1593
   *                        </p>
1594
   *                        <p>
1595
   *                        <code>
1596
   *                        // for filters that accept options, use this format
1597
   *                        $options = array(
1598
   *                        'options' => array(
1599
   *                        'default' => 3, // value to return if the filter fails
1600
   *                        // other options here
1601
   *                        'min_range' => 0
1602
   *                        ),
1603
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1604
   *                        );
1605
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1606
   *                        // for filter that only accept flags, you can pass them directly
1607
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1608
   *                        // for filter that only accept flags, you can also pass as an array
1609
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1610
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1611
   *                        // callback validate filter
1612
   *                        function foo($value)
1613
   *                        {
1614
   *                        // Expected format: Surname, GivenNames
1615
   *                        if (strpos($value, ", ") === false) return false;
1616
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1617
   *                        $empty = (empty($surname) || empty($givennames));
1618
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1619
   *                        if ($empty || $notstrings) {
1620
   *                        return false;
1621
   *                        } else {
1622
   *                        return $value;
1623
   *                        }
1624
   *                        }
1625
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1626
   *                        </code>
1627
   *                        </p>
1628
   *
1629
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1630
   */
1631 2
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
1632
  {
1633 2
    if (3 > \func_num_args()) {
1634 2
      $variable = \filter_var($variable, $filter);
1635
    } else {
1636 2
      $variable = \filter_var($variable, $filter, $options);
1637
    }
1638
1639 2
    return self::filter($variable);
1640
  }
1641
1642
  /**
1643
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1644
   *
1645
   * Gets multiple variables and optionally filters them
1646
   *
1647
   * @link  http://php.net/manual/en/function.filter-var-array.php
1648
   *
1649
   * @param array $data       <p>
1650
   *                          An array with string keys containing the data to filter.
1651
   *                          </p>
1652
   * @param mixed $definition [optional] <p>
1653
   *                          An array defining the arguments. A valid key is a string
1654
   *                          containing a variable name and a valid value is either a
1655
   *                          filter type, or an
1656
   *                          array optionally specifying the filter, flags and options.
1657
   *                          If the value is an array, valid keys are filter
1658
   *                          which specifies the filter type,
1659
   *                          flags which specifies any flags that apply to the
1660
   *                          filter, and options which specifies any options that
1661
   *                          apply to the filter. See the example below for a better understanding.
1662
   *                          </p>
1663
   *                          <p>
1664
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1665
   *                          input array are filtered by this filter.
1666
   *                          </p>
1667
   * @param bool  $add_empty  [optional] <p>
1668
   *                          Add missing keys as <b>NULL</b> to the return value.
1669
   *                          </p>
1670
   *
1671
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1672
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set.
1673
   */
1674 2
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1675
  {
1676 2
    if (2 > \func_num_args()) {
1677 2
      $a = \filter_var_array($data);
1678
    } else {
1679 2
      $a = \filter_var_array($data, $definition, $add_empty);
1680
    }
1681
1682 2
    return self::filter($a);
1683
  }
1684
1685
  /**
1686
   * Checks whether finfo is available on the server.
1687
   *
1688
   * @return bool
1689
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
1690
   */
1691
  public static function finfo_loaded(): bool
1692
  {
1693
    return \class_exists('finfo');
1694
  }
1695
1696
  /**
1697
   * Returns the first $n characters of the string.
1698
   *
1699
   * @param string $str      <p>The input string.</p>
1700
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1701
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1702
   *
1703
   * @return string
1704
   */
1705 13
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1706
  {
1707 13
    if ($n <= 0) {
1708 4
      return '';
1709
    }
1710
1711 9
    $strSub = self::substr($str, 0, $n, $encoding);
1712 9
    if ($strSub === false) {
1713
      return '';
1714
    }
1715
1716 9
    return $strSub;
1717
  }
1718
1719
  /**
1720
   * Check if the number of unicode characters are not more than the specified integer.
1721
   *
1722
   * @param string $str      The original string to be checked.
1723
   * @param int    $box_size The size in number of chars to be checked against string.
1724
   *
1725
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1726
   */
1727 2
  public static function fits_inside(string $str, int $box_size): bool
1728
  {
1729 2
    return (self::strlen($str) <= $box_size);
1730
  }
1731
1732
  /**
1733
   * @param string $str
1734
   * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
1735
   * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
1736
   *
1737
   * @return string
1738
   */
1739 54
  private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
1740
  {
1741 54
    $upper = self::$COMMON_CASE_FOLD['upper'];
1742 54
    $lower = self::$COMMON_CASE_FOLD['lower'];
1743
1744 54
    if ($useLower === true) {
1745 2
      $str = (string)\str_replace(
1746 2
          $upper,
1747 2
          $lower,
1748 2
          $str
1749
      );
1750
    } else {
1751 52
      $str = (string)\str_replace(
1752 52
          $lower,
1753 52
          $upper,
1754 52
          $str
1755
      );
1756
    }
1757
1758 54
    if ($fullCaseFold) {
1759
1760 52
      static $FULL_CASE_FOLD = null;
1761 52
      if ($FULL_CASE_FOLD === null) {
1762 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
1763
      }
1764
1765 52
      if ($useLower === true) {
1766 2
        $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
1767
      } else {
1768 50
        $str = (string)\str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
1769
      }
1770
    }
1771
1772 54
    return $str;
1773
  }
1774
1775
  /**
1776
   * Try to fix simple broken UTF-8 strings.
1777
   *
1778
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1779
   *
1780
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1781
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1782
   * See: http://en.wikipedia.org/wiki/Windows-1252
1783
   *
1784
   * @param string $str <p>The input string</p>
1785
   *
1786
   * @return string
1787
   */
1788 42
  public static function fix_simple_utf8(string $str): string
1789
  {
1790 42
    if ('' === $str) {
1791 4
      return '';
1792
    }
1793
1794 42
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1795 42
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1796
1797 42
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1798
1799 1
      if (self::$BROKEN_UTF8_FIX === null) {
1800 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_fix') can also be of type false. However, the property $BROKEN_UTF8_FIX is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1801
      }
1802
1803 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1803
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1804 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1804
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1805
    }
1806
1807 42
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1808
  }
1809
1810
  /**
1811
   * Fix a double (or multiple) encoded UTF8 string.
1812
   *
1813
   * @param string[]|string $str You can use a string or an array of strings.
1814
   *
1815
   * @return string[]|string
1816
   *                          Will return the fixed input-"array" or
1817
   *                          the fixed input-"string".
1818
   */
1819 2
  public static function fix_utf8($str)
1820
  {
1821 2
    if (\is_array($str) === true) {
1822 2
      foreach ($str as $k => $v) {
1823 2
        $str[$k] = self::fix_utf8($v);
1824
      }
1825
1826 2
      return $str;
1827
    }
1828
1829 2
    $str = (string)$str;
1830 2
    $last = '';
1831 2
    while ($last !== $str) {
1832 2
      $last = $str;
1833 2
      $str = self::to_utf8(
1834 2
          self::utf8_decode($str, true)
1835
      );
1836
    }
1837
1838 2
    return $str;
1839
  }
1840
1841
  /**
1842
   * Get character of a specific character.
1843
   *
1844
   * @param string $char
1845
   *
1846
   * @return string 'RTL' or 'LTR'
1847
   */
1848 2
  public static function getCharDirection(string $char): string
1849
  {
1850 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1851
      self::checkForSupport();
1852
    }
1853
1854 2
    if (self::$SUPPORT['intlChar'] === true) {
1855
      /** @noinspection PhpComposerExtensionStubsInspection */
1856 2
      $tmpReturn = \IntlChar::charDirection($char);
1857
1858
      // from "IntlChar"-Class
1859
      $charDirection = [
1860 2
          'RTL' => [1, 13, 14, 15, 21],
1861
          'LTR' => [0, 11, 12, 20],
1862
      ];
1863
1864 2
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1865
        return 'LTR';
1866
      }
1867
1868 2
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1869 2
        return 'RTL';
1870
      }
1871
    }
1872
1873 2
    $c = static::chr_to_decimal($char);
1874
1875 2
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1876 2
      return 'LTR';
1877
    }
1878
1879 2
    if (0x85e >= $c) {
1880
1881 2
      if (0x5be === $c ||
1882 2
          0x5c0 === $c ||
1883 2
          0x5c3 === $c ||
1884 2
          0x5c6 === $c ||
1885 2
          (0x5d0 <= $c && 0x5ea >= $c) ||
1886 2
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1887 2
          0x608 === $c ||
1888 2
          0x60b === $c ||
1889 2
          0x60d === $c ||
1890 2
          0x61b === $c ||
1891 2
          (0x61e <= $c && 0x64a >= $c) ||
1892
          (0x66d <= $c && 0x66f >= $c) ||
1893
          (0x671 <= $c && 0x6d5 >= $c) ||
1894
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1895
          (0x6ee <= $c && 0x6ef >= $c) ||
1896
          (0x6fa <= $c && 0x70d >= $c) ||
1897
          0x710 === $c ||
1898
          (0x712 <= $c && 0x72f >= $c) ||
1899
          (0x74d <= $c && 0x7a5 >= $c) ||
1900
          0x7b1 === $c ||
1901
          (0x7c0 <= $c && 0x7ea >= $c) ||
1902
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1903
          0x7fa === $c ||
1904
          (0x800 <= $c && 0x815 >= $c) ||
1905
          0x81a === $c ||
1906
          0x824 === $c ||
1907
          0x828 === $c ||
1908
          (0x830 <= $c && 0x83e >= $c) ||
1909
          (0x840 <= $c && 0x858 >= $c) ||
1910 2
          0x85e === $c
1911
      ) {
1912 2
        return 'RTL';
1913
      }
1914
1915 2
    } elseif (0x200f === $c) {
1916
1917
      return 'RTL';
1918
1919 2
    } elseif (0xfb1d <= $c) {
1920
1921 2
      if (0xfb1d === $c ||
1922 2
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1923 2
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1924 2
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1925 2
          0xfb3e === $c ||
1926 2
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1927 2
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1928 2
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1929 2
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1930 2
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1931 2
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1932 2
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1933 2
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1934 2
          (0xfe76 <= $c && 0xfefc >= $c) ||
1935 2
          (0x10800 <= $c && 0x10805 >= $c) ||
1936 2
          0x10808 === $c ||
1937 2
          (0x1080a <= $c && 0x10835 >= $c) ||
1938 2
          (0x10837 <= $c && 0x10838 >= $c) ||
1939 2
          0x1083c === $c ||
1940 2
          (0x1083f <= $c && 0x10855 >= $c) ||
1941 2
          (0x10857 <= $c && 0x1085f >= $c) ||
1942 2
          (0x10900 <= $c && 0x1091b >= $c) ||
1943 2
          (0x10920 <= $c && 0x10939 >= $c) ||
1944 2
          0x1093f === $c ||
1945 2
          0x10a00 === $c ||
1946 2
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1947 2
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1948 2
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1949 2
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1950 2
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1951 2
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1952 2
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1953 2
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1954 2
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1955 2
          (0x10b78 <= $c && 0x10b7f >= $c)
1956
      ) {
1957 2
        return 'RTL';
1958
      }
1959
    }
1960
1961 2
    return 'LTR';
1962
  }
1963
1964
  /**
1965
   * get data from "/data/*.ser"
1966
   *
1967
   * @param string $file
1968
   *
1969
   * @return mixed|false Will return false on error.
1970
   */
1971 13
  private static function getData(string $file)
1972
  {
1973 13
    $file = __DIR__ . '/data/' . $file . '.php';
1974 13
    if (\file_exists($file)) {
1975
      /** @noinspection PhpIncludeInspection */
1976 12
      return require $file;
1977
    }
1978
1979 2
    return false;
1980
  }
1981
1982
  /**
1983
   * Check for php-support.
1984
   *
1985
   * @param string|null $key
1986
   *
1987
   * @return mixed
1988
   *               Return the full support-"array", if $key === null<br>
1989
   *               return bool-value, if $key is used and available<br>
1990
   *               otherwise return <strong>null</strong>.
1991
   */
1992 26
  public static function getSupportInfo(string $key = null)
1993
  {
1994 26
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1995
      self::checkForSupport();
1996
    }
1997
1998 26
    if ($key === null) {
1999 4
      return self::$SUPPORT;
2000
    }
2001
2002 24
    if (!isset(self::$SUPPORT[$key])) {
2003 2
      return null;
2004
    }
2005
2006 22
    return self::$SUPPORT[$key];
2007
  }
2008
2009
  /**
2010
   * @param string $str
2011
   *
2012
   * @return string[]
2013
   */
2014 40
  private static function get_file_type($str)
2015
  {
2016 40
    if ('' === $str) {
2017
      return ['ext' => '', 'type' => ''];
2018
    }
2019
2020 40
    $str_info = self::substr_in_byte($str, 0, 2);
2021 40
    if (self::strlen_in_byte($str_info) !== 2) {
2022 11
      return ['ext' => '', 'type' => ''];
2023
    }
2024
2025 35
    $str_info = \unpack('C2chars', $str_info);
2026 35
    $type_code = (int)($str_info['chars1'] . $str_info['chars2']);
2027
2028
    // DEBUG
2029
    //var_dump($type_code);
2030
2031
    switch ($type_code) {
2032 35
      case 3780:
2033 4
        $ext = 'pdf';
2034 4
        $type = 'binary';
2035 4
        break;
2036 35
      case 7790:
2037
        $ext = 'exe';
2038
        $type = 'binary';
2039
        break;
2040 35
      case 7784:
2041
        $ext = 'midi';
2042
        $type = 'binary';
2043
        break;
2044 35
      case 8075:
2045 6
        $ext = 'zip';
2046 6
        $type = 'binary';
2047 6
        break;
2048 35
      case 8297:
2049
        $ext = 'rar';
2050
        $type = 'binary';
2051
        break;
2052 35
      case 255216:
2053
        $ext = 'jpg';
2054
        $type = 'binary';
2055
        break;
2056 35
      case 7173:
2057
        $ext = 'gif';
2058
        $type = 'binary';
2059
        break;
2060 35
      case 6677:
2061
        $ext = 'bmp';
2062
        $type = 'binary';
2063
        break;
2064 35
      case 13780:
2065 6
        $ext = 'png';
2066 6
        $type = 'binary';
2067 6
        break;
2068
      default:
2069 33
        $ext = '???';
2070 33
        $type = '???';
2071 33
        break;
2072
    }
2073
2074 35
    return ['ext' => $ext, 'type' => $type];
2075
  }
2076
2077
  /**
2078
   * @param int    $length        <p>Length of the random string.</p>
2079
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2080
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2081
   *
2082
   * @return string
2083
   */
2084 1
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2085
  {
2086
    // init
2087 1
    $i = 0;
2088 1
    $str = '';
2089 1
    $maxlength = self::strlen($possibleChars, $encoding);
2090
2091 1
    if ($maxlength === 0) {
2092 1
      return '';
2093
    }
2094
2095
    // add random chars
2096 1
    while ($i < $length) {
2097
      try {
2098 1
        $randInt = \random_int(0, $maxlength - 1);
2099
      } catch (\Exception $e) {
2100
        /** @noinspection RandomApiMigrationInspection */
2101
        $randInt = \mt_rand(0, $maxlength - 1);
2102
      }
2103 1
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
2104 1
      $str .= $char;
2105 1
      $i++;
2106
    }
2107
2108 1
    return $str;
2109
  }
2110
2111
  /**
2112
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2113
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2114
   *
2115
   * @return string
2116
   */
2117 1
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2118
  {
2119 1
    $uniqueHelper = \mt_rand() .
2120 1
                    \session_id() .
2121 1
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
2122 1
                    ($_SERVER['SERVER_ADDR'] ?? '') .
2123 1
                    $entropyExtra;
2124
2125 1
    $uniqueString = \uniqid($uniqueHelper, true);
2126
2127 1
    if ($md5) {
2128 1
      $uniqueString = \md5($uniqueString . $uniqueHelper);
2129
    }
2130
2131 1
    return $uniqueString;
2132
  }
2133
2134
  /**
2135
   * alias for "UTF8::string_has_bom()"
2136
   *
2137
   * @see        UTF8::string_has_bom()
2138
   *
2139
   * @param string $str
2140
   *
2141
   * @return bool
2142
   *
2143
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2144
   */
2145 2
  public static function hasBom(string $str): bool
2146
  {
2147 2
    return self::string_has_bom($str);
2148
  }
2149
2150
  /**
2151
   * Returns true if the string contains a lower case char, false otherwise.
2152
   *
2153
   * @param string $str <p>The input string.</p>
2154
   *
2155
   * @return bool Whether or not the string contains a lower case character.
2156
   */
2157 47
  public static function has_lowercase(string $str): bool
2158
  {
2159 47
    return self::str_matches_pattern($str, '.*[[:lower:]]');
2160
  }
2161
2162
  /**
2163
   * Returns true if the string contains an upper case char, false otherwise.
2164
   *
2165
   * @param string $str <p>The input string.</p>
2166
   *
2167
   * @return bool Whether or not the string contains an upper case character.
2168
   */
2169 12
  public static function has_uppercase(string $str): bool
2170
  {
2171 12
    return self::str_matches_pattern($str, '.*[[:upper:]]');
2172
  }
2173
2174
  /**
2175
   * Converts a hexadecimal-value into an UTF-8 character.
2176
   *
2177
   * @param string $hexdec <p>The hexadecimal value.</p>
2178
   *
2179
   * @return string|false One single UTF-8 character.
2180
   */
2181 4
  public static function hex_to_chr(string $hexdec)
2182
  {
2183 4
    return self::decimal_to_chr(\hexdec($hexdec));
2184
  }
2185
2186
  /**
2187
   * Converts hexadecimal U+xxxx code point representation to integer.
2188
   *
2189
   * INFO: opposite to UTF8::int_to_hex()
2190
   *
2191
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2192
   *
2193
   * @return int|false The code point, or false on failure.
2194
   */
2195 2
  public static function hex_to_int($hexDec)
2196
  {
2197
    // init
2198 2
    $hexDec = (string)$hexDec;
2199
2200 2
    if ('' === $hexDec) {
2201 2
      return false;
2202
    }
2203
2204 2
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2205 2
      return \intval($match[1], 16);
2206
    }
2207
2208 2
    return false;
2209
  }
2210
2211
  /**
2212
   * alias for "UTF8::html_entity_decode()"
2213
   *
2214
   * @see UTF8::html_entity_decode()
2215
   *
2216
   * @param string $str
2217
   * @param int    $flags
2218
   * @param string $encoding
2219
   *
2220
   * @return string
2221
   */
2222 4
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2223
  {
2224 4
    return self::html_entity_decode($str, $flags, $encoding);
2225
  }
2226
2227
  /**
2228
   * Converts a UTF-8 string to a series of HTML numbered entities.
2229
   *
2230
   * INFO: opposite to UTF8::html_decode()
2231
   *
2232
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2233
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2234
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2235
   *
2236
   * @return string HTML numbered entities.
2237
   */
2238 13
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2239
  {
2240 13
    if ('' === $str) {
2241 4
      return '';
2242
    }
2243
2244 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2245 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2246
    }
2247
2248 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2249
      self::checkForSupport();
2250
    }
2251
2252
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2253 13
    if (self::$SUPPORT['mbstring'] === true) {
2254 13
      $startCode = 0x00;
2255 13
      if ($keepAsciiChars === true) {
2256 13
        $startCode = 0x80;
2257
      }
2258
2259 13
      return \mb_encode_numericentity(
2260 13
          $str,
2261 13
          [$startCode, 0xfffff, 0, 0xfffff, 0],
2262 13
          $encoding
2263
      );
2264
    }
2265
2266
    //
2267
    // fallback via vanilla php
2268
    //
2269
2270
    return \implode(
2271
        '',
2272
        \array_map(
2273
            function ($chr) use ($keepAsciiChars, $encoding) {
2274
              return UTF8::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2275
            },
2276
            self::split($str)
2277
        )
2278
    );
2279
  }
2280
2281
  /**
2282
   * UTF-8 version of html_entity_decode()
2283
   *
2284
   * The reason we are not using html_entity_decode() by itself is because
2285
   * while it is not technically correct to leave out the semicolon
2286
   * at the end of an entity most browsers will still interpret the entity
2287
   * correctly. html_entity_decode() does not convert entities without
2288
   * semicolons, so we are left with our own little solution here. Bummer.
2289
   *
2290
   * Convert all HTML entities to their applicable characters
2291
   *
2292
   * INFO: opposite to UTF8::html_encode()
2293
   *
2294
   * @link http://php.net/manual/en/function.html-entity-decode.php
2295
   *
2296
   * @param string $str      <p>
2297
   *                         The input string.
2298
   *                         </p>
2299
   * @param int    $flags    [optional] <p>
2300
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2301
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2302
   *                         <table>
2303
   *                         Available <i>flags</i> constants
2304
   *                         <tr valign="top">
2305
   *                         <td>Constant Name</td>
2306
   *                         <td>Description</td>
2307
   *                         </tr>
2308
   *                         <tr valign="top">
2309
   *                         <td><b>ENT_COMPAT</b></td>
2310
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2311
   *                         </tr>
2312
   *                         <tr valign="top">
2313
   *                         <td><b>ENT_QUOTES</b></td>
2314
   *                         <td>Will convert both double and single quotes.</td>
2315
   *                         </tr>
2316
   *                         <tr valign="top">
2317
   *                         <td><b>ENT_NOQUOTES</b></td>
2318
   *                         <td>Will leave both double and single quotes unconverted.</td>
2319
   *                         </tr>
2320
   *                         <tr valign="top">
2321
   *                         <td><b>ENT_HTML401</b></td>
2322
   *                         <td>
2323
   *                         Handle code as HTML 4.01.
2324
   *                         </td>
2325
   *                         </tr>
2326
   *                         <tr valign="top">
2327
   *                         <td><b>ENT_XML1</b></td>
2328
   *                         <td>
2329
   *                         Handle code as XML 1.
2330
   *                         </td>
2331
   *                         </tr>
2332
   *                         <tr valign="top">
2333
   *                         <td><b>ENT_XHTML</b></td>
2334
   *                         <td>
2335
   *                         Handle code as XHTML.
2336
   *                         </td>
2337
   *                         </tr>
2338
   *                         <tr valign="top">
2339
   *                         <td><b>ENT_HTML5</b></td>
2340
   *                         <td>
2341
   *                         Handle code as HTML 5.
2342
   *                         </td>
2343
   *                         </tr>
2344
   *                         </table>
2345
   *                         </p>
2346
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2347
   *
2348
   * @return string The decoded string.
2349
   */
2350 40
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2351
  {
2352 40
    if ('' === $str) {
2353 12
      return '';
2354
    }
2355
2356 40
    if (!isset($str[3])) { // examples: &; || &x;
2357 19
      return $str;
2358
    }
2359
2360
    if (
2361 39
        \strpos($str, '&') === false
2362
        ||
2363
        (
2364 39
            \strpos($str, '&#') === false
2365
            &&
2366 39
            \strpos($str, ';') === false
2367
        )
2368
    ) {
2369 18
      return $str;
2370
    }
2371
2372 39
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2373 9
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2374
    }
2375
2376 39
    if ($flags === null) {
2377 10
      $flags = ENT_QUOTES | ENT_HTML5;
2378
    }
2379
2380
    if (
2381 39
        $encoding !== 'UTF-8'
2382
        &&
2383 39
        $encoding !== 'ISO-8859-1'
2384
        &&
2385 39
        $encoding !== 'WINDOWS-1252'
2386
        &&
2387 39
        self::$SUPPORT['mbstring'] === false
2388
    ) {
2389
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2390
    }
2391
2392 39
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2393
      self::checkForSupport();
2394
    }
2395
2396
    do {
2397 39
      $str_compare = $str;
2398
2399
      # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2400 39
      if (self::$SUPPORT['mbstring'] === true) {
2401
2402 39
        $str = \mb_decode_numericentity(
2403 39
            $str,
2404 39
            [0x80, 0xfffff, 0, 0xfffff, 0],
2405 39
            $encoding
2406
        );
2407
2408
      } else {
2409
2410
        $str = (string)\preg_replace_callback(
2411
            "/&#\d{2,6};/",
2412
            function ($matches) use ($encoding) {
2413
              // always fallback via symfony polyfill
2414
              $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2415
2416
              if ($returnTmp !== '"' && $returnTmp !== "'") {
2417
                return $returnTmp;
2418
              }
2419
2420
              return $matches[0];
2421
            },
2422
            $str
2423
        );
2424
2425
      }
2426
2427
      // decode numeric & UTF16 two byte entities
2428 39
      $str = \html_entity_decode(
2429 39
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2430 39
          $flags,
2431 39
          $encoding
2432
      );
2433
2434 39
    } while ($str_compare !== $str);
2435
2436 39
    return $str;
2437
  }
2438
2439
  /**
2440
   * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2441
   *
2442
   * @param string $str
2443
   * @param string $encoding [optional] <p>Default: UTF-8</p>
2444
   *
2445
   * @return string
2446
   */
2447 6
  public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2448
  {
2449 6
    return self::htmlspecialchars(
2450 6
        $str,
2451 6
        ENT_QUOTES | ENT_SUBSTITUTE,
2452 6
        $encoding
2453
    );
2454
  }
2455
2456
  /**
2457
   * Remove empty html-tag.
2458
   *
2459
   * e.g.: <tag></tag>
2460
   *
2461
   * @param string $str
2462
   *
2463
   * @return string
2464
   */
2465 1
  public static function html_stripe_empty_tags(string $str): string
2466
  {
2467 1
    return (string)\preg_replace(
2468 1
        "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2469 1
        '',
2470 1
        $str
2471
    );
2472
  }
2473
2474
  /**
2475
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2476
   *
2477
   * @link http://php.net/manual/en/function.htmlentities.php
2478
   *
2479
   * @param string $str           <p>
2480
   *                              The input string.
2481
   *                              </p>
2482
   * @param int    $flags         [optional] <p>
2483
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2484
   *                              invalid code unit sequences and the used document type. The default is
2485
   *                              ENT_COMPAT | ENT_HTML401.
2486
   *                              <table>
2487
   *                              Available <i>flags</i> constants
2488
   *                              <tr valign="top">
2489
   *                              <td>Constant Name</td>
2490
   *                              <td>Description</td>
2491
   *                              </tr>
2492
   *                              <tr valign="top">
2493
   *                              <td><b>ENT_COMPAT</b></td>
2494
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2495
   *                              </tr>
2496
   *                              <tr valign="top">
2497
   *                              <td><b>ENT_QUOTES</b></td>
2498
   *                              <td>Will convert both double and single quotes.</td>
2499
   *                              </tr>
2500
   *                              <tr valign="top">
2501
   *                              <td><b>ENT_NOQUOTES</b></td>
2502
   *                              <td>Will leave both double and single quotes unconverted.</td>
2503
   *                              </tr>
2504
   *                              <tr valign="top">
2505
   *                              <td><b>ENT_IGNORE</b></td>
2506
   *                              <td>
2507
   *                              Silently discard invalid code unit sequences instead of returning
2508
   *                              an empty string. Using this flag is discouraged as it
2509
   *                              may have security implications.
2510
   *                              </td>
2511
   *                              </tr>
2512
   *                              <tr valign="top">
2513
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2514
   *                              <td>
2515
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2516
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2517
   *                              </td>
2518
   *                              </tr>
2519
   *                              <tr valign="top">
2520
   *                              <td><b>ENT_DISALLOWED</b></td>
2521
   *                              <td>
2522
   *                              Replace invalid code points for the given document type with a
2523
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2524
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2525
   *                              instance, to ensure the well-formedness of XML documents with
2526
   *                              embedded external content.
2527
   *                              </td>
2528
   *                              </tr>
2529
   *                              <tr valign="top">
2530
   *                              <td><b>ENT_HTML401</b></td>
2531
   *                              <td>
2532
   *                              Handle code as HTML 4.01.
2533
   *                              </td>
2534
   *                              </tr>
2535
   *                              <tr valign="top">
2536
   *                              <td><b>ENT_XML1</b></td>
2537
   *                              <td>
2538
   *                              Handle code as XML 1.
2539
   *                              </td>
2540
   *                              </tr>
2541
   *                              <tr valign="top">
2542
   *                              <td><b>ENT_XHTML</b></td>
2543
   *                              <td>
2544
   *                              Handle code as XHTML.
2545
   *                              </td>
2546
   *                              </tr>
2547
   *                              <tr valign="top">
2548
   *                              <td><b>ENT_HTML5</b></td>
2549
   *                              <td>
2550
   *                              Handle code as HTML 5.
2551
   *                              </td>
2552
   *                              </tr>
2553
   *                              </table>
2554
   *                              </p>
2555
   * @param string $encoding      [optional] <p>
2556
   *                              Like <b>htmlspecialchars</b>,
2557
   *                              <b>htmlentities</b> takes an optional third argument
2558
   *                              <i>encoding</i> which defines encoding used in
2559
   *                              conversion.
2560
   *                              Although this argument is technically optional, you are highly
2561
   *                              encouraged to specify the correct value for your code.
2562
   *                              </p>
2563
   * @param bool   $double_encode [optional] <p>
2564
   *                              When <i>double_encode</i> is turned off PHP will not
2565
   *                              encode existing html entities. The default is to convert everything.
2566
   *                              </p>
2567
   *
2568
   *
2569
   * @return string The encoded string.
2570
   * </p>
2571
   * <p>
2572
   * If the input <i>string</i> contains an invalid code unit
2573
   * sequence within the given <i>encoding</i> an empty string
2574
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2575
   * <b>ENT_SUBSTITUTE</b> flags are set.
2576
   */
2577 9
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2578
  {
2579 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2580 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2581
    }
2582
2583 9
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2584
2585
    /**
2586
     * PHP doesn't replace a backslash to its html entity since this is something
2587
     * that's mostly used to escape characters when inserting in a database. Since
2588
     * we're using a decent database layer, we don't need this shit and we're replacing
2589
     * the double backslashes by its' html entity equivalent.
2590
     *
2591
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2592
     */
2593 9
    $str = \str_replace('\\', '&#92;', $str);
2594
2595 9
    return self::html_encode($str, true, $encoding);
2596
  }
2597
2598
  /**
2599
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2600
   *
2601
   * INFO: Take a look at "UTF8::htmlentities()"
2602
   *
2603
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2604
   *
2605
   * @param string $str           <p>
2606
   *                              The string being converted.
2607
   *                              </p>
2608
   * @param int    $flags         [optional] <p>
2609
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2610
   *                              invalid code unit sequences and the used document type. The default is
2611
   *                              ENT_COMPAT | ENT_HTML401.
2612
   *                              <table>
2613
   *                              Available <i>flags</i> constants
2614
   *                              <tr valign="top">
2615
   *                              <td>Constant Name</td>
2616
   *                              <td>Description</td>
2617
   *                              </tr>
2618
   *                              <tr valign="top">
2619
   *                              <td><b>ENT_COMPAT</b></td>
2620
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2621
   *                              </tr>
2622
   *                              <tr valign="top">
2623
   *                              <td><b>ENT_QUOTES</b></td>
2624
   *                              <td>Will convert both double and single quotes.</td>
2625
   *                              </tr>
2626
   *                              <tr valign="top">
2627
   *                              <td><b>ENT_NOQUOTES</b></td>
2628
   *                              <td>Will leave both double and single quotes unconverted.</td>
2629
   *                              </tr>
2630
   *                              <tr valign="top">
2631
   *                              <td><b>ENT_IGNORE</b></td>
2632
   *                              <td>
2633
   *                              Silently discard invalid code unit sequences instead of returning
2634
   *                              an empty string. Using this flag is discouraged as it
2635
   *                              may have security implications.
2636
   *                              </td>
2637
   *                              </tr>
2638
   *                              <tr valign="top">
2639
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2640
   *                              <td>
2641
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2642
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2643
   *                              </td>
2644
   *                              </tr>
2645
   *                              <tr valign="top">
2646
   *                              <td><b>ENT_DISALLOWED</b></td>
2647
   *                              <td>
2648
   *                              Replace invalid code points for the given document type with a
2649
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2650
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2651
   *                              instance, to ensure the well-formedness of XML documents with
2652
   *                              embedded external content.
2653
   *                              </td>
2654
   *                              </tr>
2655
   *                              <tr valign="top">
2656
   *                              <td><b>ENT_HTML401</b></td>
2657
   *                              <td>
2658
   *                              Handle code as HTML 4.01.
2659
   *                              </td>
2660
   *                              </tr>
2661
   *                              <tr valign="top">
2662
   *                              <td><b>ENT_XML1</b></td>
2663
   *                              <td>
2664
   *                              Handle code as XML 1.
2665
   *                              </td>
2666
   *                              </tr>
2667
   *                              <tr valign="top">
2668
   *                              <td><b>ENT_XHTML</b></td>
2669
   *                              <td>
2670
   *                              Handle code as XHTML.
2671
   *                              </td>
2672
   *                              </tr>
2673
   *                              <tr valign="top">
2674
   *                              <td><b>ENT_HTML5</b></td>
2675
   *                              <td>
2676
   *                              Handle code as HTML 5.
2677
   *                              </td>
2678
   *                              </tr>
2679
   *                              </table>
2680
   *                              </p>
2681
   * @param string $encoding      [optional] <p>
2682
   *                              Defines encoding used in conversion.
2683
   *                              </p>
2684
   *                              <p>
2685
   *                              For the purposes of this function, the encodings
2686
   *                              ISO-8859-1, ISO-8859-15,
2687
   *                              UTF-8, cp866,
2688
   *                              cp1251, cp1252, and
2689
   *                              KOI8-R are effectively equivalent, provided the
2690
   *                              <i>string</i> itself is valid for the encoding, as
2691
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2692
   *                              the same positions in all of these encodings.
2693
   *                              </p>
2694
   * @param bool   $double_encode [optional] <p>
2695
   *                              When <i>double_encode</i> is turned off PHP will not
2696
   *                              encode existing html entities, the default is to convert everything.
2697
   *                              </p>
2698
   *
2699
   * @return string The converted string.
2700
   * </p>
2701
   * <p>
2702
   * If the input <i>string</i> contains an invalid code unit
2703
   * sequence within the given <i>encoding</i> an empty string
2704
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2705
   * <b>ENT_SUBSTITUTE</b> flags are set.
2706
   */
2707 8
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2708
  {
2709 8
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2710 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2711
    }
2712
2713 8
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2714
  }
2715
2716
  /**
2717
   * Checks whether iconv is available on the server.
2718
   *
2719
   * @return bool
2720
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2721
   */
2722
  public static function iconv_loaded(): bool
2723
  {
2724
    return \extension_loaded('iconv') ? true : false;
2725
  }
2726
2727
  /**
2728
   * alias for "UTF8::decimal_to_chr()"
2729
   *
2730
   * @see UTF8::decimal_to_chr()
2731
   *
2732
   * @param mixed $int
2733
   *
2734
   * @return string
2735
   */
2736 4
  public static function int_to_chr($int): string
2737
  {
2738 4
    return self::decimal_to_chr($int);
2739
  }
2740
2741
  /**
2742
   * Converts Integer to hexadecimal U+xxxx code point representation.
2743
   *
2744
   * INFO: opposite to UTF8::hex_to_int()
2745
   *
2746
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2747
   * @param string $pfix [optional]
2748
   *
2749
   * @return string The code point, or empty string on failure.
2750
   */
2751 6
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2752
  {
2753 6
    $hex = \dechex($int);
2754
2755 6
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2756
2757 6
    return $pfix . $hex . '';
2758
  }
2759
2760
  /**
2761
   * Checks whether intl-char is available on the server.
2762
   *
2763
   * @return bool
2764
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2765
   */
2766
  public static function intlChar_loaded(): bool
2767
  {
2768
    return \class_exists('IntlChar');
2769
  }
2770
2771
  /**
2772
   * Checks whether intl is available on the server.
2773
   *
2774
   * @return bool
2775
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2776
   */
2777 5
  public static function intl_loaded(): bool
2778
  {
2779 5
    return \extension_loaded('intl');
2780
  }
2781
2782
  /**
2783
   * alias for "UTF8::is_ascii()"
2784
   *
2785
   * @see        UTF8::is_ascii()
2786
   *
2787
   * @param string $str
2788
   *
2789
   * @return bool
2790
   *
2791
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2792
   */
2793 2
  public static function isAscii(string $str): bool
2794
  {
2795 2
    return self::is_ascii($str);
2796
  }
2797
2798
  /**
2799
   * alias for "UTF8::is_base64()"
2800
   *
2801
   * @see        UTF8::is_base64()
2802
   *
2803
   * @param string $str
2804
   *
2805
   * @return bool
2806
   *
2807
   * @deprecated <p>use "UTF8::is_base64()"</p>
2808
   */
2809 2
  public static function isBase64($str): bool
2810
  {
2811 2
    return self::is_base64($str);
2812
  }
2813
2814
  /**
2815
   * alias for "UTF8::is_binary()"
2816
   *
2817
   * @see        UTF8::is_binary()
2818
   *
2819
   * @param mixed $str
2820
   * @param bool  $strict
2821
   *
2822
   * @return bool
2823
   *
2824
   * @deprecated <p>use "UTF8::is_binary()"</p>
2825
   */
2826 4
  public static function isBinary($str, $strict = false): bool
2827
  {
2828 4
    return self::is_binary($str, $strict);
2829
  }
2830
2831
  /**
2832
   * alias for "UTF8::is_bom()"
2833
   *
2834
   * @see        UTF8::is_bom()
2835
   *
2836
   * @param string $utf8_chr
2837
   *
2838
   * @return bool
2839
   *
2840
   * @deprecated <p>use "UTF8::is_bom()"</p>
2841
   */
2842 2
  public static function isBom(string $utf8_chr): bool
2843
  {
2844 2
    return self::is_bom($utf8_chr);
2845
  }
2846
2847
  /**
2848
   * alias for "UTF8::is_html()"
2849
   *
2850
   * @see        UTF8::is_html()
2851
   *
2852
   * @param string $str
2853
   *
2854
   * @return bool
2855
   *
2856
   * @deprecated <p>use "UTF8::is_html()"</p>
2857
   */
2858 2
  public static function isHtml(string $str): bool
2859
  {
2860 2
    return self::is_html($str);
2861
  }
2862
2863
  /**
2864
   * alias for "UTF8::is_json()"
2865
   *
2866
   * @see        UTF8::is_json()
2867
   *
2868
   * @param string $str
2869
   *
2870
   * @return bool
2871
   *
2872
   * @deprecated <p>use "UTF8::is_json()"</p>
2873
   */
2874
  public static function isJson(string $str): bool
2875
  {
2876
    return self::is_json($str);
2877
  }
2878
2879
  /**
2880
   * alias for "UTF8::is_utf16()"
2881
   *
2882
   * @see        UTF8::is_utf16()
2883
   *
2884
   * @param mixed $str
2885
   *
2886
   * @return int|false
2887
   *                    <strong>false</strong> if is't not UTF16,<br>
2888
   *                    <strong>1</strong> for UTF-16LE,<br>
2889
   *                    <strong>2</strong> for UTF-16BE.
2890
   *
2891
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2892
   */
2893 2
  public static function isUtf16($str)
2894
  {
2895 2
    return self::is_utf16($str);
2896
  }
2897
2898
  /**
2899
   * alias for "UTF8::is_utf32()"
2900
   *
2901
   * @see        UTF8::is_utf32()
2902
   *
2903
   * @param mixed $str
2904
   *
2905
   * @return int|false
2906
   *                   <strong>false</strong> if is't not UTF16,
2907
   *                   <strong>1</strong> for UTF-32LE,
2908
   *                   <strong>2</strong> for UTF-32BE.
2909
   *
2910
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2911
   */
2912 2
  public static function isUtf32($str)
2913
  {
2914 2
    return self::is_utf32($str);
2915
  }
2916
2917
  /**
2918
   * alias for "UTF8::is_utf8()"
2919
   *
2920
   * @see        UTF8::is_utf8()
2921
   *
2922
   * @param string $str
2923
   * @param bool   $strict
2924
   *
2925
   * @return bool
2926
   *
2927
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2928
   */
2929 17
  public static function isUtf8($str, $strict = false): bool
2930
  {
2931 17
    return self::is_utf8($str, $strict);
2932
  }
2933
2934
  /**
2935
   * Returns true if the string contains only alphabetic chars, false otherwise.
2936
   *
2937
   * @param string $str
2938
   *
2939
   * @return bool
2940
   *               Whether or not $str contains only alphabetic chars.
2941
   */
2942 10
  public static function is_alpha(string $str): bool
2943
  {
2944 10
    return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2945
  }
2946
2947
  /**
2948
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2949
   *
2950
   * @param string $str
2951
   *
2952
   * @return bool
2953
   *               Whether or not $str contains only alphanumeric chars.
2954
   */
2955 13
  public static function is_alphanumeric(string $str): bool
2956
  {
2957 13
    return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2958
  }
2959
2960
  /**
2961
   * Checks if a string is 7 bit ASCII.
2962
   *
2963
   * @param string $str <p>The string to check.</p>
2964
   *
2965
   * @return bool
2966
   *              <strong>true</strong> if it is ASCII<br>
2967
   *              <strong>false</strong> otherwise
2968
   *
2969
   */
2970 202
  public static function is_ascii(string $str): bool
2971
  {
2972 202
    if ('' === $str) {
2973 10
      return true;
2974
    }
2975
2976 201
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2977
  }
2978
2979
  /**
2980
   * Returns true if the string is base64 encoded, false otherwise.
2981
   *
2982
   * @param string $str <p>The input string.</p>
2983
   *
2984
   * @return bool Whether or not $str is base64 encoded.
2985
   */
2986 9
  public static function is_base64($str): bool
2987
  {
2988 9
    if ('' === $str) {
2989 3
      return false;
2990
    }
2991
2992 8
    if (\is_string($str) === false) {
0 ignored issues
show
introduced by
The condition is_string($str) === false is always false.
Loading history...
2993 2
      return false;
2994
    }
2995
2996 8
    $base64String = (string)\base64_decode($str, true);
2997
2998 8
    return $base64String && \base64_encode($base64String) === $str;
2999
  }
3000
3001
  /**
3002
   * Check if the input is binary... (is look like a hack).
3003
   *
3004
   * @param mixed $input
3005
   * @param bool  $strict
3006
   *
3007
   * @return bool
3008
   */
3009 40
  public static function is_binary($input, bool $strict = false): bool
3010
  {
3011 40
    $input = (string)$input;
3012 40
    if ('' === $input) {
3013 10
      return false;
3014
    }
3015
3016 40
    if (\preg_match('~^[01]+$~', $input)) {
3017 12
      return true;
3018
    }
3019
3020 40
    if ($strict === true) {
3021
3022 34
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3023
        self::checkForSupport();
3024
      }
3025
3026 34
      if (self::$SUPPORT['finfo'] === false) {
3027
        throw new \RuntimeException('ext-fileinfo: is not installed');
3028
      }
3029
3030
      /** @noinspection PhpComposerExtensionStubsInspection */
3031 34
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
3032 34
      $finfo_encoding = $finfo->buffer($input);
3033 34
      if ($finfo_encoding && $finfo_encoding === 'binary') {
3034 15
        return true;
3035
      }
3036
3037
    }
3038
3039 40
    $ext = self::get_file_type($input);
3040 40
    if ($ext['type'] === 'binary') {
3041 6
      return true;
3042
    }
3043
3044 38
    $testLength = self::strlen_in_byte($input);
3045 38
    if ($testLength) {
3046 38
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3047
        self::checkForSupport();
3048
      }
3049
3050 38
      $testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength);
3051 38
      if (($testNull / $testLength) > 0.256) {
3052 12
        return true;
3053
      }
3054
    }
3055
3056 36
    return false;
3057
  }
3058
3059
  /**
3060
   * Check if the file is binary.
3061
   *
3062
   * @param string $file
3063
   *
3064
   * @return bool
3065
   */
3066 6
  public static function is_binary_file($file): bool
3067
  {
3068
    // init
3069 6
    $block = '';
3070
3071 6
    $fp = \fopen($file, 'rb');
3072 6
    if (\is_resource($fp)) {
3073 6
      $block = \fread($fp, 512);
3074 6
      \fclose($fp);
3075
    }
3076
3077 6
    if ($block === '') {
3078 2
      return false;
3079
    }
3080
3081 6
    return self::is_binary($block, true);
3082
  }
3083
3084
  /**
3085
   * Returns true if the string contains only whitespace chars, false otherwise.
3086
   *
3087
   * @param string $str
3088
   *
3089
   * @return bool
3090
   *               Whether or not $str contains only whitespace characters.
3091
   */
3092 15
  public static function is_blank(string $str): bool
3093
  {
3094 15
    return self::str_matches_pattern($str, '^[[:space:]]*$');
3095
  }
3096
3097
  /**
3098
   * Checks if the given string is equal to any "Byte Order Mark".
3099
   *
3100
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3101
   *
3102
   * @param string $str <p>The input string.</p>
3103
   *
3104
   * @return bool
3105
   *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.
3106
   */
3107 2
  public static function is_bom($str): bool
3108
  {
3109 2
    foreach (self::$BOM as $bomString => $bomByteLength) {
3110 2
      if ($str === $bomString) {
3111 2
        return true;
3112
      }
3113
    }
3114
3115 2
    return false;
3116
  }
3117
3118
  /**
3119
   * Determine whether the string is considered to be empty.
3120
   *
3121
   * A variable is considered empty if it does not exist or if its value equals FALSE.
3122
   * empty() does not generate a warning if the variable does not exist.
3123
   *
3124
   * @param mixed $str
3125
   *
3126
   * @return bool Whether or not $str is empty().
3127
   */
3128
  public static function is_empty($str): bool
3129
  {
3130
    return empty($str);
3131
  }
3132
3133
  /**
3134
   * Returns true if the string contains only hexadecimal chars, false otherwise.
3135
   *
3136
   * @param string $str
3137
   *
3138
   * @return bool
3139
   *               Whether or not $str contains only hexadecimal chars.
3140
   */
3141 13
  public static function is_hexadecimal(string $str): bool
3142
  {
3143 13
    return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3144
  }
3145
3146
  /**
3147
   * Check if the string contains any html-tags <lall>.
3148
   *
3149
   * @param string $str <p>The input string.</p>
3150
   *
3151
   * @return bool
3152
   */
3153 3
  public static function is_html(string $str): bool
3154
  {
3155 3
    if ('' === $str) {
3156 3
      return false;
3157
    }
3158
3159
    // init
3160 3
    $matches = [];
3161
3162 3
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3163
3164 3
    return !(\count($matches) === 0);
3165
  }
3166
3167
  /**
3168
   * Try to check if "$str" is an json-string.
3169
   *
3170
   * @param string $str <p>The input string.</p>
3171
   *
3172
   * @return bool
3173
   */
3174 22
  public static function is_json(string $str): bool
3175
  {
3176 22
    if ('' === $str) {
3177 3
      return false;
3178
    }
3179
3180 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3181
      self::checkForSupport();
3182
    }
3183
3184 21
    if (self::$SUPPORT['json'] === false) {
3185
      throw new \RuntimeException('ext-json: is not installed');
3186
    }
3187
3188 21
    $json = self::json_decode($str);
3189
3190
    /** @noinspection PhpComposerExtensionStubsInspection */
3191
    return (
3192 21
               \is_object($json) === true
3193
               ||
3194 21
               \is_array($json) === true
3195
           )
3196
           &&
3197 21
           \json_last_error() === JSON_ERROR_NONE;
3198
  }
3199
3200
  /**
3201
   * @param string $str
3202
   *
3203
   * @return bool
3204
   */
3205 8
  public static function is_lowercase(string $str): bool
3206
  {
3207 8
    if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
3208 3
      return true;
3209
    }
3210
3211 5
    return false;
3212
  }
3213
3214
  /**
3215
   * Returns true if the string is serialized, false otherwise.
3216
   *
3217
   * @param string $str
3218
   *
3219
   * @return bool Whether or not $str is serialized.
3220
   */
3221 7
  public static function is_serialized(string $str): bool
3222
  {
3223 7
    if ('' === $str) {
3224 1
      return false;
3225
    }
3226
3227
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3228
    /** @noinspection UnserializeExploitsInspection */
3229 6
    return $str === 'b:0;'
3230
           ||
3231 6
           @\unserialize($str) !== false;
3232
  }
3233
3234
  /**
3235
   * Returns true if the string contains only lower case chars, false
3236
   * otherwise.
3237
   *
3238
   * @param string $str <p>The input string.</p>
3239
   *
3240
   * @return bool
3241
   *               Whether or not $str contains only lower case characters.
3242
   */
3243 8
  public static function is_uppercase(string $str): bool
3244
  {
3245 8
    return self::str_matches_pattern($str, '^[[:upper:]]*$');
3246
  }
3247
3248
  /**
3249
   * Check if the string is UTF-16.
3250
   *
3251
   * @param mixed $str <p>The input string.</p>
3252
   *
3253
   * @return int|false
3254
   *                   <strong>false</strong> if is't not UTF-16,<br>
3255
   *                   <strong>1</strong> for UTF-16LE,<br>
3256
   *                   <strong>2</strong> for UTF-16BE.
3257
   */
3258 21
  public static function is_utf16($str)
3259
  {
3260
    // init
3261 21
    $str = (string)$str;
3262
3263 21
    if (self::is_binary($str) === false) {
3264 9
      return false;
3265
    }
3266
3267 16
    if (self::$SUPPORT['mbstring'] === false) {
3268 2
      \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', E_USER_WARNING);
3269
    }
3270
3271
    // init
3272 16
    $strChars = [];
3273
3274 16
    $str = self::remove_bom($str);
3275
3276 16
    $maybeUTF16LE = 0;
3277 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3278 16
    if ($test) {
3279 14
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3280 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3281 14
      if ($test3 === $test) {
3282 14
        if (\count($strChars) === 0) {
3283 14
          $strChars = self::count_chars($str, true);
3284
        }
3285 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3286 14
          if (\in_array($test3char, $strChars, true) === true) {
3287 14
            $maybeUTF16LE++;
3288
          }
3289
        }
3290
      }
3291
    }
3292
3293 16
    $maybeUTF16BE = 0;
3294 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3295 16
    if ($test) {
3296 14
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3297 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3298 14
      if ($test3 === $test) {
3299 14
        if (\count($strChars) === 0) {
3300 6
          $strChars = self::count_chars($str, true);
3301
        }
3302 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3303 14
          if (\in_array($test3char, $strChars, true) === true) {
3304 14
            $maybeUTF16BE++;
3305
          }
3306
        }
3307
      }
3308
    }
3309
3310 16
    if ($maybeUTF16BE !== $maybeUTF16LE) {
3311 6
      if ($maybeUTF16LE > $maybeUTF16BE) {
3312 4
        return 1;
3313
      }
3314
3315 6
      return 2;
3316
    }
3317
3318 12
    return false;
3319
  }
3320
3321
  /**
3322
   * Check if the string is UTF-32.
3323
   *
3324
   * @param mixed $str
3325
   *
3326
   * @return int|false
3327
   *                   <strong>false</strong> if is't not UTF-32,<br>
3328
   *                   <strong>1</strong> for UTF-32LE,<br>
3329
   *                   <strong>2</strong> for UTF-32BE.
3330
   */
3331 17
  public static function is_utf32($str)
3332
  {
3333
    // init
3334 17
    $str = (string)$str;
3335
3336 17
    if (self::is_binary($str) === false) {
3337 9
      return false;
3338
    }
3339
3340 12
    if (self::$SUPPORT['mbstring'] === false) {
3341 2
      \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', E_USER_WARNING);
3342
    }
3343
3344
    // init
3345 12
    $strChars = [];
3346
3347 12
    $str = self::remove_bom($str);
3348
3349 12
    $maybeUTF32LE = 0;
3350 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3351 12
    if ($test) {
3352 10
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3353 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3354 10
      if ($test3 === $test) {
3355 10
        if (\count($strChars) === 0) {
3356 10
          $strChars = self::count_chars($str, true);
3357
        }
3358 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3359 10
          if (\in_array($test3char, $strChars, true) === true) {
3360 10
            $maybeUTF32LE++;
3361
          }
3362
        }
3363
      }
3364
    }
3365
3366 12
    $maybeUTF32BE = 0;
3367 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3368 12
    if ($test) {
3369 10
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3370 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3371 10
      if ($test3 === $test) {
3372 10
        if (\count($strChars) === 0) {
3373 6
          $strChars = self::count_chars($str, true);
3374
        }
3375 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3376 10
          if (\in_array($test3char, $strChars, true) === true) {
3377 10
            $maybeUTF32BE++;
3378
          }
3379
        }
3380
      }
3381
    }
3382
3383 12
    if ($maybeUTF32BE !== $maybeUTF32LE) {
3384 2
      if ($maybeUTF32LE > $maybeUTF32BE) {
3385 2
        return 1;
3386
      }
3387
3388 2
      return 2;
3389
    }
3390
3391 12
    return false;
3392
  }
3393
3394
  /**
3395
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3396
   *
3397
   * @see    http://hsivonen.iki.fi/php-utf8/
3398
   *
3399
   * @param string|string[] $str    <p>The string to be checked.</p>
3400
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3401
   *
3402
   * @return bool
3403
   */
3404 107
  public static function is_utf8($str, bool $strict = false): bool
3405
  {
3406 107
    if (\is_array($str) === true) {
3407 2
      foreach ($str as $k => $v) {
3408 2
        if (false === self::is_utf8($v, $strict)) {
3409 2
          return false;
3410
        }
3411
      }
3412
3413
      return true;
3414
    }
3415
3416 107
    if ('' === $str) {
3417 12
      return true;
3418
    }
3419
3420 103
    if ($strict === true) {
3421 2
      if (self::is_utf16($str) !== false) {
3422 2
        return false;
3423
      }
3424
3425
      if (self::is_utf32($str) !== false) {
3426
        return false;
3427
      }
3428
    }
3429
3430 103
    if (self::pcre_utf8_support() !== true) {
3431
3432
      // If even just the first character can be matched, when the /u
3433
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3434
      // invalid, nothing at all will match, even if the string contains
3435
      // some valid sequences
3436
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
3437
    }
3438
3439 103
    $mState = 0; // cached expected number of octets after the current octet
3440
    // until the beginning of the next UTF8 character sequence
3441 103
    $mUcs4 = 0; // cached Unicode character
3442 103
    $mBytes = 1; // cached expected number of octets in the current sequence
3443
3444 103
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3445
      self::checkForSupport();
3446
    }
3447
3448 103
    if (self::$ORD === null) {
3449
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3450
    }
3451
3452 103
    $len = self::strlen_in_byte((string)$str);
3453
    /** @noinspection ForeachInvariantsInspection */
3454 103
    for ($i = 0; $i < $len; $i++) {
3455 103
      $in = self::$ORD[$str[$i]];
3456 103
      if ($mState === 0) {
3457
        // When mState is zero we expect either a US-ASCII character or a
3458
        // multi-octet sequence.
3459 103
        if (0 === (0x80 & $in)) {
3460
          // US-ASCII, pass straight through.
3461 98
          $mBytes = 1;
3462 84
        } elseif (0xC0 === (0xE0 & $in)) {
3463
          // First octet of 2 octet sequence.
3464 75
          $mUcs4 = $in;
3465 75
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3466 75
          $mState = 1;
3467 75
          $mBytes = 2;
3468 58
        } elseif (0xE0 === (0xF0 & $in)) {
3469
          // First octet of 3 octet sequence.
3470 41
          $mUcs4 = $in;
3471 41
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3472 41
          $mState = 2;
3473 41
          $mBytes = 3;
3474 30
        } elseif (0xF0 === (0xF8 & $in)) {
3475
          // First octet of 4 octet sequence.
3476 19
          $mUcs4 = $in;
3477 19
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3478 19
          $mState = 3;
3479 19
          $mBytes = 4;
3480 13
        } elseif (0xF8 === (0xFC & $in)) {
3481
          /* First octet of 5 octet sequence.
3482
          *
3483
          * This is illegal because the encoded codepoint must be either
3484
          * (a) not the shortest form or
3485
          * (b) outside the Unicode range of 0-0x10FFFF.
3486
          * Rather than trying to resynchronize, we will carry on until the end
3487
          * of the sequence and let the later error handling code catch it.
3488
          */
3489 5
          $mUcs4 = $in;
3490 5
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3491 5
          $mState = 4;
3492 5
          $mBytes = 5;
3493 10
        } elseif (0xFC === (0xFE & $in)) {
3494
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3495 5
          $mUcs4 = $in;
3496 5
          $mUcs4 = ($mUcs4 & 1) << 30;
3497 5
          $mState = 5;
3498 5
          $mBytes = 6;
3499
        } else {
3500
          // Current octet is neither in the US-ASCII range nor a legal first
3501
          // octet of a multi-octet sequence.
3502 103
          return false;
3503
        }
3504
      } else {
3505
        // When mState is non-zero, we expect a continuation of the multi-octet
3506
        // sequence
3507 84
        if (0x80 === (0xC0 & $in)) {
3508
          // Legal continuation.
3509 76
          $shift = ($mState - 1) * 6;
3510 76
          $tmp = $in;
3511 76
          $tmp = ($tmp & 0x0000003F) << $shift;
3512 76
          $mUcs4 |= $tmp;
3513
          // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3514
          // Unicode code point to be output.
3515 76
          if (0 === --$mState) {
3516
            // Check for illegal sequences and code points.
3517
            //
3518
            // From Unicode 3.1, non-shortest form is illegal
3519
            if (
3520 76
                (2 === $mBytes && $mUcs4 < 0x0080)
3521
                ||
3522 76
                (3 === $mBytes && $mUcs4 < 0x0800)
3523
                ||
3524 76
                (4 === $mBytes && $mUcs4 < 0x10000)
3525
                ||
3526 76
                (4 < $mBytes)
3527
                ||
3528
                // From Unicode 3.2, surrogate characters are illegal.
3529 76
                (($mUcs4 & 0xFFFFF800) === 0xD800)
3530
                ||
3531
                // Code points outside the Unicode range are illegal.
3532 76
                ($mUcs4 > 0x10FFFF)
3533
            ) {
3534 8
              return false;
3535
            }
3536
            // initialize UTF8 cache
3537 76
            $mState = 0;
3538 76
            $mUcs4 = 0;
3539 76
            $mBytes = 1;
3540
          }
3541
        } else {
3542
          // ((0xC0 & (*in) != 0x80) && (mState != 0))
3543
          // Incomplete multi-octet sequence.
3544 36
          return false;
3545
        }
3546
      }
3547
    }
3548
3549 67
    return true;
3550
  }
3551
3552
  /**
3553
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3554
   * Decodes a JSON string
3555
   *
3556
   * @link http://php.net/manual/en/function.json-decode.php
3557
   *
3558
   * @param string $json    <p>
3559
   *                        The <i>json</i> string being decoded.
3560
   *                        </p>
3561
   *                        <p>
3562
   *                        This function only works with UTF-8 encoded strings.
3563
   *                        </p>
3564
   *                        <p>PHP implements a superset of
3565
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3566
   *                        only supports these values when they are nested inside an array or an object.
3567
   *                        </p>
3568
   * @param bool   $assoc   [optional] <p>
3569
   *                        When <b>TRUE</b>, returned objects will be converted into
3570
   *                        associative arrays.
3571
   *                        </p>
3572
   * @param int    $depth   [optional] <p>
3573
   *                        User specified recursion depth.
3574
   *                        </p>
3575
   * @param int    $options [optional] <p>
3576
   *                        Bitmask of JSON decode options. Currently only
3577
   *                        <b>JSON_BIGINT_AS_STRING</b>
3578
   *                        is supported (default is to cast large integers as floats)
3579
   *                        </p>
3580
   *
3581
   * @return mixed
3582
   *                The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3583
   *                null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3584
   *                <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3585
   *                is deeper than the recursion limit.
3586
   */
3587 24
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3588
  {
3589 24
    $json = self::filter($json);
3590
3591 24
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3592
      self::checkForSupport();
3593
    }
3594
3595 24
    if (self::$SUPPORT['json'] === false) {
3596
      throw new \RuntimeException('ext-json: is not installed');
3597
    }
3598
3599
    /** @noinspection PhpComposerExtensionStubsInspection */
3600 24
    $json = \json_decode($json, $assoc, $depth, $options);
3601
3602 24
    return $json;
3603
  }
3604
3605
  /**
3606
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3607
   * Returns the JSON representation of a value.
3608
   *
3609
   * @link http://php.net/manual/en/function.json-encode.php
3610
   *
3611
   * @param mixed $value   <p>
3612
   *                       The <i>value</i> being encoded. Can be any type except
3613
   *                       a resource.
3614
   *                       </p>
3615
   *                       <p>
3616
   *                       All string data must be UTF-8 encoded.
3617
   *                       </p>
3618
   *                       <p>PHP implements a superset of
3619
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3620
   *                       only supports these values when they are nested inside an array or an object.
3621
   *                       </p>
3622
   * @param int   $options [optional] <p>
3623
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3624
   *                       <b>JSON_HEX_TAG</b>,
3625
   *                       <b>JSON_HEX_AMP</b>,
3626
   *                       <b>JSON_HEX_APOS</b>,
3627
   *                       <b>JSON_NUMERIC_CHECK</b>,
3628
   *                       <b>JSON_PRETTY_PRINT</b>,
3629
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3630
   *                       <b>JSON_FORCE_OBJECT</b>,
3631
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3632
   *                       constants is described on
3633
   *                       the JSON constants page.
3634
   *                       </p>
3635
   * @param int   $depth   [optional] <p>
3636
   *                       Set the maximum depth. Must be greater than zero.
3637
   *                       </p>
3638
   *
3639
   * @return string|false
3640
   *                      A JSON encoded <strong>string</strong> on success or<br>
3641
   *                      <strong>FALSE</strong> on failure.
3642
   */
3643 5
  public static function json_encode($value, int $options = 0, int $depth = 512)
3644
  {
3645 5
    $value = self::filter($value);
3646
3647 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3648
      self::checkForSupport();
3649
    }
3650
3651 5
    if (self::$SUPPORT['json'] === false) {
3652
      throw new \RuntimeException('ext-json: is not installed');
3653
    }
3654
3655
    /** @noinspection PhpComposerExtensionStubsInspection */
3656 5
    $json = \json_encode($value, $options, $depth);
3657
3658 5
    return $json;
3659
  }
3660
3661
  /**
3662
   * Checks whether JSON is available on the server.
3663
   *
3664
   * @return bool
3665
   *              <strong>true</strong> if available, <strong>false</strong> otherwise
3666
   */
3667
  public static function json_loaded(): bool
3668
  {
3669
    return \function_exists('json_decode');
3670
  }
3671
3672
  /**
3673
   * Makes string's first char lowercase.
3674
   *
3675
   * @param string $str       <p>The input string</p>
3676
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
3677
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3678
   *
3679
   * @return string The resulting string.
3680
   */
3681 46
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3682
  {
3683 46
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3684 46
    if ($strPartTwo === false) {
3685
      $strPartTwo = '';
3686
    }
3687
3688 46
    $strPartOne = self::strtolower(
3689 46
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3690 46
        $encoding,
3691 46
        $cleanUtf8
3692
    );
3693
3694 46
    return $strPartOne . $strPartTwo;
3695
  }
3696
3697
  /**
3698
   * alias for "UTF8::lcfirst()"
3699
   *
3700
   * @see UTF8::lcfirst()
3701
   *
3702
   * @param string $str
3703
   * @param string $encoding
3704
   * @param bool   $cleanUtf8
3705
   *
3706
   * @return string
3707
   */
3708 2
  public static function lcword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3709
  {
3710 2
    return self::lcfirst($str, $encoding, $cleanUtf8);
3711
  }
3712
3713
  /**
3714
   * Lowercase for all words in the string.
3715
   *
3716
   * @param string   $str        <p>The input string.</p>
3717
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3718
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3719
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3720
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3721
   *
3722
   * @return string
3723
   */
3724 2
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3725
  {
3726 2
    if (!$str) {
3727 2
      return '';
3728
    }
3729
3730 2
    $words = self::str_to_words($str, $charlist);
3731 2
    $newWords = [];
3732
3733 2
    if (\count($exceptions) > 0) {
3734 2
      $useExceptions = true;
3735
    } else {
3736 2
      $useExceptions = false;
3737
    }
3738
3739 2
    foreach ($words as $word) {
3740
3741 2
      if (!$word) {
3742 2
        continue;
3743
      }
3744
3745
      if (
3746 2
          $useExceptions === false
3747
          ||
3748
          (
3749 2
              $useExceptions === true
3750
              &&
3751 2
              !\in_array($word, $exceptions, true)
3752
          )
3753
      ) {
3754 2
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3755
      }
3756
3757 2
      $newWords[] = $word;
3758
    }
3759
3760 2
    return \implode('', $newWords);
3761
  }
3762
3763
  /**
3764
   * alias for "UTF8::lcfirst()"
3765
   *
3766
   * @see UTF8::lcfirst()
3767
   *
3768
   * @param string $str
3769
   * @param string $encoding
3770
   * @param bool   $cleanUtf8
3771
   *
3772
   * @return string
3773
   */
3774 5
  public static function lowerCaseFirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3775
  {
3776 5
    return self::lcfirst($str, $encoding, $cleanUtf8);
3777
  }
3778
3779
  /**
3780
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3781
   *
3782
   * @param string $str   <p>The string to be trimmed</p>
3783
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3784
   *
3785
   * @return string The string with unwanted characters stripped from the left.
3786
   */
3787 22
  public static function ltrim(string $str = '', $chars = INF): string
3788
  {
3789 22
    if ('' === $str) {
3790 3
      return '';
3791
    }
3792
3793
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3794 21
    if ($chars === INF || !$chars) {
3795 14
      $pattern = "^[\pZ\pC]+";
3796
    } else {
3797 10
      $chars = \preg_quote($chars, '/');
3798 10
      $pattern = "^[$chars]+";
3799
    }
3800
3801 21
    return self::regex_replace($str, $pattern, '', '', '/');
3802
  }
3803
3804
  /**
3805
   * Returns the UTF-8 character with the maximum code point in the given data.
3806
   *
3807
   * @param string|array<string> $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3808
   *
3809
   * @return string|null The character with the highest code point than others, returns null on failure or empty input.
3810
   */
3811 2
  public static function max($arg)
3812
  {
3813 2
    if (\is_array($arg) === true) {
3814 2
      $arg = \implode('', $arg);
3815
    }
3816
3817 2
    $codepoints = self::codepoints($arg, false);
3818 2
    if (\count($codepoints) === 0) {
3819 2
      return null;
3820
    }
3821
3822 2
    $codepoint_max = \max($codepoints);
3823
3824 2
    return self::chr($codepoint_max);
3825
  }
3826
3827
  /**
3828
   * Calculates and returns the maximum number of bytes taken by any
3829
   * UTF-8 encoded character in the given string.
3830
   *
3831
   * @param string $str <p>The original Unicode string.</p>
3832
   *
3833
   * @return int Max byte lengths of the given chars.
3834
   */
3835 2
  public static function max_chr_width(string $str): int
3836
  {
3837 2
    $bytes = self::chr_size_list($str);
3838 2
    if (\count($bytes) > 0) {
3839 2
      return (int)\max($bytes);
3840
    }
3841
3842 2
    return 0;
3843
  }
3844
3845
  /**
3846
   * Checks whether mbstring is available on the server.
3847
   *
3848
   * @return bool
3849
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
3850
   */
3851 27
  public static function mbstring_loaded(): bool
3852
  {
3853 27
    $return = \extension_loaded('mbstring') ? true : false;
3854
3855 27
    if ($return === true) {
3856 27
      \mb_internal_encoding('UTF-8');
3857
    }
3858
3859 27
    return $return;
3860
  }
3861
3862
  /**
3863
   * Checks whether mbstring "overloaded" is active on the server.
3864
   *
3865
   * @return bool
3866
   */
3867
  private static function mbstring_overloaded(): bool
3868
  {
3869
    /**
3870
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3871
     */
3872
3873
    /** @noinspection PhpComposerExtensionStubsInspection */
3874
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3875
    return \defined('MB_OVERLOAD_STRING')
3876
           &&
3877
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3878
  }
3879
3880
  /**
3881
   * Returns the UTF-8 character with the minimum code point in the given data.
3882
   *
3883
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3884
   *
3885
   * @return string|null The character with the lowest code point than others, returns null on failure or empty input.
3886
   */
3887 2
  public static function min($arg)
3888
  {
3889 2
    if (\is_array($arg) === true) {
3890 2
      $arg = \implode('', $arg);
3891
    }
3892
3893 2
    $codepoints = self::codepoints($arg, false);
3894 2
    if (\count($codepoints) === 0) {
3895 2
      return null;
3896
    }
3897
3898 2
    $codepoint_min = \min($codepoints);
3899
3900 2
    return self::chr($codepoint_min);
3901
  }
3902
3903
  /**
3904
   * alias for "UTF8::normalize_encoding()"
3905
   *
3906
   * @see        UTF8::normalize_encoding()
3907
   *
3908
   * @param mixed $encoding
3909
   * @param mixed $fallback
3910
   *
3911
   * @return mixed
3912
   *
3913
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3914
   */
3915 2
  public static function normalizeEncoding($encoding, $fallback = '')
3916
  {
3917 2
    return self::normalize_encoding($encoding, $fallback);
3918
  }
3919
3920
  /**
3921
   * Normalize the encoding-"name" input.
3922
   *
3923
   * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3924
   * @param mixed $fallback <p>e.g.: UTF-8</p>
3925
   *
3926
   * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3927
   */
3928 340
  public static function normalize_encoding($encoding, $fallback = '')
3929
  {
3930 340
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3931
3932
    // init
3933 340
    $encoding = (string)$encoding;
3934
3935
    if (
3936 340
        !$encoding
3937
        ||
3938 49
        $encoding === '1' // only a fallback, for non "strict_types" usage ...
3939
        ||
3940 340
        $encoding === '0' // only a fallback, for non "strict_types" usage ...
3941
    ) {
3942 296
      return $fallback;
3943
    }
3944
3945
    if (
3946 48
        'UTF-8' === $encoding
3947
        ||
3948 48
        'UTF8' === $encoding
3949
    ) {
3950 21
      return 'UTF-8';
3951
    }
3952
3953
    if (
3954 41
        '8BIT' === $encoding
3955
        ||
3956 41
        'BINARY' === $encoding
3957
    ) {
3958
      return 'CP850';
3959
    }
3960
3961
    if (
3962 41
        'HTML' === $encoding
3963
        ||
3964 41
        'HTML-ENTITIES' === $encoding
3965
    ) {
3966 2
      return 'HTML-ENTITIES';
3967
    }
3968
3969 41
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3970 39
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3971
    }
3972
3973 6
    if (self::$ENCODINGS === null) {
3974 1
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3975
    }
3976
3977 6
    if (\in_array($encoding, self::$ENCODINGS, true)) {
0 ignored issues
show
Bug introduced by
It seems like self::ENCODINGS can also be of type false; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3977
    if (\in_array($encoding, /** @scrutinizer ignore-type */ self::$ENCODINGS, true)) {
Loading history...
3978 4
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3979
3980 4
      return $encoding;
3981
    }
3982
3983 5
    $encodingOrig = $encoding;
3984 5
    $encoding = \strtoupper($encoding);
3985 5
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3986
3987
    $equivalences = [
3988 5
        'ISO8859'     => 'ISO-8859-1',
3989
        'ISO88591'    => 'ISO-8859-1',
3990
        'ISO'         => 'ISO-8859-1',
3991
        'LATIN'       => 'ISO-8859-1',
3992
        'LATIN1'      => 'ISO-8859-1', // Western European
3993
        'ISO88592'    => 'ISO-8859-2',
3994
        'LATIN2'      => 'ISO-8859-2', // Central European
3995
        'ISO88593'    => 'ISO-8859-3',
3996
        'LATIN3'      => 'ISO-8859-3', // Southern European
3997
        'ISO88594'    => 'ISO-8859-4',
3998
        'LATIN4'      => 'ISO-8859-4', // Northern European
3999
        'ISO88595'    => 'ISO-8859-5',
4000
        'ISO88596'    => 'ISO-8859-6', // Greek
4001
        'ISO88597'    => 'ISO-8859-7',
4002
        'ISO88598'    => 'ISO-8859-8', // Hebrew
4003
        'ISO88599'    => 'ISO-8859-9',
4004
        'LATIN5'      => 'ISO-8859-9', // Turkish
4005
        'ISO885911'   => 'ISO-8859-11',
4006
        'TIS620'      => 'ISO-8859-11', // Thai
4007
        'ISO885910'   => 'ISO-8859-10',
4008
        'LATIN6'      => 'ISO-8859-10', // Nordic
4009
        'ISO885913'   => 'ISO-8859-13',
4010
        'LATIN7'      => 'ISO-8859-13', // Baltic
4011
        'ISO885914'   => 'ISO-8859-14',
4012
        'LATIN8'      => 'ISO-8859-14', // Celtic
4013
        'ISO885915'   => 'ISO-8859-15',
4014
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4015
        'ISO885916'   => 'ISO-8859-16',
4016
        'LATIN10'     => 'ISO-8859-16', // Southeast European
4017
        'CP1250'      => 'WINDOWS-1250',
4018
        'WIN1250'     => 'WINDOWS-1250',
4019
        'WINDOWS1250' => 'WINDOWS-1250',
4020
        'CP1251'      => 'WINDOWS-1251',
4021
        'WIN1251'     => 'WINDOWS-1251',
4022
        'WINDOWS1251' => 'WINDOWS-1251',
4023
        'CP1252'      => 'WINDOWS-1252',
4024
        'WIN1252'     => 'WINDOWS-1252',
4025
        'WINDOWS1252' => 'WINDOWS-1252',
4026
        'CP1253'      => 'WINDOWS-1253',
4027
        'WIN1253'     => 'WINDOWS-1253',
4028
        'WINDOWS1253' => 'WINDOWS-1253',
4029
        'CP1254'      => 'WINDOWS-1254',
4030
        'WIN1254'     => 'WINDOWS-1254',
4031
        'WINDOWS1254' => 'WINDOWS-1254',
4032
        'CP1255'      => 'WINDOWS-1255',
4033
        'WIN1255'     => 'WINDOWS-1255',
4034
        'WINDOWS1255' => 'WINDOWS-1255',
4035
        'CP1256'      => 'WINDOWS-1256',
4036
        'WIN1256'     => 'WINDOWS-1256',
4037
        'WINDOWS1256' => 'WINDOWS-1256',
4038
        'CP1257'      => 'WINDOWS-1257',
4039
        'WIN1257'     => 'WINDOWS-1257',
4040
        'WINDOWS1257' => 'WINDOWS-1257',
4041
        'CP1258'      => 'WINDOWS-1258',
4042
        'WIN1258'     => 'WINDOWS-1258',
4043
        'WINDOWS1258' => 'WINDOWS-1258',
4044
        'UTF16'       => 'UTF-16',
4045
        'UTF32'       => 'UTF-32',
4046
        'UTF8'        => 'UTF-8',
4047
        'UTF'         => 'UTF-8',
4048
        'UTF7'        => 'UTF-7',
4049
        '8BIT'        => 'CP850',
4050
        'BINARY'      => 'CP850',
4051
    ];
4052
4053 5
    if (!empty($equivalences[$encodingUpperHelper])) {
4054 4
      $encoding = $equivalences[$encodingUpperHelper];
4055
    }
4056
4057 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4058
4059 5
    return $encoding;
4060
  }
4061
4062
  /**
4063
   * Standardize line ending to unix-like.
4064
   *
4065
   * @param string $str
4066
   *
4067
   * @return string
4068
   */
4069 5
  public static function normalize_line_ending(string $str): string
4070
  {
4071 5
    return (string)str_replace(["\r\n", "\r"], "\n", $str);
4072
  }
4073
4074
  /**
4075
   * Normalize some MS Word special characters.
4076
   *
4077
   * @param string $str <p>The string to be normalized.</p>
4078
   *
4079
   * @return string
4080
   */
4081 40
  public static function normalize_msword(string $str): string
4082
  {
4083 40
    if ('' === $str) {
4084 2
      return '';
4085
    }
4086
4087 40
    static $UTF8_MSWORD_KEYS_CACHE = null;
4088 40
    static $UTF8_MSWORD_VALUES_CACHE = null;
4089
4090 40
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
4091
4092 1
      if (self::$UTF8_MSWORD === null) {
4093 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_msword') can also be of type false. However, the property $UTF8_MSWORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4094
      }
4095
4096 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4096
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4097 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4097
      $UTF8_MSWORD_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4098
    }
4099
4100 40
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
4101
  }
4102
4103
  /**
4104
   * Normalize the whitespace.
4105
   *
4106
   * @param string $str                     <p>The string to be normalized.</p>
4107
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4108
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4109
   *                                        bidirectional text chars.</p>
4110
   *
4111
   * @return string
4112
   */
4113 88
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4114
  {
4115 88
    if ('' === $str) {
4116 9
      return '';
4117
    }
4118
4119 88
    static $WHITESPACE_CACHE = [];
4120 88
    $cacheKey = (int)$keepNonBreakingSpace;
4121
4122 88
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4123
4124 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4125
4126 2
      if ($keepNonBreakingSpace === true) {
4127 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4128
      }
4129
4130 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4131
    }
4132
4133 88
    if ($keepBidiUnicodeControls === false) {
4134 88
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
4135
4136 88
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4137 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4138
      }
4139
4140 88
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4141
    }
4142
4143 88
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4144
  }
4145
4146
  /**
4147
   * Calculates Unicode code point of the given UTF-8 encoded character.
4148
   *
4149
   * INFO: opposite to UTF8::chr()
4150
   *
4151
   * @param string $chr      <p>The character of which to calculate code point.<p/>
4152
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4153
   *
4154
   * @return int
4155
   *             Unicode code point of the given character,<br>
4156
   *             0 on invalid UTF-8 byte sequence.
4157
   */
4158 35
  public static function ord($chr, string $encoding = 'UTF-8'): int
4159
  {
4160
    // init
4161 35
    $chr = (string)$chr;
4162
4163 35
    static $CHAR_CACHE = [];
4164
4165
    // save the original string
4166 35
    $chr_orig = $chr;
4167
4168 35
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4169 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4170
4171
      // check again, if it's still not UTF-8
4172 4
      if ($encoding !== 'UTF-8') {
4173 4
        $chr = self::encode($encoding, $chr);
4174
      }
4175
    }
4176
4177 35
    $cacheKey = $chr_orig . $encoding;
4178 35
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
4179 35
      return $CHAR_CACHE[$cacheKey];
4180
    }
4181
4182 12
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4183
      self::checkForSupport();
4184
    }
4185
4186 12
    if (self::$SUPPORT['intlChar'] === true) {
4187
      /** @noinspection PhpComposerExtensionStubsInspection */
4188 11
      $code = \IntlChar::ord($chr);
4189 11
      if ($code) {
4190 10
        return $CHAR_CACHE[$cacheKey] = $code;
4191
      }
4192
    }
4193
4194
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4195 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
0 ignored issues
show
Bug introduced by
$chr of type array is incompatible with the type string expected by parameter $str of voku\helper\UTF8::substr(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4195
    $chr = \unpack('C*', (string)self::substr(/** @scrutinizer ignore-type */ $chr, 0, 4, 'CP850'));
Loading history...
4196 6
    $code = $chr ? $chr[1] : 0;
4197
4198 6
    if (0xF0 <= $code && isset($chr[4])) {
4199
      /** @noinspection UnnecessaryCastingInspection */
4200
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4201
    }
4202
4203 6
    if (0xE0 <= $code && isset($chr[3])) {
4204
      /** @noinspection UnnecessaryCastingInspection */
4205 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4206
    }
4207
4208 6
    if (0xC0 <= $code && isset($chr[2])) {
4209
      /** @noinspection UnnecessaryCastingInspection */
4210 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xC0) << 6) + $chr[2] - 0x80);
4211
    }
4212
4213 6
    return $CHAR_CACHE[$cacheKey] = $code;
4214
  }
4215
4216
  /**
4217
   * Parses the string into an array (into the the second parameter).
4218
   *
4219
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4220
   *          if the second parameter is not set!
4221
   *
4222
   * @link http://php.net/manual/en/function.parse-str.php
4223
   *
4224
   * @param string $str       <p>The input string.</p>
4225
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4226
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4227
   *
4228
   * @return bool
4229
   *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result.
4230
   */
4231 2
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4232
  {
4233 2
    if ($cleanUtf8 === true) {
4234 2
      $str = self::clean($str);
4235
    }
4236
4237 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4238
      self::checkForSupport();
4239
    }
4240
4241 2
    if (self::$SUPPORT['mbstring'] === true) {
4242 2
      $return = \mb_parse_str($str, $result);
4243
4244 2
      return !($return === false || empty($result));
4245
    }
4246
4247
    /** @noinspection PhpVoidFunctionResultUsedInspection */
4248
    \parse_str($str, $result);
4249
4250
    return !empty($result);
4251
  }
4252
4253
  /**
4254
   * Checks if \u modifier is available that enables Unicode support in PCRE.
4255
   *
4256
   * @return bool
4257
   *              <strong>true</strong> if support is available,<br>
4258
   *              <strong>false</strong> otherwise.
4259
   */
4260 103
  public static function pcre_utf8_support(): bool
4261
  {
4262
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4263 103
    return (bool)@\preg_match('//u', '');
4264
  }
4265
4266
  /**
4267
   * Create an array containing a range of UTF-8 characters.
4268
   *
4269
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4270
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4271
   *
4272
   * @return string[]
4273
   */
4274 2
  public static function range($var1, $var2): array
4275
  {
4276 2
    if (!$var1 || !$var2) {
4277 2
      return [];
4278
    }
4279
4280 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4281
      self::checkForSupport();
4282
    }
4283
4284 2
    if (self::$SUPPORT['ctype'] === false) {
4285
      throw new \RuntimeException('ext-ctype: is not installed');
4286
    }
4287
4288
    /** @noinspection PhpComposerExtensionStubsInspection */
4289 2
    if (\ctype_digit((string)$var1)) {
4290 2
      $start = (int)$var1;
4291 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4292
      $start = (int)self::hex_to_int($var1);
4293
    } else {
4294 2
      $start = self::ord($var1);
4295
    }
4296
4297 2
    if (!$start) {
4298
      return [];
4299
    }
4300
4301
    /** @noinspection PhpComposerExtensionStubsInspection */
4302 2
    if (\ctype_digit((string)$var2)) {
4303 2
      $end = (int)$var2;
4304 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4305
      $end = (int)self::hex_to_int($var2);
4306
    } else {
4307 2
      $end = self::ord($var2);
4308
    }
4309
4310 2
    if (!$end) {
4311
      return [];
4312
    }
4313
4314 2
    return \array_map(
4315
        [
4316 2
            self::class,
4317
            'chr',
4318
        ],
4319 2
        \range($start, $end)
4320
    );
4321
  }
4322
4323
  /**
4324
   * Multi decode html entity & fix urlencoded-win1252-chars.
4325
   *
4326
   * e.g:
4327
   * 'test+test'                     => 'test+test'
4328
   * 'D&#252;sseldorf'               => 'Düsseldorf'
4329
   * 'D%FCsseldorf'                  => 'Düsseldorf'
4330
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4331
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4332
   * 'Düsseldorf'                   => 'Düsseldorf'
4333
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4334
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4335
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4336
   *
4337
   * @param string $str          <p>The input string.</p>
4338
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
4339
   *
4340
   * @return string
4341
   */
4342 3
  public static function rawurldecode(string $str, bool $multi_decode = true): string
4343
  {
4344 3
    if ('' === $str) {
4345 2
      return '';
4346
    }
4347
4348 3
    $pattern = '/%u([0-9a-f]{3,4})/i';
4349 3
    if (\preg_match($pattern, $str)) {
4350 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4351
    }
4352
4353 3
    $flags = ENT_QUOTES | ENT_HTML5;
4354
4355
    do {
4356 3
      $str_compare = $str;
4357
4358 3
      $str = self::fix_simple_utf8(
4359 3
          \rawurldecode(
4360 3
              self::html_entity_decode(
4361 3
                  self::to_utf8($str),
4362 3
                  $flags
4363
              )
4364
          )
4365
      );
4366
4367 3
    } while ($multi_decode === true && $str_compare !== $str);
4368
4369 3
    return $str;
4370
  }
4371
4372
  /**
4373
   * @param array $strings
4374
   * @param bool  $removeEmptyValues
4375
   * @param int   $removeShortValues
4376
   *
4377
   * @return array
4378
   */
4379 2
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
4380
  {
4381
    // init
4382 2
    $return = [];
4383
4384 2
    foreach ($strings as $str) {
4385
      if (
4386 2
          $removeShortValues !== null
4387
          &&
4388 2
          self::strlen($str) <= $removeShortValues
4389
      ) {
4390 2
        continue;
4391
      }
4392
4393
      if (
4394 2
          $removeEmptyValues === true
4395
          &&
4396 2
          \trim($str) === ''
4397
      ) {
4398 2
        continue;
4399
      }
4400
4401 2
      $return[] = $str;
4402
    }
4403
4404 2
    return $return;
4405
  }
4406
4407
  /**
4408
   * Replaces all occurrences of $pattern in $str by $replacement.
4409
   *
4410
   * @param string $str         <p>The input string.</p>
4411
   * @param string $pattern     <p>The regular expression pattern.</p>
4412
   * @param string $replacement <p>The string to replace with.</p>
4413
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
4414
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4415
   *
4416
   * @return string
4417
   */
4418 291
  public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4419
  {
4420 291
    if ($options === 'msr') {
4421 9
      $options = 'ms';
4422
    }
4423
4424
    // fallback
4425 291
    if (!$delimiter) {
4426
      $delimiter = '/';
4427
    }
4428
4429 291
    $str = (string)\preg_replace(
4430 291
        $delimiter . $pattern . $delimiter . 'u' . $options,
4431 291
        $replacement,
4432 291
        $str
4433
    );
4434
4435 291
    return $str;
4436
  }
4437
4438
  /**
4439
   * alias for "UTF8::remove_bom()"
4440
   *
4441
   * @see        UTF8::remove_bom()
4442
   *
4443
   * @param string $str
4444
   *
4445
   * @return string
4446
   *
4447
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4448
   */
4449
  public static function removeBOM(string $str): string
4450
  {
4451
    return self::remove_bom($str);
4452
  }
4453
4454
  /**
4455
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4456
   *
4457
   * @param string $str <p>The input string.</p>
4458
   *
4459
   * @return string String without UTF-BOM.
4460
   */
4461 76
  public static function remove_bom(string $str): string
4462
  {
4463 76
    if ('' === $str) {
4464 7
      return '';
4465
    }
4466
4467 76
    $strLength = self::strlen_in_byte($str);
4468 76
    foreach (self::$BOM as $bomString => $bomByteLength) {
4469 76
      if (0 === self::strpos_in_byte($str, $bomString, 0)) {
4470 10
        $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4471 10
        if ($strTmp === false) {
4472
          return '';
4473
        }
4474
4475 10
        $strLength -= $bomByteLength;
4476
4477 76
        $str = (string)$strTmp;
4478
      }
4479
    }
4480
4481 76
    return $str;
4482
  }
4483
4484
  /**
4485
   * Removes duplicate occurrences of a string in another string.
4486
   *
4487
   * @param string          $str  <p>The base string.</p>
4488
   * @param string|string[] $what <p>String to search for in the base string.</p>
4489
   *
4490
   * @return string The result string with removed duplicates.
4491
   */
4492 2
  public static function remove_duplicates(string $str, $what = ' '): string
4493
  {
4494 2
    if (\is_string($what) === true) {
4495 2
      $what = [$what];
4496
    }
4497
4498 2
    if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4499
      /** @noinspection ForeachSourceInspection */
4500 2
      foreach ($what as $item) {
4501 2
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4502
      }
4503
    }
4504
4505 2
    return $str;
4506
  }
4507
4508
  /**
4509
   * Remove html via "strip_tags()" from the string.
4510
   *
4511
   * @param string $str
4512
   * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4513
   *                              not be stripped. Default: null
4514
   *                              </p>
4515
   *
4516
   * @return string
4517
   */
4518 6
  public static function remove_html(string $str, string $allowableTags = ''): string
4519
  {
4520 6
    return \strip_tags($str, $allowableTags);
4521
  }
4522
4523
  /**
4524
   * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4525
   *
4526
   * @param string $str
4527
   * @param string $replacement [optional] <p>Default is a empty string.</p>
4528
   *
4529
   * @return string
4530
   */
4531 6
  public static function remove_html_breaks(string $str, string $replacement = ''): string
4532
  {
4533 6
    return (string)\preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4534
  }
4535
4536
  /**
4537
   * Remove invisible characters from a string.
4538
   *
4539
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4540
   *
4541
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4542
   *
4543
   * @param string $str
4544
   * @param bool   $url_encoded
4545
   * @param string $replacement
4546
   *
4547
   * @return string
4548
   */
4549 114
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4550
  {
4551
    // init
4552 114
    $non_displayables = [];
4553
4554
    // every control character except newline (dec 10),
4555
    // carriage return (dec 13) and horizontal tab (dec 09)
4556 114
    if ($url_encoded) {
4557 114
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4558 114
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4559
    }
4560
4561 114
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4562
4563
    do {
4564 114
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4565 114
    } while ($count !== 0);
4566
4567 114
    return $str;
4568
  }
4569
4570
  /**
4571
   * Returns a new string with the prefix $substring removed, if present.
4572
   *
4573
   * @param string $str
4574
   * @param string $substring <p>The prefix to remove.</p>
4575
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4576
   *
4577
   * @return string String without the prefix $substring.
4578
   */
4579 12
  public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4580
  {
4581 12
    if (self::str_starts_with($str, $substring)) {
4582
4583 6
      return (string)self::substr(
4584 6
          $str,
4585 6
          self::strlen($substring, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::strlen($substring, $encoding) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4585
          /** @scrutinizer ignore-type */ self::strlen($substring, $encoding),
Loading history...
4586 6
          null,
4587 6
          $encoding
4588
      );
4589
    }
4590
4591 6
    return $str;
4592
  }
4593
4594
  /**
4595
   * Returns a new string with the suffix $substring removed, if present.
4596
   *
4597
   * @param string $str
4598
   * @param string $substring <p>The suffix to remove.</p>
4599
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4600
   *
4601
   * @return string String having a $str without the suffix $substring.
4602
   */
4603 12
  public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4604
  {
4605 12
    if (self::str_ends_with($str, $substring)) {
4606
4607 6
      return (string)self::substr(
4608 6
          $str,
4609 6
          0,
4610 6
          self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4611
      );
4612
    }
4613
4614 6
    return $str;
4615
  }
4616
4617
  /**
4618
   * Replaces all occurrences of $search in $str by $replacement.
4619
   *
4620
   * @param string $str           <p>The input string.</p>
4621
   * @param string $search        <p>The needle to search for.</p>
4622
   * @param string $replacement   <p>The string to replace with.</p>
4623
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4624
   *
4625
   * @return string String after the replacements.
4626
   */
4627 29
  public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4628
  {
4629 29
    if ($caseSensitive) {
4630 22
      return self::str_replace($search, $replacement, $str);
4631
    }
4632
4633 7
    return self::str_ireplace($search, $replacement, $str);
4634
  }
4635
4636
  /**
4637
   * Replaces all occurrences of $search in $str by $replacement.
4638
   *
4639
   * @param string       $str           <p>The input string.</p>
4640
   * @param array        $search        <p>The elements to search for.</p>
4641
   * @param string|array $replacement   <p>The string to replace with.</p>
4642
   * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4643
   *
4644
   * @return string String after the replacements.
4645
   */
4646 30
  public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4647
  {
4648 30
    if ($caseSensitive) {
4649 23
      return self::str_replace($search, $replacement, $str);
4650
    }
4651
4652 7
    return self::str_ireplace($search, $replacement, $str);
4653
  }
4654
4655
  /**
4656
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4657
   *
4658
   * @param string $str                <p>The input string</p>
4659
   * @param string $replacementChar    <p>The replacement character.</p>
4660
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4661
   *
4662
   * @return string
4663
   */
4664 64
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4665
  {
4666 64
    if ('' === $str) {
4667 9
      return '';
4668
    }
4669
4670 64
    if ($processInvalidUtf8 === true) {
4671 64
      $replacementCharHelper = $replacementChar;
4672 64
      if ($replacementChar === '') {
4673 64
        $replacementCharHelper = 'none';
4674
      }
4675
4676 64
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4677
        self::checkForSupport();
4678
      }
4679
4680 64
      if (self::$SUPPORT['mbstring'] === false) {
4681
        // if there is no native support for "mbstring",
4682
        // then we need to clean the string before ...
4683
        $str = self::clean($str);
4684
      }
4685
4686
      // always fallback via symfony polyfill
4687 64
      $save = \mb_substitute_character();
4688 64
      \mb_substitute_character($replacementCharHelper);
4689 64
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4690 64
      \mb_substitute_character($save);
4691
4692 64
      if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4693 64
        $str = $strTmp;
4694
      } else {
4695
        $str = '';
4696
      }
4697
    }
4698
4699 64
    return str_replace(
4700
        [
4701 64
            "\xEF\xBF\xBD",
4702
            '�',
4703
        ],
4704
        [
4705 64
            $replacementChar,
4706 64
            $replacementChar,
4707
        ],
4708 64
        $str
4709
    );
4710
  }
4711
4712
  /**
4713
   * Strip whitespace or other characters from end of a UTF-8 string.
4714
   *
4715
   * @param string $str   <p>The string to be trimmed.</p>
4716
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4717
   *
4718
   * @return string The string with unwanted characters stripped from the right.
4719
   */
4720 22
  public static function rtrim(string $str = '', $chars = INF): string
4721
  {
4722 22
    if ('' === $str) {
4723 3
      return '';
4724
    }
4725
4726
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4727 21
    if ($chars === INF || !$chars) {
4728 16
      $pattern = "[\pZ\pC]+\$";
4729
    } else {
4730 8
      $chars = \preg_quote($chars, '/');
4731 8
      $pattern = "[$chars]+\$";
4732
    }
4733
4734 21
    return self::regex_replace($str, $pattern, '', '', '/');
4735
  }
4736
4737
  /**
4738
   * rxClass
4739
   *
4740
   * @param string $s
4741
   * @param string $class
4742
   *
4743
   * @return string
4744
   */
4745 37
  private static function rxClass(string $s, string $class = ''): string
4746
  {
4747 37
    static $RX_CLASSS_CACHE = [];
4748
4749 37
    $cacheKey = $s . $class;
4750
4751 37
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4752 25
      return $RX_CLASSS_CACHE[$cacheKey];
4753
    }
4754
4755
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4756 16
    $class = [$class];
4757
4758
    /** @noinspection SuspiciousLoopInspection */
4759 16
    foreach (self::str_split($s) as $s) {
4760 15
      if ('-' === $s) {
4761
        $class[0] = '-' . $class[0];
4762 15
      } elseif (!isset($s[2])) {
4763 15
        $class[0] .= \preg_quote($s, '/');
4764 1
      } elseif (1 === self::strlen($s)) {
4765 1
        $class[0] .= $s;
4766
      } else {
4767 15
        $class[] = $s;
4768
      }
4769
    }
4770
4771 16
    if ($class[0]) {
4772 16
      $class[0] = '[' . $class[0] . ']';
4773
    }
4774
4775 16
    if (1 === \count($class)) {
4776 16
      $return = $class[0];
4777
    } else {
4778
      $return = '(?:' . \implode('|', $class) . ')';
4779
    }
4780
4781 16
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4782
4783 16
    return $return;
4784
  }
4785
4786
  /**
4787
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4788
   */
4789 2
  public static function showSupport()
4790
  {
4791 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4792
      self::checkForSupport();
4793
    }
4794
4795 2
    echo '<pre>';
4796 2
    foreach (self::$SUPPORT as $key => $value) {
4797 2
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4798
    }
4799 2
    echo '</pre>';
4800 2
  }
4801
4802
  /**
4803
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4804
   *
4805
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4806
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4807
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4808
   *
4809
   * @return string The HTML numbered entity.
4810
   */
4811 2
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4812
  {
4813 2
    if ('' === $char) {
4814 2
      return '';
4815
    }
4816
4817
    if (
4818 2
        $keepAsciiChars === true
4819
        &&
4820 2
        self::is_ascii($char) === true
4821
    ) {
4822 2
      return $char;
4823
    }
4824
4825 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4826 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4827
    }
4828
4829 2
    return '&#' . self::ord($char, $encoding) . ';';
4830
  }
4831
4832
  /**
4833
   * @param string $str
4834
   * @param int    $tabLength
4835
   *
4836
   * @return string
4837
   */
4838 5
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4839
  {
4840 5
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4841
  }
4842
4843
  /**
4844
   * Convert a string to an array of Unicode characters.
4845
   *
4846
   * @param string|int|string[]|int[] $str       <p>The string to split into array.</p>
4847
   * @param int                       $length    [optional] <p>Max character length of each array element.</p>
4848
   * @param bool                      $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4849
   *
4850
   * @return string[] An array containing chunks of the string.
4851
   */
4852 86
  public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4853
  {
4854 86
    if ($length <= 0) {
4855 3
      return [];
4856
    }
4857
4858 85
    if (\is_array($str) === true) {
4859 2
      foreach ($str as $k => $v) {
4860 2
        $str[$k] = self::split($v, $length);
4861
      }
4862
4863 2
      return $str;
4864
    }
4865
4866
    // init
4867 85
    $str = (string)$str;
4868
4869 85
    if ('' === $str) {
4870 13
      return [];
4871
    }
4872
4873
    // init
4874 82
    $ret = [];
4875
4876 82
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4877
      self::checkForSupport();
4878
    }
4879
4880 82
    if ($cleanUtf8 === true) {
4881 18
      $str = self::clean($str);
4882
    }
4883
4884 82
    if (self::$SUPPORT['pcre_utf8'] === true) {
4885
4886 78
      \preg_match_all('/./us', $str, $retArray);
4887 78
      if (isset($retArray[0])) {
4888 78
        $ret = $retArray[0];
4889
      }
4890 78
      unset($retArray);
4891
4892
    } else {
4893
4894
      // fallback
4895
4896 8
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4897
        self::checkForSupport();
4898
      }
4899
4900 8
      $len = self::strlen_in_byte($str);
4901
4902
      /** @noinspection ForeachInvariantsInspection */
4903 8
      for ($i = 0; $i < $len; $i++) {
4904
4905 8
        if (($str[$i] & "\x80") === "\x00") {
4906
4907 8
          $ret[] = $str[$i];
4908
4909
        } elseif (
4910 8
            isset($str[$i + 1])
4911
            &&
4912 8
            ($str[$i] & "\xE0") === "\xC0"
4913
        ) {
4914
4915 4
          if (($str[$i + 1] & "\xC0") === "\x80") {
4916 4
            $ret[] = $str[$i] . $str[$i + 1];
4917
4918 4
            $i++;
4919
          }
4920
4921
        } elseif (
4922 6
            isset($str[$i + 2])
4923
            &&
4924 6
            ($str[$i] & "\xF0") === "\xE0"
4925
        ) {
4926
4927
          if (
4928 6
              ($str[$i + 1] & "\xC0") === "\x80"
4929
              &&
4930 6
              ($str[$i + 2] & "\xC0") === "\x80"
4931
          ) {
4932 6
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4933
4934 6
            $i += 2;
4935
          }
4936
4937
        } elseif (
4938
            isset($str[$i + 3])
4939
            &&
4940
            ($str[$i] & "\xF8") === "\xF0"
4941
        ) {
4942
4943
          if (
4944
              ($str[$i + 1] & "\xC0") === "\x80"
4945
              &&
4946
              ($str[$i + 2] & "\xC0") === "\x80"
4947
              &&
4948
              ($str[$i + 3] & "\xC0") === "\x80"
4949
          ) {
4950
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4951
4952
            $i += 3;
4953
          }
4954
4955
        }
4956
      }
4957
    }
4958
4959 82
    if ($length > 1) {
4960 11
      $ret = \array_chunk($ret, $length);
4961
4962 11
      return \array_map(
4963 11
          function ($item) {
4964 11
            return \implode('', $item);
4965 11
          }, $ret
4966
      );
4967
    }
4968
4969 75
    if (isset($ret[0]) && $ret[0] === '') {
4970
      return [];
4971
    }
4972
4973 75
    return $ret;
4974
  }
4975
4976
  /**
4977
   * Returns a camelCase version of the string. Trims surrounding spaces,
4978
   * capitalizes letters following digits, spaces, dashes and underscores,
4979
   * and removes spaces, dashes, as well as underscores.
4980
   *
4981
   * @param string $str      <p>The input string.</p>
4982
   * @param string $encoding [optional] <p>Default: UTF-8</p>
4983
   *
4984
   * @return string
4985
   */
4986 32
  public static function str_camelize(string $str, string $encoding = 'UTF-8'): string
4987
  {
4988 32
    $str = self::lcfirst(self::trim($str), $encoding);
4989 32
    $str = (string)\preg_replace('/^[-_]+/', '', $str);
4990
4991 32
    $str = (string)\preg_replace_callback(
4992 32
        '/[-_\s]+(.)?/u',
4993 32
        function ($match) use ($encoding) {
4994 27
          if (isset($match[1])) {
4995 27
            return UTF8::strtoupper($match[1], $encoding);
4996
          }
4997
4998 1
          return '';
4999 32
        },
5000 32
        $str
5001
    );
5002
5003 32
    $str = (string)\preg_replace_callback(
5004 32
        '/[\d]+(.)?/u',
5005 32
        function ($match) use ($encoding) {
5006 6
          return UTF8::strtoupper($match[0], $encoding);
5007 32
        },
5008 32
        $str
5009
    );
5010
5011 32
    return $str;
5012
  }
5013
5014
  /**
5015
   * Returns the string with the first letter of each word capitalized,
5016
   * except for when the word is a name which shouldn't be capitalized.
5017
   *
5018
   * @param string $str
5019
   *
5020
   * @return string String with $str capitalized.
5021
   */
5022 1
  public static function str_capitalize_name(string $str): string
5023
  {
5024 1
    $str = self::collapse_whitespace($str);
5025
5026 1
    $str = self::str_capitalize_name_helper($str, ' ');
5027 1
    $str = self::str_capitalize_name_helper($str, '-');
5028
5029 1
    return $str;
5030
  }
5031
5032
  /**
5033
   * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
5034
   *
5035
   * @param string $names
5036
   * @param string $delimiter
5037
   * @param string $encoding
5038
   *
5039
   * @return string
5040
   */
5041 1
  private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
5042
  {
5043
    // init
5044 1
    $namesArray = \explode($delimiter, $names);
5045
5046 1
    if ($namesArray === false) {
5047
      return '';
5048
    }
5049
5050
    $specialCases = [
5051 1
        'names'    => [
5052
            'ab',
5053
            'af',
5054
            'al',
5055
            'and',
5056
            'ap',
5057
            'bint',
5058
            'binte',
5059
            'da',
5060
            'de',
5061
            'del',
5062
            'den',
5063
            'der',
5064
            'di',
5065
            'dit',
5066
            'ibn',
5067
            'la',
5068
            'mac',
5069
            'nic',
5070
            'of',
5071
            'ter',
5072
            'the',
5073
            'und',
5074
            'van',
5075
            'von',
5076
            'y',
5077
            'zu',
5078
        ],
5079
        'prefixes' => [
5080
            'al-',
5081
            "d'",
5082
            'ff',
5083
            "l'",
5084
            'mac',
5085
            'mc',
5086
            'nic',
5087
        ],
5088
    ];
5089
5090 1
    foreach ($namesArray as &$name) {
5091 1
      if (\in_array($name, $specialCases['names'], true)) {
5092 1
        continue;
5093
      }
5094
5095 1
      $continue = false;
5096
5097 1
      if ($delimiter == '-') {
5098 1
        foreach ($specialCases['names'] as $beginning) {
5099 1
          if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5100 1
            $continue = true;
5101
          }
5102
        }
5103
      }
5104
5105 1
      foreach ($specialCases['prefixes'] as $beginning) {
5106 1
        if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5107 1
          $continue = true;
5108
        }
5109
      }
5110
5111 1
      if ($continue) {
5112 1
        continue;
5113
      }
5114
5115 1
      $name = self::str_upper_first($name);
5116
    }
5117
5118 1
    return \implode($delimiter, $namesArray);
5119
  }
5120
5121
  /**
5122
   * Returns true if the string contains $needle, false otherwise. By default
5123
   * the comparison is case-sensitive, but can be made insensitive by setting
5124
   * $caseSensitive to false.
5125
   *
5126
   * @param string $haystack      <p>The input string.</p>
5127
   * @param string $needle        <p>Substring to look for.</p>
5128
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5129
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5130
   *
5131
   * @return bool Whether or not $haystack contains $needle.
5132
   */
5133 106
  public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5134
  {
5135 106
    if ('' === $haystack || '' === $needle) {
5136 1
      return false;
5137
    }
5138
5139
    // only a fallback to prevent BC in the api ...
5140 105
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5141 2
      $encoding = (string)$caseSensitive;
5142
    }
5143
5144 105
    if ($caseSensitive) {
5145 55
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
5146
    }
5147
5148 50
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
5149
  }
5150
5151
  /**
5152
   * Returns true if the string contains all $needles, false otherwise. By
5153
   * default the comparison is case-sensitive, but can be made insensitive by
5154
   * setting $caseSensitive to false.
5155
   *
5156
   * @param string $haystack      <p>The input string.</p>
5157
   * @param array  $needles       <p>SubStrings to look for.</p>
5158
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5159
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5160
   *
5161
   * @return bool Whether or not $haystack contains $needle.
5162
   */
5163 44
  public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5164
  {
5165 44
    if ('' === $haystack) {
5166
      return false;
5167
    }
5168
5169 44
    if (empty($needles)) {
5170 1
      return false;
5171
    }
5172
5173
    // only a fallback to prevent BC in the api ...
5174 43
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5175 1
      $encoding = (string)$caseSensitive;
5176
    }
5177
5178 43
    foreach ($needles as $needle) {
5179 43
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5180 43
        return false;
5181
      }
5182
    }
5183
5184 24
    return true;
5185
  }
5186
5187
  /**
5188
   * Returns true if the string contains any $needles, false otherwise. By
5189
   * default the comparison is case-sensitive, but can be made insensitive by
5190
   * setting $caseSensitive to false.
5191
   *
5192
   * @param string $haystack      <p>The input string.</p>
5193
   * @param array  $needles       <p>SubStrings to look for.</p>
5194
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5195
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5196
   *
5197
   * @return bool
5198
   *               Whether or not $str contains $needle.
5199
   */
5200 43
  public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5201
  {
5202 43
    if (empty($needles)) {
5203 1
      return false;
5204
    }
5205
5206 42
    foreach ($needles as $needle) {
5207 42
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5208 42
        return true;
5209
      }
5210
    }
5211
5212 18
    return false;
5213
  }
5214
5215
  /**
5216
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
5217
   * inserted before uppercase characters (with the exception of the first
5218
   * character of the string), and in place of spaces as well as underscores.
5219
   *
5220
   * @param string $str      <p>The input string.</p>
5221
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5222
   *
5223
   * @return string
5224
   */
5225 19
  public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5226
  {
5227 19
    return self::str_delimit($str, '-', $encoding);
5228
  }
5229
5230
  /**
5231
   * Returns a lowercase and trimmed string separated by the given delimiter.
5232
   * Delimiters are inserted before uppercase characters (with the exception
5233
   * of the first character of the string), and in place of spaces, dashes,
5234
   * and underscores. Alpha delimiters are not converted to lowercase.
5235
   *
5236
   * @param string $str       <p>The input string.</p>
5237
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
5238
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5239
   *
5240
   * @return string
5241
   */
5242 49
  public static function str_delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
5243
  {
5244 49
    $str = self::trim($str);
5245
5246 49
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
5247
5248 49
    $str = self::strtolower($str, $encoding);
5249
5250 49
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
5251
  }
5252
5253
  /**
5254
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5255
   *
5256
   * @param string $str <p>The input string.</p>
5257
   *
5258
   * @return false|string
5259
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5260
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5261
   */
5262 32
  public static function str_detect_encoding($str)
5263
  {
5264
    // init
5265 32
    $str = (string)$str;
5266
5267
    //
5268
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5269
    //
5270
5271 32
    if (self::is_binary($str, true) === true) {
5272
5273 11
      if (self::is_utf16($str) === 1) {
5274 2
        return 'UTF-16LE';
5275
      }
5276
5277 11
      if (self::is_utf16($str) === 2) {
5278 2
        return 'UTF-16BE';
5279
      }
5280
5281 9
      if (self::is_utf32($str) === 1) {
5282
        return 'UTF-32LE';
5283
      }
5284
5285 9
      if (self::is_utf32($str) === 2) {
5286
        return 'UTF-32BE';
5287
      }
5288
5289
      // is binary but not "UTF-16" or "UTF-32"
5290 9
      return false;
5291
    }
5292
5293
    //
5294
    // 2.) simple check for ASCII chars
5295
    //
5296
5297 27
    if (self::is_ascii($str) === true) {
5298 9
      return 'ASCII';
5299
    }
5300
5301
    //
5302
    // 3.) simple check for UTF-8 chars
5303
    //
5304
5305 27
    if (self::is_utf8($str) === true) {
5306 19
      return 'UTF-8';
5307
    }
5308
5309
    //
5310
    // 4.) check via "mb_detect_encoding()"
5311
    //
5312
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5313
5314
    $detectOrder = [
5315 16
        'ISO-8859-1',
5316
        'ISO-8859-2',
5317
        'ISO-8859-3',
5318
        'ISO-8859-4',
5319
        'ISO-8859-5',
5320
        'ISO-8859-6',
5321
        'ISO-8859-7',
5322
        'ISO-8859-8',
5323
        'ISO-8859-9',
5324
        'ISO-8859-10',
5325
        'ISO-8859-13',
5326
        'ISO-8859-14',
5327
        'ISO-8859-15',
5328
        'ISO-8859-16',
5329
        'WINDOWS-1251',
5330
        'WINDOWS-1252',
5331
        'WINDOWS-1254',
5332
        'CP932',
5333
        'CP936',
5334
        'CP950',
5335
        'CP866',
5336
        'CP850',
5337
        'CP51932',
5338
        'CP50220',
5339
        'CP50221',
5340
        'CP50222',
5341
        'ISO-2022-JP',
5342
        'ISO-2022-KR',
5343
        'JIS',
5344
        'JIS-ms',
5345
        'EUC-CN',
5346
        'EUC-JP',
5347
    ];
5348
5349 16
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5350
      self::checkForSupport();
5351
    }
5352
5353 16
    if (self::$SUPPORT['mbstring'] === true) {
5354
      // info: do not use the symfony polyfill here
5355 16
      $encoding = \mb_detect_encoding($str, $detectOrder, true);
5356 16
      if ($encoding) {
5357 16
        return $encoding;
5358
      }
5359
    }
5360
5361
    //
5362
    // 5.) check via "iconv()"
5363
    //
5364
5365
    if (self::$ENCODINGS === null) {
5366
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
5367
    }
5368
5369
    foreach (self::$ENCODINGS as $encodingTmp) {
5370
      # INFO: //IGNORE but still throw notice
5371
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
5372
      if ((string)@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5373
        return $encodingTmp;
5374
      }
5375
    }
5376
5377
    return false;
5378
  }
5379
5380
  /**
5381
   * Check if the string ends with the given substring.
5382
   *
5383
   * @param string $haystack <p>The string to search in.</p>
5384
   * @param string $needle   <p>The substring to search for.</p>
5385
   *
5386
   * @return bool
5387
   */
5388 40
  public static function str_ends_with(string $haystack, string $needle): bool
5389
  {
5390 40
    if ('' === $haystack || '' === $needle) {
5391 4
      return false;
5392
    }
5393
5394 38
    return \substr($haystack, -\strlen($needle)) === $needle;
5395
  }
5396
5397
  /**
5398
   * Returns true if the string ends with any of $substrings, false otherwise.
5399
   *
5400
   * - case-sensitive
5401
   *
5402
   * @param string   $str        <p>The input string.</p>
5403
   * @param string[] $substrings <p>Substrings to look for.</p>
5404
   *
5405
   * @return bool Whether or not $str ends with $substring.
5406
   */
5407 7
  public static function str_ends_with_any(string $str, array $substrings): bool
5408
  {
5409 7
    if (empty($substrings)) {
5410
      return false;
5411
    }
5412
5413 7
    foreach ($substrings as $substring) {
5414 7
      if (self::str_ends_with($str, $substring)) {
5415 7
        return true;
5416
      }
5417
    }
5418
5419 6
    return false;
5420
  }
5421
5422
  /**
5423
   * Ensures that the string begins with $substring. If it doesn't, it's
5424
   * prepended.
5425
   *
5426
   * @param string $str       <p>The input string.</p>
5427
   * @param string $substring <p>The substring to add if not present.</p>
5428
   *
5429
   * @return string
5430
   */
5431 10
  public static function str_ensure_left(string $str, string $substring): string
5432
  {
5433 10
    if (!self::str_starts_with($str, $substring)) {
5434 4
      $str = $substring . $str;
5435
    }
5436
5437 10
    return $str;
5438
  }
5439
5440
  /**
5441
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
5442
   *
5443
   * @param string $str       <p>The input string.</p>
5444
   * @param string $substring <p>The substring to add if not present.</p>
5445
   *
5446
   * @return string
5447
   */
5448 10
  public static function str_ensure_right(string $str, string $substring): string
5449
  {
5450 10
    if (!self::str_ends_with($str, $substring)) {
5451 4
      $str .= $substring;
5452
    }
5453
5454 10
    return $str;
5455
  }
5456
5457
  /**
5458
   * Capitalizes the first word of the string, replaces underscores with
5459
   * spaces, and strips '_id'.
5460
   *
5461
   * @param string $str
5462
   *
5463
   * @return string
5464
   */
5465 3
  public static function str_humanize($str): string
5466
  {
5467 3
    $str = self::str_replace(
5468
        [
5469 3
            '_id',
5470
            '_',
5471
        ],
5472
        [
5473 3
            '',
5474
            ' ',
5475
        ],
5476 3
        $str
5477
    );
5478
5479 3
    return self::ucfirst(self::trim($str));
5480
  }
5481
5482
  /**
5483
   * Check if the string ends with the given substring, case insensitive.
5484
   *
5485
   * @param string $haystack <p>The string to search in.</p>
5486
   * @param string $needle   <p>The substring to search for.</p>
5487
   *
5488
   * @return bool
5489
   */
5490 12
  public static function str_iends_with(string $haystack, string $needle): bool
5491
  {
5492 12
    if ('' === $haystack || '' === $needle) {
5493 2
      return false;
5494
    }
5495
5496 12
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
5497 12
      return true;
5498
    }
5499
5500 8
    return false;
5501
  }
5502
5503
  /**
5504
   * Returns true if the string ends with any of $substrings, false otherwise.
5505
   *
5506
   * - case-insensitive
5507
   *
5508
   * @param string   $str        <p>The input string.</p>
5509
   * @param string[] $substrings <p>Substrings to look for.</p>
5510
   *
5511
   * @return bool Whether or not $str ends with $substring.
5512
   */
5513 4
  public static function str_iends_with_any(string $str, array $substrings): bool
5514
  {
5515 4
    if (empty($substrings)) {
5516
      return false;
5517
    }
5518
5519 4
    foreach ($substrings as $substring) {
5520 4
      if (self::str_iends_with($str, $substring)) {
5521 4
        return true;
5522
      }
5523
    }
5524
5525
    return false;
5526
  }
5527
5528
  /**
5529
   * Returns the index of the first occurrence of $needle in the string,
5530
   * and false if not found. Accepts an optional offset from which to begin
5531
   * the search.
5532
   *
5533
   * @param string $str      <p>The input string.</p>
5534
   * @param string $needle   <p>Substring to look for.</p>
5535
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5536
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5537
   *
5538
   * @return int|false
5539
   *                    The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5540
   */
5541 2
  public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5542
  {
5543 2
    return self::stripos(
5544 2
        $str,
5545 2
        $needle,
5546 2
        $offset,
5547 2
        $encoding
5548
    );
5549
  }
5550
5551
  /**
5552
   * Returns the index of the last occurrence of $needle in the string,
5553
   * and false if not found. Accepts an optional offset from which to begin
5554
   * the search. Offsets may be negative to count from the last character
5555
   * in the string.
5556
   *
5557
   * @param string $str      <p>The input string.</p>
5558
   * @param string $needle   <p>Substring to look for.</p>
5559
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5560
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5561
   *
5562
   * @return int|false
5563
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5564
   */
5565 2
  public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5566
  {
5567 2
    return self::strripos(
5568 2
        $str,
5569 2
        $needle,
5570 2
        $offset,
5571 2
        $encoding
5572
    );
5573
  }
5574
5575
  /**
5576
   * Returns the index of the first occurrence of $needle in the string,
5577
   * and false if not found. Accepts an optional offset from which to begin
5578
   * the search.
5579
   *
5580
   * @param string $str      <p>The input string.</p>
5581
   * @param string $needle   <p>Substring to look for.</p>
5582
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5583
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5584
   *
5585
   * @return int|false
5586
   *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5587
   */
5588 12
  public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5589
  {
5590 12
    return self::strpos(
5591 12
        $str,
5592 12
        $needle,
5593 12
        $offset,
5594 12
        $encoding
5595
    );
5596
  }
5597
5598
  /**
5599
   * Returns the index of the last occurrence of $needle in the string,
5600
   * and false if not found. Accepts an optional offset from which to begin
5601
   * the search. Offsets may be negative to count from the last character
5602
   * in the string.
5603
   *
5604
   * @param string $str      <p>The input string.</p>
5605
   * @param string $needle   <p>Substring to look for.</p>
5606
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5607
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5608
   *
5609
   * @return int|false
5610
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5611
   */
5612 12
  public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5613
  {
5614 12
    return self::strrpos(
5615 12
        $str,
5616 12
        $needle,
5617 12
        $offset,
5618 12
        $encoding
5619
    );
5620
  }
5621
5622
  /**
5623
   * Inserts $substring into the string at the $index provided.
5624
   *
5625
   * @param string $str       <p>The input string.</p>
5626
   * @param string $substring <p>String to be inserted.</p>
5627
   * @param int    $index     <p>The index at which to insert the substring.</p>
5628
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5629
   *
5630
   * @return string
5631
   */
5632 8
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5633
  {
5634 8
    $len = self::strlen($str, $encoding);
5635
5636 8
    if ($index > $len) {
5637 1
      return $str;
5638
    }
5639
5640 7
    $start = self::substr($str, 0, $index, $encoding);
5641 7
    $end = self::substr($str, $index, $len, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $len can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5641
    $end = self::substr($str, $index, /** @scrutinizer ignore-type */ $len, $encoding);
Loading history...
5642
5643 7
    return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5643
    return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5643
    return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
5644
  }
5645
5646
  /**
5647
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5648
   *
5649
   * @link  http://php.net/manual/en/function.str-ireplace.php
5650
   *
5651
   * @param mixed $search  <p>
5652
   *                       Every replacement with search array is
5653
   *                       performed on the result of previous replacement.
5654
   *                       </p>
5655
   * @param mixed $replace <p>
5656
   *                       </p>
5657
   * @param mixed $subject <p>
5658
   *                       If subject is an array, then the search and
5659
   *                       replace is performed with every entry of
5660
   *                       subject, and the return value is an array as
5661
   *                       well.
5662
   *                       </p>
5663
   * @param int   $count   [optional] <p>
5664
   *                       The number of matched and replaced needles will
5665
   *                       be returned in count which is passed by
5666
   *                       reference.
5667
   *                       </p>
5668
   *
5669
   * @return mixed A string or an array of replacements.
5670
   */
5671 41
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5672
  {
5673 41
    $search = (array)$search;
5674
5675
    /** @noinspection AlterInForeachInspection */
5676 41
    foreach ($search as &$s) {
5677 41
      if ('' === $s .= '') {
5678 7
        $s = '/^(?<=.)$/';
5679
      } else {
5680 41
        $s = '/' . \preg_quote($s, '/') . '/ui';
5681
      }
5682
    }
5683
5684 41
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5685 41
    $count = $replace; // used as reference parameter
5686
5687 41
    return $subject;
5688
  }
5689
5690
  /**
5691
   * Check if the string starts with the given substring, case insensitive.
5692
   *
5693
   * @param string $haystack <p>The string to search in.</p>
5694
   * @param string $needle   <p>The substring to search for.</p>
5695
   *
5696
   * @return bool
5697
   */
5698 12
  public static function str_istarts_with(string $haystack, string $needle): bool
5699
  {
5700 12
    if ('' === $haystack || '' === $needle) {
5701 2
      return false;
5702
    }
5703
5704 12
    if (self::stripos($haystack, $needle) === 0) {
5705 12
      return true;
5706
    }
5707
5708 4
    return false;
5709
  }
5710
5711
  /**
5712
   * Returns true if the string begins with any of $substrings, false otherwise.
5713
   *
5714
   * - case-insensitive
5715
   *
5716
   * @param string $str        <p>The input string.</p>
5717
   * @param array  $substrings <p>Substrings to look for.</p>
5718
   *
5719
   * @return bool Whether or not $str starts with $substring.
5720
   */
5721 4
  public static function str_istarts_with_any(string $str, array $substrings): bool
5722
  {
5723 4
    if ('' === $str) {
5724
      return false;
5725
    }
5726
5727 4
    if (empty($substrings)) {
5728
      return false;
5729
    }
5730
5731 4
    foreach ($substrings as $substring) {
5732 4
      if (self::str_istarts_with($str, $substring)) {
5733 4
        return true;
5734
      }
5735
    }
5736
5737
    return false;
5738
  }
5739
5740
  /**
5741
   * Gets the substring after the first occurrence of a separator.
5742
   *
5743
   * @param string $str       <p>The input string.</p>
5744
   * @param string $separator <p>The string separator.</p>
5745
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5746
   *
5747
   * @return string
5748
   */
5749 1
  public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5750
  {
5751
    if (
5752 1
        $separator === ''
5753
        ||
5754 1
        $str === ''
5755
    ) {
5756 1
      return '';
5757
    }
5758
5759 1
    $offset = self::str_iindex_first($str, $separator);
5760 1
    if ($offset === false) {
5761 1
      return '';
5762
    }
5763
5764 1
    return (string)self::substr(
5765 1
        $str,
5766 1
        $offset + self::strlen($separator, $encoding),
5767 1
        null,
5768 1
        $encoding
5769
    );
5770
  }
5771
5772
  /**
5773
   * Gets the substring after the last occurrence of a separator.
5774
   *
5775
   * @param string $str       <p>The input string.</p>
5776
   * @param string $separator <p>The string separator.</p>
5777
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5778
   *
5779
   * @return string
5780
   */
5781 1
  public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5782
  {
5783
    if (
5784 1
        $separator === ''
5785
        ||
5786 1
        $str === ''
5787
    ) {
5788 1
      return '';
5789
    }
5790
5791 1
    $offset = self::str_iindex_last($str, $separator);
5792 1
    if ($offset === false) {
5793 1
      return '';
5794
    }
5795
5796 1
    return (string)self::substr(
5797 1
        $str,
5798 1
        $offset + self::strlen($separator, $encoding),
5799 1
        null,
5800 1
        $encoding
5801
    );
5802
  }
5803
5804
  /**
5805
   * Gets the substring before the first occurrence of a separator.
5806
   *
5807
   * @param string $str       <p>The input string.</p>
5808
   * @param string $separator <p>The string separator.</p>
5809
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5810
   *
5811
   * @return string
5812
   */
5813 1
  public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5814
  {
5815
    if (
5816 1
        $separator === ''
5817
        ||
5818 1
        $str === ''
5819
    ) {
5820 1
      return '';
5821
    }
5822
5823 1
    $offset = self::str_iindex_first($str, $separator);
5824 1
    if ($offset === false) {
5825 1
      return '';
5826
    }
5827
5828 1
    return (string)self::substr($str, 0, $offset, $encoding);
5829
  }
5830
5831
  /**
5832
   * Gets the substring before the last occurrence of a separator.
5833
   *
5834
   * @param string $str       <p>The input string.</p>
5835
   * @param string $separator <p>The string separator.</p>
5836
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5837
   *
5838
   * @return string
5839
   */
5840 1
  public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5841
  {
5842
    if (
5843 1
        $separator === ''
5844
        ||
5845 1
        $str === ''
5846
    ) {
5847 1
      return '';
5848
    }
5849
5850 1
    $offset = self::str_iindex_last($str, $separator);
5851 1
    if ($offset === false) {
5852 1
      return '';
5853
    }
5854
5855 1
    return (string)self::substr($str, 0, $offset, $encoding);
5856
  }
5857
5858
  /**
5859
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5860
   *
5861
   * @param string $str          <p>The input string.</p>
5862
   * @param string $needle       <p>The string to look for.</p>
5863
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5864
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5865
   *
5866
   * @return string
5867
   */
5868 2
  public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5869
  {
5870
    if (
5871 2
        '' === $needle
5872
        ||
5873 2
        '' === $str
5874
    ) {
5875 2
      return '';
5876
    }
5877
5878 2
    $part = self::stristr(
5879 2
        $str,
5880 2
        $needle,
5881 2
        $beforeNeedle,
5882 2
        $encoding
5883
    );
5884 2
    if (false === $part) {
5885 2
      return '';
5886
    }
5887
5888 2
    return $part;
5889
  }
5890
5891
  /**
5892
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5893
   *
5894
   * @param string $str          <p>The input string.</p>
5895
   * @param string $needle       <p>The string to look for.</p>
5896
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5897
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5898
   *
5899
   * @return string
5900
   */
5901 1
  public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5902
  {
5903
    if (
5904 1
        '' === $needle
5905
        ||
5906 1
        '' === $str
5907
    ) {
5908 1
      return '';
5909
    }
5910
5911 1
    $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5912 1
    if (false === $part) {
5913 1
      return '';
5914
    }
5915
5916 1
    return $part;
5917
  }
5918
5919
  /**
5920
   * Returns the last $n characters of the string.
5921
   *
5922
   * @param string $str      <p>The input string.</p>
5923
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5924
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5925
   *
5926
   * @return string
5927
   */
5928 12
  public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5929
  {
5930 12
    if ($n <= 0) {
5931 4
      return '';
5932
    }
5933
5934 8
    $returnTmp = self::substr($str, -$n, null, $encoding);
5935
5936 8
    return ($returnTmp === false ? '' : $returnTmp);
5937
  }
5938
5939
  /**
5940
   * Limit the number of characters in a string.
5941
   *
5942
   * @param string $str      <p>The input string.</p>
5943
   * @param int    $length   [optional] <p>Default: 100</p>
5944
   * @param string $strAddOn [optional] <p>Default: …</p>
5945
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5946
   *
5947
   * @return string
5948
   */
5949 2
  public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5950
  {
5951 2
    if ('' === $str) {
5952 2
      return '';
5953
    }
5954
5955 2
    if ($length <= 0) {
5956 2
      return '';
5957
    }
5958
5959 2
    if (self::strlen($str, $encoding) <= $length) {
5960 2
      return $str;
5961
    }
5962
5963 2
    return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5963
    return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
5964
  }
5965
5966
  /**
5967
   * Limit the number of characters in a string, but also after the next word.
5968
   *
5969
   * @param string $str      <p>The input string.</p>
5970
   * @param int    $length   [optional] <p>Default: 100</p>
5971
   * @param string $strAddOn [optional] <p>Default: …</p>
5972
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5973
   *
5974
   * @return string
5975
   */
5976 6
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5977
  {
5978 6
    if ('' === $str) {
5979 2
      return '';
5980
    }
5981
5982 6
    if ($length <= 0) {
5983 2
      return '';
5984
    }
5985
5986 6
    if (self::strlen($str, $encoding) <= $length) {
5987 2
      return $str;
5988
    }
5989
5990 6
    if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
5991 5
      return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5991
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
5992
    }
5993
5994 3
    $str = (string)self::substr($str, 0, $length, $encoding);
5995 3
    $array = \explode(' ', $str);
5996 3
    \array_pop($array);
5997 3
    $new_str = \implode(' ', $array);
5998
5999 3
    if ($new_str === '') {
6000 2
      $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
6001
    } else {
6002 3
      $str = $new_str . $strAddOn;
6003
    }
6004
6005 3
    return $str;
6006
  }
6007
6008
  /**
6009
   * Returns the longest common prefix between the string and $otherStr.
6010
   *
6011
   * @param string $str      <p>The input sting.</p>
6012
   * @param string $otherStr <p>Second string for comparison.</p>
6013
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6014
   *
6015
   * @return string
6016
   */
6017 10
  public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6018
  {
6019 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6020
6021 10
    $longestCommonPrefix = '';
6022 10
    for ($i = 0; $i < $maxLength; $i++) {
6023 8
      $char = self::substr($str, $i, 1, $encoding);
6024
6025 8
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
6026 6
        $longestCommonPrefix .= $char;
6027
      } else {
6028 6
        break;
6029
      }
6030
    }
6031
6032 10
    return $longestCommonPrefix;
6033
  }
6034
6035
  /**
6036
   * Returns the longest common substring between the string and $otherStr.
6037
   * In the case of ties, it returns that which occurs first.
6038
   *
6039
   * @param string $str
6040
   * @param string $otherStr <p>Second string for comparison.</p>
6041
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6042
   *
6043
   * @return string String with its $str being the longest common substring.
6044
   */
6045 11
  public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6046
  {
6047
    // Uses dynamic programming to solve
6048
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6049 11
    $strLength = self::strlen($str, $encoding);
6050 11
    $otherLength = self::strlen($otherStr, $encoding);
6051
6052
    // Return if either string is empty
6053 11
    if ($strLength == 0 || $otherLength == 0) {
6054 2
      return '';
6055
    }
6056
6057 9
    $len = 0;
6058 9
    $end = 0;
6059 9
    $table = \array_fill(
6060 9
        0,
6061 9
        $strLength + 1,
6062 9
        \array_fill(0, $otherLength + 1, 0)
6063
    );
6064
6065 9
    for ($i = 1; $i <= $strLength; $i++) {
6066 9
      for ($j = 1; $j <= $otherLength; $j++) {
6067 9
        $strChar = self::substr($str, $i - 1, 1, $encoding);
6068 9
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6069
6070 9
        if ($strChar == $otherChar) {
6071 8
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6072 8
          if ($table[$i][$j] > $len) {
6073 8
            $len = $table[$i][$j];
6074 8
            $end = $i;
6075
          }
6076
        } else {
6077 9
          $table[$i][$j] = 0;
6078
        }
6079
      }
6080
    }
6081
6082 9
    $returnTmp = self::substr($str, $end - $len, $len, $encoding);
6083
6084 9
    return ($returnTmp === false ? '' : $returnTmp);
6085
  }
6086
6087
  /**
6088
   * Returns the longest common suffix between the string and $otherStr.
6089
   *
6090
   * @param string $str
6091
   * @param string $otherStr <p>Second string for comparison.</p>
6092
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6093
   *
6094
   * @return string
6095
   */
6096 10
  public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6097
  {
6098 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6099
6100 10
    $longestCommonSuffix = '';
6101 10
    for ($i = 1; $i <= $maxLength; $i++) {
6102 8
      $char = self::substr($str, -$i, 1, $encoding);
6103
6104 8
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
6105 6
        $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6105
        $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
6106
      } else {
6107 6
        break;
6108
      }
6109
    }
6110
6111 10
    return $longestCommonSuffix;
6112
  }
6113
6114
  /**
6115
   * Returns true if $str matches the supplied pattern, false otherwise.
6116
   *
6117
   * @param string $str     <p>The input string.</p>
6118
   * @param string $pattern <p>Regex pattern to match against.</p>
6119
   *
6120
   * @return bool Whether or not $str matches the pattern.
6121
   */
6122 126
  public static function str_matches_pattern(string $str, string $pattern): bool
6123
  {
6124 126
    if (\preg_match('/' . $pattern . '/u', $str)) {
6125 87
      return true;
6126
    }
6127
6128 39
    return false;
6129
  }
6130
6131
  /**
6132
   * Returns whether or not a character exists at an index. Offsets may be
6133
   * negative to count from the last character in the string. Implements
6134
   * part of the ArrayAccess interface.
6135
   *
6136
   * @param string $str      <p>The input string.</p>
6137
   * @param int    $offset   <p>The index to check.</p>
6138
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6139
   *
6140
   *
6141
   * @return bool Whether or not the index exists.
6142
   */
6143 6
  public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6144
  {
6145
    // init
6146 6
    $length = self::strlen($str, $encoding);
6147
6148 6
    if ($offset >= 0) {
6149 3
      return ($length > $offset);
6150
    }
6151
6152 3
    return ($length >= \abs($offset));
6153
  }
6154
6155
  /**
6156
   * Returns the character at the given index. Offsets may be negative to
6157
   * count from the last character in the string. Implements part of the
6158
   * ArrayAccess interface, and throws an OutOfBoundsException if the index
6159
   * does not exist.
6160
   *
6161
   * @param string $str      <p>The input string.</p>
6162
   * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6163
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6164
   *
6165
   * @return string The character at the specified index.
6166
   *
6167
   * @throws \OutOfBoundsException If the positive or negative offset does not exist.
6168
   */
6169 2
  public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6170
  {
6171
    // init
6172 2
    $length = self::strlen($str);
6173
6174
    if (
6175 2
        ($index >= 0 && $length <= $index)
6176
        ||
6177 2
        $length < \abs($index)
6178
    ) {
6179 1
      throw new \OutOfBoundsException('No character exists at the index');
6180
    }
6181
6182 1
    return self::char_at($str, $index, $encoding);
6183
  }
6184
6185
  /**
6186
   * Pad a UTF-8 string to given length with another string.
6187
   *
6188
   * @param string $str        <p>The input string.</p>
6189
   * @param int    $pad_length <p>The length of return string.</p>
6190
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
6191
   * @param int    $pad_type   [optional] <p>
6192
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
6193
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
6194
   *                           </p>
6195
   * @param string $encoding   [optional] <p>Default: UTF-8</p>
6196
   *
6197
   * @return string Returns the padded string.
6198
   */
6199 41
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
6200
  {
6201 41
    if ('' === $str) {
6202
      return '';
6203
    }
6204
6205 41
    if ($pad_type !== (int)$pad_type) {
6206 13
      if ($pad_type == 'left') {
6207 3
        $pad_type = STR_PAD_LEFT;
6208 10
      } elseif ($pad_type == 'right') {
6209 6
        $pad_type = STR_PAD_RIGHT;
6210 4
      } elseif ($pad_type == 'both') {
6211 3
        $pad_type = STR_PAD_BOTH;
6212
      } else {
6213 1
        throw new \InvalidArgumentException(
6214 1
            'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6215
        );
6216
      }
6217
    }
6218
6219 40
    $str_length = self::strlen($str, $encoding);
6220
6221
    if (
6222 40
        $pad_length > 0
6223
        &&
6224 40
        $pad_length >= $str_length
6225
    ) {
6226 39
      $ps_length = self::strlen($pad_string, $encoding);
6227
6228 39
      $diff = ($pad_length - $str_length);
6229
6230
      switch ($pad_type) {
6231 39
        case STR_PAD_LEFT:
6232 13
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6233 13
          $pre = (string)self::substr($pre, 0, $diff, $encoding);
6234 13
          $post = '';
6235 13
          break;
6236
6237 29
        case STR_PAD_BOTH:
6238 14
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6239 14
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2), $encoding);
6240 14
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6241 14
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2), $encoding);
6242 14
          break;
6243
6244 18
        case STR_PAD_RIGHT:
6245
        default:
6246 18
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6247 18
          $post = (string)self::substr($post, 0, $diff, $encoding);
6248 18
          $pre = '';
6249
      }
6250
6251 39
      return $pre . $str . $post;
6252
    }
6253
6254 4
    return $str;
6255
  }
6256
6257
  /**
6258
   * Returns a new string of a given length such that both sides of the
6259
   * string are padded. Alias for pad() with a $padType of 'both'.
6260
   *
6261
   * @param string $str
6262
   * @param int    $length   <p>Desired string length after padding.</p>
6263
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6264
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6265
   *
6266
   * @return string String with padding applied.
6267
   */
6268 11
  public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6269
  {
6270 11
    $padding = $length - self::strlen($str, $encoding);
6271
6272 11
    return self::apply_padding($str, (int)\floor($padding / 2), (int)\ceil($padding / 2), $padStr, $encoding);
6273
  }
6274
6275
  /**
6276
   * Returns a new string of a given length such that the beginning of the
6277
   * string is padded. Alias for pad() with a $padType of 'left'.
6278
   *
6279
   * @param string $str
6280
   * @param int    $length   <p>Desired string length after padding.</p>
6281
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6282
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6283
   *
6284
   * @return string String with left padding.
6285
   */
6286 7
  public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6287
  {
6288 7
    return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
6289
  }
6290
6291
  /**
6292
   * Returns a new string of a given length such that the end of the string
6293
   * is padded. Alias for pad() with a $padType of 'right'.
6294
   *
6295
   * @param string $str
6296
   * @param int    $length   <p>Desired string length after padding.</p>
6297
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6298
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6299
   *
6300
   * @return string String with right padding.
6301
   */
6302 7
  public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6303
  {
6304 7
    return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
6305
  }
6306
6307
  /**
6308
   * Repeat a string.
6309
   *
6310
   * @param string $str        <p>
6311
   *                           The string to be repeated.
6312
   *                           </p>
6313
   * @param int    $multiplier <p>
6314
   *                           Number of time the input string should be
6315
   *                           repeated.
6316
   *                           </p>
6317
   *                           <p>
6318
   *                           multiplier has to be greater than or equal to 0.
6319
   *                           If the multiplier is set to 0, the function
6320
   *                           will return an empty string.
6321
   *                           </p>
6322
   *
6323
   * @return string The repeated string.
6324
   */
6325 9
  public static function str_repeat(string $str, int $multiplier): string
6326
  {
6327 9
    $str = self::filter($str);
6328
6329 9
    return \str_repeat($str, $multiplier);
6330
  }
6331
6332
  /**
6333
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6334
   *
6335
   * Replace all occurrences of the search string with the replacement string
6336
   *
6337
   * @link http://php.net/manual/en/function.str-replace.php
6338
   *
6339
   * @param mixed $search  <p>
6340
   *                       The value being searched for, otherwise known as the needle.
6341
   *                       An array may be used to designate multiple needles.
6342
   *                       </p>
6343
   * @param mixed $replace <p>
6344
   *                       The replacement value that replaces found search
6345
   *                       values. An array may be used to designate multiple replacements.
6346
   *                       </p>
6347
   * @param mixed $subject <p>
6348
   *                       The string or array being searched and replaced on,
6349
   *                       otherwise known as the haystack.
6350
   *                       </p>
6351
   *                       <p>
6352
   *                       If subject is an array, then the search and
6353
   *                       replace is performed with every entry of
6354
   *                       subject, and the return value is an array as
6355
   *                       well.
6356
   *                       </p>
6357
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
6358
   *
6359
   * @return mixed This function returns a string or an array with the replaced values.
6360
   */
6361 92
  public static function str_replace($search, $replace, $subject, int &$count = null)
6362
  {
6363 92
    return \str_replace($search, $replace, $subject, $count);
6364
  }
6365
6366
  /**
6367
   * Replaces all occurrences of $search from the beginning of string with $replacement.
6368
   *
6369
   * @param string $str         <p>The input string.</p>
6370
   * @param string $search      <p>The string to search for.</p>
6371
   * @param string $replacement <p>The replacement.</p>
6372
   *
6373
   * @return string String after the replacements.
6374
   */
6375 16
  public static function str_replace_beginning(string $str, string $search, string $replacement): string
6376
  {
6377 16
    return self::regex_replace(
6378 16
        $str,
6379 16
        '^' . \preg_quote($search, '/'),
6380 16
        self::str_replace('\\', '\\\\', $replacement)
6381
    );
6382
  }
6383
6384
  /**
6385
   * Replaces all occurrences of $search from the ending of string with $replacement.
6386
   *
6387
   * @param string $str         <p>The input string.</p>
6388
   * @param string $search      <p>The string to search for.</p>
6389
   * @param string $replacement <p>The replacement.</p>
6390
   *
6391
   * @return string String after the replacements.
6392
   */
6393 16
  public static function str_replace_ending(string $str, string $search, string $replacement): string
6394
  {
6395 16
    return self::regex_replace(
6396 16
        $str,
6397 16
        \preg_quote($search, '/') . '$',
6398 16
        self::str_replace('\\', '\\\\', $replacement)
6399
    );
6400
  }
6401
6402
  /**
6403
   * Replace the first "$search"-term with the "$replace"-term.
6404
   *
6405
   * @param string $search
6406
   * @param string $replace
6407
   * @param string $subject
6408
   *
6409
   * @return string
6410
   */
6411 2
  public static function str_replace_first(string $search, string $replace, string $subject): string
6412
  {
6413 2
    $pos = self::strpos($subject, $search);
6414 2
    if ($pos !== false) {
6415 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6415
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6416
    }
6417
6418 2
    return $subject;
6419
  }
6420
6421
  /**
6422
   * Replace the last "$search"-term with the "$replace"-term.
6423
   *
6424
   * @param string $search
6425
   * @param string $replace
6426
   * @param string $subject
6427
   *
6428
   * @return string
6429
   */
6430 2
  public static function str_replace_last(string $search, string $replace, string $subject): string
6431
  {
6432 2
    $pos = self::strrpos($subject, $search);
6433 2
    if ($pos !== false) {
6434 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6434
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
6435
    }
6436
6437 2
    return $subject;
6438
  }
6439
6440
  /**
6441
   * Shuffles all the characters in the string.
6442
   *
6443
   * PS: uses random algorithm which is weak for cryptography purposes
6444
   *
6445
   * @param string $str <p>The input string</p>
6446
   *
6447
   * @return string The shuffled string.
6448
   */
6449 5
  public static function str_shuffle(string $str): string
6450
  {
6451 5
    $indexes = \range(0, self::strlen($str) - 1);
6452
    /** @noinspection NonSecureShuffleUsageInspection */
6453 5
    \shuffle($indexes);
6454
6455 5
    $shuffledStr = '';
6456 5
    foreach ($indexes as $i) {
6457 5
      $shuffledStr .= self::substr($str, $i, 1);
6458
    }
6459
6460 5
    return $shuffledStr;
6461
  }
6462
6463
  /**
6464
   * Returns the substring beginning at $start, and up to, but not including
6465
   * the index specified by $end. If $end is omitted, the function extracts
6466
   * the remaining string. If $end is negative, it is computed from the end
6467
   * of the string.
6468
   *
6469
   * @param string $str
6470
   * @param int    $start    <p>Initial index from which to begin extraction.</p>
6471
   * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6472
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6473
   *
6474
   * @return string|false
6475
   *                     <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6476
   *                     characters long, <b>FALSE</b> will be returned.
6477
   */
6478 18
  public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6479
  {
6480 18
    if ($end === null) {
6481 6
      $length = self::strlen($str);
6482 12
    } elseif ($end >= 0 && $end <= $start) {
6483 4
      return '';
6484 8
    } elseif ($end < 0) {
6485 2
      $length = self::strlen($str) + $end - $start;
6486
    } else {
6487 6
      $length = $end - $start;
6488
    }
6489
6490 14
    return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6490
    return self::substr($str, $start, /** @scrutinizer ignore-type */ $length, $encoding);
Loading history...
6491
  }
6492
6493
  /**
6494
   * Convert a string to e.g.: "snake_case"
6495
   *
6496
   * @param string $str
6497
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6498
   *
6499
   * @return string String in snake_case.
6500
   */
6501 20
  public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6502
  {
6503 20
    $str = self::normalize_whitespace($str);
6504 20
    $str = \str_replace('-', '_', $str);
6505
6506 20
    $str = (string)\preg_replace_callback(
6507 20
        '/([\d|A-Z])/u',
6508 20
        function ($matches) use ($encoding) {
6509 8
          $match = $matches[1];
6510 8
          $matchInt = (int)$match;
6511
6512 8
          if ((string)$matchInt == $match) {
6513 4
            return '_' . $match . '_';
6514
          }
6515
6516 4
          return '_' . UTF8::strtolower($match, $encoding);
6517 20
        },
6518 20
        $str
6519
    );
6520
6521 20
    $str = (string)\preg_replace(
6522
        [
6523 20
            '/\s+/',        // convert spaces to "_"
6524
            '/^\s+|\s+$/',  // trim leading & trailing spaces
6525
            '/_+/',         // remove double "_"
6526
        ],
6527
        [
6528 20
            '_',
6529
            '',
6530
            '_',
6531
        ],
6532 20
        $str
6533
    );
6534
6535 20
    $str = self::trim($str, '_'); // trim leading & trailing "_"
6536 20
    $str = self::trim($str); // trim leading & trailing whitespace
6537
6538 20
    return $str;
6539
  }
6540
6541
  /**
6542
   * Sort all characters according to code points.
6543
   *
6544
   * @param string $str    <p>A UTF-8 string.</p>
6545
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6546
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6547
   *
6548
   * @return string String of sorted characters.
6549
   */
6550 2
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6551
  {
6552 2
    $array = self::codepoints($str);
6553
6554 2
    if ($unique) {
6555 2
      $array = \array_flip(\array_flip($array));
6556
    }
6557
6558 2
    if ($desc) {
6559 2
      \arsort($array);
6560
    } else {
6561 2
      \asort($array);
6562
    }
6563
6564 2
    return self::string($array);
6565
  }
6566
6567
  /**
6568
   * alias for "UTF8::split()"
6569
   *
6570
   * @see UTF8::split()
6571
   *
6572
   * @param string|string[] $str
6573
   * @param int             $len
6574
   *
6575
   * @return string[]
6576
   */
6577 25
  public static function str_split($str, int $len = 1): array
6578
  {
6579 25
    return self::split($str, $len);
6580
  }
6581
6582
  /**
6583
   * Splits the string with the provided regular expression, returning an
6584
   * array of Stringy objects. An optional integer $limit will truncate the
6585
   * results.
6586
   *
6587
   * @param string $str
6588
   * @param string $pattern <p>The regex with which to split the string.</p>
6589
   * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6590
   *
6591
   * @return string[] An array of strings.
6592
   */
6593 16
  public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6594
  {
6595 16
    if ($limit === 0) {
6596 2
      return [];
6597
    }
6598
6599
    // this->split errors when supplied an empty pattern in < PHP 5.4.13
6600
    // and current versions of HHVM (3.8 and below)
6601 14
    if ($pattern === '') {
6602 1
      return [$str];
6603
    }
6604
6605
    // this->split returns the remaining unsplit string in the last index when
6606
    // supplying a limit
6607 13
    if ($limit > 0) {
6608 8
      ++$limit;
6609
    } else {
6610 5
      $limit = -1;
6611
    }
6612
6613 13
    $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6614
6615 13
    if ($array === false) {
6616
      return [];
6617
    }
6618
6619 13
    if ($limit > 0 && \count($array) === $limit) {
6620 4
      \array_pop($array);
6621
    }
6622
6623 13
    return $array;
6624
  }
6625
6626
  /**
6627
   * Check if the string starts with the given substring.
6628
   *
6629
   * @param string $haystack <p>The string to search in.</p>
6630
   * @param string $needle   <p>The substring to search for.</p>
6631
   *
6632
   * @return bool
6633
   */
6634 41
  public static function str_starts_with(string $haystack, string $needle): bool
6635
  {
6636 41
    if ('' === $haystack || '' === $needle) {
6637 4
      return false;
6638
    }
6639
6640 39
    if (\strpos($haystack, $needle) === 0) {
6641 19
      return true;
6642
    }
6643
6644 24
    return false;
6645
  }
6646
6647
  /**
6648
   * Returns true if the string begins with any of $substrings, false otherwise.
6649
   *
6650
   * - case-sensitive
6651
   *
6652
   * @param string $str        <p>The input string.</p>
6653
   * @param array  $substrings <p>Substrings to look for.</p>
6654
   *
6655
   * @return bool Whether or not $str starts with $substring.
6656
   */
6657 8
  public static function str_starts_with_any(string $str, array $substrings): bool
6658
  {
6659 8
    if ('' === $str) {
6660
      return false;
6661
    }
6662
6663 8
    if (empty($substrings)) {
6664
      return false;
6665
    }
6666
6667 8
    foreach ($substrings as $substring) {
6668 8
      if (self::str_starts_with($str, $substring)) {
6669 8
        return true;
6670
      }
6671
    }
6672
6673 6
    return false;
6674
  }
6675
6676
  /**
6677
   * Gets the substring after the first occurrence of a separator.
6678
   *
6679
   * @param string $str       <p>The input string.</p>
6680
   * @param string $separator <p>The string separator.</p>
6681
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6682
   *
6683
   * @return string
6684
   */
6685 1
  public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6686
  {
6687
    if (
6688 1
        $separator === ''
6689
        ||
6690 1
        $str === ''
6691
    ) {
6692 1
      return '';
6693
    }
6694
6695 1
    $offset = self::str_index_first($str, $separator);
6696 1
    if ($offset === false) {
6697 1
      return '';
6698
    }
6699
6700 1
    return (string)self::substr(
6701 1
        $str,
6702 1
        $offset + self::strlen($separator, $encoding),
6703 1
        null,
6704 1
        $encoding
6705
    );
6706
  }
6707
6708
  /**
6709
   * Gets the substring after the last occurrence of a separator.
6710
   *
6711
   * @param string $str       <p>The input string.</p>
6712
   * @param string $separator <p>The string separator.</p>
6713
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6714
   *
6715
   * @return string
6716
   */
6717 1
  public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6718
  {
6719
    if (
6720 1
        $separator === ''
6721
        ||
6722 1
        $str === ''
6723
    ) {
6724 1
      return '';
6725
    }
6726
6727 1
    $offset = self::str_index_last($str, $separator);
6728 1
    if ($offset === false) {
6729 1
      return '';
6730
    }
6731
6732 1
    return (string)self::substr(
6733 1
        $str,
6734 1
        $offset + self::strlen($separator, $encoding),
6735 1
        null,
6736 1
        $encoding
6737
    );
6738
  }
6739
6740
  /**
6741
   * Gets the substring before the first occurrence of a separator.
6742
   *
6743
   * @param string $str       <p>The input string.</p>
6744
   * @param string $separator <p>The string separator.</p>
6745
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6746
   *
6747
   * @return string
6748
   */
6749 1
  public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6750
  {
6751
    if (
6752 1
        $separator === ''
6753
        ||
6754 1
        $str === ''
6755
    ) {
6756 1
      return '';
6757
    }
6758
6759 1
    $offset = self::str_index_first($str, $separator);
6760 1
    if ($offset === false) {
6761 1
      return '';
6762
    }
6763
6764 1
    return (string)self::substr(
6765 1
        $str,
6766 1
        0,
6767 1
        $offset,
6768 1
        $encoding
6769
    );
6770
  }
6771
6772
  /**
6773
   * Gets the substring before the last occurrence of a separator.
6774
   *
6775
   * @param string $str       <p>The input string.</p>
6776
   * @param string $separator <p>The string separator.</p>
6777
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6778
   *
6779
   * @return string
6780
   */
6781 1
  public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6782
  {
6783
    if (
6784 1
        $separator === ''
6785
        ||
6786 1
        $str === ''
6787
    ) {
6788 1
      return '';
6789
    }
6790
6791 1
    $offset = self::str_index_last($str, $separator);
6792 1
    if ($offset === false) {
6793 1
      return '';
6794
    }
6795
6796 1
    return (string)self::substr(
6797 1
        $str,
6798 1
        0,
6799 1
        $offset,
6800 1
        $encoding
6801
    );
6802
  }
6803
6804
  /**
6805
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6806
   *
6807
   * @param string $str          <p>The input string.</p>
6808
   * @param string $needle       <p>The string to look for.</p>
6809
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6810
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6811
   *
6812
   * @return string
6813
   */
6814 2
  public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6815
  {
6816
    if (
6817 2
        '' === $str
6818
        ||
6819 2
        '' === $needle
6820
    ) {
6821 2
      return '';
6822
    }
6823
6824 2
    $part = self::strstr(
6825 2
        $str,
6826 2
        $needle,
6827 2
        $beforeNeedle,
6828 2
        $encoding
6829
    );
6830 2
    if (false === $part) {
6831 2
      return '';
6832
    }
6833
6834 2
    return $part;
6835
  }
6836
6837
  /**
6838
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6839
   *
6840
   * @param string $str          <p>The input string.</p>
6841
   * @param string $needle       <p>The string to look for.</p>
6842
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6843
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6844
   *
6845
   * @return string
6846
   */
6847 2
  public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6848
  {
6849
    if (
6850 2
        '' === $str
6851
        ||
6852 2
        '' === $needle
6853
    ) {
6854 2
      return '';
6855
    }
6856
6857 2
    $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6858 2
    if (false === $part) {
6859 2
      return '';
6860
    }
6861
6862 2
    return $part;
6863
  }
6864
6865
  /**
6866
   * Surrounds $str with the given substring.
6867
   *
6868
   * @param string $str
6869
   * @param string $substring <p>The substring to add to both sides.</P>
6870
   *
6871
   * @return string String with the substring both prepended and appended.
6872
   */
6873 5
  public static function str_surround(string $str, string $substring): string
6874
  {
6875 5
    return \implode('', [$substring, $str, $substring]);
6876
  }
6877
6878
  /**
6879
   * Returns a trimmed string with the first letter of each word capitalized.
6880
   * Also accepts an array, $ignore, allowing you to list words not to be
6881
   * capitalized.
6882
   *
6883
   * @param string              $str
6884
   * @param string[]|array|null $ignore   [optional] <p>An array of words not to capitalize or null. Default: null</p>
6885
   * @param string              $encoding [optional] <p>Default: UTF-8</p>
6886
   *
6887
   * @return string The titleized string.
6888
   */
6889 5
  public static function str_titleize(string $str, array $ignore = null, string $encoding = 'UTF-8'): string
6890
  {
6891 5
    $str = self::trim($str);
6892
6893 5
    $str = (string)\preg_replace_callback(
6894 5
        '/([\S]+)/u',
6895 5
        function ($match) use ($encoding, $ignore) {
6896 5
          if ($ignore && \in_array($match[0], $ignore, true)) {
6897 2
            return $match[0];
6898
          }
6899
6900 5
          return self::str_upper_first(self::strtolower($match[0], $encoding));
6901 5
        },
6902 5
        $str
6903
    );
6904
6905 5
    return $str;
6906
  }
6907
6908
  /**
6909
   * Returns a trimmed string in proper title case.
6910
   *
6911
   * Also accepts an array, $ignore, allowing you to list words not to be
6912
   * capitalized.
6913
   *
6914
   * Adapted from John Gruber's script.
6915
   *
6916
   * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6917
   *
6918
   * @param string $str
6919
   * @param array  $ignore   <p>An array of words not to capitalize.</p>
6920
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6921
   *
6922
   * @return string The titleized string.
6923
   */
6924 35
  public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6925
  {
6926 35
    $smallWords = \array_merge(
6927
        [
6928 35
            '(?<!q&)a',
6929
            'an',
6930
            'and',
6931
            'as',
6932
            'at(?!&t)',
6933
            'but',
6934
            'by',
6935
            'en',
6936
            'for',
6937
            'if',
6938
            'in',
6939
            'of',
6940
            'on',
6941
            'or',
6942
            'the',
6943
            'to',
6944
            'v[.]?',
6945
            'via',
6946
            'vs[.]?',
6947
        ],
6948 35
        $ignore
6949
    );
6950
6951 35
    $smallWordsRx = \implode('|', $smallWords);
6952 35
    $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6953
6954 35
    $str = self::trim($str);
6955
6956 35
    if (self::has_lowercase($str) === false) {
6957 2
      $str = self::strtolower($str);
6958
    }
6959
6960
    // The main substitutions
6961 35
    $str = (string)\preg_replace_callback(
6962
        '~\b (_*) (?:                                                              # 1. Leading underscore and
6963
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6964 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6965
                        |
6966 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6967
                        |
6968 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6969
                        |
6970 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
6971
                      ) (_*) \b                                                           # 6. With trailing underscore
6972
                    ~ux',
6973 35
        function ($matches) use ($encoding) {
6974
          // Preserve leading underscore
6975 35
          $str = $matches[1];
6976 35
          if ($matches[2]) {
6977
            // Preserve URLs, domains, emails and file paths
6978 5
            $str .= $matches[2];
6979 35
          } elseif ($matches[3]) {
6980
            // Lower-case small words
6981 25
            $str .= self::strtolower($matches[3], $encoding);
6982 35
          } elseif ($matches[4]) {
6983
            // Capitalize word w/o internal caps
6984 34
            $str .= static::str_upper_first($matches[4], $encoding);
6985
          } else {
6986
            // Preserve other kinds of word (iPhone)
6987 7
            $str .= $matches[5];
6988
          }
6989
          // Preserve trailing underscore
6990 35
          $str .= $matches[6];
6991
6992 35
          return $str;
6993 35
        },
6994 35
        $str
6995
    );
6996
6997
    // Exceptions for small words: capitalize at start of title...
6998 35
    $str = (string)\preg_replace_callback(
6999
        '~(  \A [[:punct:]]*                # start of title...
7000
                      |  [:.;?!][ ]+               # or of subsentence...
7001
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7002 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7003
                     ~uxi',
7004 35
        function ($matches) use ($encoding) {
7005 11
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
7006 35
        },
7007 35
        $str
7008
    );
7009
7010
    // ...and end of title
7011 35
    $str = (string)\preg_replace_callback(
7012 35
        '~\b ( ' . $smallWordsRx . ' ) # small word...
7013
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7014
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7015
                     ~uxi',
7016 35
        function ($matches) use ($encoding) {
7017 3
          return static::str_upper_first($matches[1], $encoding);
7018 35
        },
7019 35
        $str
7020
    );
7021
7022
    // Exceptions for small words in hyphenated compound words
7023
    // e.g. "in-flight" -> In-Flight
7024 35
    $str = (string)\preg_replace_callback(
7025
        '~\b
7026
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7027 35
                        ( ' . $smallWordsRx . ' )
7028
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7029
                       ~uxi',
7030 35
        function ($matches) use ($encoding) {
7031
          return static::str_upper_first($matches[1], $encoding);
7032 35
        },
7033 35
        $str
7034
    );
7035
7036
    // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7037 35
    $str = (string)\preg_replace_callback(
7038
        '~\b
7039
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7040
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7041 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7042
                      (?!	- )                   # Negative lookahead for another -
7043
                     ~uxi',
7044 35
        function ($matches) use ($encoding) {
7045
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
7046 35
        },
7047 35
        $str
7048
    );
7049
7050 35
    return $str;
7051
  }
7052
7053
  /**
7054
   * Get a binary representation of a specific string.
7055
   *
7056
   * @param string $str <p>The input string.</p>
7057
   *
7058
   * @return string
7059
   */
7060 2
  public static function str_to_binary(string $str): string
7061
  {
7062 2
    $value = \unpack('H*', $str);
7063
7064 2
    return \base_convert($value[1], 16, 2);
7065
  }
7066
7067
  /**
7068
   * @param string   $str
7069
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7070
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7071
   *
7072
   * @return string[]
7073
   */
7074 17
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7075
  {
7076 17
    if ('' === $str) {
7077 1
      return ($removeEmptyValues === true ? [] : ['']);
7078
    }
7079
7080 16
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
7081
7082 16
    if ($return === false) {
7083
      return ($removeEmptyValues === true ? [] : ['']);
7084
    }
7085
7086
    if (
7087 16
        $removeShortValues === null
7088
        &&
7089 16
        $removeEmptyValues === false
7090
    ) {
7091 16
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7092
    }
7093
7094
    $tmpReturn = self::reduce_string_array(
7095
        $return,
7096
        $removeEmptyValues,
7097
        $removeShortValues
7098
    );
7099
7100
    return $tmpReturn;
7101
  }
7102
7103
  /**
7104
   * Convert a string into an array of words.
7105
   *
7106
   * @param string   $str
7107
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7108
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7109
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7110
   *
7111
   * @return string[]
7112
   */
7113 14
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
7114
  {
7115 14
    if ('' === $str) {
7116 4
      return ($removeEmptyValues === true ? [] : ['']);
7117
    }
7118
7119 14
    $charList = self::rxClass($charList, '\pL');
7120
7121 14
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
7122
7123 14
    if ($return === false) {
7124
      return ($removeEmptyValues === true ? [] : ['']);
7125
    }
7126
7127
    if (
7128 14
        $removeShortValues === null
7129
        &&
7130 14
        $removeEmptyValues === false
7131
    ) {
7132 14
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7133
    }
7134
7135 2
    $tmpReturn = self::reduce_string_array(
7136 2
        $return,
7137 2
        $removeEmptyValues,
7138 2
        $removeShortValues
7139
    );
7140
7141 2
    foreach ($tmpReturn as &$item) {
7142 2
      $item = (string)$item;
7143
    }
7144
7145 2
    return $tmpReturn;
7146
  }
7147
7148
  /**
7149
   * alias for "UTF8::to_ascii()"
7150
   *
7151
   * @see UTF8::to_ascii()
7152
   *
7153
   * @param string $str
7154
   * @param string $unknown
7155
   * @param bool   $strict
7156
   *
7157
   * @return string
7158
   */
7159 8
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7160
  {
7161 8
    return self::to_ascii($str, $unknown, $strict);
7162
  }
7163
7164
  /**
7165
   * Truncates the string to a given length. If $substring is provided, and
7166
   * truncating occurs, the string is further truncated so that the substring
7167
   * may be appended without exceeding the desired length.
7168
   *
7169
   * @param string $str
7170
   * @param int    $length    <p>Desired length of the truncated string.</p>
7171
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7172
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7173
   *
7174
   * @return string String after truncating.
7175
   */
7176 22
  public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7177
  {
7178
    // init
7179 22
    $str = (string)$str;
7180
7181 22
    if ('' === $str) {
7182
      return '';
7183
    }
7184
7185 22
    if ($length >= self::strlen($str, $encoding)) {
7186 4
      return $str;
7187
    }
7188
7189
    // Need to further trim the string so we can append the substring
7190 18
    $substringLength = self::strlen($substring, $encoding);
7191 18
    $length -= $substringLength;
7192
7193 18
    $truncated = self::substr($str, 0, $length, $encoding);
7194
7195 18
    return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7195
    return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7196
  }
7197
7198
  /**
7199
   * Truncates the string to a given length, while ensuring that it does not
7200
   * split words. If $substring is provided, and truncating occurs, the
7201
   * string is further truncated so that the substring may be appended without
7202
   * exceeding the desired length.
7203
   *
7204
   * @param string $str
7205
   * @param int    $length    <p>Desired length of the truncated string.</p>
7206
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7207
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7208
   *
7209
   * @return string String after truncating.
7210
   */
7211 23
  public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7212
  {
7213 23
    if ($length >= self::strlen($str, $encoding)) {
7214 4
      return $str;
7215
    }
7216
7217
    // need to further trim the string so we can append the substring
7218 19
    $substringLength = self::strlen($substring, $encoding);
7219 19
    $length -= $substringLength;
7220
7221 19
    $truncated = self::substr($str, 0, $length, $encoding);
7222 19
    if ($truncated === false) {
7223
      return '';
7224
    }
7225
7226
    // if the last word was truncated
7227 19
    $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7228 19
    if ($strPosSpace != $length) {
7229
      // find pos of the last occurrence of a space, get up to that
7230 12
      $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7231
7232 12
      if ($lastPos !== false || $strPosSpace !== false) {
7233 11
        $truncated = self::substr($truncated, 0, (int)$lastPos, $encoding);
7234
      }
7235
    }
7236
7237 19
    $str = $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7237
    $str = /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7238
7239 19
    return $str;
7240
  }
7241
7242
  /**
7243
   * Returns a lowercase and trimmed string separated by underscores.
7244
   * Underscores are inserted before uppercase characters (with the exception
7245
   * of the first character of the string), and in place of spaces as well as
7246
   * dashes.
7247
   *
7248
   * @param string $str
7249
   *
7250
   * @return string The underscored string.
7251
   */
7252 16
  public static function str_underscored(string $str): string
7253
  {
7254 16
    return self::str_delimit($str, '_');
7255
  }
7256
7257
  /**
7258
   * Returns an UpperCamelCase version of the supplied string. It trims
7259
   * surrounding spaces, capitalizes letters following digits, spaces, dashes
7260
   * and underscores, and removes spaces, dashes, underscores.
7261
   *
7262
   * @param string $str      <p>The input string.</p>
7263
   * @param string $encoding [optional] <p>Default: UTF-8</p>
7264
   *
7265
   * @return string String in UpperCamelCase.
7266
   */
7267 13
  public static function str_upper_camelize(string $str, string $encoding = 'UTF-8'): string
7268
  {
7269 13
    return self::str_upper_first(self::str_camelize($str, $encoding), $encoding);
7270
  }
7271
7272
  /**
7273
   * alias for "UTF8::ucfirst()"
7274
   *
7275
   * @see UTF8::ucfirst()
7276
   *
7277
   * @param string $str
7278
   * @param string $encoding
7279
   * @param bool   $cleanUtf8
7280
   *
7281
   * @return string
7282
   */
7283 58
  public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7284
  {
7285 58
    return self::ucfirst($str, $encoding, $cleanUtf8);
7286
  }
7287
7288
  /**
7289
   * Counts number of words in the UTF-8 string.
7290
   *
7291
   * @param string $str      <p>The input string.</p>
7292
   * @param int    $format   [optional] <p>
7293
   *                         <strong>0</strong> => return a number of words (default)<br>
7294
   *                         <strong>1</strong> => return an array of words<br>
7295
   *                         <strong>2</strong> => return an array of words with word-offset as key
7296
   *                         </p>
7297
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7298
   *
7299
   * @return string[]|int The number of words in the string
7300
   */
7301 2
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7302
  {
7303 2
    $strParts = self::str_to_words($str, $charlist);
7304
7305 2
    $len = \count($strParts);
7306
7307 2
    if ($format === 1) {
7308
7309 2
      $numberOfWords = [];
7310 2
      for ($i = 1; $i < $len; $i += 2) {
7311 2
        $numberOfWords[] = $strParts[$i];
7312
      }
7313
7314 2
    } elseif ($format === 2) {
7315
7316 2
      $numberOfWords = [];
7317 2
      $offset = self::strlen($strParts[0]);
7318 2
      for ($i = 1; $i < $len; $i += 2) {
7319 2
        $numberOfWords[$offset] = $strParts[$i];
7320 2
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
7321
      }
7322
7323
    } else {
7324
7325 2
      $numberOfWords = (int)(($len - 1) / 2);
7326
7327
    }
7328
7329 2
    return $numberOfWords;
7330
  }
7331
7332
  /**
7333
   * Case-insensitive string comparison.
7334
   *
7335
   * INFO: Case-insensitive version of UTF8::strcmp()
7336
   *
7337
   * @param string $str1     <p>The first string.</p>
7338
   * @param string $str2     <p>The second string.</p>
7339
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7340
   *
7341
   * @return int
7342
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7343
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7344
   *             <strong>0</strong> if they are equal.
7345
   */
7346 23
  public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7347
  {
7348 23
    return self::strcmp(
7349 23
        self::strtocasefold($str1, true, false, $encoding, null, false),
7350 23
        self::strtocasefold($str2, true, false, $encoding, null, false)
7351
    );
7352
  }
7353
7354
  /**
7355
   * alias for "UTF8::strstr()"
7356
   *
7357
   * @see UTF8::strstr()
7358
   *
7359
   * @param string $haystack
7360
   * @param string $needle
7361
   * @param bool   $before_needle
7362
   * @param string $encoding
7363
   * @param bool   $cleanUtf8
7364
   *
7365
   * @return string|false
7366
   */
7367 2
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7368
  {
7369 2
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7370
  }
7371
7372
  /**
7373
   * Case-sensitive string comparison.
7374
   *
7375
   * @param string $str1 <p>The first string.</p>
7376
   * @param string $str2 <p>The second string.</p>
7377
   *
7378
   * @return int
7379
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
7380
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
7381
   *              <strong>0</strong> if they are equal.
7382
   */
7383 29
  public static function strcmp(string $str1, string $str2): int
7384
  {
7385
    /** @noinspection PhpUndefinedClassInspection */
7386 29
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7387 24
        \Normalizer::normalize($str1, \Normalizer::NFD),
7388 29
        \Normalizer::normalize($str2, \Normalizer::NFD)
7389
    );
7390
  }
7391
7392
  /**
7393
   * Find length of initial segment not matching mask.
7394
   *
7395
   * @param string $str
7396
   * @param string $charList
7397
   * @param int    $offset
7398
   * @param int    $length
7399
   *
7400
   * @return int|null
7401
   */
7402 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
7403
  {
7404 15
    if ('' === $charList .= '') {
7405 1
      return null;
7406
    }
7407
7408 14
    if ($offset || $length !== null) {
7409 2
      $strTmp = self::substr($str, $offset, $length);
7410 2
      if ($strTmp === false) {
7411
        return null;
7412
      }
7413 2
      $str = (string)$strTmp;
7414
    }
7415
7416 14
    if ('' === $str) {
7417 1
      return null;
7418
    }
7419
7420 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept null|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7420
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7421 13
      return self::strlen($length[1]);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($length[1]) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7422
    }
7423
7424 1
    return self::strlen($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($str) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7425
  }
7426
7427
  /**
7428
   * alias for "UTF8::stristr()"
7429
   *
7430
   * @see UTF8::stristr()
7431
   *
7432
   * @param string $haystack
7433
   * @param string $needle
7434
   * @param bool   $before_needle
7435
   * @param string $encoding
7436
   * @param bool   $cleanUtf8
7437
   *
7438
   * @return string|false
7439
   */
7440 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7441
  {
7442 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7443
  }
7444
7445
  /**
7446
   * Create a UTF-8 string from code points.
7447
   *
7448
   * INFO: opposite to UTF8::codepoints()
7449
   *
7450
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7451
   *
7452
   * @return string UTF-8 encoded string.
7453
   */
7454 4
  public static function string(array $array): string
7455
  {
7456 4
    return \implode(
7457 4
        '',
7458 4
        \array_map(
7459
            [
7460 4
                self::class,
7461
                'chr',
7462
            ],
7463 4
            $array
7464
        )
7465
    );
7466
  }
7467
7468
  /**
7469
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7470
   *
7471
   * @param string $str <p>The input string.</p>
7472
   *
7473
   * @return bool
7474
   *              <strong>true</strong> if the string has BOM at the start,<br>
7475
   *              <strong>false</strong> otherwise.
7476
   */
7477 6
  public static function string_has_bom(string $str): bool
7478
  {
7479 6
    foreach (self::$BOM as $bomString => $bomByteLength) {
7480 6
      if (0 === \strpos($str, $bomString)) {
7481 6
        return true;
7482
      }
7483
    }
7484
7485 6
    return false;
7486
  }
7487
7488
  /**
7489
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7490
   *
7491
   * @link http://php.net/manual/en/function.strip-tags.php
7492
   *
7493
   * @param string $str             <p>
7494
   *                                The input string.
7495
   *                                </p>
7496
   * @param string $allowable_tags  [optional] <p>
7497
   *                                You can use the optional second parameter to specify tags which should
7498
   *                                not be stripped.
7499
   *                                </p>
7500
   *                                <p>
7501
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
7502
   *                                can not be changed with allowable_tags.
7503
   *                                </p>
7504
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
7505
   *
7506
   * @return string The stripped string.
7507
   */
7508 4
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7509
  {
7510 4
    if ('' === $str) {
7511 1
      return '';
7512
    }
7513
7514 4
    if ($cleanUtf8 === true) {
7515 2
      $str = self::clean($str);
7516
    }
7517
7518 4
    return \strip_tags($str, $allowable_tags);
7519
  }
7520
7521
  /**
7522
   * Strip all whitespace characters. This includes tabs and newline
7523
   * characters, as well as multibyte whitespace such as the thin space
7524
   * and ideographic space.
7525
   *
7526
   * @param string $str
7527
   *
7528
   * @return string
7529
   */
7530 36
  public static function strip_whitespace(string $str): string
7531
  {
7532 36
    if ('' === $str) {
7533 3
      return '';
7534
    }
7535
7536 33
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
7537
  }
7538
7539
  /**
7540
   * Finds position of first occurrence of a string within another, case insensitive.
7541
   *
7542
   * @link http://php.net/manual/en/function.mb-stripos.php
7543
   *
7544
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7545
   * @param string $needle    <p>The string to find in haystack.</p>
7546
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7547
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7548
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7549
   *
7550
   * @return int|false
7551
   *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7552
   *                   haystack string,<br> or <strong>false</strong> if needle is not found.
7553
   */
7554 75
  public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7555
  {
7556 75
    if ('' === $haystack || '' === $needle) {
7557 5
      return false;
7558
    }
7559
7560 74
    if ($cleanUtf8 === true) {
7561
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7562
      // if invalid characters are found in $haystack before $needle
7563 1
      $haystack = self::clean($haystack);
7564 1
      $needle = self::clean($needle);
7565
    }
7566
7567 74
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7568 23
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7569
    }
7570
7571 74
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7572
      self::checkForSupport();
7573
    }
7574
7575 74
    if (self::$SUPPORT['mbstring'] === true) {
7576 74
      $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7577 74
      if ($returnTmp !== false) {
7578 54
        return $returnTmp;
7579
      }
7580
    }
7581
7582
    if (
7583 31
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7584
        &&
7585 31
        $offset >= 0 // grapheme_stripos() can't handle negative offset
7586
        &&
7587 31
        self::$SUPPORT['intl'] === true
7588
    ) {
7589 31
      $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7590 31
      if ($returnTmp !== false) {
7591
        return $returnTmp;
7592
      }
7593
    }
7594
7595
    //
7596
    // fallback for ascii only
7597
    //
7598
7599 31
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7600 15
      return \stripos($haystack, $needle, $offset);
7601
    }
7602
7603
    //
7604
    // fallback via vanilla php
7605
    //
7606
7607 20
    $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
7608 20
    $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
7609
7610 20
    return self::strpos($haystack, $needle, $offset, $encoding);
7611
  }
7612
7613
  /**
7614
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
7615
   *
7616
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7617
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7618
   * @param bool   $before_needle  [optional] <p>
7619
   *                               If <b>TRUE</b>, it returns the part of the
7620
   *                               haystack before the first occurrence of the needle (excluding the needle).
7621
   *                               </p>
7622
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
7623
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7624
   *
7625
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
7626
   */
7627 19
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7628
  {
7629 19
    if ('' === $haystack || '' === $needle) {
7630 6
      return false;
7631
    }
7632
7633 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7634 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7635
    }
7636
7637 13
    if ($cleanUtf8 === true) {
7638
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7639
      // if invalid characters are found in $haystack before $needle
7640 1
      $needle = self::clean($needle);
7641 1
      $haystack = self::clean($haystack);
7642
    }
7643
7644 13
    if (!$needle) {
7645
      return $haystack;
7646
    }
7647
7648 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7649
      self::checkForSupport();
7650
    }
7651
7652
    if (
7653 13
        $encoding !== 'UTF-8'
7654
        &&
7655 13
        self::$SUPPORT['mbstring'] === false
7656
    ) {
7657
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7658
    }
7659
7660 13
    if (self::$SUPPORT['mbstring'] === true) {
7661 13
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7662
    }
7663
7664
    if (
7665
        $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7666
        &&
7667
        self::$SUPPORT['intl'] === true
7668
    ) {
7669
      $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7670
      if ($returnTmp !== false) {
7671
        return $returnTmp;
7672
      }
7673
    }
7674
7675
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7676
      return \stristr($haystack, $needle, $before_needle);
7677
    }
7678
7679
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7680
7681
    if (!isset($match[1])) {
7682
      return false;
7683
    }
7684
7685
    if ($before_needle) {
7686
      return $match[1];
7687
    }
7688
7689
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7689
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
7690
  }
7691
7692
  /**
7693
   * Get the string length, not the byte-length!
7694
   *
7695
   * @link     http://php.net/manual/en/function.mb-strlen.php
7696
   *
7697
   * @param string $str       <p>The string being checked for length.</p>
7698
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7699
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7700
   *
7701
   * @return int|false
7702
   *             The number <strong>(int)</strong> of characters in the string $str having character encoding $encoding.
7703
   *             (One multi-byte character counted as +1).
7704
   *             <br>
7705
   *             Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid chars.
7706
   */
7707 262
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7708
  {
7709 262
    if ('' === $str) {
7710 37
      return 0;
7711
    }
7712
7713 260
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7714 83
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7715
    }
7716
7717
    //
7718
    // fallback for binary || ascii only
7719
    //
7720
7721
    if (
7722 260
        $encoding === 'CP850'
7723
        ||
7724 260
        $encoding === 'ASCII'
7725
    ) {
7726 2
      return self::strlen_in_byte($str);
7727
    }
7728
7729 260
    if ($cleanUtf8 === true) {
7730
      // "mb_strlen" and "\iconv_strlen" returns wrong length,
7731
      // if invalid characters are found in $str
7732 4
      $str = self::clean($str);
7733
    }
7734
7735 260
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7736
      self::checkForSupport();
7737
    }
7738
7739
    if (
7740 260
        $encoding !== 'UTF-8'
7741
        &&
7742 260
        self::$SUPPORT['mbstring'] === false
7743
        &&
7744 260
        self::$SUPPORT['iconv'] === false
7745
    ) {
7746 2
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7747
    }
7748
7749
    //
7750
    // fallback via mbstring
7751
    //
7752
7753 260
    if (self::$SUPPORT['mbstring'] === true) {
7754 256
      $returnTmp = \mb_strlen($str, $encoding);
7755 256
      if ($returnTmp !== false) {
7756 256
        return $returnTmp;
7757
      }
7758
    }
7759
7760
    //
7761
    // fallback via iconv
7762
    //
7763
7764 8
    if (self::$SUPPORT['iconv'] === true) {
7765
      $returnTmp = \iconv_strlen($str, $encoding);
7766
      if ($returnTmp !== false) {
7767
        return $returnTmp;
7768
      }
7769
    }
7770
7771
    //
7772
    // fallback via intl
7773
    //
7774
7775
    if (
7776 8
        $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7777
        &&
7778 8
        self::$SUPPORT['intl'] === true
7779
    ) {
7780
      $returnTmp = \grapheme_strlen($str);
7781
      if ($returnTmp !== null) {
7782
        return $returnTmp;
7783
      }
7784
    }
7785
7786
    //
7787
    // fallback for ascii only
7788
    //
7789
7790 8
    if (self::is_ascii($str)) {
7791 4
      return \strlen($str);
7792
    }
7793
7794
    //
7795
    // fallback via vanilla php
7796
    //
7797
7798 8
    \preg_match_all('/./us', $str, $parts);
7799
7800 8
    $returnTmp = \count($parts[0]);
7801 8
    if ($returnTmp === 0 && isset($str[0])) {
7802
      return false;
7803
    }
7804
7805 8
    return $returnTmp;
7806
  }
7807
7808
  /**
7809
   * Get string length in byte.
7810
   *
7811
   * @param string $str
7812
   *
7813
   * @return int
7814
   */
7815 195
  public static function strlen_in_byte(string $str): int
7816
  {
7817 195
    if ($str === '') {
7818
      return 0;
7819
    }
7820
7821 195
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7822
      self::checkForSupport();
7823
    }
7824
7825 195
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7826
      // "mb_" is available if overload is used, so use it ...
7827
      return \mb_strlen($str, 'CP850'); // 8-BIT
7828
    }
7829
7830 195
    return \strlen($str);
7831
  }
7832
7833
  /**
7834
   * Case insensitive string comparisons using a "natural order" algorithm.
7835
   *
7836
   * INFO: natural order version of UTF8::strcasecmp()
7837
   *
7838
   * @param string $str1     <p>The first string.</p>
7839
   * @param string $str2     <p>The second string.</p>
7840
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7841
   *
7842
   * @return int
7843
   *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7844
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7845
   *             <strong>0</strong> if they are equal
7846
   */
7847 2
  public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7848
  {
7849 2
    return self::strnatcmp(
7850 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
7851 2
        self::strtocasefold($str2, true, false, $encoding, null, false)
7852
    );
7853
  }
7854
7855
  /**
7856
   * String comparisons using a "natural order" algorithm
7857
   *
7858
   * INFO: natural order version of UTF8::strcmp()
7859
   *
7860
   * @link  http://php.net/manual/en/function.strnatcmp.php
7861
   *
7862
   * @param string $str1 <p>The first string.</p>
7863
   * @param string $str2 <p>The second string.</p>
7864
   *
7865
   * @return int
7866
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7867
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7868
   *             <strong>0</strong> if they are equal
7869
   */
7870 4
  public static function strnatcmp(string $str1, string $str2): int
7871
  {
7872 4
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
7873
  }
7874
7875
  /**
7876
   * Case-insensitive string comparison of the first n characters.
7877
   *
7878
   * @link  http://php.net/manual/en/function.strncasecmp.php
7879
   *
7880
   * @param string $str1     <p>The first string.</p>
7881
   * @param string $str2     <p>The second string.</p>
7882
   * @param int    $len      <p>The length of strings to be used in the comparison.</p>
7883
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7884
   *
7885
   * @return int
7886
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7887
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7888
   *             <strong>0</strong> if they are equal
7889
   */
7890 2
  public static function strncasecmp(string $str1, string $str2, int $len, string $encoding = 'UTF-8'): int
7891
  {
7892 2
    return self::strncmp(
7893 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
7894 2
        self::strtocasefold($str2, true, false, $encoding, null, false),
7895 2
        $len
7896
    );
7897
  }
7898
7899
  /**
7900
   * String comparison of the first n characters.
7901
   *
7902
   * @link  http://php.net/manual/en/function.strncmp.php
7903
   *
7904
   * @param string $str1 <p>The first string.</p>
7905
   * @param string $str2 <p>The second string.</p>
7906
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
7907
   *
7908
   * @return int
7909
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7910
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7911
   *             <strong>0</strong> if they are equal
7912
   */
7913 4
  public static function strncmp(string $str1, string $str2, int $len): int
7914
  {
7915 4
    $str1 = (string)self::substr($str1, 0, $len);
7916 4
    $str2 = (string)self::substr($str2, 0, $len);
7917
7918 4
    return self::strcmp($str1, $str2);
7919
  }
7920
7921
  /**
7922
   * Search a string for any of a set of characters.
7923
   *
7924
   * @link  http://php.net/manual/en/function.strpbrk.php
7925
   *
7926
   * @param string $haystack  <p>The string where char_list is looked for.</p>
7927
   * @param string $char_list <p>This parameter is case sensitive.</p>
7928
   *
7929
   * @return string|false String starting from the character found, or false if it is not found.
7930
   */
7931 2
  public static function strpbrk(string $haystack, string $char_list)
7932
  {
7933 2
    if ('' === $haystack || '' === $char_list) {
7934 2
      return false;
7935
    }
7936
7937 2
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7938 2
      return \substr($haystack, (int)\strpos($haystack, $m[0]));
7939
    }
7940
7941 2
    return false;
7942
  }
7943
7944
  /**
7945
   * Find position of first occurrence of string in a string.
7946
   *
7947
   * @link http://php.net/manual/en/function.mb-strpos.php
7948
   *
7949
   * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7950
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7951
   * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7952
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7953
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7954
   *
7955
   * @return int|false
7956
   *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
7957
   *                   string.<br> If needle is not found it returns false.
7958
   */
7959 142
  public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7960
  {
7961 142
    if ('' === $haystack) {
7962 4
      return false;
7963
    }
7964
7965
    // iconv and mbstring do not support integer $needle
7966 141
    if ((int)$needle === $needle && $needle >= 0) {
7967
      $needle = (string)self::chr($needle);
7968
    }
7969 141
    $needle = (string)$needle;
7970
7971 141
    if ('' === $needle) {
7972 2
      return false;
7973
    }
7974
7975 141
    if ($cleanUtf8 === true) {
7976
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7977
      // if invalid characters are found in $haystack before $needle
7978 3
      $needle = self::clean($needle);
7979 3
      $haystack = self::clean($haystack);
7980
    }
7981
7982 141
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7983 55
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7984
    }
7985
7986 141
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7987
      self::checkForSupport();
7988
    }
7989
7990
    //
7991
    // fallback for binary || ascii only
7992
    //
7993
7994
    if (
7995 141
        $encoding === 'CP850'
7996
        ||
7997 141
        $encoding === 'ASCII'
7998
    ) {
7999 2
      return self::strpos_in_byte($haystack, $needle, $offset);
8000
    }
8001
8002
    if (
8003 141
        $encoding !== 'UTF-8'
8004
        &&
8005 141
        self::$SUPPORT['iconv'] === false
8006
        &&
8007 141
        self::$SUPPORT['mbstring'] === false
8008
    ) {
8009 2
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8010
    }
8011
8012
    //
8013
    // fallback via mbstring
8014
    //
8015
8016 141
    if (self::$SUPPORT['mbstring'] === true) {
8017 141
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
8018 141
      if ($returnTmp !== false) {
8019 86
        return $returnTmp;
8020
      }
8021
    }
8022
8023
    //
8024
    // fallback via intl
8025
    //
8026
8027
    if (
8028 69
        $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8029
        &&
8030 69
        $offset >= 0 // grapheme_strpos() can't handle negative offset
8031
        &&
8032 69
        self::$SUPPORT['intl'] === true
8033
    ) {
8034 69
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
8035 69
      if ($returnTmp !== false) {
8036
        return $returnTmp;
8037
      }
8038
    }
8039
8040
    //
8041
    // fallback via iconv
8042
    //
8043
8044
    if (
8045 69
        $offset >= 0 // iconv_strpos() can't handle negative offset
8046
        &&
8047 69
        self::$SUPPORT['iconv'] === true
8048
    ) {
8049
      // ignore invalid negative offset to keep compatibility
8050
      // with php < 5.5.35, < 5.6.21, < 7.0.6
8051 69
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
8052 69
      if ($returnTmp !== false) {
8053
        return $returnTmp;
8054
      }
8055
    }
8056
8057
    //
8058
    // fallback for ascii only
8059
    //
8060
8061 69
    if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
8062 35
      return \strpos($haystack, $needle, $offset);
8063
    }
8064
8065
    //
8066
    // fallback via vanilla php
8067
    //
8068
8069 39
    if ($haystackIsAscii) {
8070
      $haystackTmp = \substr($haystack, $offset);
8071
    } else {
8072 39
      $haystackTmp = self::substr($haystack, $offset, null, $encoding);
8073
    }
8074 39
    if ($haystackTmp === false) {
8075
      $haystackTmp = '';
8076
    }
8077 39
    $haystack = (string)$haystackTmp;
8078
8079 39
    if ($offset < 0) {
8080 2
      $offset = 0;
8081
    }
8082
8083 39
    $pos = \strpos($haystack, $needle);
8084 39
    if ($pos === false) {
8085 39
      return false;
8086
    }
8087
8088 4
    if ($pos) {
8089 4
      return ($offset + (self::strlen(substr($haystack, 0, $pos), $encoding)));
8090
    }
8091
8092 2
    return ($offset + 0);
8093
  }
8094
8095
  /**
8096
   * Find position of first occurrence of string in a string.
8097
   *
8098
   * @param string $haystack <p>
8099
   *                         The string being checked.
8100
   *                         </p>
8101
   * @param string $needle   <p>
8102
   *                         The position counted from the beginning of haystack.
8103
   *                         </p>
8104
   * @param int    $offset   [optional] <p>
8105
   *                         The search offset. If it is not specified, 0 is used.
8106
   *                         </p>
8107
   *
8108
   * @return int|false The numeric position of the first occurrence of needle in the
8109
   *                   haystack string. If needle is not found, it returns false.
8110
   */
8111 78
  public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8112
  {
8113 78
    if ($haystack === '' || $needle === '') {
8114
      return false;
8115
    }
8116
8117 78
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8118
      self::checkForSupport();
8119
    }
8120
8121 78
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8122
      // "mb_" is available if overload is used, so use it ...
8123
      return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8124
    }
8125
8126 78
    return \strpos($haystack, $needle, $offset);
8127
  }
8128
8129
  /**
8130
   * Finds the last occurrence of a character in a string within another.
8131
   *
8132
   * @link http://php.net/manual/en/function.mb-strrchr.php
8133
   *
8134
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8135
   * @param string $needle        <p>The string to find in haystack</p>
8136
   * @param bool   $before_needle [optional] <p>
8137
   *                              Determines which portion of haystack
8138
   *                              this function returns.
8139
   *                              If set to true, it returns all of haystack
8140
   *                              from the beginning to the last occurrence of needle.
8141
   *                              If set to false, it returns all of haystack
8142
   *                              from the last occurrence of needle to the end,
8143
   *                              </p>
8144
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8145
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8146
   *
8147
   * @return string|false The portion of haystack or false if needle is not found.
8148
   */
8149 4
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8150
  {
8151 4
    if ('' === $haystack || '' === $needle) {
8152 2
      return false;
8153
    }
8154
8155 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8156 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8157
    }
8158
8159 4
    if ($cleanUtf8 === true) {
8160
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8161
      // if invalid characters are found in $haystack before $needle
8162 2
      $needle = self::clean($needle);
8163 2
      $haystack = self::clean($haystack);
8164
    }
8165
8166 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8167
      self::checkForSupport();
8168
    }
8169
8170
    if (
8171 4
        $encoding !== 'UTF-8'
8172
        &&
8173 4
        self::$SUPPORT['mbstring'] === false
8174
    ) {
8175
      \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8176
    }
8177
8178 4
    if (self::$SUPPORT['mbstring'] === true) {
8179 4
      return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8180
    }
8181
8182
    //
8183
    // fallback for binary || ascii only
8184
    //
8185
8186
    if (
8187
        $before_needle === false
8188
        &&
8189
        (
8190
            $encoding === 'CP850'
8191
            ||
8192
            $encoding === 'ASCII'
8193
        )
8194
    ) {
8195
      return \strrchr($haystack, $needle);
8196
    }
8197
8198
    //
8199
    // fallback via iconv
8200
    //
8201
8202
    if (self::$SUPPORT['iconv'] === true) {
8203
      $needleTmp = self::substr($needle, 0, 1, $encoding);
8204
      if ($needleTmp === false) {
8205
        return false;
8206
      }
8207
      $needle = (string)$needleTmp;
8208
8209
      $pos = \iconv_strrpos($haystack, $needle, $encoding);
8210
      if (false === $pos) {
8211
        return false;
8212
      }
8213
8214
      if ($before_needle) {
8215
        return self::substr($haystack, 0, $pos, $encoding);
8216
      }
8217
8218
      return self::substr($haystack, $pos, null, $encoding);
8219
    }
8220
8221
    //
8222
    // fallback via vanilla php
8223
    //
8224
8225
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8226
    if ($needleTmp === false) {
8227
      return false;
8228
    }
8229
    $needle = (string)$needleTmp;
8230
8231
    $pos = self::strrpos($haystack, $needle, null, $encoding);
8232
    if ($pos === false) {
8233
      return false;
8234
    }
8235
8236
    if ($before_needle) {
8237
      return self::substr($haystack, 0, $pos, $encoding);
8238
    }
8239
8240
    return self::substr($haystack, $pos, null, $encoding);
8241
  }
8242
8243
  /**
8244
   * Reverses characters order in the string.
8245
   *
8246
   * @param string $str <p>The input string.</p>
8247
   *
8248
   * @return string The string with characters in the reverse sequence.
8249
   */
8250 10
  public static function strrev(string $str): string
8251
  {
8252 10
    if ('' === $str) {
8253 4
      return '';
8254
    }
8255
8256 8
    $reversed = '';
8257 8
    $i = self::strlen($str);
8258 8
    while ($i--) {
8259 8
      $reversed .= self::substr($str, $i, 1);
0 ignored issues
show
Bug introduced by
It seems like $i can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8259
      $reversed .= self::substr($str, /** @scrutinizer ignore-type */ $i, 1);
Loading history...
8260
    }
8261
8262 8
    return $reversed;
8263
  }
8264
8265
  /**
8266
   * Finds the last occurrence of a character in a string within another, case insensitive.
8267
   *
8268
   * @link http://php.net/manual/en/function.mb-strrichr.php
8269
   *
8270
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
8271
   * @param string $needle         <p>The string to find in haystack.</p>
8272
   * @param bool   $before_needle  [optional] <p>
8273
   *                               Determines which portion of haystack
8274
   *                               this function returns.
8275
   *                               If set to true, it returns all of haystack
8276
   *                               from the beginning to the last occurrence of needle.
8277
   *                               If set to false, it returns all of haystack
8278
   *                               from the last occurrence of needle to the end,
8279
   *                               </p>
8280
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8281
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8282
   *
8283
   * @return string|false The portion of haystack or<br>false if needle is not found.
8284
   */
8285 3
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8286
  {
8287 3
    if ('' === $haystack || '' === $needle) {
8288 2
      return false;
8289
    }
8290
8291 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8292 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8293
    }
8294
8295 3
    if ($cleanUtf8 === true) {
8296
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8297
      // if invalid characters are found in $haystack before $needle
8298 2
      $needle = self::clean($needle);
8299 2
      $haystack = self::clean($haystack);
8300
    }
8301
8302 3
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8303
      self::checkForSupport();
8304
    }
8305
8306
    //
8307
    // fallback via mbstring
8308
    //
8309
8310 3
    if (self::$SUPPORT['mbstring'] === true) {
8311 3
      return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8312
    }
8313
8314
    //
8315
    // fallback via vanilla php
8316
    //
8317
8318
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8319
    if ($needleTmp === false) {
8320
      return false;
8321
    }
8322
    $needle = (string)$needleTmp;
8323
8324
    $pos = self::strripos($haystack, $needle, 0, $encoding);
8325
    if ($pos === false) {
8326
      return false;
8327
    }
8328
8329
    if ($before_needle) {
8330
      return self::substr($haystack, 0, $pos, $encoding);
8331
    }
8332
8333
    return self::substr($haystack, $pos, null, $encoding);
8334
  }
8335
8336
  /**
8337
   * Find position of last occurrence of a case-insensitive string.
8338
   *
8339
   * @param string     $haystack  <p>The string to look in.</p>
8340
   * @param string|int $needle    <p>The string to look for.</p>
8341
   * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8342
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8343
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8344
   *
8345
   * @return int|false
8346
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8347
   *                   string.<br>If needle is not found, it returns false.
8348
   */
8349 4
  public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8350
  {
8351 4
    if ('' === $haystack) {
8352
      return false;
8353
    }
8354
8355
    // iconv and mbstring do not support integer $needle
8356 4
    if ((int)$needle === $needle && $needle >= 0) {
8357
      $needle = (string)self::chr($needle);
8358
    }
8359 4
    $needle = (string)$needle;
8360
8361 4
    if ('' === $needle) {
8362
      return false;
8363
    }
8364
8365 4
    if ($cleanUtf8 === true) {
8366
      // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8367 2
      $needle = self::clean($needle);
8368 2
      $haystack = self::clean($haystack);
8369
    }
8370
8371 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8372 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8373
    }
8374
8375
    //
8376
    // fallback for binary || ascii only
8377
    //
8378
8379
    if (
8380 4
        $encoding === 'CP850'
8381
        ||
8382 4
        $encoding === 'ASCII'
8383
    ) {
8384
      return self::strripos_in_byte($haystack, $needle, $offset);
8385
    }
8386
8387 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8388
      self::checkForSupport();
8389
    }
8390
8391
    if (
8392 4
        $encoding !== 'UTF-8'
8393
        &&
8394 4
        self::$SUPPORT['mbstring'] === false
8395
    ) {
8396
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8397
    }
8398
8399
    //
8400
    // fallback via mbstrig
8401
    //
8402
8403 4
    if (self::$SUPPORT['mbstring'] === true) {
8404 4
      return \mb_strripos($haystack, $needle, $offset, $encoding);
8405
    }
8406
8407
    //
8408
    // fallback via intl
8409
    //
8410
8411
    if (
8412
        $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8413
        &&
8414
        $offset >= 0 // grapheme_strripos() can't handle negative offset
8415
        &&
8416
        self::$SUPPORT['intl'] === true
8417
    ) {
8418
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8419
      if ($returnTmp !== false) {
8420
        return $returnTmp;
8421
      }
8422
    }
8423
8424
    //
8425
    // fallback for ascii only
8426
    //
8427
8428
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8429
      return self::strripos_in_byte($haystack, $needle, $offset);
8430
    }
8431
8432
    //
8433
    // fallback via vanilla php
8434
    //
8435
8436
    $haystack = self::strtocasefold($haystack, true, false, $encoding);
8437
    $needle = self::strtocasefold($needle, true, false, $encoding);
8438
8439
    return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8440
  }
8441
8442
  /**
8443
   * Finds position of last occurrence of a string within another, case insensitive.
8444
   *
8445
   * @param string $haystack <p>
8446
   *                         The string from which to get the position of the last occurrence
8447
   *                         of needle.
8448
   *                         </p>
8449
   * @param string $needle   <p>
8450
   *                         The string to find in haystack.
8451
   *                         </p>
8452
   * @param int    $offset   [optional] <p>
8453
   *                         The position in haystack
8454
   *                         to start searching.
8455
   *                         </p>
8456
   *
8457
   * @return int|false Return the numeric position of the last occurrence of needle in the
8458
   *                   haystack string, or false if needle is not found.
8459
   */
8460
  public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
8461
  {
8462
    if ($haystack === '' || $needle === '') {
8463
      return false;
8464
    }
8465
8466
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8467
      self::checkForSupport();
8468
    }
8469
8470
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8471
      // "mb_" is available if overload is used, so use it ...
8472
      return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8473
    }
8474
8475
    return \strripos($haystack, $needle, $offset);
8476
  }
8477
8478
  /**
8479
   * Find position of last occurrence of a string in a string.
8480
   *
8481
   * @link http://php.net/manual/en/function.mb-strrpos.php
8482
   *
8483
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8484
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8485
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8486
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
8487
   *                              the end of the string.
8488
   *                              </p>
8489
   * @param string     $encoding  [optional] <p>Set the charset.</p>
8490
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8491
   *
8492
   * @return int|false
8493
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8494
   *                   string.<br>If needle is not found, it returns false.
8495
   */
8496 38
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8497
  {
8498 38
    if ('' === $haystack) {
8499 3
      return false;
8500
    }
8501
8502
    // iconv and mbstring do not support integer $needle
8503 37
    if ((int)$needle === $needle && $needle >= 0) {
8504 2
      $needle = (string)self::chr($needle);
8505
    }
8506 37
    $needle = (string)$needle;
8507
8508 37
    if ('' === $needle) {
8509 2
      return false;
8510
    }
8511
8512 37
    if ($cleanUtf8 === true) {
8513
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8514 4
      $needle = self::clean($needle);
8515 4
      $haystack = self::clean($haystack);
8516
    }
8517
8518 37
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8519 14
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8520
    }
8521
8522
    //
8523
    // fallback for binary || ascii only
8524
    //
8525
8526
    if (
8527 37
        $encoding === 'CP850'
8528
        ||
8529 37
        $encoding === 'ASCII'
8530
    ) {
8531 2
      return self::strrpos_in_byte($haystack, $needle, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::strrpos_in_byte() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8531
      return self::strrpos_in_byte($haystack, $needle, /** @scrutinizer ignore-type */ $offset);
Loading history...
8532
    }
8533
8534 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8535
      self::checkForSupport();
8536
    }
8537
8538
    if (
8539 37
        $encoding !== 'UTF-8'
8540
        &&
8541 37
        self::$SUPPORT['mbstring'] === false
8542
    ) {
8543
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8544
    }
8545
8546
    //
8547
    // fallback via mbstring
8548
    //
8549
8550 37
    if (self::$SUPPORT['mbstring'] === true) {
8551 37
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
8552
    }
8553
8554
    //
8555
    // fallback via intl
8556
    //
8557
8558
    if (
8559
        $offset !== null
8560
        &&
8561
        $offset >= 0 // grapheme_strrpos() can't handle negative offset
8562
        &&
8563
        $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8564
        &&
8565
        self::$SUPPORT['intl'] === true
8566
    ) {
8567
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8568
      if ($returnTmp !== false) {
8569
        return $returnTmp;
8570
      }
8571
    }
8572
8573
    //
8574
    // fallback for ascii only
8575
    //
8576
8577
    if (
8578
        $offset !== null
8579
        &&
8580
        self::is_ascii($haystack)
8581
        &&
8582
        self::is_ascii($needle)
8583
    ) {
8584
      return self::strrpos_in_byte($haystack, $needle, $offset);
8585
    }
8586
8587
    //
8588
    // fallback via vanilla php
8589
    //
8590
8591
    $haystackTmp = null;
8592
    if ($offset > 0) {
8593
      $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8593
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
8594
    } elseif ($offset < 0) {
8595
      $haystackTmp = self::substr($haystack, 0, $offset);
8596
      $offset = 0;
8597
    }
8598
8599
    if ($haystackTmp !== null) {
8600
      if ($haystackTmp === false) {
8601
        $haystackTmp = '';
8602
      }
8603
      $haystack = (string)$haystackTmp;
8604
    }
8605
8606
    $pos = self::strrpos_in_byte($haystack, $needle);
8607
    if ($pos === false) {
8608
      return false;
8609
    }
8610
8611
    return $offset + self::strlen(self::substr_in_byte($haystack, 0, $pos));
8612
  }
8613
8614
  /**
8615
   * Find position of last occurrence of a string in a string.
8616
   *
8617
   * @param string $haystack <p>
8618
   *                         The string being checked, for the last occurrence
8619
   *                         of needle.
8620
   *                         </p>
8621
   * @param string $needle   <p>
8622
   *                         The string to find in haystack.
8623
   *                         </p>
8624
   * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
8625
   *                         the string. Negative values will stop searching at an arbitrary point
8626
   *                         prior to the end of the string.
8627
   *
8628
   * @return int|false The numeric position of the last occurrence of needle in the
8629
   *                   haystack string. If needle is not found, it returns false.
8630
   */
8631 2
  public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
8632
  {
8633 2
    if ($haystack === '' || $needle === '') {
8634
      return false;
8635
    }
8636
8637 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8638
      self::checkForSupport();
8639
    }
8640
8641 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8642
      // "mb_" is available if overload is used, so use it ...
8643
      return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8644
    }
8645
8646 2
    return \strrpos($haystack, $needle, $offset);
8647
  }
8648
8649
  /**
8650
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8651
   * mask.
8652
   *
8653
   * @param string $str    <p>The input string.</p>
8654
   * @param string $mask   <p>The mask of chars</p>
8655
   * @param int    $offset [optional]
8656
   * @param int    $length [optional]
8657
   *
8658
   * @return int
8659
   */
8660 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
8661
  {
8662 10
    if ($offset || $length !== null) {
8663 2
      $strTmp = self::substr($str, $offset, $length);
8664 2
      if ($strTmp === false) {
8665
        $strTmp = '';
8666
      }
8667 2
      $str = (string)$strTmp;
8668
    }
8669
8670 10
    if ('' === $str || '' === $mask) {
8671 2
      return 0;
8672
    }
8673
8674 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug introduced by
$str of type string is incompatible with the type null|array expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8674
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
Bug Best Practice introduced by
The expression return preg_match('/^' ....lf::strlen($str[0]) : 0 could return the type false which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
8675
  }
8676
8677
  /**
8678
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8679
   *
8680
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
8681
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
8682
   * @param bool   $before_needle  [optional] <p>
8683
   *                               If <b>TRUE</b>, strstr() returns the part of the
8684
   *                               haystack before the first occurrence of the needle (excluding the needle).
8685
   *                               </p>
8686
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8687
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8688
   *
8689
   * @return string|false
8690
   *                       A sub-string,<br>or <strong>false</strong> if needle is not found.
8691
   */
8692 5
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8693
  {
8694 5
    if ('' === $haystack || '' === $needle) {
8695 2
      return false;
8696
    }
8697
8698 5
    if ($cleanUtf8 === true) {
8699
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8700
      // if invalid characters are found in $haystack before $needle
8701
      $needle = self::clean($needle);
8702
      $haystack = self::clean($haystack);
8703
    }
8704
8705 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8706 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8707
    }
8708
8709
    //
8710
    // fallback for binary || ascii only
8711
    //
8712
8713
    if (
8714 5
        $encoding === 'CP850'
8715
        ||
8716 5
        $encoding === 'ASCII'
8717
    ) {
8718
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8719
    }
8720
8721 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8722
      self::checkForSupport();
8723
    }
8724
8725
    if (
8726 5
        $encoding !== 'UTF-8'
8727
        &&
8728 5
        self::$SUPPORT['mbstring'] === false
8729
    ) {
8730
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8731
    }
8732
8733
    //
8734
    // fallback via mbstring
8735
    //
8736
8737 5
    if (self::$SUPPORT['mbstring'] === true) {
8738 5
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8739
    }
8740
8741
    //
8742
    // fallback via intl
8743
    //
8744
8745
    if (
8746
        $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8747
        &&
8748
        self::$SUPPORT['intl'] === true
8749
    ) {
8750
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8751
      if ($returnTmp !== false) {
8752
        return $returnTmp;
8753
      }
8754
    }
8755
8756
    //
8757
    // fallback for ascii only
8758
    //
8759
8760
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8761
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8762
    }
8763
8764
    //
8765
    // fallback via vanilla php
8766
    //
8767
8768
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8769
8770
    if (!isset($match[1])) {
8771
      return false;
8772
    }
8773
8774
    if ($before_needle) {
8775
      return $match[1];
8776
    }
8777
8778
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8778
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
8779
  }
8780
8781
  /**
8782
   *  * Finds first occurrence of a string within another.
8783
   *
8784
   * @param string $haystack      <p>
8785
   *                              The string from which to get the first occurrence
8786
   *                              of needle.
8787
   *                              </p>
8788
   * @param string $needle        <p>
8789
   *                              The string to find in haystack.
8790
   *                              </p>
8791
   * @param bool   $before_needle [optional] <p>
8792
   *                              Determines which portion of haystack
8793
   *                              this function returns.
8794
   *                              If set to true, it returns all of haystack
8795
   *                              from the beginning to the first occurrence of needle.
8796
   *                              If set to false, it returns all of haystack
8797
   *                              from the first occurrence of needle to the end,
8798
   *                              </p>
8799
   *
8800
   * @return string|false The portion of haystack,
8801
   *                      or false if needle is not found.
8802
   */
8803
  public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
8804
  {
8805
    if ($haystack === '' || $needle === '') {
8806
      return false;
8807
    }
8808
8809
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8810
      self::checkForSupport();
8811
    }
8812
8813
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8814
      // "mb_" is available if overload is used, so use it ...
8815
      return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
8816
    }
8817
8818
    return \strstr($haystack, $needle, $before_needle);
8819
  }
8820
8821
  /**
8822
   * Unicode transformation for case-less matching.
8823
   *
8824
   * @link http://unicode.org/reports/tr21/tr21-5.html
8825
   *
8826
   * @param string      $str       <p>The input string.</p>
8827
   * @param bool        $full      [optional] <p>
8828
   *                               <b>true</b>, replace full case folding chars (default)<br>
8829
   *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
8830
   *                               </p>
8831
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8832
   * @param string      $encoding  [optional] <p>Set the charset.</p>
8833
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8834
   * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase is
8835
   *                               for some languages better ...</p>
8836
   *
8837
   * @return string
8838
   */
8839 53
  public static function strtocasefold(
8840
      string $str,
8841
      bool $full = true,
8842
      bool $cleanUtf8 = false,
8843
      string $encoding = 'UTF-8',
8844
      string $lang = null,
8845
      $lower = true
8846
  ): string
8847
  {
8848 53
    if ('' === $str) {
8849 5
      return '';
8850
    }
8851
8852 52
    $str = self::fixStrCaseHelper($str, $lower, $full);
8853
8854 52
    if ($lower === true) {
8855 2
      return self::strtolower($str, $encoding, $cleanUtf8, $lang);
8856
    }
8857
8858 50
    return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
8859
  }
8860
8861
  /**
8862
   * Make a string lowercase.
8863
   *
8864
   * @link http://php.net/manual/en/function.mb-strtolower.php
8865
   *
8866
   * @param string      $str                   <p>The string being lowercased.</p>
8867
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
8868
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8869
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8870
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8871
   *
8872
   * @return string String with all alphabetic characters converted to lowercase.
8873
   */
8874 151
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8875
  {
8876
    // init
8877 151
    $str = (string)$str;
8878
8879 151
    if ('' === $str) {
8880 4
      return '';
8881
    }
8882
8883 149
    if ($cleanUtf8 === true) {
8884
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8885
      // if invalid characters are found in $haystack before $needle
8886 4
      $str = self::clean($str);
8887
    }
8888
8889 149
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8890 94
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8891
    }
8892
8893
    // hack for old php version or for the polyfill ...
8894 149
    if ($tryToKeepStringLength === true) {
8895
      $str = self::fixStrCaseHelper($str, true);
8896
    }
8897
8898 149
    if ($lang !== null) {
8899
8900 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8901
        self::checkForSupport();
8902
      }
8903
8904 2
      if (self::$SUPPORT['intl'] === true) {
8905
8906 2
        $langCode = $lang . '-Lower';
8907 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8908
          \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, E_USER_WARNING);
8909
8910
          $langCode = 'Any-Lower';
8911
        }
8912
8913
        /** @noinspection PhpComposerExtensionStubsInspection */
8914 2
        return transliterator_transliterate($langCode, $str);
8915
      }
8916
8917
      \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, E_USER_WARNING);
8918
    }
8919
8920
    // always fallback via symfony polyfill
8921 149
    return \mb_strtolower($str, $encoding);
8922
  }
8923
8924
  /**
8925
   * Generic case sensitive transformation for collation matching.
8926
   *
8927
   * @param string $str <p>The input string</p>
8928
   *
8929
   * @return string
8930
   */
8931 6
  private static function strtonatfold(string $str): string
8932
  {
8933
    /** @noinspection PhpUndefinedClassInspection */
8934 6
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
8935
  }
8936
8937
  /**
8938
   * Make a string uppercase.
8939
   *
8940
   * @link http://php.net/manual/en/function.mb-strtoupper.php
8941
   *
8942
   * @param string      $str                   <p>The string being uppercased.</p>
8943
   * @param string      $encoding              [optional] <p>Set the charset.</p>
8944
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8945
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8946
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8947
   *
8948
   * @return string String with all alphabetic characters converted to uppercase.
8949
   */
8950 160
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8951
  {
8952
    // init
8953 160
    $str = (string)$str;
8954
8955 160
    if ('' === $str) {
8956 4
      return '';
8957
    }
8958
8959 158
    if ($cleanUtf8 === true) {
8960
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8961
      // if invalid characters are found in $haystack before $needle
8962 3
      $str = self::clean($str);
8963
    }
8964
8965 158
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8966 72
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8967
    }
8968
8969
    // hack for old php version or for the polyfill ...
8970 158
    if ($tryToKeepStringLength === true) {
8971 2
      $str = self::fixStrCaseHelper($str, false);
8972
    }
8973
8974 158
    if ($lang !== null) {
8975
8976 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8977
        self::checkForSupport();
8978
      }
8979
8980 2
      if (self::$SUPPORT['intl'] === true) {
8981
8982 2
        $langCode = $lang . '-Upper';
8983 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8984
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
8985
8986
          $langCode = 'Any-Upper';
8987
        }
8988
8989
        /** @noinspection PhpComposerExtensionStubsInspection */
8990 2
        return transliterator_transliterate($langCode, $str);
8991
      }
8992
8993
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
8994
    }
8995
8996
    // always fallback via symfony polyfill
8997 158
    return \mb_strtoupper($str, $encoding);
8998
  }
8999
9000
  /**
9001
   * Translate characters or replace sub-strings.
9002
   *
9003
   * @link  http://php.net/manual/en/function.strtr.php
9004
   *
9005
   * @param string          $str  <p>The string being translated.</p>
9006
   * @param string|string[] $from <p>The string replacing from.</p>
9007
   * @param string|string[] $to   <p>The string being translated to to.</p>
9008
   *
9009
   * @return string
9010
   *                This function returns a copy of str, translating all occurrences of each character in from to the
9011
   *                corresponding character in to.
9012
   */
9013 2
  public static function strtr(string $str, $from, $to = INF): string
9014
  {
9015 2
    if ('' === $str) {
9016
      return '';
9017
    }
9018
9019 2
    if ($from === $to) {
9020
      return $str;
9021
    }
9022
9023 2
    if (INF !== $to) {
9024 2
      $from = self::str_split($from);
9025 2
      $to = self::str_split($to);
9026 2
      $countFrom = \count($from);
9027 2
      $countTo = \count($to);
9028
9029 2
      if ($countFrom > $countTo) {
9030 2
        $from = \array_slice($from, 0, $countTo);
9031 2
      } elseif ($countFrom < $countTo) {
9032 2
        $to = \array_slice($to, 0, $countFrom);
9033
      }
9034
9035 2
      $from = \array_combine($from, $to);
9036
    }
9037
9038 2
    if (\is_string($from)) {
9039 2
      return \str_replace($from, '', $str);
9040
    }
9041
9042 2
    return \strtr($str, $from);
9043
  }
9044
9045
  /**
9046
   * Return the width of a string.
9047
   *
9048
   * @param string $str       <p>The input string.</p>
9049
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9050
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9051
   *
9052
   * @return int
9053
   */
9054 2
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
9055
  {
9056 2
    if ('' === $str) {
9057 2
      return 0;
9058
    }
9059
9060 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9061 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9062
    }
9063
9064 2
    if ($cleanUtf8 === true) {
9065
      // iconv and mbstring are not tolerant to invalid encoding
9066
      // further, their behaviour is inconsistent with that of PHP's substr
9067 2
      $str = self::clean($str);
9068
    }
9069
9070 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9071
      self::checkForSupport();
9072
    }
9073
9074
    //
9075
    // fallback via mbstring
9076
    //
9077
9078 2
    if (self::$SUPPORT['mbstring'] === true) {
9079 2
      return \mb_strwidth($str, $encoding);
9080
    }
9081
9082
    //
9083
    // fallback via vanilla php
9084
    //
9085
9086
    if ('UTF-8' !== $encoding) {
9087
      $str = self::encode('UTF-8', $str, false, $encoding);
9088
    }
9089
9090
    $wide = 0;
9091
    $str = (string)preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
9092
9093
    return ($wide << 1) + self::strlen($str, 'UTF-8');
9094
  }
9095
9096
  /**
9097
   * Get part of a string.
9098
   *
9099
   * @link http://php.net/manual/en/function.mb-substr.php
9100
   *
9101
   * @param string $str       <p>The string being checked.</p>
9102
   * @param int    $offset    <p>The first position used in str.</p>
9103
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
9104
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9105
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9106
   *
9107
   * @return string|false
9108
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9109
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9110
   *                      characters long, <b>FALSE</b> will be returned.
9111
   */
9112 394
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
9113
  {
9114 394
    if ('' === $str) {
9115 19
      return '';
9116
    }
9117
9118
    // Empty string
9119 388
    if ($length === 0) {
9120 14
      return '';
9121
    }
9122
9123 385
    if ($cleanUtf8 === true) {
9124
      // iconv and mbstring are not tolerant to invalid encoding
9125
      // further, their behaviour is inconsistent with that of PHP's substr
9126 2
      $str = self::clean($str);
9127
    }
9128
9129
    // Whole string
9130 385
    if (!$offset && $length === null) {
9131 40
      return $str;
9132
    }
9133
9134 356
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9135 157
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9136
    }
9137
9138 356
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9139
      self::checkForSupport();
9140
    }
9141
9142
    //
9143
    // fallback for binary || ascii only
9144
    //
9145
9146
    if (
9147 356
        $encoding === 'CP850'
9148
        ||
9149 356
        $encoding === 'ASCII'
9150
    ) {
9151 5
      return self::substr_in_byte($str, $offset, $length);
9152
    }
9153
9154
    //
9155
    // fallback via mbstring
9156
    //
9157
9158 351
    if (self::$SUPPORT['mbstring'] === true) {
9159 351
      $return = \mb_substr($str, $offset, $length ?? 2147483647, $encoding);
9160 351
      if ($return !== false) {
9161 351
        return $return;
9162
      }
9163
    }
9164
9165
    // otherwise we need the string-length and can't fake it via "2147483647"
9166 4
    $str_length = 0;
9167 4
    if ($offset || $length === null) {
9168 4
      $str_length = self::strlen($str, $encoding);
9169
    }
9170
9171
    // e.g.: invalid chars + mbstring not installed
9172 4
    if ($str_length === false) {
9173
      return false;
9174
    }
9175
9176
    // Empty string
9177 4
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
9178
      return '';
9179
    }
9180
9181
    // Impossible
9182 4
    if ($offset && $offset > $str_length) {
9183
      // "false" is the php native return type here,
9184
      //  but we optimized this for performance ... see "2147483647" instead of "strlen"
9185
      return '';
9186
9187
    }
9188
9189 4
    if ($length === null) {
9190 4
      $length = (int)$str_length;
9191
    } else {
9192 2
      $length = (int)$length;
9193
    }
9194
9195
    if (
9196 4
        $encoding !== 'UTF-8'
9197
        &&
9198 4
        self::$SUPPORT['mbstring'] === false
9199
    ) {
9200 2
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9201
    }
9202
9203
    //
9204
    // fallback via intl
9205
    //
9206
9207
    if (
9208 4
        $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
9209
        &&
9210 4
        $offset >= 0 // grapheme_substr() can't handle negative offset
9211
        &&
9212 4
        self::$SUPPORT['intl'] === true
9213
    ) {
9214
      $returnTmp = \grapheme_substr($str, $offset, $length);
9215
      if ($returnTmp !== false) {
9216
        return $returnTmp;
9217
      }
9218
    }
9219
9220
    //
9221
    // fallback via iconv
9222
    //
9223
9224
    if (
9225 4
        $length >= 0 // "iconv_substr()" can't handle negative length
9226
        &&
9227 4
        self::$SUPPORT['iconv'] === true
9228
    ) {
9229
      $returnTmp = \iconv_substr($str, $offset, $length);
9230
      if ($returnTmp !== false) {
9231
        return $returnTmp;
9232
      }
9233
    }
9234
9235
    //
9236
    // fallback for ascii only
9237
    //
9238
9239 4
    if (self::is_ascii($str)) {
9240
      return \substr($str, $offset, $length);
9241
    }
9242
9243
    //
9244
    // fallback via vanilla php
9245
    //
9246
9247
    // split to array, and remove invalid characters
9248 4
    $array = self::split($str);
9249
9250
    // extract relevant part, and join to make sting again
9251 4
    return \implode('', \array_slice($array, $offset, $length));
9252
  }
9253
9254
  /**
9255
   * Binary safe comparison of two strings from an offset, up to length characters.
9256
   *
9257
   * @param string   $str1               <p>The main string being compared.</p>
9258
   * @param string   $str2               <p>The secondary string being compared.</p>
9259
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
9260
   *                                     counting from the end of the string.</p>
9261
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
9262
   *                                     the length of the str compared to the length of main_str less the offset.</p>
9263
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
9264
   *                                     insensitive.</p>
9265
   *
9266
   * @return int
9267
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9268
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9269
   *             <strong>0</strong> if they are equal.
9270
   */
9271 2
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
9272
  {
9273
    if (
9274 2
        $offset !== 0
9275
        ||
9276 2
        $length !== null
9277
    ) {
9278 2
      $str1Tmp = self::substr($str1, $offset, $length);
9279 2
      if ($str1Tmp === false) {
9280
        $str1Tmp = '';
9281
      }
9282 2
      $str1 = (string)$str1Tmp;
9283
9284 2
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str1) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9284
      $str2Tmp = self::substr($str2, 0, /** @scrutinizer ignore-type */ self::strlen($str1));
Loading history...
9285 2
      if ($str2Tmp === false) {
9286
        $str2Tmp = '';
9287
      }
9288 2
      $str2 = (string)$str2Tmp;
9289
    }
9290
9291 2
    if ($case_insensitivity === true) {
9292 2
      return self::strcasecmp($str1, $str2);
9293
    }
9294
9295 2
    return self::strcmp($str1, $str2);
9296
  }
9297
9298
  /**
9299
   * Count the number of substring occurrences.
9300
   *
9301
   * @link  http://php.net/manual/en/function.substr-count.php
9302
   *
9303
   * @param string $haystack   <p>The string to search in.</p>
9304
   * @param string $needle     <p>The substring to search for.</p>
9305
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
9306
   * @param int    $length     [optional] <p>
9307
   *                           The maximum length after the specified offset to search for the
9308
   *                           substring. It outputs a warning if the offset plus the length is
9309
   *                           greater than the haystack length.
9310
   *                           </p>
9311
   * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9312
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9313
   *
9314
   * @return int|false This functions returns an integer or false if there isn't a string.
9315
   */
9316 18
  public static function substr_count(
9317
      string $haystack,
9318
      string $needle,
9319
      int $offset = 0,
9320
      int $length = null,
9321
      string $encoding = 'UTF-8',
9322
      bool $cleanUtf8 = false
9323
  )
9324
  {
9325 18
    if ('' === $haystack || '' === $needle) {
9326 2
      return false;
9327
    }
9328
9329 18
    if ($offset || $length !== null) {
9330
9331 2
      if ($length === null) {
9332 2
        $lengthTmp = self::strlen($haystack);
9333 2
        if ($lengthTmp === false) {
9334
          return false;
9335
        }
9336 2
        $length = (int)$lengthTmp;
9337
      }
9338
9339
      if (
9340
          (
9341 2
              $length !== 0
9342
              &&
9343 2
              $offset !== 0
9344
          )
9345
          &&
9346 2
          ($length + $offset) <= 0
9347
          &&
9348 2
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9349
      ) {
9350 2
        return false;
9351
      }
9352
9353 2
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
9354 2
      if ($haystackTmp === false) {
9355
        $haystackTmp = '';
9356
      }
9357 2
      $haystack = (string)$haystackTmp;
9358
    }
9359
9360 18
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9361 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9362
    }
9363
9364 18
    if ($cleanUtf8 === true) {
9365
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9366
      // if invalid characters are found in $haystack before $needle
9367
      $needle = self::clean($needle);
9368
      $haystack = self::clean($haystack);
9369
    }
9370
9371 18
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9372
      self::checkForSupport();
9373
    }
9374
9375
    if (
9376 18
        $encoding !== 'UTF-8'
9377
        &&
9378 18
        self::$SUPPORT['mbstring'] === false
9379
    ) {
9380
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9381
    }
9382
9383 18
    if (self::$SUPPORT['mbstring'] === true) {
9384 18
      return \mb_substr_count($haystack, $needle, $encoding);
9385
    }
9386
9387
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
9388
9389
    return \count($matches);
9390
  }
9391
9392
  /**
9393
   * Count the number of substring occurrences.
9394
   *
9395
   * @param string $haystack <p>
9396
   *                         The string being checked.
9397
   *                         </p>
9398
   * @param string $needle   <p>
9399
   *                         The string being found.
9400
   *                         </p>
9401
   * @param int    $offset   [optional] <p>
9402
   *                         The offset where to start counting
9403
   *                         </p>
9404
   * @param int    $length   [optional] <p>
9405
   *                         The maximum length after the specified offset to search for the
9406
   *                         substring. It outputs a warning if the offset plus the length is
9407
   *                         greater than the haystack length.
9408
   *                         </p>
9409
   *
9410
   * @return int|false The number of times the
9411
   *                   needle substring occurs in the
9412
   *                   haystack string.
9413
   */
9414 38
  public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null)
9415
  {
9416 38
    if ($haystack === '' || $needle === '') {
9417
      return 0;
9418
    }
9419
9420 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9421
      self::checkForSupport();
9422
    }
9423
9424
    if (
9425 38
        ($offset || $length !== null)
9426
        &&
9427 38
        self::$SUPPORT['mbstring_func_overload'] === true
9428
    ) {
9429
9430
      if ($length === null) {
9431
        $lengthTmp = self::strlen($haystack);
9432
        if ($lengthTmp === false) {
9433
          return false;
9434
        }
9435
        $length = (int)$lengthTmp;
9436
      }
9437
9438
      if (
9439
          (
9440
              $length !== 0
9441
              &&
9442
              $offset !== 0
9443
          )
9444
          &&
9445
          ($length + $offset) <= 0
9446
          &&
9447
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9448
      ) {
9449
        return false;
9450
      }
9451
9452
      $haystackTmp = self::substr_in_byte($haystack, $offset, $length);
9453
      if ($haystackTmp === false) {
0 ignored issues
show
introduced by
The condition $haystackTmp === false is always false.
Loading history...
9454
        $haystackTmp = '';
9455
      }
9456
      $haystack = (string)$haystackTmp;
9457
    }
9458
9459 38
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9460
      // "mb_" is available if overload is used, so use it ...
9461
      return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
9462
    }
9463
9464 38
    return \substr_count($haystack, $needle, $offset, $length);
9465
  }
9466
9467
  /**
9468
   * Returns the number of occurrences of $substring in the given string.
9469
   * By default, the comparison is case-sensitive, but can be made insensitive
9470
   * by setting $caseSensitive to false.
9471
   *
9472
   * @param string $str           <p>The input string.</p>
9473
   * @param string $substring     <p>The substring to search for.</p>
9474
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
9475
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9476
   *
9477
   * @return int
9478
   */
9479 15
  public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
9480
  {
9481 15
    if ('' === $str || '' === $substring) {
9482 2
      return 0;
9483
    }
9484
9485
    // only a fallback to prevent BC in the api ...
9486 13
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
9487 4
      $encoding = (string)$caseSensitive;
9488
    }
9489
9490 13
    if (!$caseSensitive) {
9491 6
      $str = self::strtocasefold($str, true, false, $encoding, null, false);
9492 6
      $substring = self::strtocasefold($substring, true, false, $encoding, null, false);
9493
    }
9494
9495 13
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
9496
  }
9497
9498
  /**
9499
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
9500
   *
9501
   * @param string $haystack <p>The string to search in.</p>
9502
   * @param string $needle   <p>The substring to search for.</p>
9503
   *
9504
   * @return string Return the sub-string.
9505
   */
9506 2
  public static function substr_ileft(string $haystack, string $needle): string
9507
  {
9508 2
    if ('' === $haystack) {
9509 2
      return '';
9510
    }
9511
9512 2
    if ('' === $needle) {
9513 2
      return $haystack;
9514
    }
9515
9516 2
    if (self::str_istarts_with($haystack, $needle) === true) {
9517 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9517
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9518 2
      if ($haystackTmp === false) {
9519
        $haystackTmp = '';
9520
      }
9521 2
      $haystack = (string)$haystackTmp;
9522
    }
9523
9524 2
    return $haystack;
9525
  }
9526
9527
  /**
9528
   * Get part of a string process in bytes.
9529
   *
9530
   * @param string $str    <p>The string being checked.</p>
9531
   * @param int    $offset <p>The first position used in str.</p>
9532
   * @param int    $length [optional] <p>The maximum length of the returned string.</p>
9533
   *
9534
   * @return string|false
9535
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9536
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9537
   *                      characters long, <b>FALSE</b> will be returned.
9538
   */
9539 55
  public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
9540
  {
9541 55
    if ($str === '') {
9542
      return '';
9543
    }
9544
9545
    // Empty string
9546 55
    if ($length === 0) {
9547
      return '';
9548
    }
9549
9550
    // Whole string
9551 55
    if (!$offset && $length === null) {
9552
      return $str;
9553
    }
9554
9555 55
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9556
      self::checkForSupport();
9557
    }
9558
9559 55
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9560
      // "mb_" is available if overload is used, so use it ...
9561
      return \mb_substr($str, $offset, $length ?? 2147483647, 'CP850'); // 8-BIT
9562
    }
9563
9564 55
    return \substr($str, $offset, $length ?? 2147483647);
9565
  }
9566
9567
  /**
9568
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
9569
   *
9570
   * @param string $haystack <p>The string to search in.</p>
9571
   * @param string $needle   <p>The substring to search for.</p>
9572
   *
9573
   * @return string Return the sub-string.
9574
   */
9575 2
  public static function substr_iright(string $haystack, string $needle): string
9576
  {
9577 2
    if ('' === $haystack) {
9578 2
      return '';
9579
    }
9580
9581 2
    if ('' === $needle) {
9582 2
      return $haystack;
9583
    }
9584
9585 2
    if (self::str_iends_with($haystack, $needle) === true) {
9586 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9587 2
      if ($haystackTmp === false) {
9588
        $haystackTmp = '';
9589
      }
9590 2
      $haystack = (string)$haystackTmp;
9591
    }
9592
9593 2
    return $haystack;
9594
  }
9595
9596
  /**
9597
   * Removes an prefix ($needle) from start of the string ($haystack).
9598
   *
9599
   * @param string $haystack <p>The string to search in.</p>
9600
   * @param string $needle   <p>The substring to search for.</p>
9601
   *
9602
   * @return string Return the sub-string.
9603
   */
9604 2
  public static function substr_left(string $haystack, string $needle): string
9605
  {
9606 2
    if ('' === $haystack) {
9607 2
      return '';
9608
    }
9609
9610 2
    if ('' === $needle) {
9611 2
      return $haystack;
9612
    }
9613
9614 2
    if (self::str_starts_with($haystack, $needle) === true) {
9615 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9615
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9616 2
      if ($haystackTmp === false) {
9617
        $haystackTmp = '';
9618
      }
9619 2
      $haystack = (string)$haystackTmp;
9620
    }
9621
9622 2
    return $haystack;
9623
  }
9624
9625
  /**
9626
   * Replace text within a portion of a string.
9627
   *
9628
   * source: https://gist.github.com/stemar/8287074
9629
   *
9630
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
9631
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
9632
   * @param int|int[]       $offset           <p>
9633
   *                                          If start is positive, the replacing will begin at the start'th offset
9634
   *                                          into string.
9635
   *                                          <br><br>
9636
   *                                          If start is negative, the replacing will begin at the start'th character
9637
   *                                          from the end of string.
9638
   *                                          </p>
9639
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
9640
   *                                          portion of string which is to be replaced. If it is negative, it
9641
   *                                          represents the number of characters from the end of string at which to
9642
   *                                          stop replacing. If it is not given, then it will default to strlen(
9643
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
9644
   *                                          length is zero then this function will have the effect of inserting
9645
   *                                          replacement into string at the given start offset.</p>
9646
   *
9647
   * @return string|string[] The result string is returned. If string is an array then array is returned.
9648
   */
9649 10
  public static function substr_replace($str, $replacement, $offset, $length = null)
9650
  {
9651 10
    if (\is_array($str) === true) {
9652 1
      $num = \count($str);
9653
9654
      // the replacement
9655 1
      if (\is_array($replacement) === true) {
9656 1
        $replacement = \array_slice($replacement, 0, $num);
9657
      } else {
9658 1
        $replacement = \array_pad([$replacement], $num, $replacement);
9659
      }
9660
9661
      // the offset
9662 1
      if (\is_array($offset) === true) {
9663 1
        $offset = \array_slice($offset, 0, $num);
9664 1
        foreach ($offset as &$valueTmp) {
9665 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
9666
        }
9667 1
        unset($valueTmp);
9668
      } else {
9669 1
        $offset = \array_pad([$offset], $num, $offset);
9670
      }
9671
9672
      // the length
9673 1
      if (null === $length) {
9674 1
        $length = \array_fill(0, $num, 0);
9675 1
      } elseif (\is_array($length) === true) {
9676 1
        $length = \array_slice($length, 0, $num);
9677 1
        foreach ($length as &$valueTmpV2) {
9678 1
          if (null !== $valueTmpV2) {
9679 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9680
          } else {
9681 1
            $valueTmpV2 = 0;
9682
          }
9683
        }
9684 1
        unset($valueTmpV2);
9685
      } else {
9686 1
        $length = \array_pad([$length], $num, $length);
9687
      }
9688
9689
      // recursive call
9690 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9691
    }
9692
9693 10
    if (\is_array($replacement) === true) {
9694 1
      if (\count($replacement) > 0) {
9695 1
        $replacement = $replacement[0];
9696
      } else {
9697 1
        $replacement = '';
9698
      }
9699
    }
9700
9701
    // init
9702 10
    $str = (string)$str;
9703 10
    $replacement = (string)$replacement;
9704
9705 10
    if ('' === $str) {
9706 1
      return $replacement;
9707
    }
9708
9709 9
    if (self::is_ascii($str)) {
9710 6
      return ($length === null) ?
9711
          \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9711
          \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
9712 6
          \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9712
          \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
9713
    }
9714
9715 8
    \preg_match_all('/./us', $str, $smatches);
9716 8
    \preg_match_all('/./us', $replacement, $rmatches);
9717
9718 8
    if ($length === null) {
9719 3
      $lengthTmp = self::strlen($str);
9720 3
      if ($lengthTmp === false) {
9721
        // e.g.: non mbstring support + invalid chars
9722
        return '';
9723
      }
9724 3
      $length = (int)$lengthTmp;
9725
    }
9726
9727 8
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9727
    \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9727
    \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
9728
9729 8
    return \implode('', $smatches[0]);
9730
  }
9731
9732
  /**
9733
   * Removes an suffix ($needle) from end of the string ($haystack).
9734
   *
9735
   * @param string $haystack <p>The string to search in.</p>
9736
   * @param string $needle   <p>The substring to search for.</p>
9737
   *
9738
   * @return string Return the sub-string.
9739
   */
9740 2
  public static function substr_right(string $haystack, string $needle): string
9741
  {
9742 2
    if ('' === $haystack) {
9743 2
      return '';
9744
    }
9745
9746 2
    if ('' === $needle) {
9747 2
      return $haystack;
9748
    }
9749
9750 2
    if (self::str_ends_with($haystack, $needle) === true) {
9751 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9752 2
      if ($haystackTmp === false) {
9753
        $haystackTmp = '';
9754
      }
9755 2
      $haystack = (string)$haystackTmp;
9756
    }
9757
9758 2
    return $haystack;
9759
  }
9760
9761
  /**
9762
   * Returns a case swapped version of the string.
9763
   *
9764
   * @param string $str       <p>The input string.</p>
9765
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9766
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9767
   *
9768
   * @return string Each character's case swapped.
9769
   */
9770 6
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9771
  {
9772 6
    if ('' === $str) {
9773 1
      return '';
9774
    }
9775
9776 6
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9777 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9778
    }
9779
9780 6
    if ($cleanUtf8 === true) {
9781
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9782
      // if invalid characters are found in $haystack before $needle
9783 2
      $str = self::clean($str);
9784
    }
9785
9786 6
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9787
  }
9788
9789
  /**
9790
   * Checks whether mbstring is available on the server.
9791
   *
9792
   * @return bool
9793
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
9794
   */
9795
  public static function symfony_polyfill_used(): bool
9796
  {
9797
    // init
9798
    $return = false;
9799
9800
    $returnTmp = \extension_loaded('mbstring') ? true : false;
9801
    if ($returnTmp === false && \function_exists('mb_strlen')) {
9802
      $return = true;
9803
    }
9804
9805
    $returnTmp = \extension_loaded('iconv') ? true : false;
9806
    if ($returnTmp === false && \function_exists('iconv')) {
9807
      $return = true;
9808
    }
9809
9810
    return $return;
9811
  }
9812
9813
  /**
9814
   * @param string $str
9815
   * @param int    $tabLength
9816
   *
9817
   * @return string
9818
   */
9819 6
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9820
  {
9821 6
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9822
  }
9823
9824
  /**
9825
   * Converts the first character of each word in the string to uppercase
9826
   * and all other chars to lowercase.
9827
   *
9828
   * @param string $str      <p>The input string.</p>
9829
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9830
   *
9831
   * @return string String with all characters of $str being title-cased.
9832
   */
9833 5
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
9834
  {
9835 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9836 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9837
    }
9838
9839
    // always fallback via symfony polyfill
9840 5
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
9841
  }
9842
9843
  /**
9844
   * alias for "UTF8::to_ascii()"
9845
   *
9846
   * @see        UTF8::to_ascii()
9847
   *
9848
   * @param string $str
9849
   * @param string $subst_chr
9850
   * @param bool   $strict
9851
   *
9852
   * @return string
9853
   *
9854
   * @deprecated <p>use "UTF8::to_ascii()"</p>
9855
   */
9856 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
9857
  {
9858 7
    return self::to_ascii($str, $subst_chr, $strict);
9859
  }
9860
9861
  /**
9862
   * alias for "UTF8::to_iso8859()"
9863
   *
9864
   * @see        UTF8::to_iso8859()
9865
   *
9866
   * @param string|string[] $str
9867
   *
9868
   * @return string|string[]
9869
   *
9870
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
9871
   */
9872 2
  public static function toIso8859($str)
9873
  {
9874 2
    return self::to_iso8859($str);
9875
  }
9876
9877
  /**
9878
   * alias for "UTF8::to_latin1()"
9879
   *
9880
   * @see        UTF8::to_latin1()
9881
   *
9882
   * @param string|string[] $str
9883
   *
9884
   * @return string|string[]
9885
   *
9886
   * @deprecated <p>use "UTF8::to_latin1()"</p>
9887
   */
9888 2
  public static function toLatin1($str)
9889
  {
9890 2
    return self::to_latin1($str);
9891
  }
9892
9893
  /**
9894
   * alias for "UTF8::to_utf8()"
9895
   *
9896
   * @see        UTF8::to_utf8()
9897
   *
9898
   * @param string|string[] $str
9899
   *
9900
   * @return string|string[]
9901
   *
9902
   * @deprecated <p>use "UTF8::to_utf8()"</p>
9903
   */
9904 2
  public static function toUTF8($str)
9905
  {
9906 2
    return self::to_utf8($str);
9907
  }
9908
9909
  /**
9910
   * Convert a string into ASCII.
9911
   *
9912
   * @param string $str     <p>The input string.</p>
9913
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
9914
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
9915
   *                        performance</p>
9916
   *
9917
   * @return string
9918
   */
9919 38
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
9920
  {
9921 38
    static $UTF8_TO_ASCII;
9922
9923 38
    if ('' === $str) {
9924 3
      return '';
9925
    }
9926
9927
    // check if we only have ASCII, first (better performance)
9928 35
    if (self::is_ascii($str) === true) {
9929 7
      return $str;
9930
    }
9931
9932 30
    $str = self::clean(
9933 30
        $str,
9934 30
        true,
9935 30
        true,
9936 30
        true,
9937 30
        false,
9938 30
        true,
9939 30
        true
9940
    );
9941
9942
    // check again, if we only have ASCII, now ...
9943 30
    if (self::is_ascii($str) === true) {
9944 12
      return $str;
9945
    }
9946
9947 19
    if ($strict === true) {
9948
9949 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9950
        self::checkForSupport();
9951
      }
9952
9953 1
      if (self::$SUPPORT['intl'] === true) {
9954
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
9955
        /** @noinspection PhpComposerExtensionStubsInspection */
9956 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
9957
9958
        // check again, if we only have ASCII, now ...
9959 1
        if (self::is_ascii($str) === true) {
9960 1
          return $str;
9961
        }
9962
9963
      }
9964
    }
9965
9966 19
    if (self::$ORD === null) {
9967
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9968
    }
9969
9970 19
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
9971 19
    $chars = $ar[0];
9972 19
    $ord = null;
9973 19
    foreach ($chars as &$c) {
9974
9975 19
      $ordC0 = self::$ORD[$c[0]];
9976
9977 19
      if ($ordC0 >= 0 && $ordC0 <= 127) {
9978 15
        continue;
9979
      }
9980
9981 19
      $ordC1 = self::$ORD[$c[1]];
9982
9983
      // ASCII - next please
9984 19
      if ($ordC0 >= 192 && $ordC0 <= 223) {
9985 17
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
9986
      }
9987
9988 19
      if ($ordC0 >= 224) {
9989 8
        $ordC2 = self::$ORD[$c[2]];
9990
9991 8
        if ($ordC0 <= 239) {
9992 7
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
9993
        }
9994
9995 8
        if ($ordC0 >= 240) {
9996 2
          $ordC3 = self::$ORD[$c[3]];
9997
9998 2
          if ($ordC0 <= 247) {
9999 2
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
10000
          }
10001
10002 2
          if ($ordC0 >= 248) {
10003
            $ordC4 = self::$ORD[$c[4]];
10004
10005
            if ($ordC0 <= 251) {
10006
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
10007
            }
10008
10009
            if ($ordC0 >= 252) {
10010
              $ordC5 = self::$ORD[$c[5]];
10011
10012
              if ($ordC0 <= 253) {
10013
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
10014
              }
10015
            }
10016
          }
10017
        }
10018
      }
10019
10020 19
      if ($ordC0 === 254 || $ordC0 === 255) {
10021
        $c = $unknown;
10022
        continue;
10023
      }
10024
10025 19
      if ($ord === null) {
10026
        $c = $unknown;
10027
        continue;
10028
      }
10029
10030 19
      $bank = $ord >> 8;
10031 19
      if (!isset($UTF8_TO_ASCII[$bank])) {
10032 9
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
10033 9
        if ($UTF8_TO_ASCII[$bank] === false) {
10034 2
          $UTF8_TO_ASCII[$bank] = [];
10035
        }
10036
      }
10037
10038 19
      $newchar = $ord & 255;
10039
10040 19
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
10041
10042
        // keep for debugging
10043
        /*
10044
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10045
        echo "char: " . $c . "\n";
10046
        echo "ord: " . $ord . "\n";
10047
        echo "newchar: " . $newchar . "\n";
10048
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
10049
        echo "bank:" . $bank . "\n\n";
10050
        */
10051
10052 18
        $c = $UTF8_TO_ASCII[$bank][$newchar];
10053
      } else {
10054
10055
        // keep for debugging missing chars
10056
        /*
10057
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10058
        echo "char: " . $c . "\n";
10059
        echo "ord: " . $ord . "\n";
10060
        echo "newchar: " . $newchar . "\n";
10061
        echo "bank:" . $bank . "\n\n";
10062
        */
10063
10064 19
        $c = $unknown;
10065
      }
10066
    }
10067
10068 19
    return \implode('', $chars);
10069
  }
10070
10071
  /**
10072
   * @param mixed $str
10073
   *
10074
   * @return bool
10075
   */
10076 19
  public static function to_boolean($str): bool
10077
  {
10078
    // init
10079 19
    $str = (string)$str;
10080
10081 19
    if ('' === $str) {
10082 2
      return false;
10083
    }
10084
10085 17
    $key = \strtolower($str);
10086
10087
    // Info: http://php.net/manual/en/filter.filters.validate.php
10088
    $map = [
10089 17
        'true'  => true,
10090
        '1'     => true,
10091
        'on'    => true,
10092
        'yes'   => true,
10093
        'false' => false,
10094
        '0'     => false,
10095
        'off'   => false,
10096
        'no'    => false,
10097
    ];
10098
10099 17
    if (isset($map[$key])) {
10100 13
      return $map[$key];
10101
    }
10102
10103
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10104 4
    if (\is_numeric($str)) {
10105 2
      return (((float)$str + 0) > 0);
10106
    }
10107
10108 2
    return (bool)self::trim($str);
10109
  }
10110
10111
  /**
10112
   * Convert given string to safe filename (and keep string case).
10113
   *
10114
   * @param string $string
10115
   * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
10116
   *                                  simply replaced with hyphen.
10117
   * @param string $fallback_char
10118
   *
10119
   * @return string
10120
   */
10121 1
  public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
10122
  {
10123 1
    if ($use_transliterate === true) {
10124 1
      $string = self::str_transliterate($string, $fallback_char);
10125
    }
10126
10127 1
    $fallback_char_escaped = \preg_quote($fallback_char, '/');
10128
10129 1
    $string = (string)\preg_replace(
10130
        [
10131 1
            '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
10132 1
            '/[\s]+/',                                            // 2) convert spaces to $fallback_char
10133 1
            '/[' . $fallback_char_escaped . ']+/',                // 3) remove double $fallback_char's
10134
        ],
10135
        [
10136 1
            '',
10137 1
            $fallback_char,
10138 1
            $fallback_char,
10139
        ],
10140 1
        $string
10141
    );
10142
10143
    // trim "$fallback_char" from beginning and end of the string
10144 1
    return \trim($string, $fallback_char);
10145
  }
10146
10147
  /**
10148
   * Convert a string into "ISO-8859"-encoding (Latin-1).
10149
   *
10150
   * @param string|string[] $str
10151
   *
10152
   * @return string|string[]
10153
   */
10154 8
  public static function to_iso8859($str)
10155
  {
10156 8
    if (\is_array($str) === true) {
10157 2
      foreach ($str as $k => $v) {
10158 2
        $str[$k] = self::to_iso8859($v);
10159
      }
10160
10161 2
      return $str;
10162
    }
10163
10164 8
    $str = (string)$str;
10165 8
    if ('' === $str) {
10166 2
      return '';
10167
    }
10168
10169 8
    return self::utf8_decode($str);
10170
  }
10171
10172
  /**
10173
   * alias for "UTF8::to_iso8859()"
10174
   *
10175
   * @see UTF8::to_iso8859()
10176
   *
10177
   * @param string|string[] $str
10178
   *
10179
   * @return string|string[]
10180
   */
10181 2
  public static function to_latin1($str)
10182
  {
10183 2
    return self::to_iso8859($str);
10184
  }
10185
10186
  /**
10187
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
10188
   *
10189
   * <ul>
10190
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
10191
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
10192
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
10193
   * case.</li>
10194
   * </ul>
10195
   *
10196
   * @param string|string[] $str                    <p>Any string or array.</p>
10197
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
10198
   *
10199
   * @return string|string[] The UTF-8 encoded string.
10200
   */
10201 38
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
10202
  {
10203 38
    if (\is_array($str) === true) {
10204 4
      foreach ($str as $k => $v) {
10205 4
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
10206
      }
10207
10208 4
      return $str;
10209
    }
10210
10211 38
    $str = (string)$str;
10212 38
    if ('' === $str) {
10213 6
      return $str;
10214
    }
10215
10216 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10217
      self::checkForSupport();
10218
    }
10219
10220 38
    $max = self::strlen_in_byte($str);
10221 38
    $buf = '';
10222
10223
    /** @noinspection ForeachInvariantsInspection */
10224 38
    for ($i = 0; $i < $max; $i++) {
10225 38
      $c1 = $str[$i];
10226
10227 38
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
10228
10229 34
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
10230
10231 31
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10232
10233 31
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
10234 17
            $buf .= $c1 . $c2;
10235 17
            $i++;
10236
          } else { // not valid UTF8 - convert it
10237 31
            $buf .= self::to_utf8_convert_helper($c1);
10238
          }
10239
10240 34
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
10241
10242 32
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10243 32
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10244
10245 32
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
10246 14
            $buf .= $c1 . $c2 . $c3;
10247 14
            $i += 2;
10248
          } else { // not valid UTF8 - convert it
10249 32
            $buf .= self::to_utf8_convert_helper($c1);
10250
          }
10251
10252 26
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
10253
10254 26
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10255 26
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10256 26
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
10257
10258 26
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
10259 8
            $buf .= $c1 . $c2 . $c3 . $c4;
10260 8
            $i += 3;
10261
          } else { // not valid UTF8 - convert it
10262 26
            $buf .= self::to_utf8_convert_helper($c1);
10263
          }
10264
10265
        } else { // doesn't look like UTF8, but should be converted
10266 34
          $buf .= self::to_utf8_convert_helper($c1);
10267
        }
10268
10269 35
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
10270
10271 4
        $buf .= self::to_utf8_convert_helper($c1);
10272
10273
      } else { // it doesn't need conversion
10274 35
        $buf .= $c1;
10275
      }
10276
    }
10277
10278
    // decode unicode escape sequences
10279 38
    $buf = \preg_replace_callback(
10280 38
        '/\\\\u([0-9a-f]{4})/i',
10281 38
        function ($match) {
10282
          // always fallback via symfony polyfill
10283 8
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
10284 38
        },
10285 38
        $buf
10286
    );
10287
10288
    // decode UTF-8 codepoints
10289 38
    if ($decodeHtmlEntityToUtf8 === true) {
10290 2
      $buf = self::html_entity_decode($buf);
10291
    }
10292
10293 38
    return $buf;
10294
  }
10295
10296
  /**
10297
   * @param int|string $input
10298
   *
10299
   * @return string
10300
   */
10301 30
  private static function to_utf8_convert_helper($input): string
10302
  {
10303
    // init
10304 30
    $buf = '';
10305
10306 30
    if (self::$ORD === null) {
10307 1
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10308
    }
10309
10310 30
    if (self::$CHR === null) {
10311 1
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10312
    }
10313
10314 30
    if (self::$WIN1252_TO_UTF8 === null) {
10315 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10316
    }
10317
10318 30
    $ordC1 = self::$ORD[$input];
10319 30
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
10320 30
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
10321
    } else {
10322 2
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
10323 2
      $cc2 = ((string)$input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
10324 2
      $buf .= $cc1 . $cc2;
10325
    }
10326
10327 30
    return $buf;
10328
  }
10329
10330
  /**
10331
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
10332
   *
10333
   * INFO: This is slower then "trim()"
10334
   *
10335
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
10336
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
10337
   *
10338
   * @param string $str   <p>The string to be trimmed</p>
10339
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
10340
   *
10341
   * @return string The trimmed string.
10342
   */
10343 214
  public static function trim(string $str = '', $chars = INF): string
10344
  {
10345 214
    if ('' === $str) {
10346 11
      return '';
10347
    }
10348
10349
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
10350 206
    if ($chars === INF || !$chars) {
10351 179
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
10352
    } else {
10353 47
      $chars = \preg_quote($chars, '/');
10354 47
      $pattern = "^[$chars]+|[$chars]+\$";
10355
    }
10356
10357 206
    return self::regex_replace($str, $pattern, '', '', '/');
10358
  }
10359
10360
  /**
10361
   * Makes string's first char uppercase.
10362
   *
10363
   * @param string $str       <p>The input string.</p>
10364
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10365
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10366
   *
10367
   * @return string The resulting string.
10368
   */
10369 76
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10370
  {
10371 76
    if ($cleanUtf8 === true) {
10372
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10373
      // if invalid characters are found in $haystack before $needle
10374 1
      $str = self::clean($str);
10375
    }
10376
10377 76
    $strPartTwo = self::substr($str, 1, null, $encoding);
10378 76
    if ($strPartTwo === false) {
10379
      $strPartTwo = '';
10380
    }
10381
10382 76
    $strPartOne = self::strtoupper(
10383 76
        (string)self::substr($str, 0, 1, $encoding),
10384 76
        $encoding,
10385 76
        $cleanUtf8
10386
    );
10387
10388 76
    return $strPartOne . $strPartTwo;
10389
  }
10390
10391
  /**
10392
   * alias for "UTF8::ucfirst()"
10393
   *
10394
   * @see UTF8::ucfirst()
10395
   *
10396
   * @param string $str
10397
   * @param string $encoding
10398
   * @param bool   $cleanUtf8
10399
   *
10400
   * @return string
10401
   */
10402 1
  public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10403
  {
10404 1
    return self::ucfirst($str, $encoding, $cleanUtf8);
10405
  }
10406
10407
  /**
10408
   * Uppercase for all words in the string.
10409
   *
10410
   * @param string   $str        <p>The input string.</p>
10411
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
10412
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
10413
   * @param string   $encoding   [optional] <p>Set the charset.</p>
10414
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
10415
   *
10416
   * @return string
10417
   */
10418 9
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10419
  {
10420 9
    if (!$str) {
10421 2
      return '';
10422
    }
10423
10424
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
10425
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
10426
10427 8
    if ($cleanUtf8 === true) {
10428
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10429
      // if invalid characters are found in $haystack before $needle
10430 1
      $str = self::clean($str);
10431
    }
10432
10433 8
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
10434
10435
    if (
10436 8
        $usePhpDefaultFunctions === true
10437
        &&
10438 8
        self::is_ascii($str) === true
10439
    ) {
10440
      return \ucwords($str);
10441
    }
10442
10443 8
    $words = self::str_to_words($str, $charlist);
10444 8
    $newWords = [];
10445
10446 8
    if (\count($exceptions) > 0) {
10447 1
      $useExceptions = true;
10448
    } else {
10449 8
      $useExceptions = false;
10450
    }
10451
10452 8
    foreach ($words as $word) {
10453
10454 8
      if (!$word) {
10455 8
        continue;
10456
      }
10457
10458
      if (
10459 8
          $useExceptions === false
10460
          ||
10461
          (
10462 1
              $useExceptions === true
10463
              &&
10464 8
              !\in_array($word, $exceptions, true)
10465
          )
10466
      ) {
10467 8
        $word = self::ucfirst($word, $encoding);
10468
      }
10469
10470 8
      $newWords[] = $word;
10471
    }
10472
10473 8
    return \implode('', $newWords);
10474
  }
10475
10476
  /**
10477
   * Multi decode html entity & fix urlencoded-win1252-chars.
10478
   *
10479
   * e.g:
10480
   * 'test+test'                     => 'test test'
10481
   * 'D&#252;sseldorf'               => 'Düsseldorf'
10482
   * 'D%FCsseldorf'                  => 'Düsseldorf'
10483
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
10484
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
10485
   * 'Düsseldorf'                   => 'Düsseldorf'
10486
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
10487
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
10488
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
10489
   *
10490
   * @param string $str          <p>The input string.</p>
10491
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
10492
   *
10493
   * @return string
10494
   */
10495 2
  public static function urldecode(string $str, bool $multi_decode = true): string
10496
  {
10497 2
    if ('' === $str) {
10498 2
      return '';
10499
    }
10500
10501 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
10502 2
    if (\preg_match($pattern, $str)) {
10503 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
10504
    }
10505
10506 2
    $flags = ENT_QUOTES | ENT_HTML5;
10507
10508
    do {
10509 2
      $str_compare = $str;
10510
10511 2
      $str = self::fix_simple_utf8(
10512 2
          \urldecode(
10513 2
              self::html_entity_decode(
10514 2
                  self::to_utf8($str),
10515 2
                  $flags
10516
              )
10517
          )
10518
      );
10519
10520 2
    } while ($multi_decode === true && $str_compare !== $str);
10521
10522 2
    return $str;
10523
  }
10524
10525
  /**
10526
   * Return a array with "urlencoded"-win1252 -> UTF-8
10527
   *
10528
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
10529
   *
10530
   * @return string[]
10531
   */
10532 2
  public static function urldecode_fix_win1252_chars(): array
10533
  {
10534
    return [
10535 2
        '%20' => ' ',
10536
        '%21' => '!',
10537
        '%22' => '"',
10538
        '%23' => '#',
10539
        '%24' => '$',
10540
        '%25' => '%',
10541
        '%26' => '&',
10542
        '%27' => "'",
10543
        '%28' => '(',
10544
        '%29' => ')',
10545
        '%2A' => '*',
10546
        '%2B' => '+',
10547
        '%2C' => ',',
10548
        '%2D' => '-',
10549
        '%2E' => '.',
10550
        '%2F' => '/',
10551
        '%30' => '0',
10552
        '%31' => '1',
10553
        '%32' => '2',
10554
        '%33' => '3',
10555
        '%34' => '4',
10556
        '%35' => '5',
10557
        '%36' => '6',
10558
        '%37' => '7',
10559
        '%38' => '8',
10560
        '%39' => '9',
10561
        '%3A' => ':',
10562
        '%3B' => ';',
10563
        '%3C' => '<',
10564
        '%3D' => '=',
10565
        '%3E' => '>',
10566
        '%3F' => '?',
10567
        '%40' => '@',
10568
        '%41' => 'A',
10569
        '%42' => 'B',
10570
        '%43' => 'C',
10571
        '%44' => 'D',
10572
        '%45' => 'E',
10573
        '%46' => 'F',
10574
        '%47' => 'G',
10575
        '%48' => 'H',
10576
        '%49' => 'I',
10577
        '%4A' => 'J',
10578
        '%4B' => 'K',
10579
        '%4C' => 'L',
10580
        '%4D' => 'M',
10581
        '%4E' => 'N',
10582
        '%4F' => 'O',
10583
        '%50' => 'P',
10584
        '%51' => 'Q',
10585
        '%52' => 'R',
10586
        '%53' => 'S',
10587
        '%54' => 'T',
10588
        '%55' => 'U',
10589
        '%56' => 'V',
10590
        '%57' => 'W',
10591
        '%58' => 'X',
10592
        '%59' => 'Y',
10593
        '%5A' => 'Z',
10594
        '%5B' => '[',
10595
        '%5C' => '\\',
10596
        '%5D' => ']',
10597
        '%5E' => '^',
10598
        '%5F' => '_',
10599
        '%60' => '`',
10600
        '%61' => 'a',
10601
        '%62' => 'b',
10602
        '%63' => 'c',
10603
        '%64' => 'd',
10604
        '%65' => 'e',
10605
        '%66' => 'f',
10606
        '%67' => 'g',
10607
        '%68' => 'h',
10608
        '%69' => 'i',
10609
        '%6A' => 'j',
10610
        '%6B' => 'k',
10611
        '%6C' => 'l',
10612
        '%6D' => 'm',
10613
        '%6E' => 'n',
10614
        '%6F' => 'o',
10615
        '%70' => 'p',
10616
        '%71' => 'q',
10617
        '%72' => 'r',
10618
        '%73' => 's',
10619
        '%74' => 't',
10620
        '%75' => 'u',
10621
        '%76' => 'v',
10622
        '%77' => 'w',
10623
        '%78' => 'x',
10624
        '%79' => 'y',
10625
        '%7A' => 'z',
10626
        '%7B' => '{',
10627
        '%7C' => '|',
10628
        '%7D' => '}',
10629
        '%7E' => '~',
10630
        '%7F' => '',
10631
        '%80' => '`',
10632
        '%81' => '',
10633
        '%82' => '‚',
10634
        '%83' => 'ƒ',
10635
        '%84' => '„',
10636
        '%85' => '…',
10637
        '%86' => '†',
10638
        '%87' => '‡',
10639
        '%88' => 'ˆ',
10640
        '%89' => '‰',
10641
        '%8A' => 'Š',
10642
        '%8B' => '‹',
10643
        '%8C' => 'Œ',
10644
        '%8D' => '',
10645
        '%8E' => 'Ž',
10646
        '%8F' => '',
10647
        '%90' => '',
10648
        '%91' => '‘',
10649
        '%92' => '’',
10650
        '%93' => '“',
10651
        '%94' => '”',
10652
        '%95' => '•',
10653
        '%96' => '–',
10654
        '%97' => '—',
10655
        '%98' => '˜',
10656
        '%99' => '™',
10657
        '%9A' => 'š',
10658
        '%9B' => '›',
10659
        '%9C' => 'œ',
10660
        '%9D' => '',
10661
        '%9E' => 'ž',
10662
        '%9F' => 'Ÿ',
10663
        '%A0' => '',
10664
        '%A1' => '¡',
10665
        '%A2' => '¢',
10666
        '%A3' => '£',
10667
        '%A4' => '¤',
10668
        '%A5' => '¥',
10669
        '%A6' => '¦',
10670
        '%A7' => '§',
10671
        '%A8' => '¨',
10672
        '%A9' => '©',
10673
        '%AA' => 'ª',
10674
        '%AB' => '«',
10675
        '%AC' => '¬',
10676
        '%AD' => '',
10677
        '%AE' => '®',
10678
        '%AF' => '¯',
10679
        '%B0' => '°',
10680
        '%B1' => '±',
10681
        '%B2' => '²',
10682
        '%B3' => '³',
10683
        '%B4' => '´',
10684
        '%B5' => 'µ',
10685
        '%B6' => '¶',
10686
        '%B7' => '·',
10687
        '%B8' => '¸',
10688
        '%B9' => '¹',
10689
        '%BA' => 'º',
10690
        '%BB' => '»',
10691
        '%BC' => '¼',
10692
        '%BD' => '½',
10693
        '%BE' => '¾',
10694
        '%BF' => '¿',
10695
        '%C0' => 'À',
10696
        '%C1' => 'Á',
10697
        '%C2' => 'Â',
10698
        '%C3' => 'Ã',
10699
        '%C4' => 'Ä',
10700
        '%C5' => 'Å',
10701
        '%C6' => 'Æ',
10702
        '%C7' => 'Ç',
10703
        '%C8' => 'È',
10704
        '%C9' => 'É',
10705
        '%CA' => 'Ê',
10706
        '%CB' => 'Ë',
10707
        '%CC' => 'Ì',
10708
        '%CD' => 'Í',
10709
        '%CE' => 'Î',
10710
        '%CF' => 'Ï',
10711
        '%D0' => 'Ð',
10712
        '%D1' => 'Ñ',
10713
        '%D2' => 'Ò',
10714
        '%D3' => 'Ó',
10715
        '%D4' => 'Ô',
10716
        '%D5' => 'Õ',
10717
        '%D6' => 'Ö',
10718
        '%D7' => '×',
10719
        '%D8' => 'Ø',
10720
        '%D9' => 'Ù',
10721
        '%DA' => 'Ú',
10722
        '%DB' => 'Û',
10723
        '%DC' => 'Ü',
10724
        '%DD' => 'Ý',
10725
        '%DE' => 'Þ',
10726
        '%DF' => 'ß',
10727
        '%E0' => 'à',
10728
        '%E1' => 'á',
10729
        '%E2' => 'â',
10730
        '%E3' => 'ã',
10731
        '%E4' => 'ä',
10732
        '%E5' => 'å',
10733
        '%E6' => 'æ',
10734
        '%E7' => 'ç',
10735
        '%E8' => 'è',
10736
        '%E9' => 'é',
10737
        '%EA' => 'ê',
10738
        '%EB' => 'ë',
10739
        '%EC' => 'ì',
10740
        '%ED' => 'í',
10741
        '%EE' => 'î',
10742
        '%EF' => 'ï',
10743
        '%F0' => 'ð',
10744
        '%F1' => 'ñ',
10745
        '%F2' => 'ò',
10746
        '%F3' => 'ó',
10747
        '%F4' => 'ô',
10748
        '%F5' => 'õ',
10749
        '%F6' => 'ö',
10750
        '%F7' => '÷',
10751
        '%F8' => 'ø',
10752
        '%F9' => 'ù',
10753
        '%FA' => 'ú',
10754
        '%FB' => 'û',
10755
        '%FC' => 'ü',
10756
        '%FD' => 'ý',
10757
        '%FE' => 'þ',
10758
        '%FF' => 'ÿ',
10759
    ];
10760
  }
10761
10762
  /**
10763
   * Decodes an UTF-8 string to ISO-8859-1.
10764
   *
10765
   * @param string $str <p>The input string.</p>
10766
   * @param bool   $keepUtf8Chars
10767
   *
10768
   * @return string
10769
   */
10770 14
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10771
  {
10772 14
    if ('' === $str) {
10773 5
      return '';
10774
    }
10775
10776 14
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10777 14
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10778
10779 14
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10780
10781 1
      if (self::$WIN1252_TO_UTF8 === null) {
10782
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10783
      }
10784
10785 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10785
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10786 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10786
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10787
    }
10788
10789
    /** @noinspection PhpInternalEntityUsedInspection */
10790 14
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10791
10792 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10793
      self::checkForSupport();
10794
    }
10795
10796
    // save for later comparision
10797 14
    $str_backup = $str;
10798 14
    $len = self::strlen_in_byte($str);
10799
10800 14
    if (self::$ORD === null) {
10801
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10802
    }
10803
10804 14
    if (self::$CHR === null) {
10805
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10806
    }
10807
10808 14
    $noCharFound = '?';
10809
    /** @noinspection ForeachInvariantsInspection */
10810 14
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10811 14
      switch ($str[$i] & "\xF0") {
10812 14
        case "\xC0":
10813 12
        case "\xD0":
10814 14
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10815 14
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10816 14
          break;
10817
10818
        /** @noinspection PhpMissingBreakStatementInspection */
10819 12
        case "\xF0":
10820
          ++$i;
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment if this fall-through is intended.
Loading history...
10821 12
        case "\xE0":
10822 10
          $str[$j] = $noCharFound;
10823 10
          $i += 2;
10824 10
          break;
10825
10826
        default:
10827 12
          $str[$j] = $str[$i];
10828
      }
10829
    }
10830
10831 14
    $return = self::substr_in_byte($str, 0, $j);
10832 14
    if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
10833
      $return = '';
10834
    }
10835
10836
    if (
10837 14
        $keepUtf8Chars === true
10838
        &&
10839 14
        self::strlen($return) >= self::strlen($str_backup)
10840
    ) {
10841 2
      return $str_backup;
10842
    }
10843
10844 14
    return $return;
10845
  }
10846
10847
  /**
10848
   * Encodes an ISO-8859-1 string to UTF-8.
10849
   *
10850
   * @param string $str <p>The input string.</p>
10851
   *
10852
   * @return string
10853
   */
10854 14
  public static function utf8_encode(string $str): string
10855
  {
10856 14
    if ('' === $str) {
10857 13
      return '';
10858
    }
10859
10860 14
    $str = \utf8_encode($str);
10861
10862
    // the polyfill maybe return false
10863
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10864 14
    if ($str === false) {
10865
      return '';
10866
    }
10867
10868 14
    if (false === \strpos($str, "\xC2")) {
10869 6
      return $str;
10870
    }
10871
10872 12
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
10873 12
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
10874
10875 12
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
10876
10877 1
      if (self::$WIN1252_TO_UTF8 === null) {
10878
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10879
      }
10880
10881 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10881
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10882 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10882
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10883
    }
10884
10885 12
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
10886
  }
10887
10888
  /**
10889
   * fix -> utf8-win1252 chars
10890
   *
10891
   * @param string $str <p>The input string.</p>
10892
   *
10893
   * @return string
10894
   *
10895
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
10896
   */
10897 2
  public static function utf8_fix_win1252_chars(string $str): string
10898
  {
10899 2
    return self::fix_simple_utf8($str);
10900
  }
10901
10902
  /**
10903
   * Returns an array with all utf8 whitespace characters.
10904
   *
10905
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
10906
   *
10907
   * @author: Derek E. [email protected]
10908
   *
10909
   * @return string[]
10910
   *                 An array with all known whitespace characters as values and the type of whitespace as keys
10911
   *                 as defined in above URL.
10912
   */
10913 2
  public static function whitespace_table(): array
10914
  {
10915 2
    return self::$WHITESPACE_TABLE;
10916
  }
10917
10918
  /**
10919
   * Limit the number of words in a string.
10920
   *
10921
   * @param string $str      <p>The input string.</p>
10922
   * @param int    $limit    <p>The limit of words as integer.</p>
10923
   * @param string $strAddOn <p>Replacement for the striped string.</p>
10924
   *
10925
   * @return string
10926
   */
10927 2
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
10928
  {
10929 2
    if ('' === $str) {
10930 2
      return '';
10931
    }
10932
10933 2
    if ($limit < 1) {
10934 2
      return '';
10935
    }
10936
10937 2
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
10938
10939
    if (
10940 2
        !isset($matches[0])
10941
        ||
10942 2
        self::strlen($str) === self::strlen($matches[0])
10943
    ) {
10944 2
      return $str;
10945
    }
10946
10947 2
    return self::rtrim($matches[0]) . $strAddOn;
10948
  }
10949
10950
  /**
10951
   * Wraps a string to a given number of characters
10952
   *
10953
   * @link  http://php.net/manual/en/function.wordwrap.php
10954
   *
10955
   * @param string $str   <p>The input string.</p>
10956
   * @param int    $width [optional] <p>The column width.</p>
10957
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
10958
   * @param bool   $cut   [optional] <p>
10959
   *                      If the cut is set to true, the string is
10960
   *                      always wrapped at or before the specified width. So if you have
10961
   *                      a word that is larger than the given width, it is broken apart.
10962
   *                      </p>
10963
   *
10964
   * @return string The given string wrapped at the specified column.
10965
   */
10966 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
10967
  {
10968 10
    if ('' === $str || '' === $break) {
10969 3
      return '';
10970
    }
10971
10972 8
    $w = '';
10973 8
    $strSplit = \explode($break, $str);
10974 8
    if ($strSplit === false) {
10975
      $count = 0;
10976
    } else {
10977 8
      $count = \count($strSplit);
10978
    }
10979
10980 8
    $chars = [];
10981
    /** @noinspection ForeachInvariantsInspection */
10982 8
    for ($i = 0; $i < $count; ++$i) {
10983
10984 8
      if ($i) {
10985 1
        $chars[] = $break;
10986 1
        $w .= '#';
10987
      }
10988
10989 8
      $c = $strSplit[$i];
10990 8
      unset($strSplit[$i]);
10991
10992 8
      if ($c !== null) {
10993 8
        foreach (self::split($c) as $c) {
10994 8
          $chars[] = $c;
10995 8
          $w .= ' ' === $c ? ' ' : '?';
10996
        }
10997
      }
10998
    }
10999
11000 8
    $strReturn = '';
11001 8
    $j = 0;
11002 8
    $b = $i = -1;
11003 8
    $w = \wordwrap($w, $width, '#', $cut);
11004
11005 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
11006 6
      for (++$i; $i < $b; ++$i) {
11007 6
        $strReturn .= $chars[$j];
11008 6
        unset($chars[$j++]);
11009
      }
11010
11011 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
11012 3
        unset($chars[$j++]);
11013
      }
11014
11015 6
      $strReturn .= $break;
11016
    }
11017
11018 8
    return $strReturn . \implode('', $chars);
11019
  }
11020
11021
  /**
11022
   * Line-Wrap the string after $limit, but also after the next word.
11023
   *
11024
   * @param string $str
11025
   * @param int    $limit
11026
   *
11027
   * @return string
11028
   */
11029 1
  public static function wordwrap_per_line(string $str, int $limit): string
11030
  {
11031 1
    $strings = (array)\preg_split('/\\r\\n|\\r|\\n/', $str);
11032
11033 1
    $string = '';
11034 1
    foreach ($strings as $value) {
11035 1
      if ($value === false) {
11036
        continue;
11037
      }
11038
11039 1
      $string .= wordwrap($value, $limit);
11040 1
      $string .= "\n";
11041
    }
11042
11043 1
    return $string;
11044
  }
11045
11046
  /**
11047
   * Returns an array of Unicode White Space characters.
11048
   *
11049
   * @return string[] An array with numeric code point as key and White Space Character as value.
11050
   */
11051 2
  public static function ws(): array
11052
  {
11053 2
    return self::$WHITESPACE;
11054
  }
11055
11056
11057
}
11058