Passed
Push — master ( 81b950...991a03 )
by Lars
05:03 queued 01:44
created

UTF8::str_longest_common_prefix()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 16
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 3

Importance

Changes 0
Metric Value
cc 3
eloc 9
nc 3
nop 3
dl 0
loc 16
ccs 9
cts 9
cp 1
crap 3
rs 9.9666
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'upper' => [
157
          'µ',
158
          'ſ',
159
          "\xCD\x85",
160
          'ς',
161
          'ẞ',
162
          "\xCF\x90",
163
          "\xCF\x91",
164
          "\xCF\x95",
165
          "\xCF\x96",
166
          "\xCF\xB0",
167
          "\xCF\xB1",
168
          "\xCF\xB5",
169
          "\xE1\xBA\x9B",
170
          "\xE1\xBE\xBE",
171
      ],
172
      'lower' => [
173
          'μ',
174
          's',
175
          'ι',
176
          'σ',
177
          'ß',
178
          'β',
179
          'θ',
180
          'φ',
181
          'π',
182
          'κ',
183
          'ρ',
184
          'ε',
185
          "\xE1\xB9\xA1",
186
          'ι',
187
      ],
188
  ];
189
190
191
  /**
192
   * @var array
193
   */
194
  private static $SUPPORT = [];
195
196
  /**
197
   * @var null|array
198
   */
199
  private static $UTF8_MSWORD;
200
201
  /**
202
   * @var null|array
203
   */
204
  private static $BROKEN_UTF8_FIX;
205
206
  /**
207
   * @var null|array
208
   */
209
  private static $WIN1252_TO_UTF8;
210
211
  /**
212
   * @var null|array
213
   */
214
  private static $ENCODINGS;
215
216
  /**
217
   * @var null|array
218
   */
219
  private static $ORD;
220
221
  /**
222
   * @var null|array
223
   */
224
  private static $CHR;
225
226
  /**
227
   * __construct()
228
   */
229 32
  public function __construct()
230
  {
231 32
    self::checkForSupport();
232 32
  }
233
234
  /**
235
   * Return the character at the specified position: $str[1] like functionality.
236
   *
237
   * @param string $str <p>A UTF-8 string.</p>
238
   * @param int    $pos <p>The position of character to return.</p>
239
   *
240
   * @return string Single Multi-Byte character.
241
   */
242 3
  public static function access(string $str, int $pos): string
243
  {
244 3
    if ('' === $str) {
245 1
      return '';
246
    }
247
248 3
    if ($pos < 0) {
249 2
      return '';
250
    }
251
252 3
    return (string)self::substr($str, $pos, 1);
253
  }
254
255
  /**
256
   * Prepends UTF-8 BOM character to the string and returns the whole string.
257
   *
258
   * INFO: If BOM already existed there, the Input string is returned.
259
   *
260
   * @param string $str <p>The input string.</p>
261
   *
262
   * @return string The output string that contains BOM.
263
   */
264 2
  public static function add_bom_to_string(string $str): string
265
  {
266 2
    if (self::string_has_bom($str) === false) {
267 2
      $str = self::bom() . $str;
268
    }
269
270 2
    return $str;
271
  }
272
273
  /**
274
   * Adds the specified amount of left and right padding to the given string.
275
   * The default character used is a space.
276
   *
277
   * @param string $str
278
   * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
279
   * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
280
   * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
281
   * @param string $encoding [optional] <p>Default: UTF-8</p>
282
   *
283
   * @return string String with padding applied.
284
   */
285 25
  private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
286
  {
287 25
    $strlen = self::strlen($str, $encoding);
288
289 25
    if ($left && $right) {
290 8
      $length = ($left + $right) + $strlen;
291 8
      $type = STR_PAD_BOTH;
292 17
    } elseif ($left) {
293 7
      $length = $left + $strlen;
294 7
      $type = STR_PAD_LEFT;
295 10
    } elseif ($right) {
296 10
      $length = $right + $strlen;
297 10
      $type = STR_PAD_RIGHT;
298
    } else {
299
      $length = ($left + $right) + $strlen;
300
      $type = STR_PAD_BOTH;
301
    }
302
303 25
    return self::str_pad($str, $length, $padStr, $type, $encoding);
304
  }
305
306
  /**
307
   * Changes all keys in an array.
308
   *
309
   * @param array $array <p>The array to work on</p>
310
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
311
   *                     or <strong>CASE_LOWER</strong> (default)</p>
312
   *
313
   * @return string[] An array with its keys lower or uppercased.
314
   */
315 2
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
316
  {
317
    if (
318 2
        $case !== CASE_LOWER
319
        &&
320 2
        $case !== CASE_UPPER
321
    ) {
322
      $case = CASE_LOWER;
323
    }
324
325 2
    $return = [];
326 2
    foreach ($array as $key => $value) {
327 2
      if ($case === CASE_LOWER) {
328 2
        $key = self::strtolower($key);
329
      } else {
330 2
        $key = self::strtoupper($key);
331
      }
332
333 2
      $return[$key] = $value;
334
    }
335
336 2
    return $return;
337
  }
338
339
  /**
340
   * Returns the substring between $start and $end, if found, or an empty
341
   * string. An optional offset may be supplied from which to begin the
342
   * search for the start string.
343
   *
344
   * @param string $str
345
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
346
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
347
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
348
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
349
   *
350
   * @return string
351
   */
352 16
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
353
  {
354 16
    $posStart = self::strpos($str, $start, $offset, $encoding);
355 16
    if ($posStart === false) {
356 2
      return '';
357
    }
358
359 14
    $substrIndex = $posStart + self::strlen($start, $encoding);
360 14
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
361
    if (
362 14
        $posEnd === false
363
        ||
364 14
        $posEnd === $substrIndex
365
    ) {
366 4
      return '';
367
    }
368
369 10
    $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
370
371 10
    if ($return === false) {
372
      return '';
373
    }
374
375 10
    return $return;
376
  }
377
378
  /**
379
   * Convert binary into an string.
380
   *
381
   * @param mixed $bin 1|0
382
   *
383
   * @return string
384
   */
385 2
  public static function binary_to_str($bin): string
386
  {
387 2
    if (!isset($bin[0])) {
388
      return '';
389
    }
390
391 2
    $convert = \base_convert($bin, 2, 16);
392 2
    if ($convert === '0') {
393 1
      return '';
394
    }
395
396 2
    return \pack('H*', $convert);
397
  }
398
399
  /**
400
   * Returns the UTF-8 Byte Order Mark Character.
401
   *
402
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
403
   *
404
   * @return string UTF-8 Byte Order Mark
405
   */
406 4
  public static function bom(): string
407
  {
408 4
    return "\xef\xbb\xbf";
409
  }
410
411
  /**
412
   * @alias of UTF8::chr_map()
413
   *
414
   * @see   UTF8::chr_map()
415
   *
416
   * @param string|array $callback
417
   * @param string       $str
418
   *
419
   * @return string[]
420
   */
421 2
  public static function callback($callback, string $str): array
422
  {
423 2
    return self::chr_map($callback, $str);
424
  }
425
426
  /**
427
   * Returns the character at $index, with indexes starting at 0.
428
   *
429
   * @param string $str
430
   * @param int    $index    <p>Position of the character.</p>
431
   * @param string $encoding [optional] <p>Default is UTF-8</p>
432
   *
433
   * @return string The character at $index.
434
   */
435 9
  public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
436
  {
437 9
    return (string)self::substr($str, $index, 1, $encoding);
438
  }
439
440
  /**
441
   * Returns an array consisting of the characters in the string.
442
   *
443
   * @param string $str <p>The input string.</p>
444
   *
445
   * @return string[] An array of chars.
446
   */
447 3
  public static function chars(string $str): array
448
  {
449 3
    return self::str_split($str, 1);
450
  }
451
452
  /**
453
   * This method will auto-detect your server environment for UTF-8 support.
454
   *
455
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
456
   */
457 37
  public static function checkForSupport()
458
  {
459 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
460
461
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
462
463
      // http://php.net/manual/en/book.mbstring.php
464
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
465
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
466
467
      // http://php.net/manual/en/book.iconv.php
468
      self::$SUPPORT['iconv'] = self::iconv_loaded();
469
470
      // http://php.net/manual/en/book.intl.php
471
      self::$SUPPORT['intl'] = self::intl_loaded();
472
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
473
474
      self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
475
476
      if (
477
          self::$SUPPORT['intl'] === true
478
          &&
479
          \function_exists('transliterator_list_ids') === true
480
      ) {
481
        /** @noinspection PhpComposerExtensionStubsInspection */
482
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
483
      }
484
485
      // http://php.net/manual/en/class.intlchar.php
486
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
487
488
      // http://php.net/manual/en/book.ctype.php
489
      self::$SUPPORT['ctype'] = self::ctype_loaded();
490
491
      // http://php.net/manual/en/class.finfo.php
492
      self::$SUPPORT['finfo'] = self::finfo_loaded();
493
494
      // http://php.net/manual/en/book.json.php
495
      self::$SUPPORT['json'] = self::json_loaded();
496
497
      // http://php.net/manual/en/book.pcre.php
498
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
499
    }
500 37
  }
501
502
  /**
503
   * Generates a UTF-8 encoded character from the given code point.
504
   *
505
   * INFO: opposite to UTF8::ord()
506
   *
507
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
508
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
509
   *
510
   * @return string|null Multi-Byte character, returns null on failure or empty input.
511
   */
512 16
  public static function chr($code_point, string $encoding = 'UTF-8')
513
  {
514
    // init
515 16
    static $CHAR_CACHE = [];
516
517 16
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
518
      self::checkForSupport();
519
    }
520
521 16
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
522 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
523
    }
524
525
    if (
526 16
        $encoding !== 'UTF-8'
527
        &&
528 16
        $encoding !== 'ISO-8859-1'
529
        &&
530 16
        $encoding !== 'WINDOWS-1252'
531
        &&
532 16
        self::$SUPPORT['mbstring'] === false
533
    ) {
534
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
535
    }
536
537 16
    $cacheKey = $code_point . $encoding;
538 16
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
539 15
      return $CHAR_CACHE[$cacheKey];
540
    }
541
542 10
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
543
544 9
      if (self::$CHR === null) {
545
        $chrTmp = self::getData('chr');
546
        if ($chrTmp) {
547
          self::$CHR = (array)$chrTmp;
548
        }
549
      }
550
551 9
      $chr = self::$CHR[$code_point];
552
553 9
      if ($encoding !== 'UTF-8') {
554 1
        $chr = self::encode($encoding, $chr);
555
      }
556
557 9
      return $CHAR_CACHE[$cacheKey] = $chr;
558
    }
559
560 7
    if (self::$SUPPORT['intlChar'] === true) {
561
      /** @noinspection PhpComposerExtensionStubsInspection */
562 7
      $chr = \IntlChar::chr($code_point);
563
564 7
      if ($encoding !== 'UTF-8') {
565
        $chr = self::encode($encoding, $chr);
566
      }
567
568 7
      return $CHAR_CACHE[$cacheKey] = $chr;
569
    }
570
571
    if (self::$CHR === null) {
572
      $chrTmp = self::getData('chr');
573
      if ($chrTmp) {
574
        self::$CHR = (array)$chrTmp;
575
      }
576
    }
577
578
    $code_point = (int)$code_point;
579
    if ($code_point <= 0x7F) {
580
      $chr = self::$CHR[$code_point];
581
    } elseif ($code_point <= 0x7FF) {
582
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
583
             self::$CHR[($code_point & 0x3F) + 0x80];
584
    } elseif ($code_point <= 0xFFFF) {
585
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
586
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
587
             self::$CHR[($code_point & 0x3F) + 0x80];
588
    } else {
589
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
590
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
591
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
592
             self::$CHR[($code_point & 0x3F) + 0x80];
593
    }
594
595
    if ($encoding !== 'UTF-8') {
596
      $chr = self::encode($encoding, $chr);
597
    }
598
599
    return $CHAR_CACHE[$cacheKey] = $chr;
600
  }
601
602
  /**
603
   * Applies callback to all characters of a string.
604
   *
605
   * @param string|array $callback <p>The callback function.</p>
606
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
607
   *
608
   * @return string[] The outcome of callback.
609
   */
610 2
  public static function chr_map($callback, string $str): array
611
  {
612 2
    $chars = self::split($str);
613
614 2
    return \array_map($callback, $chars);
615
  }
616
617
  /**
618
   * Generates an array of byte length of each character of a Unicode string.
619
   *
620
   * 1 byte => U+0000  - U+007F
621
   * 2 byte => U+0080  - U+07FF
622
   * 3 byte => U+0800  - U+FFFF
623
   * 4 byte => U+10000 - U+10FFFF
624
   *
625
   * @param string $str <p>The original unicode string.</p>
626
   *
627
   * @return int[] An array of byte lengths of each character.
628
   */
629 4
  public static function chr_size_list(string $str): array
630
  {
631 4
    if ('' === $str) {
632 4
      return [];
633
    }
634
635 4
    $strSplit = self::split($str);
636
637 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
638
      self::checkForSupport();
639
    }
640
641 4
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
642 4
      return \array_map(
643 4
          function ($data) {
644 4
            return UTF8::strlen_in_byte($data);
645 4
          },
646 4
          $strSplit
647
      );
648
    }
649
650
    return \array_map('\strlen', $strSplit);
651
  }
652
653
  /**
654
   * Get a decimal code representation of a specific character.
655
   *
656
   * @param string $char <p>The input character.</p>
657
   *
658
   * @return int
659
   */
660 4
  public static function chr_to_decimal(string $char): int
661
  {
662 4
    $code = self::ord($char[0]);
663 4
    $bytes = 1;
664
665 4
    if (!($code & 0x80)) {
666
      // 0xxxxxxx
667 4
      return $code;
668
    }
669
670 4
    if (($code & 0xe0) === 0xc0) {
671
      // 110xxxxx
672 4
      $bytes = 2;
673 4
      $code &= ~0xc0;
674 4
    } elseif (($code & 0xf0) === 0xe0) {
675
      // 1110xxxx
676 4
      $bytes = 3;
677 4
      $code &= ~0xe0;
678 2
    } elseif (($code & 0xf8) === 0xf0) {
679
      // 11110xxx
680 2
      $bytes = 4;
681 2
      $code &= ~0xf0;
682
    }
683
684 4
    for ($i = 2; $i <= $bytes; $i++) {
685
      // 10xxxxxx
686 4
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
687
    }
688
689 4
    return $code;
690
  }
691
692
  /**
693
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
694
   *
695
   * @param string|int $char <p>The input character</p>
696
   * @param string     $pfix [optional]
697
   *
698
   * @return string The code point encoded as U+xxxx
699
   */
700 2
  public static function chr_to_hex($char, string $pfix = 'U+'): string
701
  {
702 2
    if ('' === $char) {
703 2
      return '';
704
    }
705
706 2
    if ($char === '&#0;') {
707 2
      $char = '';
708
    }
709
710 2
    return self::int_to_hex(self::ord($char), $pfix);
711
  }
712
713
  /**
714
   * alias for "UTF8::chr_to_decimal()"
715
   *
716
   * @see UTF8::chr_to_decimal()
717
   *
718
   * @param string $chr
719
   *
720
   * @return int
721
   */
722 2
  public static function chr_to_int(string $chr): int
723
  {
724 2
    return self::chr_to_decimal($chr);
725
  }
726
727
  /**
728
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
729
   *
730
   * @param string $body     <p>The original string to be split.</p>
731
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
732
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
733
   *
734
   * @return string The chunked string.
735
   */
736 4
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
737
  {
738 4
    return \implode($end, self::split($body, $chunklen));
739
  }
740
741
  /**
742
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
743
   *
744
   * @param string $str                           <p>The string to be sanitized.</p>
745
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
746
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
747
   *                                              whitespace.</p>
748
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
749
   *                                              e.g.: "…"
750
   *                                              => "..."</p>
751
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
752
   *                                              combination with
753
   *                                              $normalize_whitespace</p>
754
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
755
   *                                              mark e.g.: "�"</p>
756
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
757
   *                                              characters e.g.: "\0"</p>
758
   *
759
   * @return string Clean UTF-8 encoded string.
760
   */
761 111
  public static function clean(
762
      string $str,
763
      bool $remove_bom = false,
764
      bool $normalize_whitespace = false,
765
      bool $normalize_msword = false,
766
      bool $keep_non_breaking_space = false,
767
      bool $replace_diamond_question_mark = false,
768
      bool $remove_invisible_characters = true
769
  ): string
770
  {
771
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
772
    // caused connection reset problem on larger strings
773
774 111
    $regx = '/
775
      (
776
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
777
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
778
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
779
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
780
        ){1,100}                      # ...one or more times
781
      )
782
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
783
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
784
    /x';
785 111
    $str = (string)\preg_replace($regx, '$1', $str);
786
787 111
    if ($replace_diamond_question_mark === true) {
788 61
      $str = self::replace_diamond_question_mark($str, '');
789
    }
790
791 111
    if ($remove_invisible_characters === true) {
792 111
      $str = self::remove_invisible_characters($str);
793
    }
794
795 111
    if ($normalize_whitespace === true) {
796 65
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
797
    }
798
799 111
    if ($normalize_msword === true) {
800 33
      $str = self::normalize_msword($str);
801
    }
802
803 111
    if ($remove_bom === true) {
804 63
      $str = self::remove_bom($str);
805
    }
806
807 111
    return $str;
808
  }
809
810
  /**
811
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
812
   *
813
   * @param string $str <p>The input string.</p>
814
   *
815
   * @return string
816
   */
817 33
  public static function cleanup($str): string
818
  {
819
    // init
820 33
    $str = (string)$str;
821
822 33
    if ('' === $str) {
823 5
      return '';
824
    }
825
826
    // fixed ISO <-> UTF-8 Errors
827 33
    $str = self::fix_simple_utf8($str);
828
829
    // remove all none UTF-8 symbols
830
    // && remove diamond question mark (�)
831
    // && remove remove invisible characters (e.g. "\0")
832
    // && remove BOM
833
    // && normalize whitespace chars (but keep non-breaking-spaces)
834 33
    $str = self::clean(
835 33
        $str,
836 33
        true,
837 33
        true,
838 33
        false,
839 33
        true,
840 33
        true,
841 33
        true
842
    );
843
844 33
    return $str;
845
  }
846
847
  /**
848
   * Accepts a string or a array of strings and returns an array of Unicode code points.
849
   *
850
   * INFO: opposite to UTF8::string()
851
   *
852
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
853
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
854
   *                                    default, code points will be returned as integers.</p>
855
   *
856
   * @return array<int|string>
857
   *                           The array of code points:<br>
858
   *                           array<int> for $u_style === false<br>
859
   *                           array<string> for $u_style === true<br>
860
   */
861 12
  public static function codepoints($arg, bool $u_style = false): array
862
  {
863 12
    if (\is_string($arg) === true) {
864 12
      $arg = self::split($arg);
865
    }
866
867 12
    $arg = \array_map(
868
        [
869 12
            self::class,
870
            'ord',
871
        ],
872 12
        $arg
873
    );
874
875 12
    if (\count($arg) === 0) {
876 7
      return [];
877
    }
878
879 11
    if ($u_style) {
880 2
      $arg = \array_map(
881
          [
882 2
              self::class,
883
              'int_to_hex',
884
          ],
885 2
          $arg
886
      );
887
    }
888
889 11
    return $arg;
890
  }
891
892
  /**
893
   * Trims the string and replaces consecutive whitespace characters with a
894
   * single space. This includes tabs and newline characters, as well as
895
   * multibyte whitespace such as the thin space and ideographic space.
896
   *
897
   * @param string $str <p>The input string.</p>
898
   *
899
   * @return string String with a trimmed $str and condensed whitespace.
900
   */
901 13
  public static function collapse_whitespace(string $str): string
902
  {
903 13
    return self::trim(
904 13
        self::regex_replace($str, '[[:space:]]+', ' ')
905
    );
906
  }
907
908
  /**
909
   * Returns count of characters used in a string.
910
   *
911
   * @param string $str       <p>The input string.</p>
912
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
913
   *
914
   * @return int[] An associative array of Character as keys and
915
   *               their count as values.
916
   */
917 18
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
918
  {
919 18
    return \array_count_values(self::split($str, 1, $cleanUtf8));
920
  }
921
922
  /**
923
   * Remove css media-queries.
924
   *
925
   * @param string $str
926
   *
927
   * @return string
928
   */
929 1
  public static function css_stripe_media_queries(string $str): string
930
  {
931 1
    return (string)\preg_replace(
932 1
        '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
933 1
        '',
934 1
        $str
935
    );
936
  }
937
938
  /**
939
   * Checks whether ctype is available on the server.
940
   *
941
   * @return bool
942
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
943
   */
944
  public static function ctype_loaded(): bool
945
  {
946
    return \extension_loaded('ctype');
947
  }
948
949
  /**
950
   * Converts a int-value into an UTF-8 character.
951
   *
952
   * @param mixed $int
953
   *
954
   * @return string
955
   */
956 10
  public static function decimal_to_chr($int): string
957
  {
958 10
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
959
  }
960
961
  /**
962
   * Decodes a MIME header field
963
   *
964
   * @param string $str
965
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
966
   *
967
   * @return string|false
968
   *                      A decoded MIME field on success,
969
   *                      or false if an error occurs during the decoding.
970
   */
971
  public static function decode_mimeheader($str, $encoding = 'UTF-8')
972
  {
973
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
974
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
975
    }
976
977
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
978
      self::checkForSupport();
979
    }
980
981
    if (self::$SUPPORT['iconv'] === true) {
982
      return \iconv_mime_decode($str, ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
983
    }
984
985
    if ($encoding != 'UTF-8') {
986
      $str = self::encode($encoding, $str);
987
    }
988
989
    return \mb_decode_mimeheader($str);
990
  }
991
992
  /**
993
   * Encode a string with a new charset-encoding.
994
   *
995
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
996
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
997
   *
998
   * @param string      $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
999
   * @param string      $str                    <p>The input string</p>
1000
   * @param bool        $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1001
   *                                            encoding for UTF-8)<br> otherwise we auto-detect the current
1002
   *                                            string-encoding</p>
1003
   * @param string|null $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc. ... otherwise
1004
   *                                            we will autodetect the encoding anyway</p>
1005
   *
1006
   * @return string
1007
   */
1008 30
  public static function encode(string $toEncoding, string $str, bool $autodetectFromEncoding = true, $fromEncoding = null): string
1009
  {
1010 30
    if ('' === $str || '' === $toEncoding) {
1011 12
      return $str;
1012
    }
1013
1014 30
    if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1015 8
      $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1016
    }
1017
1018 30
    if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1019
      $fromEncoding = self::normalize_encoding($fromEncoding, null);
1020
    }
1021
1022 30
    if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) {
1023
      return $str;
1024
    }
1025
1026 30
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1027
      self::checkForSupport();
1028
    }
1029
1030 30
    if ('BASE64' === $fromEncoding) {
1031
      $str = base64_decode($str);
1032
      $fromEncoding = null;
1033
    }
1034
1035 30
    if ('BASE64' === $toEncoding) {
1036
      return base64_encode($str);
1037
    }
1038
1039 30
    if ('HTML-ENTITIES' === $toEncoding || 'HTML' === $toEncoding) {
1040
1041 1
      if ('HTML-ENTITIES' === $fromEncoding || 'HTML' === $fromEncoding) {
1042
        $fromEncoding = null;
1043
      }
1044
1045 1
      if ('UTF-8' !== $fromEncoding) {
1046 1
        $str = self::encode('UTF-8', $str, false, $fromEncoding);
1047
      }
1048
1049 1
      return self::html_encode($str, true, $toEncoding);
1050
    }
1051
1052 30
    if ('HTML-ENTITIES' === $fromEncoding) {
1053
      $str = self::html_entity_decode($str, ENT_COMPAT, 'UTF-8');
1054
      $fromEncoding = 'UTF-8';
1055
    }
1056
1057 30
    $fromEncodingDetected = false;
1058
    if (
1059 30
        $autodetectFromEncoding === true
1060
        ||
1061 30
        !$fromEncoding
1062
    ) {
1063 30
      $fromEncodingDetected = self::str_detect_encoding($str);
1064
    }
1065
1066
    // DEBUG
1067
    //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1068
1069 30
    if ($fromEncodingDetected !== false) {
1070 25
      $fromEncoding = $fromEncodingDetected;
1071 7
    } elseif ($fromEncodingDetected === false && $autodetectFromEncoding === true) {
1072
      // fallback for the "autodetect"-mode
1073 7
      return self::to_utf8($str);
1074
    }
1075
1076
    if (
1077 25
        !$fromEncoding
1078
        ||
1079 25
        $fromEncoding === $toEncoding
1080
    ) {
1081 15
      return $str;
1082
    }
1083
1084
    if (
1085 19
        $toEncoding === 'UTF-8'
1086
        &&
1087
        (
1088 17
            $fromEncoding === 'WINDOWS-1252'
1089
            ||
1090 19
            $fromEncoding === 'ISO-8859-1'
1091
        )
1092
    ) {
1093 14
      return self::to_utf8($str);
1094
    }
1095
1096
    if (
1097 11
        $toEncoding === 'ISO-8859-1'
1098
        &&
1099
        (
1100 6
            $fromEncoding === 'WINDOWS-1252'
1101
            ||
1102 11
            $fromEncoding === 'UTF-8'
1103
        )
1104
    ) {
1105 6
      return self::to_iso8859($str);
1106
    }
1107
1108
    if (
1109 9
        $toEncoding !== 'UTF-8'
1110
        &&
1111 9
        $toEncoding !== 'ISO-8859-1'
1112
        &&
1113 9
        $toEncoding !== 'WINDOWS-1252'
1114
        &&
1115 9
        self::$SUPPORT['mbstring'] === false
1116
    ) {
1117
      \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', E_USER_WARNING);
1118
    }
1119
1120 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1121
      self::checkForSupport();
1122
    }
1123
1124 9
    if (self::$SUPPORT['mbstring'] === true) {
1125
      // info: do not use the symfony polyfill here
1126 9
      $strEncoded = \mb_convert_encoding(
1127 9
          $str,
1128 9
          $toEncoding,
1129 9
          ($autodetectFromEncoding === true ? $toEncoding : $fromEncoding)
1130
      );
1131
1132 9
      if ($strEncoded) {
1133 9
        return $strEncoded;
1134
      }
1135
    }
1136
1137
    $return = \iconv($fromEncoding, $toEncoding . '//IGNORE', $str);
1138
    if ($return !== false) {
1139
      return $return;
1140
    }
1141
1142
    return $str;
1143
  }
1144
1145
  /**
1146
   * @param string $str
1147
   * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1148
   * @param string $toCharset        [optional] <p>Set the output charset.</p>
1149
   * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1150
   * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1151
   * @param int    $indent           [optional] <p>Set the max length indent.</p>
1152
   *
1153
   * @return string|false
1154
   *                      An encoded MIME field on success,
1155
   *                      or false if an error occurs during the encoding.
1156
   */
1157
  public static function encode_mimeheader(
1158
      $str,
1159
      $fromCharset = 'UTF-8',
1160
      $toCharset = 'UTF-8',
1161
      $transferEncoding = 'Q',
1162
      $linefeed = "\r\n",
1163
      $indent = 76
1164
  )
1165
  {
1166
    if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1167
      $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1168
    }
1169
1170
    if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1171
      $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1172
    }
1173
1174
    $output = \iconv_mime_encode(
1175
        '',
1176
        $str,
1177
        [
1178
            'scheme'           => $transferEncoding,
1179
            'line-length'      => $indent,
1180
            'input-charset'    => $fromCharset,
1181
            'output-charset'   => $toCharset,
1182
            'line-break-chars' => $linefeed,
1183
        ]
1184
    );
1185
1186
    return $output;
1187
  }
1188
1189
  /**
1190
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1191
   *
1192
   * @param string   $str                    <p>The input string.</p>
1193
   * @param string   $search                 <p>The searched string.</p>
1194
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1195
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1196
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1197
   *
1198
   * @return string
1199
   */
1200 1
  public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1201
  {
1202 1
    if ('' === $str) {
1203 1
      return '';
1204
    }
1205
1206 1
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1207
1208 1
    if ($length === null) {
1209 1
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
1210
    }
1211
1212 1
    if (empty($search)) {
1213
1214 1
      $stringLength = self::strlen($str, $encoding);
1215
1216 1
      if ($length > 0) {
1217 1
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1218
      } else {
1219 1
        $end = 0;
1220
      }
1221
1222 1
      $pos = (int)\min(
1223 1
          self::strpos($str, ' ', $end, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $end can also be of type false; however, parameter $offset of voku\helper\UTF8::strpos() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1223
          self::strpos($str, ' ', /** @scrutinizer ignore-type */ $end, $encoding),
Loading history...
1224 1
          self::strpos($str, '.', $end, $encoding)
1225
      );
1226
1227 1
      if ($pos) {
1228 1
        $strSub = self::substr($str, 0, $pos, $encoding);
1229 1
        if ($strSub === false) {
1230
          return '';
1231
        }
1232
1233 1
        return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1234
      }
1235
1236
      return $str;
1237
    }
1238
1239 1
    $wordPos = self::stripos($str, $search, 0, $encoding);
1240 1
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1241
1242 1
    $pos_start = 0;
1243 1
    if ($halfSide > 0) {
1244 1
      $halfText = self::substr($str, 0, $halfSide, $encoding);
1245 1
      if ($halfText !== false) {
1246 1
        $pos_start = (int)\max(
1247 1
            self::strrpos($halfText, ' ', 0, $encoding),
1248 1
            self::strrpos($halfText, '.', 0, $encoding)
1249
        );
1250
      }
1251
    }
1252
1253 1
    if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type integer|false is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1254 1
      $l = $pos_start + $length - 1;
1255 1
      $realLength = self::strlen($str, $encoding);
1256
1257 1
      if ($l > $realLength) {
1258
        $l = $realLength;
1259
      }
1260
1261 1
      $pos_end = (int)\min(
1262 1
              self::strpos($str, ' ', $l, $encoding),
1263 1
              self::strpos($str, '.', $l, $encoding)
1264 1
          ) - $pos_start;
1265
1266 1
      if (!$pos_end || $pos_end <= 0) {
1267 1
        $strSub = self::substr($str, $pos_start, self::strlen($str), $encoding);
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1267
        $strSub = self::substr($str, $pos_start, /** @scrutinizer ignore-type */ self::strlen($str), $encoding);
Loading history...
1268 1
        if ($strSub !== false) {
1269 1
          $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1270
        } else {
1271 1
          $extract = '';
1272
        }
1273
      } else {
1274 1
        $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1275 1
        if ($strSub !== false) {
1276 1
          $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1277
        } else {
1278 1
          $extract = '';
1279
        }
1280
      }
1281
1282
    } else {
1283
1284 1
      $l = $length - 1;
1285 1
      $trueLength = self::strlen($str, $encoding);
1286
1287 1
      if ($l > $trueLength) {
1288
        $l = $trueLength;
1289
      }
1290
1291 1
      $pos_end = \min(
1292 1
          self::strpos($str, ' ', $l, $encoding),
1293 1
          self::strpos($str, '.', $l, $encoding)
1294
      );
1295
1296 1
      if ($pos_end) {
1297 1
        $strSub = self::substr($str, 0, $pos_end, $encoding);
1298 1
        if ($strSub !== false) {
1299 1
          $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1300
        } else {
1301 1
          $extract = '';
1302
        }
1303
      } else {
1304 1
        $extract = $str;
1305
      }
1306
    }
1307
1308 1
    return $extract;
1309
  }
1310
1311
  /**
1312
   * Reads entire file into a string.
1313
   *
1314
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1315
   *
1316
   * @link http://php.net/manual/en/function.file-get-contents.php
1317
   *
1318
   * @param string        $filename         <p>
1319
   *                                        Name of the file to read.
1320
   *                                        </p>
1321
   * @param bool          $use_include_path [optional] <p>
1322
   *                                        Prior to PHP 5, this parameter is called
1323
   *                                        use_include_path and is a bool.
1324
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1325
   *                                        to trigger include path
1326
   *                                        search.
1327
   *                                        </p>
1328
   * @param resource|null $context          [optional] <p>
1329
   *                                        A valid context resource created with
1330
   *                                        stream_context_create. If you don't need to use a
1331
   *                                        custom context, you can skip this parameter by &null;.
1332
   *                                        </p>
1333
   * @param int|null      $offset           [optional] <p>
1334
   *                                        The offset where the reading starts.
1335
   *                                        </p>
1336
   * @param int|null      $maxLength        [optional] <p>
1337
   *                                        Maximum length of data read. The default is to read until end
1338
   *                                        of file is reached.
1339
   *                                        </p>
1340
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1341
   *
1342
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
1343
   *                                        images or pdf, because they used non default utf-8 chars.</p>
1344
   * @param string|null   $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc. ... otherwise we
1345
   *                                        will autodetect the encoding</p>
1346
   *
1347
   * @return string|false The function returns the read data or false on failure.
1348
   */
1349 11
  public static function file_get_contents(
1350
      string $filename,
1351
      bool $use_include_path = false,
1352
      $context = null,
1353
      int $offset = null,
1354
      int $maxLength = null,
1355
      int $timeout = 10,
1356
      bool $convertToUtf8 = true,
1357
      string $fromEncoding = ''
1358
  )
1359
  {
1360
    // init
1361 11
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1362
1363 11
    if ($timeout && $context === null) {
1364 9
      $context = \stream_context_create(
1365
          [
1366
              'http' =>
1367
                  [
1368 9
                      'timeout' => $timeout,
1369
                  ],
1370
          ]
1371
      );
1372
    }
1373
1374 11
    if ($offset === null) {
1375 11
      $offset = 0;
1376
    }
1377
1378 11
    if (\is_int($maxLength) === true) {
1379 2
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1380
    } else {
1381 11
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1382
    }
1383
1384
    // return false on error
1385 11
    if ($data === false) {
1386
      return false;
1387
    }
1388
1389 11
    if ($convertToUtf8 === true) {
1390
      // only for non binary, but also for UTF-16 or UTF-32
1391
      if (
1392 11
          self::is_binary($data, true) !== true
1393
          ||
1394 8
          self::is_utf16($data) !== false
1395
          ||
1396 11
          self::is_utf32($data) !== false
1397
      ) {
1398 9
        $data = self::encode('UTF-8', $data, false, $fromEncoding);
1399 9
        $data = self::cleanup($data);
1400
      }
1401
    }
1402
1403 11
    return $data;
1404
  }
1405
1406
  /**
1407
   * Checks if a file starts with BOM (Byte Order Mark) character.
1408
   *
1409
   * @param string $file_path <p>Path to a valid file.</p>
1410
   *
1411
   * @throws \RuntimeException if file_get_contents() returned false
1412
   *
1413
   * @return bool
1414
   *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.
1415
   */
1416 2
  public static function file_has_bom(string $file_path): bool
1417
  {
1418 2
    $file_content = \file_get_contents($file_path);
1419 2
    if ($file_content === false) {
1420
      throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1421
    }
1422
1423 2
    return self::string_has_bom($file_content);
1424
  }
1425
1426
  /**
1427
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1428
   *
1429
   * @param mixed  $var
1430
   * @param int    $normalization_form
1431
   * @param string $leading_combining
1432
   *
1433
   * @return mixed
1434
   */
1435 43
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1436
  {
1437 43
    switch (\gettype($var)) {
1438 43
      case 'array':
1439 6
        foreach ($var as $k => $v) {
1440
          /** @noinspection AlterInForeachInspection */
1441 6
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1442
        }
1443 6
        break;
1444 43
      case 'object':
1445 4
        foreach ($var as $k => $v) {
1446 4
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1447
        }
1448 4
        break;
1449 43
      case 'string':
1450
1451 43
        if (false !== \strpos($var, "\r")) {
1452
          // Workaround https://bugs.php.net/65732
1453 3
          $var = self::normalize_line_ending($var);
1454
        }
1455
1456 43
        if (self::is_ascii($var) === false) {
1457
          /** @noinspection PhpUndefinedClassInspection */
1458 26
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1459 20
            $n = '-';
1460
          } else {
1461
            /** @noinspection PhpUndefinedClassInspection */
1462 13
            $n = \Normalizer::normalize($var, $normalization_form);
1463
1464 13
            if (isset($n[0])) {
1465 7
              $var = $n;
1466
            } else {
1467 9
              $var = self::encode('UTF-8', $var, true);
1468
            }
1469
          }
1470
1471
          if (
1472 26
              $var[0] >= "\x80"
1473
              &&
1474 26
              isset($n[0], $leading_combining[0])
1475
              &&
1476 26
              \preg_match('/^\p{Mn}/u', $var)
1477
          ) {
1478
            // Prevent leading combining chars
1479
            // for NFC-safe concatenations.
1480 3
            $var = $leading_combining . $var;
1481
          }
1482
        }
1483
1484 43
        break;
1485
    }
1486
1487 43
    return $var;
1488
  }
1489
1490
  /**
1491
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1492
   *
1493
   * Gets a specific external variable by name and optionally filters it
1494
   *
1495
   * @link  http://php.net/manual/en/function.filter-input.php
1496
   *
1497
   * @param int    $type          <p>
1498
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1499
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1500
   *                              <b>INPUT_ENV</b>.
1501
   *                              </p>
1502
   * @param string $variable_name <p>
1503
   *                              Name of a variable to get.
1504
   *                              </p>
1505
   * @param int    $filter        [optional] <p>
1506
   *                              The ID of the filter to apply. The
1507
   *                              manual page lists the available filters.
1508
   *                              </p>
1509
   * @param mixed  $options       [optional] <p>
1510
   *                              Associative array of options or bitwise disjunction of flags. If filter
1511
   *                              accepts options, flags can be provided in "flags" field of array.
1512
   *                              </p>
1513
   *
1514
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1515
   *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1516
   *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1517
   */
1518
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
1519
  {
1520
    if (4 > \func_num_args()) {
1521
      $var = \filter_input($type, $variable_name, $filter);
1522
    } else {
1523
      $var = \filter_input($type, $variable_name, $filter, $options);
1524
    }
1525
1526
    return self::filter($var);
1527
  }
1528
1529
  /**
1530
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1531
   *
1532
   * Gets external variables and optionally filters them
1533
   *
1534
   * @link  http://php.net/manual/en/function.filter-input-array.php
1535
   *
1536
   * @param int   $type       <p>
1537
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1538
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1539
   *                          <b>INPUT_ENV</b>.
1540
   *                          </p>
1541
   * @param mixed $definition [optional] <p>
1542
   *                          An array defining the arguments. A valid key is a string
1543
   *                          containing a variable name and a valid value is either a filter type, or an array
1544
   *                          optionally specifying the filter, flags and options. If the value is an
1545
   *                          array, valid keys are filter which specifies the
1546
   *                          filter type,
1547
   *                          flags which specifies any flags that apply to the
1548
   *                          filter, and options which specifies any options that
1549
   *                          apply to the filter. See the example below for a better understanding.
1550
   *                          </p>
1551
   *                          <p>
1552
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1553
   *                          input array are filtered by this filter.
1554
   *                          </p>
1555
   * @param bool  $add_empty  [optional] <p>
1556
   *                          Add missing keys as <b>NULL</b> to the return value.
1557
   *                          </p>
1558
   *
1559
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1560
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set. Or
1561
   *               if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable is not
1562
   *               set and <b>NULL</b> if the filter fails.
1563
   */
1564
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1565
  {
1566
    if (2 > \func_num_args()) {
1567
      $a = \filter_input_array($type);
1568
    } else {
1569
      $a = \filter_input_array($type, $definition, $add_empty);
1570
    }
1571
1572
    return self::filter($a);
1573
  }
1574
1575
  /**
1576
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1577
   *
1578
   * Filters a variable with a specified filter
1579
   *
1580
   * @link  http://php.net/manual/en/function.filter-var.php
1581
   *
1582
   * @param mixed $variable <p>
1583
   *                        Value to filter.
1584
   *                        </p>
1585
   * @param int   $filter   [optional] <p>
1586
   *                        The ID of the filter to apply. The
1587
   *                        manual page lists the available filters.
1588
   *                        </p>
1589
   * @param mixed $options  [optional] <p>
1590
   *                        Associative array of options or bitwise disjunction of flags. If filter
1591
   *                        accepts options, flags can be provided in "flags" field of array. For
1592
   *                        the "callback" filter, callable type should be passed. The
1593
   *                        callback must accept one argument, the value to be filtered, and return
1594
   *                        the value after filtering/sanitizing it.
1595
   *                        </p>
1596
   *                        <p>
1597
   *                        <code>
1598
   *                        // for filters that accept options, use this format
1599
   *                        $options = array(
1600
   *                        'options' => array(
1601
   *                        'default' => 3, // value to return if the filter fails
1602
   *                        // other options here
1603
   *                        'min_range' => 0
1604
   *                        ),
1605
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1606
   *                        );
1607
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1608
   *                        // for filter that only accept flags, you can pass them directly
1609
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1610
   *                        // for filter that only accept flags, you can also pass as an array
1611
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1612
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1613
   *                        // callback validate filter
1614
   *                        function foo($value)
1615
   *                        {
1616
   *                        // Expected format: Surname, GivenNames
1617
   *                        if (strpos($value, ", ") === false) return false;
1618
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1619
   *                        $empty = (empty($surname) || empty($givennames));
1620
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1621
   *                        if ($empty || $notstrings) {
1622
   *                        return false;
1623
   *                        } else {
1624
   *                        return $value;
1625
   *                        }
1626
   *                        }
1627
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1628
   *                        </code>
1629
   *                        </p>
1630
   *
1631
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1632
   */
1633 2
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
1634
  {
1635 2
    if (3 > \func_num_args()) {
1636 2
      $variable = \filter_var($variable, $filter);
1637
    } else {
1638 2
      $variable = \filter_var($variable, $filter, $options);
1639
    }
1640
1641 2
    return self::filter($variable);
1642
  }
1643
1644
  /**
1645
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1646
   *
1647
   * Gets multiple variables and optionally filters them
1648
   *
1649
   * @link  http://php.net/manual/en/function.filter-var-array.php
1650
   *
1651
   * @param array $data       <p>
1652
   *                          An array with string keys containing the data to filter.
1653
   *                          </p>
1654
   * @param mixed $definition [optional] <p>
1655
   *                          An array defining the arguments. A valid key is a string
1656
   *                          containing a variable name and a valid value is either a
1657
   *                          filter type, or an
1658
   *                          array optionally specifying the filter, flags and options.
1659
   *                          If the value is an array, valid keys are filter
1660
   *                          which specifies the filter type,
1661
   *                          flags which specifies any flags that apply to the
1662
   *                          filter, and options which specifies any options that
1663
   *                          apply to the filter. See the example below for a better understanding.
1664
   *                          </p>
1665
   *                          <p>
1666
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1667
   *                          input array are filtered by this filter.
1668
   *                          </p>
1669
   * @param bool  $add_empty  [optional] <p>
1670
   *                          Add missing keys as <b>NULL</b> to the return value.
1671
   *                          </p>
1672
   *
1673
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1674
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set.
1675
   */
1676 2
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1677
  {
1678 2
    if (2 > \func_num_args()) {
1679 2
      $a = \filter_var_array($data);
1680
    } else {
1681 2
      $a = \filter_var_array($data, $definition, $add_empty);
1682
    }
1683
1684 2
    return self::filter($a);
1685
  }
1686
1687
  /**
1688
   * Checks whether finfo is available on the server.
1689
   *
1690
   * @return bool
1691
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
1692
   */
1693
  public static function finfo_loaded(): bool
1694
  {
1695
    return \class_exists('finfo');
1696
  }
1697
1698
  /**
1699
   * Returns the first $n characters of the string.
1700
   *
1701
   * @param string $str      <p>The input string.</p>
1702
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1703
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1704
   *
1705
   * @return string
1706
   */
1707 13
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1708
  {
1709 13
    if ($n <= 0) {
1710 4
      return '';
1711
    }
1712
1713 9
    $strSub = self::substr($str, 0, $n, $encoding);
1714 9
    if ($strSub === false) {
1715
      return '';
1716
    }
1717
1718 9
    return $strSub;
1719
  }
1720
1721
  /**
1722
   * Check if the number of unicode characters are not more than the specified integer.
1723
   *
1724
   * @param string $str      The original string to be checked.
1725
   * @param int    $box_size The size in number of chars to be checked against string.
1726
   *
1727
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1728
   */
1729 2
  public static function fits_inside(string $str, int $box_size): bool
1730
  {
1731 2
    return (self::strlen($str) <= $box_size);
1732
  }
1733
1734
  /**
1735
   * @param string $str
1736
   * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
1737
   * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
1738
   *
1739
   * @return string
1740
   */
1741 54
  private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
1742
  {
1743 54
    $upper = self::$COMMON_CASE_FOLD['upper'];
1744 54
    $lower = self::$COMMON_CASE_FOLD['lower'];
1745
1746 54
    if ($useLower === true) {
1747 2
      $str = (string)\str_replace(
1748 2
          $upper,
1749 2
          $lower,
1750 2
          $str
1751
      );
1752
    } else {
1753 52
      $str = (string)\str_replace(
1754 52
          $lower,
1755 52
          $upper,
1756 52
          $str
1757
      );
1758
    }
1759
1760 54
    if ($fullCaseFold) {
1761
1762 52
      static $FULL_CASE_FOLD = null;
1763 52
      if ($FULL_CASE_FOLD === null) {
1764 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
1765
      }
1766
1767 52
      if ($useLower === true) {
1768 2
        $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
1769
      } else {
1770 50
        $str = (string)\str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
1771
      }
1772
    }
1773
1774 54
    return $str;
1775
  }
1776
1777
  /**
1778
   * Try to fix simple broken UTF-8 strings.
1779
   *
1780
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1781
   *
1782
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1783
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1784
   * See: http://en.wikipedia.org/wiki/Windows-1252
1785
   *
1786
   * @param string $str <p>The input string</p>
1787
   *
1788
   * @return string
1789
   */
1790 42
  public static function fix_simple_utf8(string $str): string
1791
  {
1792 42
    if ('' === $str) {
1793 4
      return '';
1794
    }
1795
1796 42
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1797 42
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1798
1799 42
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1800
1801 1
      if (self::$BROKEN_UTF8_FIX === null) {
1802 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_fix') can also be of type false. However, the property $BROKEN_UTF8_FIX is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1803
      }
1804
1805 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1805
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1806 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1806
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1807
    }
1808
1809 42
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1810
  }
1811
1812
  /**
1813
   * Fix a double (or multiple) encoded UTF8 string.
1814
   *
1815
   * @param string[]|string $str You can use a string or an array of strings.
1816
   *
1817
   * @return string[]|string
1818
   *                          Will return the fixed input-"array" or
1819
   *                          the fixed input-"string".
1820
   */
1821 2
  public static function fix_utf8($str)
1822
  {
1823 2
    if (\is_array($str) === true) {
1824 2
      foreach ($str as $k => $v) {
1825 2
        $str[$k] = self::fix_utf8($v);
1826
      }
1827
1828 2
      return $str;
1829
    }
1830
1831 2
    $str = (string)$str;
1832 2
    $last = '';
1833 2
    while ($last !== $str) {
1834 2
      $last = $str;
1835 2
      $str = self::to_utf8(
1836 2
          self::utf8_decode($str, true)
1837
      );
1838
    }
1839
1840 2
    return $str;
1841
  }
1842
1843
  /**
1844
   * Get character of a specific character.
1845
   *
1846
   * @param string $char
1847
   *
1848
   * @return string 'RTL' or 'LTR'
1849
   */
1850 2
  public static function getCharDirection(string $char): string
1851
  {
1852 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1853
      self::checkForSupport();
1854
    }
1855
1856 2
    if (self::$SUPPORT['intlChar'] === true) {
1857
      /** @noinspection PhpComposerExtensionStubsInspection */
1858 2
      $tmpReturn = \IntlChar::charDirection($char);
1859
1860
      // from "IntlChar"-Class
1861
      $charDirection = [
1862 2
          'RTL' => [1, 13, 14, 15, 21],
1863
          'LTR' => [0, 11, 12, 20],
1864
      ];
1865
1866 2
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1867
        return 'LTR';
1868
      }
1869
1870 2
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1871 2
        return 'RTL';
1872
      }
1873
    }
1874
1875 2
    $c = static::chr_to_decimal($char);
1876
1877 2
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1878 2
      return 'LTR';
1879
    }
1880
1881 2
    if (0x85e >= $c) {
1882
1883 2
      if (0x5be === $c ||
1884 2
          0x5c0 === $c ||
1885 2
          0x5c3 === $c ||
1886 2
          0x5c6 === $c ||
1887 2
          (0x5d0 <= $c && 0x5ea >= $c) ||
1888 2
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1889 2
          0x608 === $c ||
1890 2
          0x60b === $c ||
1891 2
          0x60d === $c ||
1892 2
          0x61b === $c ||
1893 2
          (0x61e <= $c && 0x64a >= $c) ||
1894
          (0x66d <= $c && 0x66f >= $c) ||
1895
          (0x671 <= $c && 0x6d5 >= $c) ||
1896
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1897
          (0x6ee <= $c && 0x6ef >= $c) ||
1898
          (0x6fa <= $c && 0x70d >= $c) ||
1899
          0x710 === $c ||
1900
          (0x712 <= $c && 0x72f >= $c) ||
1901
          (0x74d <= $c && 0x7a5 >= $c) ||
1902
          0x7b1 === $c ||
1903
          (0x7c0 <= $c && 0x7ea >= $c) ||
1904
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1905
          0x7fa === $c ||
1906
          (0x800 <= $c && 0x815 >= $c) ||
1907
          0x81a === $c ||
1908
          0x824 === $c ||
1909
          0x828 === $c ||
1910
          (0x830 <= $c && 0x83e >= $c) ||
1911
          (0x840 <= $c && 0x858 >= $c) ||
1912 2
          0x85e === $c
1913
      ) {
1914 2
        return 'RTL';
1915
      }
1916
1917 2
    } elseif (0x200f === $c) {
1918
1919
      return 'RTL';
1920
1921 2
    } elseif (0xfb1d <= $c) {
1922
1923 2
      if (0xfb1d === $c ||
1924 2
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1925 2
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1926 2
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1927 2
          0xfb3e === $c ||
1928 2
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1929 2
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1930 2
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1931 2
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1932 2
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1933 2
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1934 2
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1935 2
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1936 2
          (0xfe76 <= $c && 0xfefc >= $c) ||
1937 2
          (0x10800 <= $c && 0x10805 >= $c) ||
1938 2
          0x10808 === $c ||
1939 2
          (0x1080a <= $c && 0x10835 >= $c) ||
1940 2
          (0x10837 <= $c && 0x10838 >= $c) ||
1941 2
          0x1083c === $c ||
1942 2
          (0x1083f <= $c && 0x10855 >= $c) ||
1943 2
          (0x10857 <= $c && 0x1085f >= $c) ||
1944 2
          (0x10900 <= $c && 0x1091b >= $c) ||
1945 2
          (0x10920 <= $c && 0x10939 >= $c) ||
1946 2
          0x1093f === $c ||
1947 2
          0x10a00 === $c ||
1948 2
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1949 2
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1950 2
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1951 2
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1952 2
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1953 2
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1954 2
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1955 2
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1956 2
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1957 2
          (0x10b78 <= $c && 0x10b7f >= $c)
1958
      ) {
1959 2
        return 'RTL';
1960
      }
1961
    }
1962
1963 2
    return 'LTR';
1964
  }
1965
1966
  /**
1967
   * get data from "/data/*.ser"
1968
   *
1969
   * @param string $file
1970
   *
1971
   * @return mixed|false Will return false on error.
1972
   */
1973 13
  private static function getData(string $file)
1974
  {
1975 13
    $file = __DIR__ . '/data/' . $file . '.php';
1976 13
    if (\file_exists($file)) {
1977
      /** @noinspection PhpIncludeInspection */
1978 12
      return require $file;
1979
    }
1980
1981 2
    return false;
1982
  }
1983
1984
  /**
1985
   * Check for php-support.
1986
   *
1987
   * @param string|null $key
1988
   *
1989
   * @return mixed
1990
   *               Return the full support-"array", if $key === null<br>
1991
   *               return bool-value, if $key is used and available<br>
1992
   *               otherwise return <strong>null</strong>.
1993
   */
1994 25
  public static function getSupportInfo(string $key = null)
1995
  {
1996 25
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1997
      self::checkForSupport();
1998
    }
1999
2000 25
    if ($key === null) {
2001 4
      return self::$SUPPORT;
2002
    }
2003
2004 23
    if (!isset(self::$SUPPORT[$key])) {
2005 2
      return null;
2006
    }
2007
2008 21
    return self::$SUPPORT[$key];
2009
  }
2010
2011
  /**
2012
   * @param string $str
2013
   *
2014
   * @return string[]
2015
   */
2016 40
  private static function get_file_type($str)
2017
  {
2018 40
    if ('' === $str) {
2019
      return ['ext' => '', 'type' => ''];
2020
    }
2021
2022 40
    $str_info = self::substr_in_byte($str, 0, 2);
2023 40
    if (self::strlen_in_byte($str_info) !== 2) {
2024 11
      return ['ext' => '', 'type' => ''];
2025
    }
2026
2027 35
    $str_info = \unpack("C2chars", $str_info);
2028 35
    $type_code = (int)($str_info['chars1'] . $str_info['chars2']);
2029
2030
    // DEBUG
2031
    //var_dump($type_code);
2032
2033
    switch ($type_code) {
2034 35
      case 3780:
2035 4
        $ext = 'pdf';
2036 4
        $type = 'binary';
2037 4
        break;
2038 35
      case 7790:
2039
        $ext = 'exe';
2040
        $type = 'binary';
2041
        break;
2042 35
      case 7784:
2043
        $ext = 'midi';
2044
        $type = 'binary';
2045
        break;
2046 35
      case 8075:
2047 6
        $ext = 'zip';
2048 6
        $type = 'binary';
2049 6
        break;
2050 35
      case 8297:
2051
        $ext = 'rar';
2052
        $type = 'binary';
2053
        break;
2054 35
      case 255216:
2055
        $ext = 'jpg';
2056
        $type = 'binary';
2057
        break;
2058 35
      case 7173:
2059
        $ext = 'gif';
2060
        $type = 'binary';
2061
        break;
2062 35
      case 6677:
2063
        $ext = 'bmp';
2064
        $type = 'binary';
2065
        break;
2066 35
      case 13780:
2067 6
        $ext = 'png';
2068 6
        $type = 'binary';
2069 6
        break;
2070
      default:
2071 33
        $ext = '???';
2072 33
        $type = '???';
2073 33
        break;
2074
    }
2075
2076 35
    return ['ext' => $ext, 'type' => $type];
2077
  }
2078
2079
  /**
2080
   * @param int    $length        <p>Length of the random string.</p>
2081
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2082
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2083
   *
2084
   * @return string
2085
   */
2086 1
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2087
  {
2088
    // init
2089 1
    $i = 0;
2090 1
    $str = '';
2091 1
    $maxlength = self::strlen($possibleChars, $encoding);
2092
2093 1
    if ($maxlength === 0) {
2094 1
      return '';
2095
    }
2096
2097
    // add random chars
2098 1
    while ($i < $length) {
2099
      try {
2100 1
        $randInt = \random_int(0, $maxlength - 1);
2101
      } catch (\Exception $e) {
2102
        /** @noinspection RandomApiMigrationInspection */
2103
        $randInt = \mt_rand(0, $maxlength - 1);
2104
      }
2105 1
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
2106 1
      $str .= $char;
2107 1
      $i++;
2108
    }
2109
2110 1
    return $str;
2111
  }
2112
2113
  /**
2114
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2115
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2116
   *
2117
   * @return string
2118
   */
2119 1
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2120
  {
2121 1
    $uniqueHelper = \mt_rand() .
2122 1
                    \session_id() .
2123 1
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
2124 1
                    ($_SERVER['SERVER_ADDR'] ?? '') .
2125 1
                    $entropyExtra;
2126
2127 1
    $uniqueString = \uniqid($uniqueHelper, true);
2128
2129 1
    if ($md5) {
2130 1
      $uniqueString = \md5($uniqueString . $uniqueHelper);
2131
    }
2132
2133 1
    return $uniqueString;
2134
  }
2135
2136
  /**
2137
   * alias for "UTF8::string_has_bom()"
2138
   *
2139
   * @see        UTF8::string_has_bom()
2140
   *
2141
   * @param string $str
2142
   *
2143
   * @return bool
2144
   *
2145
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2146
   */
2147 2
  public static function hasBom(string $str): bool
2148
  {
2149 2
    return self::string_has_bom($str);
2150
  }
2151
2152
  /**
2153
   * Returns true if the string contains a lower case char, false otherwise.
2154
   *
2155
   * @param string $str <p>The input string.</p>
2156
   *
2157
   * @return bool Whether or not the string contains a lower case character.
2158
   */
2159 47
  public static function has_lowercase(string $str): bool
2160
  {
2161 47
    return self::str_matches_pattern($str, '.*[[:lower:]]');
2162
  }
2163
2164
  /**
2165
   * Returns true if the string contains an upper case char, false otherwise.
2166
   *
2167
   * @param string $str <p>The input string.</p>
2168
   *
2169
   * @return bool Whether or not the string contains an upper case character.
2170
   */
2171 12
  public static function has_uppercase(string $str): bool
2172
  {
2173 12
    return self::str_matches_pattern($str, '.*[[:upper:]]');
2174
  }
2175
2176
  /**
2177
   * Converts a hexadecimal-value into an UTF-8 character.
2178
   *
2179
   * @param string $hexdec <p>The hexadecimal value.</p>
2180
   *
2181
   * @return string|false One single UTF-8 character.
2182
   */
2183 4
  public static function hex_to_chr(string $hexdec)
2184
  {
2185 4
    return self::decimal_to_chr(\hexdec($hexdec));
2186
  }
2187
2188
  /**
2189
   * Converts hexadecimal U+xxxx code point representation to integer.
2190
   *
2191
   * INFO: opposite to UTF8::int_to_hex()
2192
   *
2193
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2194
   *
2195
   * @return int|false The code point, or false on failure.
2196
   */
2197 2
  public static function hex_to_int($hexDec)
2198
  {
2199
    // init
2200 2
    $hexDec = (string)$hexDec;
2201
2202 2
    if ('' === $hexDec) {
2203 2
      return false;
2204
    }
2205
2206 2
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2207 2
      return \intval($match[1], 16);
2208
    }
2209
2210 2
    return false;
2211
  }
2212
2213
  /**
2214
   * alias for "UTF8::html_entity_decode()"
2215
   *
2216
   * @see UTF8::html_entity_decode()
2217
   *
2218
   * @param string $str
2219
   * @param int    $flags
2220
   * @param string $encoding
2221
   *
2222
   * @return string
2223
   */
2224 2
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2225
  {
2226 2
    return self::html_entity_decode($str, $flags, $encoding);
2227
  }
2228
2229
  /**
2230
   * Converts a UTF-8 string to a series of HTML numbered entities.
2231
   *
2232
   * INFO: opposite to UTF8::html_decode()
2233
   *
2234
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2235
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2236
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2237
   *
2238
   * @return string HTML numbered entities.
2239
   */
2240 12
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2241
  {
2242 12
    if ('' === $str) {
2243 4
      return '';
2244
    }
2245
2246 12
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2247 5
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2248
    }
2249
2250 12
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2251
      self::checkForSupport();
2252
    }
2253
2254
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2255 12
    if (self::$SUPPORT['mbstring'] === true) {
2256 12
      $startCode = 0x00;
2257 12
      if ($keepAsciiChars === true) {
2258 12
        $startCode = 0x80;
2259
      }
2260
2261 12
      return \mb_encode_numericentity(
2262 12
          $str,
2263 12
          [$startCode, 0xfffff, 0, 0xfffff, 0],
2264 12
          $encoding
2265
      );
2266
    }
2267
2268
    //
2269
    // fallback via vanilla php
2270
    //
2271
2272
    return \implode(
2273
        '',
2274
        \array_map(
2275
            function ($data) use ($keepAsciiChars, $encoding) {
2276
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2277
            },
2278
            self::split($str)
2279
        )
2280
    );
2281
  }
2282
2283
  /**
2284
   * UTF-8 version of html_entity_decode()
2285
   *
2286
   * The reason we are not using html_entity_decode() by itself is because
2287
   * while it is not technically correct to leave out the semicolon
2288
   * at the end of an entity most browsers will still interpret the entity
2289
   * correctly. html_entity_decode() does not convert entities without
2290
   * semicolons, so we are left with our own little solution here. Bummer.
2291
   *
2292
   * Convert all HTML entities to their applicable characters
2293
   *
2294
   * INFO: opposite to UTF8::html_encode()
2295
   *
2296
   * @link http://php.net/manual/en/function.html-entity-decode.php
2297
   *
2298
   * @param string $str      <p>
2299
   *                         The input string.
2300
   *                         </p>
2301
   * @param int    $flags    [optional] <p>
2302
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2303
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2304
   *                         <table>
2305
   *                         Available <i>flags</i> constants
2306
   *                         <tr valign="top">
2307
   *                         <td>Constant Name</td>
2308
   *                         <td>Description</td>
2309
   *                         </tr>
2310
   *                         <tr valign="top">
2311
   *                         <td><b>ENT_COMPAT</b></td>
2312
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2313
   *                         </tr>
2314
   *                         <tr valign="top">
2315
   *                         <td><b>ENT_QUOTES</b></td>
2316
   *                         <td>Will convert both double and single quotes.</td>
2317
   *                         </tr>
2318
   *                         <tr valign="top">
2319
   *                         <td><b>ENT_NOQUOTES</b></td>
2320
   *                         <td>Will leave both double and single quotes unconverted.</td>
2321
   *                         </tr>
2322
   *                         <tr valign="top">
2323
   *                         <td><b>ENT_HTML401</b></td>
2324
   *                         <td>
2325
   *                         Handle code as HTML 4.01.
2326
   *                         </td>
2327
   *                         </tr>
2328
   *                         <tr valign="top">
2329
   *                         <td><b>ENT_XML1</b></td>
2330
   *                         <td>
2331
   *                         Handle code as XML 1.
2332
   *                         </td>
2333
   *                         </tr>
2334
   *                         <tr valign="top">
2335
   *                         <td><b>ENT_XHTML</b></td>
2336
   *                         <td>
2337
   *                         Handle code as XHTML.
2338
   *                         </td>
2339
   *                         </tr>
2340
   *                         <tr valign="top">
2341
   *                         <td><b>ENT_HTML5</b></td>
2342
   *                         <td>
2343
   *                         Handle code as HTML 5.
2344
   *                         </td>
2345
   *                         </tr>
2346
   *                         </table>
2347
   *                         </p>
2348
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2349
   *
2350
   * @return string The decoded string.
2351
   */
2352 38
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2353
  {
2354 38
    if ('' === $str) {
2355 12
      return '';
2356
    }
2357
2358 38
    if (!isset($str[3])) { // examples: &; || &x;
2359 19
      return $str;
2360
    }
2361
2362
    if (
2363 37
        \strpos($str, '&') === false
2364
        ||
2365
        (
2366 37
            \strpos($str, '&#') === false
2367
            &&
2368 37
            \strpos($str, ';') === false
2369
        )
2370
    ) {
2371 18
      return $str;
2372
    }
2373
2374 37
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2375 9
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2376
    }
2377
2378 37
    if ($flags === null) {
2379 10
      $flags = ENT_QUOTES | ENT_HTML5;
2380
    }
2381
2382
    if (
2383 37
        $encoding !== 'UTF-8'
2384
        &&
2385 37
        $encoding !== 'ISO-8859-1'
2386
        &&
2387 37
        $encoding !== 'WINDOWS-1252'
2388
        &&
2389 37
        self::$SUPPORT['mbstring'] === false
2390
    ) {
2391
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2392
    }
2393
2394 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2395
      self::checkForSupport();
2396
    }
2397
2398
    do {
2399 37
      $str_compare = $str;
2400
2401
      # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2402 37
      if (self::$SUPPORT['mbstring'] === true) {
2403
2404 37
        $str = \mb_decode_numericentity(
2405 37
            $str,
2406 37
            [0x80, 0xfffff, 0, 0xfffff, 0],
2407 37
            $encoding
2408
        );
2409
2410
      } else {
2411
2412
        $str = (string)\preg_replace_callback(
2413
            "/&#\d{2,6};/",
2414
            function ($matches) use ($encoding) {
2415
              // always fallback via symfony polyfill
2416
              $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2417
2418
              if ($returnTmp !== '"' && $returnTmp !== "'") {
2419
                return $returnTmp;
2420
              }
2421
2422
              return $matches[0];
2423
            },
2424
            $str
2425
        );
2426
2427
      }
2428
2429
      // decode numeric & UTF16 two byte entities
2430 37
      $str = \html_entity_decode(
2431 37
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2432 37
          $flags,
2433 37
          $encoding
2434
      );
2435
2436 37
    } while ($str_compare !== $str);
2437
2438 37
    return $str;
2439
  }
2440
2441
  /**
2442
   * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2443
   *
2444
   * @param string $str
2445
   * @param string $encoding [optional] <p>Default: UTF-8</p>
2446
   *
2447
   * @return string
2448
   */
2449 6
  public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2450
  {
2451 6
    return self::htmlspecialchars(
2452 6
        $str,
2453 6
        ENT_QUOTES | ENT_SUBSTITUTE,
2454 6
        $encoding
2455
    );
2456
  }
2457
2458
  /**
2459
   * Remove empty html-tag.
2460
   *
2461
   * e.g.: <tag></tag>
2462
   *
2463
   * @param string $str
2464
   *
2465
   * @return string
2466
   */
2467 1
  public static function html_stripe_empty_tags(string $str): string
2468
  {
2469 1
    return (string)\preg_replace(
2470 1
        "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2471 1
        '',
2472 1
        $str
2473
    );
2474
  }
2475
2476
  /**
2477
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2478
   *
2479
   * @link http://php.net/manual/en/function.htmlentities.php
2480
   *
2481
   * @param string $str           <p>
2482
   *                              The input string.
2483
   *                              </p>
2484
   * @param int    $flags         [optional] <p>
2485
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2486
   *                              invalid code unit sequences and the used document type. The default is
2487
   *                              ENT_COMPAT | ENT_HTML401.
2488
   *                              <table>
2489
   *                              Available <i>flags</i> constants
2490
   *                              <tr valign="top">
2491
   *                              <td>Constant Name</td>
2492
   *                              <td>Description</td>
2493
   *                              </tr>
2494
   *                              <tr valign="top">
2495
   *                              <td><b>ENT_COMPAT</b></td>
2496
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2497
   *                              </tr>
2498
   *                              <tr valign="top">
2499
   *                              <td><b>ENT_QUOTES</b></td>
2500
   *                              <td>Will convert both double and single quotes.</td>
2501
   *                              </tr>
2502
   *                              <tr valign="top">
2503
   *                              <td><b>ENT_NOQUOTES</b></td>
2504
   *                              <td>Will leave both double and single quotes unconverted.</td>
2505
   *                              </tr>
2506
   *                              <tr valign="top">
2507
   *                              <td><b>ENT_IGNORE</b></td>
2508
   *                              <td>
2509
   *                              Silently discard invalid code unit sequences instead of returning
2510
   *                              an empty string. Using this flag is discouraged as it
2511
   *                              may have security implications.
2512
   *                              </td>
2513
   *                              </tr>
2514
   *                              <tr valign="top">
2515
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2516
   *                              <td>
2517
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2518
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2519
   *                              </td>
2520
   *                              </tr>
2521
   *                              <tr valign="top">
2522
   *                              <td><b>ENT_DISALLOWED</b></td>
2523
   *                              <td>
2524
   *                              Replace invalid code points for the given document type with a
2525
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2526
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2527
   *                              instance, to ensure the well-formedness of XML documents with
2528
   *                              embedded external content.
2529
   *                              </td>
2530
   *                              </tr>
2531
   *                              <tr valign="top">
2532
   *                              <td><b>ENT_HTML401</b></td>
2533
   *                              <td>
2534
   *                              Handle code as HTML 4.01.
2535
   *                              </td>
2536
   *                              </tr>
2537
   *                              <tr valign="top">
2538
   *                              <td><b>ENT_XML1</b></td>
2539
   *                              <td>
2540
   *                              Handle code as XML 1.
2541
   *                              </td>
2542
   *                              </tr>
2543
   *                              <tr valign="top">
2544
   *                              <td><b>ENT_XHTML</b></td>
2545
   *                              <td>
2546
   *                              Handle code as XHTML.
2547
   *                              </td>
2548
   *                              </tr>
2549
   *                              <tr valign="top">
2550
   *                              <td><b>ENT_HTML5</b></td>
2551
   *                              <td>
2552
   *                              Handle code as HTML 5.
2553
   *                              </td>
2554
   *                              </tr>
2555
   *                              </table>
2556
   *                              </p>
2557
   * @param string $encoding      [optional] <p>
2558
   *                              Like <b>htmlspecialchars</b>,
2559
   *                              <b>htmlentities</b> takes an optional third argument
2560
   *                              <i>encoding</i> which defines encoding used in
2561
   *                              conversion.
2562
   *                              Although this argument is technically optional, you are highly
2563
   *                              encouraged to specify the correct value for your code.
2564
   *                              </p>
2565
   * @param bool   $double_encode [optional] <p>
2566
   *                              When <i>double_encode</i> is turned off PHP will not
2567
   *                              encode existing html entities. The default is to convert everything.
2568
   *                              </p>
2569
   *
2570
   *
2571
   * @return string The encoded string.
2572
   * </p>
2573
   * <p>
2574
   * If the input <i>string</i> contains an invalid code unit
2575
   * sequence within the given <i>encoding</i> an empty string
2576
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2577
   * <b>ENT_SUBSTITUTE</b> flags are set.
2578
   */
2579 9
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2580
  {
2581 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2582 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2583
    }
2584
2585 9
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2586
2587
    /**
2588
     * PHP doesn't replace a backslash to its html entity since this is something
2589
     * that's mostly used to escape characters when inserting in a database. Since
2590
     * we're using a decent database layer, we don't need this shit and we're replacing
2591
     * the double backslashes by its' html entity equivalent.
2592
     *
2593
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2594
     */
2595 9
    $str = \str_replace('\\', '&#92;', $str);
2596
2597 9
    return self::html_encode($str, true, $encoding);
2598
  }
2599
2600
  /**
2601
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2602
   *
2603
   * INFO: Take a look at "UTF8::htmlentities()"
2604
   *
2605
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2606
   *
2607
   * @param string $str           <p>
2608
   *                              The string being converted.
2609
   *                              </p>
2610
   * @param int    $flags         [optional] <p>
2611
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2612
   *                              invalid code unit sequences and the used document type. The default is
2613
   *                              ENT_COMPAT | ENT_HTML401.
2614
   *                              <table>
2615
   *                              Available <i>flags</i> constants
2616
   *                              <tr valign="top">
2617
   *                              <td>Constant Name</td>
2618
   *                              <td>Description</td>
2619
   *                              </tr>
2620
   *                              <tr valign="top">
2621
   *                              <td><b>ENT_COMPAT</b></td>
2622
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2623
   *                              </tr>
2624
   *                              <tr valign="top">
2625
   *                              <td><b>ENT_QUOTES</b></td>
2626
   *                              <td>Will convert both double and single quotes.</td>
2627
   *                              </tr>
2628
   *                              <tr valign="top">
2629
   *                              <td><b>ENT_NOQUOTES</b></td>
2630
   *                              <td>Will leave both double and single quotes unconverted.</td>
2631
   *                              </tr>
2632
   *                              <tr valign="top">
2633
   *                              <td><b>ENT_IGNORE</b></td>
2634
   *                              <td>
2635
   *                              Silently discard invalid code unit sequences instead of returning
2636
   *                              an empty string. Using this flag is discouraged as it
2637
   *                              may have security implications.
2638
   *                              </td>
2639
   *                              </tr>
2640
   *                              <tr valign="top">
2641
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2642
   *                              <td>
2643
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2644
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2645
   *                              </td>
2646
   *                              </tr>
2647
   *                              <tr valign="top">
2648
   *                              <td><b>ENT_DISALLOWED</b></td>
2649
   *                              <td>
2650
   *                              Replace invalid code points for the given document type with a
2651
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2652
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2653
   *                              instance, to ensure the well-formedness of XML documents with
2654
   *                              embedded external content.
2655
   *                              </td>
2656
   *                              </tr>
2657
   *                              <tr valign="top">
2658
   *                              <td><b>ENT_HTML401</b></td>
2659
   *                              <td>
2660
   *                              Handle code as HTML 4.01.
2661
   *                              </td>
2662
   *                              </tr>
2663
   *                              <tr valign="top">
2664
   *                              <td><b>ENT_XML1</b></td>
2665
   *                              <td>
2666
   *                              Handle code as XML 1.
2667
   *                              </td>
2668
   *                              </tr>
2669
   *                              <tr valign="top">
2670
   *                              <td><b>ENT_XHTML</b></td>
2671
   *                              <td>
2672
   *                              Handle code as XHTML.
2673
   *                              </td>
2674
   *                              </tr>
2675
   *                              <tr valign="top">
2676
   *                              <td><b>ENT_HTML5</b></td>
2677
   *                              <td>
2678
   *                              Handle code as HTML 5.
2679
   *                              </td>
2680
   *                              </tr>
2681
   *                              </table>
2682
   *                              </p>
2683
   * @param string $encoding      [optional] <p>
2684
   *                              Defines encoding used in conversion.
2685
   *                              </p>
2686
   *                              <p>
2687
   *                              For the purposes of this function, the encodings
2688
   *                              ISO-8859-1, ISO-8859-15,
2689
   *                              UTF-8, cp866,
2690
   *                              cp1251, cp1252, and
2691
   *                              KOI8-R are effectively equivalent, provided the
2692
   *                              <i>string</i> itself is valid for the encoding, as
2693
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2694
   *                              the same positions in all of these encodings.
2695
   *                              </p>
2696
   * @param bool   $double_encode [optional] <p>
2697
   *                              When <i>double_encode</i> is turned off PHP will not
2698
   *                              encode existing html entities, the default is to convert everything.
2699
   *                              </p>
2700
   *
2701
   * @return string The converted string.
2702
   * </p>
2703
   * <p>
2704
   * If the input <i>string</i> contains an invalid code unit
2705
   * sequence within the given <i>encoding</i> an empty string
2706
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2707
   * <b>ENT_SUBSTITUTE</b> flags are set.
2708
   */
2709 8
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2710
  {
2711 8
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2712 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2713
    }
2714
2715 8
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2716
  }
2717
2718
  /**
2719
   * Checks whether iconv is available on the server.
2720
   *
2721
   * @return bool
2722
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2723
   */
2724
  public static function iconv_loaded(): bool
2725
  {
2726
    return \extension_loaded('iconv') ? true : false;
2727
  }
2728
2729
  /**
2730
   * alias for "UTF8::decimal_to_chr()"
2731
   *
2732
   * @see UTF8::decimal_to_chr()
2733
   *
2734
   * @param mixed $int
2735
   *
2736
   * @return string
2737
   */
2738 4
  public static function int_to_chr($int): string
2739
  {
2740 4
    return self::decimal_to_chr($int);
2741
  }
2742
2743
  /**
2744
   * Converts Integer to hexadecimal U+xxxx code point representation.
2745
   *
2746
   * INFO: opposite to UTF8::hex_to_int()
2747
   *
2748
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2749
   * @param string $pfix [optional]
2750
   *
2751
   * @return string The code point, or empty string on failure.
2752
   */
2753 6
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2754
  {
2755 6
    $hex = \dechex($int);
2756
2757 6
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2758
2759 6
    return $pfix . $hex . '';
2760
  }
2761
2762
  /**
2763
   * Checks whether intl-char is available on the server.
2764
   *
2765
   * @return bool
2766
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2767
   */
2768
  public static function intlChar_loaded(): bool
2769
  {
2770
    return \class_exists('IntlChar');
2771
  }
2772
2773
  /**
2774
   * Checks whether intl is available on the server.
2775
   *
2776
   * @return bool
2777
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2778
   */
2779 5
  public static function intl_loaded(): bool
2780
  {
2781 5
    return \extension_loaded('intl');
2782
  }
2783
2784
  /**
2785
   * alias for "UTF8::is_ascii()"
2786
   *
2787
   * @see        UTF8::is_ascii()
2788
   *
2789
   * @param string $str
2790
   *
2791
   * @return bool
2792
   *
2793
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2794
   */
2795 2
  public static function isAscii(string $str): bool
2796
  {
2797 2
    return self::is_ascii($str);
2798
  }
2799
2800
  /**
2801
   * alias for "UTF8::is_base64()"
2802
   *
2803
   * @see        UTF8::is_base64()
2804
   *
2805
   * @param string $str
2806
   *
2807
   * @return bool
2808
   *
2809
   * @deprecated <p>use "UTF8::is_base64()"</p>
2810
   */
2811 2
  public static function isBase64($str): bool
2812
  {
2813 2
    return self::is_base64($str);
2814
  }
2815
2816
  /**
2817
   * alias for "UTF8::is_binary()"
2818
   *
2819
   * @see        UTF8::is_binary()
2820
   *
2821
   * @param mixed $str
2822
   * @param bool  $strict
2823
   *
2824
   * @return bool
2825
   *
2826
   * @deprecated <p>use "UTF8::is_binary()"</p>
2827
   */
2828 4
  public static function isBinary($str, $strict = false): bool
2829
  {
2830 4
    return self::is_binary($str, $strict);
2831
  }
2832
2833
  /**
2834
   * alias for "UTF8::is_bom()"
2835
   *
2836
   * @see        UTF8::is_bom()
2837
   *
2838
   * @param string $utf8_chr
2839
   *
2840
   * @return bool
2841
   *
2842
   * @deprecated <p>use "UTF8::is_bom()"</p>
2843
   */
2844 2
  public static function isBom(string $utf8_chr): bool
2845
  {
2846 2
    return self::is_bom($utf8_chr);
2847
  }
2848
2849
  /**
2850
   * alias for "UTF8::is_html()"
2851
   *
2852
   * @see        UTF8::is_html()
2853
   *
2854
   * @param string $str
2855
   *
2856
   * @return bool
2857
   *
2858
   * @deprecated <p>use "UTF8::is_html()"</p>
2859
   */
2860 2
  public static function isHtml(string $str): bool
2861
  {
2862 2
    return self::is_html($str);
2863
  }
2864
2865
  /**
2866
   * alias for "UTF8::is_json()"
2867
   *
2868
   * @see        UTF8::is_json()
2869
   *
2870
   * @param string $str
2871
   *
2872
   * @return bool
2873
   *
2874
   * @deprecated <p>use "UTF8::is_json()"</p>
2875
   */
2876
  public static function isJson(string $str): bool
2877
  {
2878
    return self::is_json($str);
2879
  }
2880
2881
  /**
2882
   * alias for "UTF8::is_utf16()"
2883
   *
2884
   * @see        UTF8::is_utf16()
2885
   *
2886
   * @param mixed $str
2887
   *
2888
   * @return int|false
2889
   *                    <strong>false</strong> if is't not UTF16,<br>
2890
   *                    <strong>1</strong> for UTF-16LE,<br>
2891
   *                    <strong>2</strong> for UTF-16BE.
2892
   *
2893
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2894
   */
2895 2
  public static function isUtf16($str)
2896
  {
2897 2
    return self::is_utf16($str);
2898
  }
2899
2900
  /**
2901
   * alias for "UTF8::is_utf32()"
2902
   *
2903
   * @see        UTF8::is_utf32()
2904
   *
2905
   * @param mixed $str
2906
   *
2907
   * @return int|false
2908
   *                   <strong>false</strong> if is't not UTF16,
2909
   *                   <strong>1</strong> for UTF-32LE,
2910
   *                   <strong>2</strong> for UTF-32BE.
2911
   *
2912
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2913
   */
2914 2
  public static function isUtf32($str)
2915
  {
2916 2
    return self::is_utf32($str);
2917
  }
2918
2919
  /**
2920
   * alias for "UTF8::is_utf8()"
2921
   *
2922
   * @see        UTF8::is_utf8()
2923
   *
2924
   * @param string $str
2925
   * @param bool   $strict
2926
   *
2927
   * @return bool
2928
   *
2929
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2930
   */
2931 17
  public static function isUtf8($str, $strict = false): bool
2932
  {
2933 17
    return self::is_utf8($str, $strict);
2934
  }
2935
2936
  /**
2937
   * Returns true if the string contains only alphabetic chars, false otherwise.
2938
   *
2939
   * @param string $str
2940
   *
2941
   * @return bool
2942
   *               Whether or not $str contains only alphabetic chars.
2943
   */
2944 10
  public static function is_alpha(string $str): bool
2945
  {
2946 10
    return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2947
  }
2948
2949
  /**
2950
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2951
   *
2952
   * @param string $str
2953
   *
2954
   * @return bool
2955
   *               Whether or not $str contains only alphanumeric chars.
2956
   */
2957 13
  public static function is_alphanumeric(string $str): bool
2958
  {
2959 13
    return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2960
  }
2961
2962
  /**
2963
   * Checks if a string is 7 bit ASCII.
2964
   *
2965
   * @param string $str <p>The string to check.</p>
2966
   *
2967
   * @return bool
2968
   *              <strong>true</strong> if it is ASCII<br>
2969
   *              <strong>false</strong> otherwise
2970
   *
2971
   */
2972 197
  public static function is_ascii(string $str): bool
2973
  {
2974 197
    if ('' === $str) {
2975 10
      return true;
2976
    }
2977
2978 196
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2979
  }
2980
2981
  /**
2982
   * Returns true if the string is base64 encoded, false otherwise.
2983
   *
2984
   * @param string $str <p>The input string.</p>
2985
   *
2986
   * @return bool Whether or not $str is base64 encoded.
2987
   */
2988 9
  public static function is_base64($str): bool
2989
  {
2990 9
    if ('' === $str) {
2991 3
      return false;
2992
    }
2993
2994 8
    if (\is_string($str) === false) {
0 ignored issues
show
introduced by
The condition is_string($str) === false is always false.
Loading history...
2995 2
      return false;
2996
    }
2997
2998 8
    $base64String = (string)\base64_decode($str, true);
2999
3000 8
    return $base64String && \base64_encode($base64String) === $str;
3001
  }
3002
3003
  /**
3004
   * Check if the input is binary... (is look like a hack).
3005
   *
3006
   * @param mixed $input
3007
   * @param bool  $strict
3008
   *
3009
   * @return bool
3010
   */
3011 40
  public static function is_binary($input, bool $strict = false): bool
3012
  {
3013 40
    $input = (string)$input;
3014 40
    if ('' === $input) {
3015 10
      return false;
3016
    }
3017
3018 40
    if (\preg_match('~^[01]+$~', $input)) {
3019 12
      return true;
3020
    }
3021
3022 40
    if ($strict === true) {
3023
3024 34
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3025
        self::checkForSupport();
3026
      }
3027
3028 34
      if (self::$SUPPORT['finfo'] === false) {
3029
        throw new \RuntimeException('ext-fileinfo: is not installed');
3030
      }
3031
3032
      /** @noinspection PhpComposerExtensionStubsInspection */
3033 34
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
3034 34
      $finfo_encoding = $finfo->buffer($input);
3035 34
      if ($finfo_encoding && $finfo_encoding === 'binary') {
3036 15
        return true;
3037
      }
3038
3039
    }
3040
3041 40
    $ext = self::get_file_type($input);
3042 40
    if ($ext['type'] === 'binary') {
3043 6
      return true;
3044
    }
3045
3046 38
    $testLength = self::strlen_in_byte($input);
3047 38
    if ($testLength) {
3048 38
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3049
        self::checkForSupport();
3050
      }
3051
3052 38
      $testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength);
3053 38
      if (($testNull / $testLength) > 0.256) {
3054 12
        return true;
3055
      }
3056
    }
3057
3058 36
    return false;
3059
  }
3060
3061
  /**
3062
   * Check if the file is binary.
3063
   *
3064
   * @param string $file
3065
   *
3066
   * @return bool
3067
   */
3068 6
  public static function is_binary_file($file): bool
3069
  {
3070
    // init
3071 6
    $block = '';
3072
3073 6
    $fp = \fopen($file, 'rb');
3074 6
    if (\is_resource($fp)) {
3075 6
      $block = \fread($fp, 512);
3076 6
      \fclose($fp);
3077
    }
3078
3079 6
    if ($block === '') {
3080 2
      return false;
3081
    }
3082
3083 6
    return self::is_binary($block, true);
3084
  }
3085
3086
  /**
3087
   * Returns true if the string contains only whitespace chars, false otherwise.
3088
   *
3089
   * @param string $str
3090
   *
3091
   * @return bool
3092
   *               Whether or not $str contains only whitespace characters.
3093
   */
3094 15
  public static function is_blank(string $str): bool
3095
  {
3096 15
    return self::str_matches_pattern($str, '^[[:space:]]*$');
3097
  }
3098
3099
  /**
3100
   * Checks if the given string is equal to any "Byte Order Mark".
3101
   *
3102
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3103
   *
3104
   * @param string $str <p>The input string.</p>
3105
   *
3106
   * @return bool
3107
   *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.
3108
   */
3109 2
  public static function is_bom($str): bool
3110
  {
3111 2
    foreach (self::$BOM as $bomString => $bomByteLength) {
3112 2
      if ($str === $bomString) {
3113 2
        return true;
3114
      }
3115
    }
3116
3117 2
    return false;
3118
  }
3119
3120
  /**
3121
   * Determine whether the string is considered to be empty.
3122
   *
3123
   * A variable is considered empty if it does not exist or if its value equals FALSE.
3124
   * empty() does not generate a warning if the variable does not exist.
3125
   *
3126
   * @param mixed $str
3127
   *
3128
   * @return bool Whether or not $str is empty().
3129
   */
3130
  public static function is_empty($str): bool
3131
  {
3132
    return empty($str);
3133
  }
3134
3135
  /**
3136
   * Returns true if the string contains only hexadecimal chars, false otherwise.
3137
   *
3138
   * @param string $str
3139
   *
3140
   * @return bool
3141
   *               Whether or not $str contains only hexadecimal chars.
3142
   */
3143 13
  public static function is_hexadecimal(string $str): bool
3144
  {
3145 13
    return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3146
  }
3147
3148
  /**
3149
   * Check if the string contains any html-tags <lall>.
3150
   *
3151
   * @param string $str <p>The input string.</p>
3152
   *
3153
   * @return bool
3154
   */
3155 3
  public static function is_html(string $str): bool
3156
  {
3157 3
    if ('' === $str) {
3158 3
      return false;
3159
    }
3160
3161
    // init
3162 3
    $matches = [];
3163
3164 3
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3165
3166 3
    return !(\count($matches) === 0);
3167
  }
3168
3169
  /**
3170
   * Try to check if "$str" is an json-string.
3171
   *
3172
   * @param string $str <p>The input string.</p>
3173
   *
3174
   * @return bool
3175
   */
3176 22
  public static function is_json(string $str): bool
3177
  {
3178 22
    if ('' === $str) {
3179 3
      return false;
3180
    }
3181
3182 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3183
      self::checkForSupport();
3184
    }
3185
3186 21
    if (self::$SUPPORT['json'] === false) {
3187
      throw new \RuntimeException('ext-json: is not installed');
3188
    }
3189
3190 21
    $json = self::json_decode($str);
3191
3192
    /** @noinspection PhpComposerExtensionStubsInspection */
3193
    return (
3194 21
               \is_object($json) === true
3195
               ||
3196 21
               \is_array($json) === true
3197
           )
3198
           &&
3199 21
           \json_last_error() === JSON_ERROR_NONE;
3200
  }
3201
3202
  /**
3203
   * @param string $str
3204
   *
3205
   * @return bool
3206
   */
3207 8
  public static function is_lowercase(string $str): bool
3208
  {
3209 8
    if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
3210 3
      return true;
3211
    }
3212
3213 5
    return false;
3214
  }
3215
3216
  /**
3217
   * Returns true if the string is serialized, false otherwise.
3218
   *
3219
   * @param string $str
3220
   *
3221
   * @return bool Whether or not $str is serialized.
3222
   */
3223 7
  public static function is_serialized(string $str): bool
3224
  {
3225 7
    if ('' === $str) {
3226 1
      return false;
3227
    }
3228
3229
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3230
    /** @noinspection UnserializeExploitsInspection */
3231 6
    return $str === 'b:0;'
3232
           ||
3233 6
           @\unserialize($str) !== false;
3234
  }
3235
3236
  /**
3237
   * Returns true if the string contains only lower case chars, false
3238
   * otherwise.
3239
   *
3240
   * @param string $str <p>The input string.</p>
3241
   *
3242
   * @return bool
3243
   *               Whether or not $str contains only lower case characters.
3244
   */
3245 8
  public static function is_uppercase(string $str): bool
3246
  {
3247 8
    return self::str_matches_pattern($str, '^[[:upper:]]*$');
3248
  }
3249
3250
  /**
3251
   * Check if the string is UTF-16.
3252
   *
3253
   * @param mixed $str <p>The input string.</p>
3254
   *
3255
   * @return int|false
3256
   *                   <strong>false</strong> if is't not UTF-16,<br>
3257
   *                   <strong>1</strong> for UTF-16LE,<br>
3258
   *                   <strong>2</strong> for UTF-16BE.
3259
   */
3260 21
  public static function is_utf16($str)
3261
  {
3262
    // init
3263 21
    $str = (string)$str;
3264
3265 21
    if (self::is_binary($str) === false) {
3266 9
      return false;
3267
    }
3268
3269 16
    if (self::$SUPPORT['mbstring'] === false) {
3270
      \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', E_USER_WARNING);
3271
    }
3272
3273
    // init
3274 16
    $strChars = [];
3275
3276 16
    $str = self::remove_bom($str);
3277
3278 16
    $maybeUTF16LE = 0;
3279 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3280 16
    if ($test) {
3281 14
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3282 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3283 14
      if ($test3 === $test) {
3284 14
        if (\count($strChars) === 0) {
3285 14
          $strChars = self::count_chars($str, true);
3286
        }
3287 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3288 14
          if (\in_array($test3char, $strChars, true) === true) {
3289 14
            $maybeUTF16LE++;
3290
          }
3291
        }
3292
      }
3293
    }
3294
3295 16
    $maybeUTF16BE = 0;
3296 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3297 16
    if ($test) {
3298 14
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3299 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3300 14
      if ($test3 === $test) {
3301 14
        if (\count($strChars) === 0) {
3302 6
          $strChars = self::count_chars($str, true);
3303
        }
3304 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3305 14
          if (\in_array($test3char, $strChars, true) === true) {
3306 14
            $maybeUTF16BE++;
3307
          }
3308
        }
3309
      }
3310
    }
3311
3312 16
    if ($maybeUTF16BE !== $maybeUTF16LE) {
3313 6
      if ($maybeUTF16LE > $maybeUTF16BE) {
3314 4
        return 1;
3315
      }
3316
3317 6
      return 2;
3318
    }
3319
3320 12
    return false;
3321
  }
3322
3323
  /**
3324
   * Check if the string is UTF-32.
3325
   *
3326
   * @param mixed $str
3327
   *
3328
   * @return int|false
3329
   *                   <strong>false</strong> if is't not UTF-32,<br>
3330
   *                   <strong>1</strong> for UTF-32LE,<br>
3331
   *                   <strong>2</strong> for UTF-32BE.
3332
   */
3333 17
  public static function is_utf32($str)
3334
  {
3335
    // init
3336 17
    $str = (string)$str;
3337
3338 17
    if (self::is_binary($str) === false) {
3339 9
      return false;
3340
    }
3341
3342 12
    if (self::$SUPPORT['mbstring'] === false) {
3343
      \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', E_USER_WARNING);
3344
    }
3345
3346
    // init
3347 12
    $strChars = [];
3348
3349 12
    $str = self::remove_bom($str);
3350
3351 12
    $maybeUTF32LE = 0;
3352 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3353 12
    if ($test) {
3354 10
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3355 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3356 10
      if ($test3 === $test) {
3357 10
        if (\count($strChars) === 0) {
3358 10
          $strChars = self::count_chars($str, true);
3359
        }
3360 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3361 10
          if (\in_array($test3char, $strChars, true) === true) {
3362 10
            $maybeUTF32LE++;
3363
          }
3364
        }
3365
      }
3366
    }
3367
3368 12
    $maybeUTF32BE = 0;
3369 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3370 12
    if ($test) {
3371 10
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3372 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3373 10
      if ($test3 === $test) {
3374 10
        if (\count($strChars) === 0) {
3375 6
          $strChars = self::count_chars($str, true);
3376
        }
3377 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3378 10
          if (\in_array($test3char, $strChars, true) === true) {
3379 10
            $maybeUTF32BE++;
3380
          }
3381
        }
3382
      }
3383
    }
3384
3385 12
    if ($maybeUTF32BE !== $maybeUTF32LE) {
3386 2
      if ($maybeUTF32LE > $maybeUTF32BE) {
3387 2
        return 1;
3388
      }
3389
3390 2
      return 2;
3391
    }
3392
3393 12
    return false;
3394
  }
3395
3396
  /**
3397
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3398
   *
3399
   * @see    http://hsivonen.iki.fi/php-utf8/
3400
   *
3401
   * @param string|string[] $str    <p>The string to be checked.</p>
3402
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3403
   *
3404
   * @return bool
3405
   */
3406 107
  public static function is_utf8($str, bool $strict = false): bool
3407
  {
3408 107
    if (\is_array($str) === true) {
3409 2
      foreach ($str as $k => $v) {
3410 2
        if (false === self::is_utf8($v, $strict)) {
3411 2
          return false;
3412
        }
3413
      }
3414
3415
      return true;
3416
    }
3417
3418 107
    if ('' === $str) {
3419 12
      return true;
3420
    }
3421
3422 103
    if ($strict === true) {
3423 2
      if (self::is_utf16($str) !== false) {
3424 2
        return false;
3425
      }
3426
3427
      if (self::is_utf32($str) !== false) {
3428
        return false;
3429
      }
3430
    }
3431
3432 103
    if (self::pcre_utf8_support() !== true) {
3433
3434
      // If even just the first character can be matched, when the /u
3435
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3436
      // invalid, nothing at all will match, even if the string contains
3437
      // some valid sequences
3438
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
3439
    }
3440
3441 103
    $mState = 0; // cached expected number of octets after the current octet
3442
    // until the beginning of the next UTF8 character sequence
3443 103
    $mUcs4 = 0; // cached Unicode character
3444 103
    $mBytes = 1; // cached expected number of octets in the current sequence
3445
3446 103
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3447
      self::checkForSupport();
3448
    }
3449
3450 103
    if (self::$ORD === null) {
3451
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3452
    }
3453
3454 103
    $len = self::strlen_in_byte((string)$str);
3455
    /** @noinspection ForeachInvariantsInspection */
3456 103
    for ($i = 0; $i < $len; $i++) {
3457 103
      $in = self::$ORD[$str[$i]];
3458 103
      if ($mState === 0) {
3459
        // When mState is zero we expect either a US-ASCII character or a
3460
        // multi-octet sequence.
3461 103
        if (0 === (0x80 & $in)) {
3462
          // US-ASCII, pass straight through.
3463 98
          $mBytes = 1;
3464 84
        } elseif (0xC0 === (0xE0 & $in)) {
3465
          // First octet of 2 octet sequence.
3466 75
          $mUcs4 = $in;
3467 75
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3468 75
          $mState = 1;
3469 75
          $mBytes = 2;
3470 58
        } elseif (0xE0 === (0xF0 & $in)) {
3471
          // First octet of 3 octet sequence.
3472 41
          $mUcs4 = $in;
3473 41
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3474 41
          $mState = 2;
3475 41
          $mBytes = 3;
3476 30
        } elseif (0xF0 === (0xF8 & $in)) {
3477
          // First octet of 4 octet sequence.
3478 19
          $mUcs4 = $in;
3479 19
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3480 19
          $mState = 3;
3481 19
          $mBytes = 4;
3482 13
        } elseif (0xF8 === (0xFC & $in)) {
3483
          /* First octet of 5 octet sequence.
3484
          *
3485
          * This is illegal because the encoded codepoint must be either
3486
          * (a) not the shortest form or
3487
          * (b) outside the Unicode range of 0-0x10FFFF.
3488
          * Rather than trying to resynchronize, we will carry on until the end
3489
          * of the sequence and let the later error handling code catch it.
3490
          */
3491 5
          $mUcs4 = $in;
3492 5
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3493 5
          $mState = 4;
3494 5
          $mBytes = 5;
3495 10
        } elseif (0xFC === (0xFE & $in)) {
3496
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3497 5
          $mUcs4 = $in;
3498 5
          $mUcs4 = ($mUcs4 & 1) << 30;
3499 5
          $mState = 5;
3500 5
          $mBytes = 6;
3501
        } else {
3502
          // Current octet is neither in the US-ASCII range nor a legal first
3503
          // octet of a multi-octet sequence.
3504 103
          return false;
3505
        }
3506
      } else {
3507
        // When mState is non-zero, we expect a continuation of the multi-octet
3508
        // sequence
3509 84
        if (0x80 === (0xC0 & $in)) {
3510
          // Legal continuation.
3511 76
          $shift = ($mState - 1) * 6;
3512 76
          $tmp = $in;
3513 76
          $tmp = ($tmp & 0x0000003F) << $shift;
3514 76
          $mUcs4 |= $tmp;
3515
          // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3516
          // Unicode code point to be output.
3517 76
          if (0 === --$mState) {
3518
            // Check for illegal sequences and code points.
3519
            //
3520
            // From Unicode 3.1, non-shortest form is illegal
3521
            if (
3522 76
                (2 === $mBytes && $mUcs4 < 0x0080)
3523
                ||
3524 76
                (3 === $mBytes && $mUcs4 < 0x0800)
3525
                ||
3526 76
                (4 === $mBytes && $mUcs4 < 0x10000)
3527
                ||
3528 76
                (4 < $mBytes)
3529
                ||
3530
                // From Unicode 3.2, surrogate characters are illegal.
3531 76
                (($mUcs4 & 0xFFFFF800) === 0xD800)
3532
                ||
3533
                // Code points outside the Unicode range are illegal.
3534 76
                ($mUcs4 > 0x10FFFF)
3535
            ) {
3536 8
              return false;
3537
            }
3538
            // initialize UTF8 cache
3539 76
            $mState = 0;
3540 76
            $mUcs4 = 0;
3541 76
            $mBytes = 1;
3542
          }
3543
        } else {
3544
          // ((0xC0 & (*in) != 0x80) && (mState != 0))
3545
          // Incomplete multi-octet sequence.
3546 36
          return false;
3547
        }
3548
      }
3549
    }
3550
3551 67
    return true;
3552
  }
3553
3554
  /**
3555
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3556
   * Decodes a JSON string
3557
   *
3558
   * @link http://php.net/manual/en/function.json-decode.php
3559
   *
3560
   * @param string $json    <p>
3561
   *                        The <i>json</i> string being decoded.
3562
   *                        </p>
3563
   *                        <p>
3564
   *                        This function only works with UTF-8 encoded strings.
3565
   *                        </p>
3566
   *                        <p>PHP implements a superset of
3567
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3568
   *                        only supports these values when they are nested inside an array or an object.
3569
   *                        </p>
3570
   * @param bool   $assoc   [optional] <p>
3571
   *                        When <b>TRUE</b>, returned objects will be converted into
3572
   *                        associative arrays.
3573
   *                        </p>
3574
   * @param int    $depth   [optional] <p>
3575
   *                        User specified recursion depth.
3576
   *                        </p>
3577
   * @param int    $options [optional] <p>
3578
   *                        Bitmask of JSON decode options. Currently only
3579
   *                        <b>JSON_BIGINT_AS_STRING</b>
3580
   *                        is supported (default is to cast large integers as floats)
3581
   *                        </p>
3582
   *
3583
   * @return mixed
3584
   *                The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3585
   *                null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3586
   *                <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3587
   *                is deeper than the recursion limit.
3588
   */
3589 23
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3590
  {
3591 23
    $json = self::filter($json);
3592
3593 23
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3594
      self::checkForSupport();
3595
    }
3596
3597 23
    if (self::$SUPPORT['json'] === false) {
3598
      throw new \RuntimeException('ext-json: is not installed');
3599
    }
3600
3601
    /** @noinspection PhpComposerExtensionStubsInspection */
3602 23
    $json = \json_decode($json, $assoc, $depth, $options);
3603
3604 23
    return $json;
3605
  }
3606
3607
  /**
3608
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3609
   * Returns the JSON representation of a value.
3610
   *
3611
   * @link http://php.net/manual/en/function.json-encode.php
3612
   *
3613
   * @param mixed $value   <p>
3614
   *                       The <i>value</i> being encoded. Can be any type except
3615
   *                       a resource.
3616
   *                       </p>
3617
   *                       <p>
3618
   *                       All string data must be UTF-8 encoded.
3619
   *                       </p>
3620
   *                       <p>PHP implements a superset of
3621
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3622
   *                       only supports these values when they are nested inside an array or an object.
3623
   *                       </p>
3624
   * @param int   $options [optional] <p>
3625
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3626
   *                       <b>JSON_HEX_TAG</b>,
3627
   *                       <b>JSON_HEX_AMP</b>,
3628
   *                       <b>JSON_HEX_APOS</b>,
3629
   *                       <b>JSON_NUMERIC_CHECK</b>,
3630
   *                       <b>JSON_PRETTY_PRINT</b>,
3631
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3632
   *                       <b>JSON_FORCE_OBJECT</b>,
3633
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3634
   *                       constants is described on
3635
   *                       the JSON constants page.
3636
   *                       </p>
3637
   * @param int   $depth   [optional] <p>
3638
   *                       Set the maximum depth. Must be greater than zero.
3639
   *                       </p>
3640
   *
3641
   * @return string|false
3642
   *                      A JSON encoded <strong>string</strong> on success or<br>
3643
   *                      <strong>FALSE</strong> on failure.
3644
   */
3645 4
  public static function json_encode($value, int $options = 0, int $depth = 512)
3646
  {
3647 4
    $value = self::filter($value);
3648
3649 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3650
      self::checkForSupport();
3651
    }
3652
3653 4
    if (self::$SUPPORT['json'] === false) {
3654
      throw new \RuntimeException('ext-json: is not installed');
3655
    }
3656
3657
    /** @noinspection PhpComposerExtensionStubsInspection */
3658 4
    $json = \json_encode($value, $options, $depth);
3659
3660 4
    return $json;
3661
  }
3662
3663
  /**
3664
   * Checks whether JSON is available on the server.
3665
   *
3666
   * @return bool
3667
   *              <strong>true</strong> if available, <strong>false</strong> otherwise
3668
   */
3669
  public static function json_loaded(): bool
3670
  {
3671
    return \function_exists('json_decode');
3672
  }
3673
3674
  /**
3675
   * Makes string's first char lowercase.
3676
   *
3677
   * @param string $str       <p>The input string</p>
3678
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
3679
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3680
   *
3681
   * @return string The resulting string.
3682
   */
3683 46
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3684
  {
3685 46
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3686 46
    if ($strPartTwo === false) {
3687 5
      $strPartTwo = '';
3688
    }
3689
3690 46
    $strPartOne = self::strtolower(
3691 46
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3692 46
        $encoding,
3693 46
        $cleanUtf8
3694
    );
3695
3696 46
    return $strPartOne . $strPartTwo;
3697
  }
3698
3699
  /**
3700
   * alias for "UTF8::lcfirst()"
3701
   *
3702
   * @see UTF8::lcfirst()
3703
   *
3704
   * @param string $str
3705
   * @param string $encoding
3706
   * @param bool   $cleanUtf8
3707
   *
3708
   * @return string
3709
   */
3710 2
  public static function lcword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3711
  {
3712 2
    return self::lcfirst($str, $encoding, $cleanUtf8);
3713
  }
3714
3715
  /**
3716
   * Lowercase for all words in the string.
3717
   *
3718
   * @param string   $str        <p>The input string.</p>
3719
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3720
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3721
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3722
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3723
   *
3724
   * @return string
3725
   */
3726 2
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3727
  {
3728 2
    if (!$str) {
3729 2
      return '';
3730
    }
3731
3732 2
    $words = self::str_to_words($str, $charlist);
3733 2
    $newWords = [];
3734
3735 2
    if (\count($exceptions) > 0) {
3736 2
      $useExceptions = true;
3737
    } else {
3738 2
      $useExceptions = false;
3739
    }
3740
3741 2
    foreach ($words as $word) {
3742
3743 2
      if (!$word) {
3744 2
        continue;
3745
      }
3746
3747
      if (
3748 2
          $useExceptions === false
3749
          ||
3750
          (
3751 2
              $useExceptions === true
3752
              &&
3753 2
              !\in_array($word, $exceptions, true)
3754
          )
3755
      ) {
3756 2
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3757
      }
3758
3759 2
      $newWords[] = $word;
3760
    }
3761
3762 2
    return \implode('', $newWords);
3763
  }
3764
3765
  /**
3766
   * alias for "UTF8::lcfirst()"
3767
   *
3768
   * @see UTF8::lcfirst()
3769
   *
3770
   * @param string $str
3771
   * @param string $encoding
3772
   * @param bool   $cleanUtf8
3773
   *
3774
   * @return string
3775
   */
3776 5
  public static function lowerCaseFirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3777
  {
3778 5
    return self::lcfirst($str, $encoding, $cleanUtf8);
3779
  }
3780
3781
  /**
3782
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3783
   *
3784
   * @param string $str   <p>The string to be trimmed</p>
3785
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3786
   *
3787
   * @return string The string with unwanted characters stripped from the left.
3788
   */
3789 22
  public static function ltrim(string $str = '', $chars = INF): string
3790
  {
3791 22
    if ('' === $str) {
3792 3
      return '';
3793
    }
3794
3795
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3796 21
    if ($chars === INF || !$chars) {
3797 14
      $pattern = "^[\pZ\pC]+";
3798
    } else {
3799 10
      $chars = \preg_quote($chars, '/');
3800 10
      $pattern = "^[$chars]+";
3801
    }
3802
3803 21
    return self::regex_replace($str, $pattern, '', '', '/');
3804
  }
3805
3806
  /**
3807
   * Returns the UTF-8 character with the maximum code point in the given data.
3808
   *
3809
   * @param string|array<string> $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3810
   *
3811
   * @return string|null The character with the highest code point than others, returns null on failure or empty input.
3812
   */
3813 2
  public static function max($arg)
3814
  {
3815 2
    if (\is_array($arg) === true) {
3816 2
      $arg = \implode('', $arg);
3817
    }
3818
3819 2
    $codepoints = self::codepoints($arg, false);
3820 2
    if (\count($codepoints) === 0) {
3821 2
      return null;
3822
    }
3823
3824 2
    $codepoint_max = \max($codepoints);
3825
3826 2
    return self::chr($codepoint_max);
3827
  }
3828
3829
  /**
3830
   * Calculates and returns the maximum number of bytes taken by any
3831
   * UTF-8 encoded character in the given string.
3832
   *
3833
   * @param string $str <p>The original Unicode string.</p>
3834
   *
3835
   * @return int Max byte lengths of the given chars.
3836
   */
3837 2
  public static function max_chr_width(string $str): int
3838
  {
3839 2
    $bytes = self::chr_size_list($str);
3840 2
    if (\count($bytes) > 0) {
3841 2
      return (int)\max($bytes);
3842
    }
3843
3844 2
    return 0;
3845
  }
3846
3847
  /**
3848
   * Checks whether mbstring is available on the server.
3849
   *
3850
   * @return bool
3851
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
3852
   */
3853 27
  public static function mbstring_loaded(): bool
3854
  {
3855 27
    $return = \extension_loaded('mbstring') ? true : false;
3856
3857 27
    if ($return === true) {
3858 27
      \mb_internal_encoding('UTF-8');
3859
    }
3860
3861 27
    return $return;
3862
  }
3863
3864
  /**
3865
   * Checks whether mbstring "overloaded" is active on the server.
3866
   *
3867
   * @return bool
3868
   */
3869
  private static function mbstring_overloaded(): bool
3870
  {
3871
    /**
3872
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3873
     */
3874
3875
    /** @noinspection PhpComposerExtensionStubsInspection */
3876
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3877
    return \defined('MB_OVERLOAD_STRING')
3878
           &&
3879
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3880
  }
3881
3882
  /**
3883
   * Returns the UTF-8 character with the minimum code point in the given data.
3884
   *
3885
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3886
   *
3887
   * @return string|null The character with the lowest code point than others, returns null on failure or empty input.
3888
   */
3889 2
  public static function min($arg)
3890
  {
3891 2
    if (\is_array($arg) === true) {
3892 2
      $arg = \implode('', $arg);
3893
    }
3894
3895 2
    $codepoints = self::codepoints($arg, false);
3896 2
    if (\count($codepoints) === 0) {
3897 2
      return null;
3898
    }
3899
3900 2
    $codepoint_min = \min($codepoints);
3901
3902 2
    return self::chr($codepoint_min);
3903
  }
3904
3905
  /**
3906
   * alias for "UTF8::normalize_encoding()"
3907
   *
3908
   * @see        UTF8::normalize_encoding()
3909
   *
3910
   * @param mixed $encoding
3911
   * @param mixed $fallback
3912
   *
3913
   * @return mixed
3914
   *
3915
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3916
   */
3917 2
  public static function normalizeEncoding($encoding, $fallback = '')
3918
  {
3919 2
    return self::normalize_encoding($encoding, $fallback);
3920
  }
3921
3922
  /**
3923
   * Normalize the encoding-"name" input.
3924
   *
3925
   * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3926
   * @param mixed $fallback <p>e.g.: UTF-8</p>
3927
   *
3928
   * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3929
   */
3930 340
  public static function normalize_encoding($encoding, $fallback = '')
3931
  {
3932 340
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3933
3934
    // init
3935 340
    $encoding = (string)$encoding;
3936
3937
    if (
3938 340
        !$encoding
3939
        ||
3940 49
        $encoding === '1' // only a fallback, for non "strict_types" usage ...
3941
        ||
3942 340
        $encoding === '0' // only a fallback, for non "strict_types" usage ...
3943
    ) {
3944 296
      return $fallback;
3945
    }
3946
3947
    if (
3948 48
        'UTF-8' === $encoding
3949
        ||
3950 48
        'UTF8' === $encoding
3951
    ) {
3952 21
      return 'UTF-8';
3953
    }
3954
3955
    if (
3956 41
        '8BIT' === $encoding
3957
        ||
3958 41
        'BINARY' === $encoding
3959
    ) {
3960
      return 'CP850';
3961
    }
3962
3963 41
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3964 39
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3965
    }
3966
3967 6
    if (self::$ENCODINGS === null) {
3968 1
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3969
    }
3970
3971 6
    if (\in_array($encoding, self::$ENCODINGS, true)) {
0 ignored issues
show
Bug introduced by
It seems like self::ENCODINGS can also be of type false; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3971
    if (\in_array($encoding, /** @scrutinizer ignore-type */ self::$ENCODINGS, true)) {
Loading history...
3972 4
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3973
3974 4
      return $encoding;
3975
    }
3976
3977 5
    $encodingOrig = $encoding;
3978 5
    $encoding = \strtoupper($encoding);
3979 5
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3980
3981
    $equivalences = [
3982 5
        'ISO8859'     => 'ISO-8859-1',
3983
        'ISO88591'    => 'ISO-8859-1',
3984
        'ISO'         => 'ISO-8859-1',
3985
        'LATIN'       => 'ISO-8859-1',
3986
        'LATIN1'      => 'ISO-8859-1', // Western European
3987
        'ISO88592'    => 'ISO-8859-2',
3988
        'LATIN2'      => 'ISO-8859-2', // Central European
3989
        'ISO88593'    => 'ISO-8859-3',
3990
        'LATIN3'      => 'ISO-8859-3', // Southern European
3991
        'ISO88594'    => 'ISO-8859-4',
3992
        'LATIN4'      => 'ISO-8859-4', // Northern European
3993
        'ISO88595'    => 'ISO-8859-5',
3994
        'ISO88596'    => 'ISO-8859-6', // Greek
3995
        'ISO88597'    => 'ISO-8859-7',
3996
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3997
        'ISO88599'    => 'ISO-8859-9',
3998
        'LATIN5'      => 'ISO-8859-9', // Turkish
3999
        'ISO885911'   => 'ISO-8859-11',
4000
        'TIS620'      => 'ISO-8859-11', // Thai
4001
        'ISO885910'   => 'ISO-8859-10',
4002
        'LATIN6'      => 'ISO-8859-10', // Nordic
4003
        'ISO885913'   => 'ISO-8859-13',
4004
        'LATIN7'      => 'ISO-8859-13', // Baltic
4005
        'ISO885914'   => 'ISO-8859-14',
4006
        'LATIN8'      => 'ISO-8859-14', // Celtic
4007
        'ISO885915'   => 'ISO-8859-15',
4008
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4009
        'ISO885916'   => 'ISO-8859-16',
4010
        'LATIN10'     => 'ISO-8859-16', // Southeast European
4011
        'CP1250'      => 'WINDOWS-1250',
4012
        'WIN1250'     => 'WINDOWS-1250',
4013
        'WINDOWS1250' => 'WINDOWS-1250',
4014
        'CP1251'      => 'WINDOWS-1251',
4015
        'WIN1251'     => 'WINDOWS-1251',
4016
        'WINDOWS1251' => 'WINDOWS-1251',
4017
        'CP1252'      => 'WINDOWS-1252',
4018
        'WIN1252'     => 'WINDOWS-1252',
4019
        'WINDOWS1252' => 'WINDOWS-1252',
4020
        'CP1253'      => 'WINDOWS-1253',
4021
        'WIN1253'     => 'WINDOWS-1253',
4022
        'WINDOWS1253' => 'WINDOWS-1253',
4023
        'CP1254'      => 'WINDOWS-1254',
4024
        'WIN1254'     => 'WINDOWS-1254',
4025
        'WINDOWS1254' => 'WINDOWS-1254',
4026
        'CP1255'      => 'WINDOWS-1255',
4027
        'WIN1255'     => 'WINDOWS-1255',
4028
        'WINDOWS1255' => 'WINDOWS-1255',
4029
        'CP1256'      => 'WINDOWS-1256',
4030
        'WIN1256'     => 'WINDOWS-1256',
4031
        'WINDOWS1256' => 'WINDOWS-1256',
4032
        'CP1257'      => 'WINDOWS-1257',
4033
        'WIN1257'     => 'WINDOWS-1257',
4034
        'WINDOWS1257' => 'WINDOWS-1257',
4035
        'CP1258'      => 'WINDOWS-1258',
4036
        'WIN1258'     => 'WINDOWS-1258',
4037
        'WINDOWS1258' => 'WINDOWS-1258',
4038
        'UTF16'       => 'UTF-16',
4039
        'UTF32'       => 'UTF-32',
4040
        'UTF8'        => 'UTF-8',
4041
        'UTF'         => 'UTF-8',
4042
        'UTF7'        => 'UTF-7',
4043
        '8BIT'        => 'CP850',
4044
        'BINARY'      => 'CP850',
4045
    ];
4046
4047 5
    if (!empty($equivalences[$encodingUpperHelper])) {
4048 4
      $encoding = $equivalences[$encodingUpperHelper];
4049
    }
4050
4051 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4052
4053 5
    return $encoding;
4054
  }
4055
4056
  /**
4057
   * Standardize line ending to unix-like.
4058
   *
4059
   * @param string $str
4060
   *
4061
   * @return string
4062
   */
4063 5
  public static function normalize_line_ending(string $str): string
4064
  {
4065 5
    return (string)str_replace(["\r\n", "\r"], "\n", $str);
4066
  }
4067
4068
  /**
4069
   * Normalize some MS Word special characters.
4070
   *
4071
   * @param string $str <p>The string to be normalized.</p>
4072
   *
4073
   * @return string
4074
   */
4075 39
  public static function normalize_msword(string $str): string
4076
  {
4077 39
    if ('' === $str) {
4078 2
      return '';
4079
    }
4080
4081 39
    static $UTF8_MSWORD_KEYS_CACHE = null;
4082 39
    static $UTF8_MSWORD_VALUES_CACHE = null;
4083
4084 39
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
4085
4086 1
      if (self::$UTF8_MSWORD === null) {
4087 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_msword') can also be of type false. However, the property $UTF8_MSWORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4088
      }
4089
4090 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4090
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4091 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4091
      $UTF8_MSWORD_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4092
    }
4093
4094 39
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
4095
  }
4096
4097
  /**
4098
   * Normalize the whitespace.
4099
   *
4100
   * @param string $str                     <p>The string to be normalized.</p>
4101
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4102
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4103
   *                                        bidirectional text chars.</p>
4104
   *
4105
   * @return string
4106
   */
4107 87
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4108
  {
4109 87
    if ('' === $str) {
4110 9
      return '';
4111
    }
4112
4113 87
    static $WHITESPACE_CACHE = [];
4114 87
    $cacheKey = (int)$keepNonBreakingSpace;
4115
4116 87
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4117
4118 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4119
4120 2
      if ($keepNonBreakingSpace === true) {
4121 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4122
      }
4123
4124 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4125
    }
4126
4127 87
    if ($keepBidiUnicodeControls === false) {
4128 87
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
4129
4130 87
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4131 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4132
      }
4133
4134 87
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4135
    }
4136
4137 87
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4138
  }
4139
4140
  /**
4141
   * Calculates Unicode code point of the given UTF-8 encoded character.
4142
   *
4143
   * INFO: opposite to UTF8::chr()
4144
   *
4145
   * @param string $chr      <p>The character of which to calculate code point.<p/>
4146
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4147
   *
4148
   * @return int
4149
   *             Unicode code point of the given character,<br>
4150
   *             0 on invalid UTF-8 byte sequence.
4151
   */
4152 35
  public static function ord($chr, string $encoding = 'UTF-8'): int
4153
  {
4154
    // init
4155 35
    $chr = (string)$chr;
4156
4157 35
    static $CHAR_CACHE = [];
4158
4159
    // save the original string
4160 35
    $chr_orig = $chr;
4161
4162 35
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4163 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4164
4165
      // check again, if it's still not UTF-8
4166 4
      if ($encoding !== 'UTF-8') {
4167 4
        $chr = self::encode($encoding, $chr);
4168
      }
4169
    }
4170
4171 35
    $cacheKey = $chr_orig . $encoding;
4172 35
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
4173 35
      return $CHAR_CACHE[$cacheKey];
4174
    }
4175
4176 12
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4177
      self::checkForSupport();
4178
    }
4179
4180 12
    if (self::$SUPPORT['intlChar'] === true) {
4181
      /** @noinspection PhpComposerExtensionStubsInspection */
4182 12
      $code = \IntlChar::ord($chr);
4183 12
      if ($code) {
4184 11
        return $CHAR_CACHE[$cacheKey] = $code;
4185
      }
4186
    }
4187
4188
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4189 4
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
0 ignored issues
show
Bug introduced by
$chr of type array is incompatible with the type string expected by parameter $str of voku\helper\UTF8::substr(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4189
    $chr = \unpack('C*', (string)self::substr(/** @scrutinizer ignore-type */ $chr, 0, 4, 'CP850'));
Loading history...
4190 4
    $code = $chr ? $chr[1] : 0;
4191
4192 4
    if (0xF0 <= $code && isset($chr[4])) {
4193
      /** @noinspection UnnecessaryCastingInspection */
4194
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4195
    }
4196
4197 4
    if (0xE0 <= $code && isset($chr[3])) {
4198
      /** @noinspection UnnecessaryCastingInspection */
4199
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4200
    }
4201
4202 4
    if (0xC0 <= $code && isset($chr[2])) {
4203
      /** @noinspection UnnecessaryCastingInspection */
4204
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xC0) << 6) + $chr[2] - 0x80);
4205
    }
4206
4207 4
    return $CHAR_CACHE[$cacheKey] = $code;
4208
  }
4209
4210
  /**
4211
   * Parses the string into an array (into the the second parameter).
4212
   *
4213
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4214
   *          if the second parameter is not set!
4215
   *
4216
   * @link http://php.net/manual/en/function.parse-str.php
4217
   *
4218
   * @param string $str       <p>The input string.</p>
4219
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4220
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4221
   *
4222
   * @return bool
4223
   *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result.
4224
   */
4225 2
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4226
  {
4227 2
    if ($cleanUtf8 === true) {
4228 2
      $str = self::clean($str);
4229
    }
4230
4231 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4232
      self::checkForSupport();
4233
    }
4234
4235 2
    if (self::$SUPPORT['mbstring'] === true) {
4236 2
      $return = \mb_parse_str($str, $result);
4237
4238 2
      return !($return === false || empty($result));
4239
    }
4240
4241
    /** @noinspection PhpVoidFunctionResultUsedInspection */
4242
    \parse_str($str, $result);
4243
4244
    return !empty($result);
4245
  }
4246
4247
  /**
4248
   * Checks if \u modifier is available that enables Unicode support in PCRE.
4249
   *
4250
   * @return bool
4251
   *              <strong>true</strong> if support is available,<br>
4252
   *              <strong>false</strong> otherwise.
4253
   */
4254 103
  public static function pcre_utf8_support(): bool
4255
  {
4256
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4257 103
    return (bool)@\preg_match('//u', '');
4258
  }
4259
4260
  /**
4261
   * Create an array containing a range of UTF-8 characters.
4262
   *
4263
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4264
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4265
   *
4266
   * @return string[]
4267
   */
4268 2
  public static function range($var1, $var2): array
4269
  {
4270 2
    if (!$var1 || !$var2) {
4271 2
      return [];
4272
    }
4273
4274 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4275
      self::checkForSupport();
4276
    }
4277
4278 2
    if (self::$SUPPORT['ctype'] === false) {
4279
      throw new \RuntimeException('ext-ctype: is not installed');
4280
    }
4281
4282
    /** @noinspection PhpComposerExtensionStubsInspection */
4283 2
    if (\ctype_digit((string)$var1)) {
4284 2
      $start = (int)$var1;
4285 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4286
      $start = (int)self::hex_to_int($var1);
4287
    } else {
4288 2
      $start = self::ord($var1);
4289
    }
4290
4291 2
    if (!$start) {
4292
      return [];
4293
    }
4294
4295
    /** @noinspection PhpComposerExtensionStubsInspection */
4296 2
    if (\ctype_digit((string)$var2)) {
4297 2
      $end = (int)$var2;
4298 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4299
      $end = (int)self::hex_to_int($var2);
4300
    } else {
4301 2
      $end = self::ord($var2);
4302
    }
4303
4304 2
    if (!$end) {
4305
      return [];
4306
    }
4307
4308 2
    return \array_map(
4309
        [
4310 2
            self::class,
4311
            'chr',
4312
        ],
4313 2
        \range($start, $end)
4314
    );
4315
  }
4316
4317
  /**
4318
   * Multi decode html entity & fix urlencoded-win1252-chars.
4319
   *
4320
   * e.g:
4321
   * 'test+test'                     => 'test+test'
4322
   * 'D&#252;sseldorf'               => 'Düsseldorf'
4323
   * 'D%FCsseldorf'                  => 'Düsseldorf'
4324
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4325
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4326
   * 'Düsseldorf'                   => 'Düsseldorf'
4327
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4328
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4329
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4330
   *
4331
   * @param string $str          <p>The input string.</p>
4332
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
4333
   *
4334
   * @return string
4335
   */
4336 3
  public static function rawurldecode(string $str, bool $multi_decode = true): string
4337
  {
4338 3
    if ('' === $str) {
4339 2
      return '';
4340
    }
4341
4342 3
    $pattern = '/%u([0-9a-f]{3,4})/i';
4343 3
    if (\preg_match($pattern, $str)) {
4344 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4345
    }
4346
4347 3
    $flags = ENT_QUOTES | ENT_HTML5;
4348
4349
    do {
4350 3
      $str_compare = $str;
4351
4352 3
      $str = self::fix_simple_utf8(
4353 3
          \rawurldecode(
4354 3
              self::html_entity_decode(
4355 3
                  self::to_utf8($str),
4356 3
                  $flags
4357
              )
4358
          )
4359
      );
4360
4361 3
    } while ($multi_decode === true && $str_compare !== $str);
4362
4363 3
    return $str;
4364
  }
4365
4366
  /**
4367
   * @param array $strings
4368
   * @param bool  $removeEmptyValues
4369
   * @param int   $removeShortValues
4370
   *
4371
   * @return array
4372
   */
4373 2
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
4374
  {
4375
    // init
4376 2
    $return = [];
4377
4378 2
    foreach ($strings as $str) {
4379
      if (
4380 2
          $removeShortValues !== null
4381
          &&
4382 2
          self::strlen($str) <= $removeShortValues
4383
      ) {
4384 2
        continue;
4385
      }
4386
4387
      if (
4388 2
          $removeEmptyValues === true
4389
          &&
4390 2
          \trim($str) === ''
4391
      ) {
4392 2
        continue;
4393
      }
4394
4395 2
      $return[] = $str;
4396
    }
4397
4398 2
    return $return;
4399
  }
4400
4401
  /**
4402
   * Replaces all occurrences of $pattern in $str by $replacement.
4403
   *
4404
   * @param string $str         <p>The input string.</p>
4405
   * @param string $pattern     <p>The regular expression pattern.</p>
4406
   * @param string $replacement <p>The string to replace with.</p>
4407
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
4408
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4409
   *
4410
   * @return string
4411
   */
4412 291
  public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4413
  {
4414 291
    if ($options === 'msr') {
4415 9
      $options = 'ms';
4416
    }
4417
4418
    // fallback
4419 291
    if (!$delimiter) {
4420
      $delimiter = '/';
4421
    }
4422
4423 291
    $str = (string)\preg_replace(
4424 291
        $delimiter . $pattern . $delimiter . 'u' . $options,
4425 291
        $replacement,
4426 291
        $str
4427
    );
4428
4429 291
    return $str;
4430
  }
4431
4432
  /**
4433
   * alias for "UTF8::remove_bom()"
4434
   *
4435
   * @see        UTF8::remove_bom()
4436
   *
4437
   * @param string $str
4438
   *
4439
   * @return string
4440
   *
4441
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4442
   */
4443
  public static function removeBOM(string $str): string
4444
  {
4445
    return self::remove_bom($str);
4446
  }
4447
4448
  /**
4449
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4450
   *
4451
   * @param string $str <p>The input string.</p>
4452
   *
4453
   * @return string String without UTF-BOM.
4454
   */
4455 75
  public static function remove_bom(string $str): string
4456
  {
4457 75
    if ('' === $str) {
4458 7
      return '';
4459
    }
4460
4461 75
    $strLength = self::strlen_in_byte($str);
4462 75
    foreach (self::$BOM as $bomString => $bomByteLength) {
4463 75
      if (0 === self::strpos_in_byte($str, $bomString, 0)) {
4464 10
        $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4465 10
        if ($strTmp === false) {
4466
          return '';
4467
        }
4468
4469 10
        $strLength -= $bomByteLength;
4470
4471 75
        $str = (string)$strTmp;
4472
      }
4473
    }
4474
4475 75
    return $str;
4476
  }
4477
4478
  /**
4479
   * Removes duplicate occurrences of a string in another string.
4480
   *
4481
   * @param string          $str  <p>The base string.</p>
4482
   * @param string|string[] $what <p>String to search for in the base string.</p>
4483
   *
4484
   * @return string The result string with removed duplicates.
4485
   */
4486 2
  public static function remove_duplicates(string $str, $what = ' '): string
4487
  {
4488 2
    if (\is_string($what) === true) {
4489 2
      $what = [$what];
4490
    }
4491
4492 2
    if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4493
      /** @noinspection ForeachSourceInspection */
4494 2
      foreach ($what as $item) {
4495 2
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4496
      }
4497
    }
4498
4499 2
    return $str;
4500
  }
4501
4502
  /**
4503
   * Remove html via "strip_tags()" from the string.
4504
   *
4505
   * @param string $str
4506
   * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4507
   *                              not be stripped. Default: null
4508
   *                              </p>
4509
   *
4510
   * @return string
4511
   */
4512 6
  public static function remove_html(string $str, string $allowableTags = ''): string
4513
  {
4514 6
    return \strip_tags($str, $allowableTags);
4515
  }
4516
4517
  /**
4518
   * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4519
   *
4520
   * @param string $str
4521
   * @param string $replacement [optional] <p>Default is a empty string.</p>
4522
   *
4523
   * @return string
4524
   */
4525 6
  public static function remove_html_breaks(string $str, string $replacement = ''): string
4526
  {
4527 6
    return (string)\preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4528
  }
4529
4530
  /**
4531
   * Remove invisible characters from a string.
4532
   *
4533
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4534
   *
4535
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4536
   *
4537
   * @param string $str
4538
   * @param bool   $url_encoded
4539
   * @param string $replacement
4540
   *
4541
   * @return string
4542
   */
4543 113
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4544
  {
4545
    // init
4546 113
    $non_displayables = [];
4547
4548
    // every control character except newline (dec 10),
4549
    // carriage return (dec 13) and horizontal tab (dec 09)
4550 113
    if ($url_encoded) {
4551 113
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4552 113
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4553
    }
4554
4555 113
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4556
4557
    do {
4558 113
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4559 113
    } while ($count !== 0);
4560
4561 113
    return $str;
4562
  }
4563
4564
  /**
4565
   * Returns a new string with the prefix $substring removed, if present.
4566
   *
4567
   * @param string $str
4568
   * @param string $substring <p>The prefix to remove.</p>
4569
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4570
   *
4571
   * @return string String without the prefix $substring.
4572
   */
4573 12
  public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4574
  {
4575 12
    if (self::str_starts_with($str, $substring)) {
4576
4577 6
      return (string)self::substr(
4578 6
          $str,
4579 6
          self::strlen($substring, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::strlen($substring, $encoding) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4579
          /** @scrutinizer ignore-type */ self::strlen($substring, $encoding),
Loading history...
4580 6
          null,
4581 6
          $encoding
4582
      );
4583
    }
4584
4585 6
    return $str;
4586
  }
4587
4588
  /**
4589
   * Returns a new string with the suffix $substring removed, if present.
4590
   *
4591
   * @param string $str
4592
   * @param string $substring <p>The suffix to remove.</p>
4593
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4594
   *
4595
   * @return string String having a $str without the suffix $substring.
4596
   */
4597 12
  public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4598
  {
4599 12
    if (self::str_ends_with($str, $substring)) {
4600
4601 6
      return (string)self::substr(
4602 6
          $str,
4603 6
          0,
4604 6
          self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4605
      );
4606
    }
4607
4608 6
    return $str;
4609
  }
4610
4611
  /**
4612
   * Replaces all occurrences of $search in $str by $replacement.
4613
   *
4614
   * @param string $str           <p>The input string.</p>
4615
   * @param string $search        <p>The needle to search for.</p>
4616
   * @param string $replacement   <p>The string to replace with.</p>
4617
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4618
   *
4619
   * @return string String after the replacements.
4620
   */
4621 29
  public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4622
  {
4623 29
    if ($caseSensitive) {
4624 22
      return self::str_replace($search, $replacement, $str);
4625
    }
4626
4627 7
    return self::str_ireplace($search, $replacement, $str);
4628
  }
4629
4630
  /**
4631
   * Replaces all occurrences of $search in $str by $replacement.
4632
   *
4633
   * @param string       $str           <p>The input string.</p>
4634
   * @param array        $search        <p>The elements to search for.</p>
4635
   * @param string|array $replacement   <p>The string to replace with.</p>
4636
   * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4637
   *
4638
   * @return string String after the replacements.
4639
   */
4640 30
  public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4641
  {
4642 30
    if ($caseSensitive) {
4643 23
      return self::str_replace($search, $replacement, $str);
4644
    }
4645
4646 7
    return self::str_ireplace($search, $replacement, $str);
4647
  }
4648
4649
  /**
4650
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4651
   *
4652
   * @param string $str                <p>The input string</p>
4653
   * @param string $replacementChar    <p>The replacement character.</p>
4654
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4655
   *
4656
   * @return string
4657
   */
4658 63
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4659
  {
4660 63
    if ('' === $str) {
4661 9
      return '';
4662
    }
4663
4664 63
    if ($processInvalidUtf8 === true) {
4665 63
      $replacementCharHelper = $replacementChar;
4666 63
      if ($replacementChar === '') {
4667 63
        $replacementCharHelper = 'none';
4668
      }
4669
4670 63
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4671
        self::checkForSupport();
4672
      }
4673
4674 63
      if (self::$SUPPORT['mbstring'] === false) {
4675
        // if there is no native support for "mbstring",
4676
        // then we need to clean the string before ...
4677
        $str = self::clean($str);
4678
      }
4679
4680
      // always fallback via symfony polyfill
4681 63
      $save = \mb_substitute_character();
4682 63
      \mb_substitute_character($replacementCharHelper);
4683 63
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4684 63
      \mb_substitute_character($save);
4685
4686 63
      if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4687 63
        $str = $strTmp;
4688
      } else {
4689
        $str = '';
4690
      }
4691
    }
4692
4693 63
    return str_replace(
4694
        [
4695 63
            "\xEF\xBF\xBD",
4696
            '�',
4697
        ],
4698
        [
4699 63
            $replacementChar,
4700 63
            $replacementChar,
4701
        ],
4702 63
        $str
4703
    );
4704
  }
4705
4706
  /**
4707
   * Strip whitespace or other characters from end of a UTF-8 string.
4708
   *
4709
   * @param string $str   <p>The string to be trimmed.</p>
4710
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4711
   *
4712
   * @return string The string with unwanted characters stripped from the right.
4713
   */
4714 22
  public static function rtrim(string $str = '', $chars = INF): string
4715
  {
4716 22
    if ('' === $str) {
4717 3
      return '';
4718
    }
4719
4720
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4721 21
    if ($chars === INF || !$chars) {
4722 16
      $pattern = "[\pZ\pC]+\$";
4723
    } else {
4724 8
      $chars = \preg_quote($chars, '/');
4725 8
      $pattern = "[$chars]+\$";
4726
    }
4727
4728 21
    return self::regex_replace($str, $pattern, '', '', '/');
4729
  }
4730
4731
  /**
4732
   * rxClass
4733
   *
4734
   * @param string $s
4735
   * @param string $class
4736
   *
4737
   * @return string
4738
   */
4739 37
  private static function rxClass(string $s, string $class = ''): string
4740
  {
4741 37
    static $RX_CLASSS_CACHE = [];
4742
4743 37
    $cacheKey = $s . $class;
4744
4745 37
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4746 25
      return $RX_CLASSS_CACHE[$cacheKey];
4747
    }
4748
4749
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4750 16
    $class = [$class];
4751
4752
    /** @noinspection SuspiciousLoopInspection */
4753 16
    foreach (self::str_split($s) as $s) {
4754 15
      if ('-' === $s) {
4755
        $class[0] = '-' . $class[0];
4756 15
      } elseif (!isset($s[2])) {
4757 15
        $class[0] .= \preg_quote($s, '/');
4758 1
      } elseif (1 === self::strlen($s)) {
4759 1
        $class[0] .= $s;
4760
      } else {
4761 15
        $class[] = $s;
4762
      }
4763
    }
4764
4765 16
    if ($class[0]) {
4766 16
      $class[0] = '[' . $class[0] . ']';
4767
    }
4768
4769 16
    if (1 === \count($class)) {
4770 16
      $return = $class[0];
4771
    } else {
4772
      $return = '(?:' . \implode('|', $class) . ')';
4773
    }
4774
4775 16
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4776
4777 16
    return $return;
4778
  }
4779
4780
  /**
4781
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4782
   */
4783 2
  public static function showSupport()
4784
  {
4785 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4786
      self::checkForSupport();
4787
    }
4788
4789 2
    echo '<pre>';
4790 2
    foreach (self::$SUPPORT as $key => $value) {
4791 2
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4792
    }
4793 2
    echo '</pre>';
4794 2
  }
4795
4796
  /**
4797
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4798
   *
4799
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4800
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4801
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4802
   *
4803
   * @return string The HTML numbered entity.
4804
   */
4805 2
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4806
  {
4807 2
    if ('' === $char) {
4808 2
      return '';
4809
    }
4810
4811
    if (
4812 2
        $keepAsciiChars === true
4813
        &&
4814 2
        self::is_ascii($char) === true
4815
    ) {
4816 2
      return $char;
4817
    }
4818
4819 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4820 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4821
    }
4822
4823 2
    return '&#' . self::ord($char, $encoding) . ';';
4824
  }
4825
4826
  /**
4827
   * @param string $str
4828
   * @param int    $tabLength
4829
   *
4830
   * @return string
4831
   */
4832 5
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4833
  {
4834 5
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4835
  }
4836
4837
  /**
4838
   * Convert a string to an array of Unicode characters.
4839
   *
4840
   * @param string|int|string[]|int[] $str       <p>The string to split into array.</p>
4841
   * @param int                       $length    [optional] <p>Max character length of each array element.</p>
4842
   * @param bool                      $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4843
   *
4844
   * @return string[] An array containing chunks of the string.
4845
   */
4846 82
  public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4847
  {
4848 82
    if ($length <= 0) {
4849 3
      return [];
4850
    }
4851
4852 81
    if (\is_array($str) === true) {
4853 2
      foreach ($str as $k => $v) {
4854 2
        $str[$k] = self::split($v, $length);
4855
      }
4856
4857 2
      return $str;
4858
    }
4859
4860
    // init
4861 81
    $str = (string)$str;
4862
4863 81
    if ('' === $str) {
4864 13
      return [];
4865
    }
4866
4867
    // init
4868 78
    $ret = [];
4869
4870 78
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4871
      self::checkForSupport();
4872
    }
4873
4874 78
    if ($cleanUtf8 === true) {
4875 18
      $str = self::clean($str);
4876
    }
4877
4878 78
    if (self::$SUPPORT['pcre_utf8'] === true) {
4879
4880 78
      \preg_match_all('/./us', $str, $retArray);
4881 78
      if (isset($retArray[0])) {
4882 78
        $ret = $retArray[0];
4883
      }
4884 78
      unset($retArray);
4885
4886
    } else {
4887
4888
      // fallback
4889
4890
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4891
        self::checkForSupport();
4892
      }
4893
4894
      $len = self::strlen_in_byte($str);
4895
4896
      /** @noinspection ForeachInvariantsInspection */
4897
      for ($i = 0; $i < $len; $i++) {
4898
4899
        if (($str[$i] & "\x80") === "\x00") {
4900
4901
          $ret[] = $str[$i];
4902
4903
        } elseif (
4904
            isset($str[$i + 1])
4905
            &&
4906
            ($str[$i] & "\xE0") === "\xC0"
4907
        ) {
4908
4909
          if (($str[$i + 1] & "\xC0") === "\x80") {
4910
            $ret[] = $str[$i] . $str[$i + 1];
4911
4912
            $i++;
4913
          }
4914
4915
        } elseif (
4916
            isset($str[$i + 2])
4917
            &&
4918
            ($str[$i] & "\xF0") === "\xE0"
4919
        ) {
4920
4921
          if (
4922
              ($str[$i + 1] & "\xC0") === "\x80"
4923
              &&
4924
              ($str[$i + 2] & "\xC0") === "\x80"
4925
          ) {
4926
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4927
4928
            $i += 2;
4929
          }
4930
4931
        } elseif (
4932
            isset($str[$i + 3])
4933
            &&
4934
            ($str[$i] & "\xF8") === "\xF0"
4935
        ) {
4936
4937
          if (
4938
              ($str[$i + 1] & "\xC0") === "\x80"
4939
              &&
4940
              ($str[$i + 2] & "\xC0") === "\x80"
4941
              &&
4942
              ($str[$i + 3] & "\xC0") === "\x80"
4943
          ) {
4944
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4945
4946
            $i += 3;
4947
          }
4948
4949
        }
4950
      }
4951
    }
4952
4953 78
    if ($length > 1) {
4954 11
      $ret = \array_chunk($ret, $length);
4955
4956 11
      return \array_map(
4957 11
          function ($item) {
4958 11
            return \implode('', $item);
4959 11
          }, $ret
4960
      );
4961
    }
4962
4963 71
    if (isset($ret[0]) && $ret[0] === '') {
4964
      return [];
4965
    }
4966
4967 71
    return $ret;
4968
  }
4969
4970
  /**
4971
   * Returns a camelCase version of the string. Trims surrounding spaces,
4972
   * capitalizes letters following digits, spaces, dashes and underscores,
4973
   * and removes spaces, dashes, as well as underscores.
4974
   *
4975
   * @param string $str      <p>The input string.</p>
4976
   * @param string $encoding [optional] <p>Default: UTF-8</p>
4977
   *
4978
   * @return string
4979
   */
4980 32
  public static function str_camelize(string $str, string $encoding = 'UTF-8'): string
4981
  {
4982 32
    $str = self::lcfirst(self::trim($str), $encoding);
4983 32
    $str = (string)\preg_replace('/^[-_]+/', '', $str);
4984
4985 32
    $str = (string)\preg_replace_callback(
4986 32
        '/[-_\s]+(.)?/u',
4987 32
        function ($match) use ($encoding) {
4988 27
          if (isset($match[1])) {
4989 27
            return UTF8::strtoupper($match[1], $encoding);
4990
          }
4991
4992 1
          return '';
4993 32
        },
4994 32
        $str
4995
    );
4996
4997 32
    $str = (string)\preg_replace_callback(
4998 32
        '/[\d]+(.)?/u',
4999 32
        function ($match) use ($encoding) {
5000 6
          return UTF8::strtoupper($match[0], $encoding);
5001 32
        },
5002 32
        $str
5003
    );
5004
5005 32
    return $str;
5006
  }
5007
5008
  /**
5009
   * Returns the string with the first letter of each word capitalized,
5010
   * except for when the word is a name which shouldn't be capitalized.
5011
   *
5012
   * @param string $str
5013
   *
5014
   * @return string String with $str capitalized.
5015
   */
5016 1
  public static function str_capitalize_name(string $str): string
5017
  {
5018 1
    $str = self::collapse_whitespace($str);
5019
5020 1
    $str = self::str_capitalize_name_helper($str, ' ');
5021 1
    $str = self::str_capitalize_name_helper($str, '-');
5022
5023 1
    return $str;
5024
  }
5025
5026
  /**
5027
   * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
5028
   *
5029
   * @param string $names
5030
   * @param string $delimiter
5031
   * @param string $encoding
5032
   *
5033
   * @return string
5034
   */
5035 1
  private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
5036
  {
5037
    // init
5038 1
    $namesArray = \explode($delimiter, $names);
5039
5040 1
    if ($namesArray === false) {
5041
      return '';
5042
    }
5043
5044
    $specialCases = [
5045 1
        'names'    => [
5046
            'ab',
5047
            'af',
5048
            'al',
5049
            'and',
5050
            'ap',
5051
            'bint',
5052
            'binte',
5053
            'da',
5054
            'de',
5055
            'del',
5056
            'den',
5057
            'der',
5058
            'di',
5059
            'dit',
5060
            'ibn',
5061
            'la',
5062
            'mac',
5063
            'nic',
5064
            'of',
5065
            'ter',
5066
            'the',
5067
            'und',
5068
            'van',
5069
            'von',
5070
            'y',
5071
            'zu',
5072
        ],
5073
        'prefixes' => [
5074
            'al-',
5075
            "d'",
5076
            'ff',
5077
            "l'",
5078
            'mac',
5079
            'mc',
5080
            'nic',
5081
        ],
5082
    ];
5083
5084 1
    foreach ($namesArray as &$name) {
5085 1
      if (\in_array($name, $specialCases['names'], true)) {
5086 1
        continue;
5087
      }
5088
5089 1
      $continue = false;
5090
5091 1
      if ($delimiter == '-') {
5092 1
        foreach ($specialCases['names'] as $beginning) {
5093 1
          if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5094 1
            $continue = true;
5095
          }
5096
        }
5097
      }
5098
5099 1
      foreach ($specialCases['prefixes'] as $beginning) {
5100 1
        if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5101 1
          $continue = true;
5102
        }
5103
      }
5104
5105 1
      if ($continue) {
5106 1
        continue;
5107
      }
5108
5109 1
      $name = self::str_upper_first($name);
5110
    }
5111
5112 1
    return \implode($delimiter, $namesArray);
5113
  }
5114
5115
  /**
5116
   * Returns true if the string contains $needle, false otherwise. By default
5117
   * the comparison is case-sensitive, but can be made insensitive by setting
5118
   * $caseSensitive to false.
5119
   *
5120
   * @param string $haystack      <p>The input string.</p>
5121
   * @param string $needle        <p>Substring to look for.</p>
5122
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5123
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5124
   *
5125
   * @return bool Whether or not $haystack contains $needle.
5126
   */
5127 106
  public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5128
  {
5129 106
    if ('' === $haystack || '' === $needle) {
5130 1
      return false;
5131
    }
5132
5133
    // only a fallback to prevent BC in the api ...
5134 105
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5135 2
      $encoding = (string)$caseSensitive;
5136
    }
5137
5138 105
    if ($caseSensitive) {
5139 55
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
5140
    }
5141
5142 50
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
5143
  }
5144
5145
  /**
5146
   * Returns true if the string contains all $needles, false otherwise. By
5147
   * default the comparison is case-sensitive, but can be made insensitive by
5148
   * setting $caseSensitive to false.
5149
   *
5150
   * @param string $haystack      <p>The input string.</p>
5151
   * @param array  $needles       <p>SubStrings to look for.</p>
5152
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5153
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5154
   *
5155
   * @return bool Whether or not $haystack contains $needle.
5156
   */
5157 44
  public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5158
  {
5159 44
    if ('' === $haystack) {
5160
      return false;
5161
    }
5162
5163 44
    if (empty($needles)) {
5164 1
      return false;
5165
    }
5166
5167
    // only a fallback to prevent BC in the api ...
5168 43
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5169 1
      $encoding = (string)$caseSensitive;
5170
    }
5171
5172 43
    foreach ($needles as $needle) {
5173 43
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5174 43
        return false;
5175
      }
5176
    }
5177
5178 24
    return true;
5179
  }
5180
5181
  /**
5182
   * Returns true if the string contains any $needles, false otherwise. By
5183
   * default the comparison is case-sensitive, but can be made insensitive by
5184
   * setting $caseSensitive to false.
5185
   *
5186
   * @param string $haystack      <p>The input string.</p>
5187
   * @param array  $needles       <p>SubStrings to look for.</p>
5188
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5189
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5190
   *
5191
   * @return bool
5192
   *               Whether or not $str contains $needle.
5193
   */
5194 43
  public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5195
  {
5196 43
    if (empty($needles)) {
5197 1
      return false;
5198
    }
5199
5200 42
    foreach ($needles as $needle) {
5201 42
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5202 42
        return true;
5203
      }
5204
    }
5205
5206 18
    return false;
5207
  }
5208
5209
  /**
5210
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
5211
   * inserted before uppercase characters (with the exception of the first
5212
   * character of the string), and in place of spaces as well as underscores.
5213
   *
5214
   * @param string $str      <p>The input string.</p>
5215
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5216
   *
5217
   * @return string
5218
   */
5219 19
  public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5220
  {
5221 19
    return self::str_delimit($str, '-', $encoding);
5222
  }
5223
5224
  /**
5225
   * Returns a lowercase and trimmed string separated by the given delimiter.
5226
   * Delimiters are inserted before uppercase characters (with the exception
5227
   * of the first character of the string), and in place of spaces, dashes,
5228
   * and underscores. Alpha delimiters are not converted to lowercase.
5229
   *
5230
   * @param string $str       <p>The input string.</p>
5231
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
5232
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5233
   *
5234
   * @return string
5235
   */
5236 49
  public static function str_delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
5237
  {
5238 49
    $str = self::trim($str);
5239
5240 49
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
5241
5242 49
    $str = self::strtolower($str, $encoding);
5243
5244 49
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
5245
  }
5246
5247
  /**
5248
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5249
   *
5250
   * @param string $str <p>The input string.</p>
5251
   *
5252
   * @return false|string
5253
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5254
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5255
   */
5256 32
  public static function str_detect_encoding($str)
5257
  {
5258
    // init
5259 32
    $str = (string)$str;
5260
5261
    //
5262
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5263
    //
5264
5265 32
    if (self::is_binary($str, true) === true) {
5266
5267 11
      if (self::is_utf16($str) === 1) {
5268 2
        return 'UTF-16LE';
5269
      }
5270
5271 11
      if (self::is_utf16($str) === 2) {
5272 2
        return 'UTF-16BE';
5273
      }
5274
5275 9
      if (self::is_utf32($str) === 1) {
5276
        return 'UTF-32LE';
5277
      }
5278
5279 9
      if (self::is_utf32($str) === 2) {
5280
        return 'UTF-32BE';
5281
      }
5282
5283
      // is binary but not "UTF-16" or "UTF-32"
5284 9
      return false;
5285
    }
5286
5287
    //
5288
    // 2.) simple check for ASCII chars
5289
    //
5290
5291 27
    if (self::is_ascii($str) === true) {
5292 9
      return 'ASCII';
5293
    }
5294
5295
    //
5296
    // 3.) simple check for UTF-8 chars
5297
    //
5298
5299 27
    if (self::is_utf8($str) === true) {
5300 19
      return 'UTF-8';
5301
    }
5302
5303
    //
5304
    // 4.) check via "mb_detect_encoding()"
5305
    //
5306
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5307
5308
    $detectOrder = [
5309 16
        'ISO-8859-1',
5310
        'ISO-8859-2',
5311
        'ISO-8859-3',
5312
        'ISO-8859-4',
5313
        'ISO-8859-5',
5314
        'ISO-8859-6',
5315
        'ISO-8859-7',
5316
        'ISO-8859-8',
5317
        'ISO-8859-9',
5318
        'ISO-8859-10',
5319
        'ISO-8859-13',
5320
        'ISO-8859-14',
5321
        'ISO-8859-15',
5322
        'ISO-8859-16',
5323
        'WINDOWS-1251',
5324
        'WINDOWS-1252',
5325
        'WINDOWS-1254',
5326
        'CP932',
5327
        'CP936',
5328
        'CP950',
5329
        'CP866',
5330
        'CP850',
5331
        'CP51932',
5332
        'CP50220',
5333
        'CP50221',
5334
        'CP50222',
5335
        'ISO-2022-JP',
5336
        'ISO-2022-KR',
5337
        'JIS',
5338
        'JIS-ms',
5339
        'EUC-CN',
5340
        'EUC-JP',
5341
    ];
5342
5343 16
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5344
      self::checkForSupport();
5345
    }
5346
5347 16
    if (self::$SUPPORT['mbstring'] === true) {
5348
      // info: do not use the symfony polyfill here
5349 16
      $encoding = \mb_detect_encoding($str, $detectOrder, true);
5350 16
      if ($encoding) {
5351 16
        return $encoding;
5352
      }
5353
    }
5354
5355
    //
5356
    // 5.) check via "iconv()"
5357
    //
5358
5359
    if (self::$ENCODINGS === null) {
5360
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
5361
    }
5362
5363
    foreach (self::$ENCODINGS as $encodingTmp) {
5364
      # INFO: //IGNORE but still throw notice
5365
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
5366
      if ((string)@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5367
        return $encodingTmp;
5368
      }
5369
    }
5370
5371
    return false;
5372
  }
5373
5374
  /**
5375
   * Check if the string ends with the given substring.
5376
   *
5377
   * @param string $haystack <p>The string to search in.</p>
5378
   * @param string $needle   <p>The substring to search for.</p>
5379
   *
5380
   * @return bool
5381
   */
5382 40
  public static function str_ends_with(string $haystack, string $needle): bool
5383
  {
5384 40
    if ('' === $haystack || '' === $needle) {
5385 4
      return false;
5386
    }
5387
5388 38
    return \substr($haystack, -\strlen($needle)) === $needle;
5389
  }
5390
5391
  /**
5392
   * Returns true if the string ends with any of $substrings, false otherwise.
5393
   *
5394
   * - case-sensitive
5395
   *
5396
   * @param string   $str        <p>The input string.</p>
5397
   * @param string[] $substrings <p>Substrings to look for.</p>
5398
   *
5399
   * @return bool Whether or not $str ends with $substring.
5400
   */
5401 7
  public static function str_ends_with_any(string $str, array $substrings): bool
5402
  {
5403 7
    if (empty($substrings)) {
5404
      return false;
5405
    }
5406
5407 7
    foreach ($substrings as $substring) {
5408 7
      if (self::str_ends_with($str, $substring)) {
5409 7
        return true;
5410
      }
5411
    }
5412
5413 6
    return false;
5414
  }
5415
5416
  /**
5417
   * Ensures that the string begins with $substring. If it doesn't, it's
5418
   * prepended.
5419
   *
5420
   * @param string $str       <p>The input string.</p>
5421
   * @param string $substring <p>The substring to add if not present.</p>
5422
   *
5423
   * @return string
5424
   */
5425 10
  public static function str_ensure_left(string $str, string $substring): string
5426
  {
5427 10
    if (!self::str_starts_with($str, $substring)) {
5428 4
      $str = $substring . $str;
5429
    }
5430
5431 10
    return $str;
5432
  }
5433
5434
  /**
5435
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
5436
   *
5437
   * @param string $str       <p>The input string.</p>
5438
   * @param string $substring <p>The substring to add if not present.</p>
5439
   *
5440
   * @return string
5441
   */
5442 10
  public static function str_ensure_right(string $str, string $substring): string
5443
  {
5444 10
    if (!self::str_ends_with($str, $substring)) {
5445 4
      $str .= $substring;
5446
    }
5447
5448 10
    return $str;
5449
  }
5450
5451
  /**
5452
   * Capitalizes the first word of the string, replaces underscores with
5453
   * spaces, and strips '_id'.
5454
   *
5455
   * @param string $str
5456
   *
5457
   * @return string
5458
   */
5459 3
  public static function str_humanize($str): string
5460
  {
5461 3
    $str = self::str_replace(
5462
        [
5463 3
            '_id',
5464
            '_',
5465
        ],
5466
        [
5467 3
            '',
5468
            ' ',
5469
        ],
5470 3
        $str
5471
    );
5472
5473 3
    return self::ucfirst(self::trim($str));
5474
  }
5475
5476
  /**
5477
   * Check if the string ends with the given substring, case insensitive.
5478
   *
5479
   * @param string $haystack <p>The string to search in.</p>
5480
   * @param string $needle   <p>The substring to search for.</p>
5481
   *
5482
   * @return bool
5483
   */
5484 12
  public static function str_iends_with(string $haystack, string $needle): bool
5485
  {
5486 12
    if ('' === $haystack || '' === $needle) {
5487 2
      return false;
5488
    }
5489
5490 12
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
5491 12
      return true;
5492
    }
5493
5494 8
    return false;
5495
  }
5496
5497
  /**
5498
   * Returns true if the string ends with any of $substrings, false otherwise.
5499
   *
5500
   * - case-insensitive
5501
   *
5502
   * @param string   $str        <p>The input string.</p>
5503
   * @param string[] $substrings <p>Substrings to look for.</p>
5504
   *
5505
   * @return bool Whether or not $str ends with $substring.
5506
   */
5507 4
  public static function str_iends_with_any(string $str, array $substrings): bool
5508
  {
5509 4
    if (empty($substrings)) {
5510
      return false;
5511
    }
5512
5513 4
    foreach ($substrings as $substring) {
5514 4
      if (self::str_iends_with($str, $substring)) {
5515 4
        return true;
5516
      }
5517
    }
5518
5519
    return false;
5520
  }
5521
5522
  /**
5523
   * Returns the index of the first occurrence of $needle in the string,
5524
   * and false if not found. Accepts an optional offset from which to begin
5525
   * the search.
5526
   *
5527
   * @param string $str      <p>The input string.</p>
5528
   * @param string $needle   <p>Substring to look for.</p>
5529
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5530
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5531
   *
5532
   * @return int|false
5533
   *                    The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5534
   */
5535 2
  public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5536
  {
5537 2
    return self::stripos(
5538 2
        $str,
5539 2
        $needle,
5540 2
        $offset,
5541 2
        $encoding
5542
    );
5543
  }
5544
5545
  /**
5546
   * Returns the index of the last occurrence of $needle in the string,
5547
   * and false if not found. Accepts an optional offset from which to begin
5548
   * the search. Offsets may be negative to count from the last character
5549
   * in the string.
5550
   *
5551
   * @param string $str      <p>The input string.</p>
5552
   * @param string $needle   <p>Substring to look for.</p>
5553
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5554
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5555
   *
5556
   * @return int|false
5557
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5558
   */
5559 2
  public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5560
  {
5561 2
    return self::strripos(
5562 2
        $str,
5563 2
        $needle,
5564 2
        $offset,
5565 2
        $encoding
5566
    );
5567
  }
5568
5569
  /**
5570
   * Returns the index of the first occurrence of $needle in the string,
5571
   * and false if not found. Accepts an optional offset from which to begin
5572
   * the search.
5573
   *
5574
   * @param string $str      <p>The input string.</p>
5575
   * @param string $needle   <p>Substring to look for.</p>
5576
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5577
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5578
   *
5579
   * @return int|false
5580
   *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5581
   */
5582 12
  public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5583
  {
5584 12
    return self::strpos(
5585 12
        $str,
5586 12
        $needle,
5587 12
        $offset,
5588 12
        $encoding
5589
    );
5590
  }
5591
5592
  /**
5593
   * Returns the index of the last occurrence of $needle in the string,
5594
   * and false if not found. Accepts an optional offset from which to begin
5595
   * the search. Offsets may be negative to count from the last character
5596
   * in the string.
5597
   *
5598
   * @param string $str      <p>The input string.</p>
5599
   * @param string $needle   <p>Substring to look for.</p>
5600
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5601
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5602
   *
5603
   * @return int|false
5604
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5605
   */
5606 12
  public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5607
  {
5608 12
    return self::strrpos(
5609 12
        $str,
5610 12
        $needle,
5611 12
        $offset,
5612 12
        $encoding
5613
    );
5614
  }
5615
5616
  /**
5617
   * Inserts $substring into the string at the $index provided.
5618
   *
5619
   * @param string $str       <p>The input string.</p>
5620
   * @param string $substring <p>String to be inserted.</p>
5621
   * @param int    $index     <p>The index at which to insert the substring.</p>
5622
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5623
   *
5624
   * @return string
5625
   */
5626 8
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5627
  {
5628 8
    $len = self::strlen($str, $encoding);
5629
5630 8
    if ($index > $len) {
5631 1
      return $str;
5632
    }
5633
5634 7
    $start = self::substr($str, 0, $index, $encoding);
5635 7
    $end = self::substr($str, $index, $len, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $len can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5635
    $end = self::substr($str, $index, /** @scrutinizer ignore-type */ $len, $encoding);
Loading history...
5636
5637 7
    return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5637
    return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5637
    return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
5638
  }
5639
5640
  /**
5641
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5642
   *
5643
   * @link  http://php.net/manual/en/function.str-ireplace.php
5644
   *
5645
   * @param mixed $search  <p>
5646
   *                       Every replacement with search array is
5647
   *                       performed on the result of previous replacement.
5648
   *                       </p>
5649
   * @param mixed $replace <p>
5650
   *                       </p>
5651
   * @param mixed $subject <p>
5652
   *                       If subject is an array, then the search and
5653
   *                       replace is performed with every entry of
5654
   *                       subject, and the return value is an array as
5655
   *                       well.
5656
   *                       </p>
5657
   * @param int   $count   [optional] <p>
5658
   *                       The number of matched and replaced needles will
5659
   *                       be returned in count which is passed by
5660
   *                       reference.
5661
   *                       </p>
5662
   *
5663
   * @return mixed A string or an array of replacements.
5664
   */
5665 41
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5666
  {
5667 41
    $search = (array)$search;
5668
5669
    /** @noinspection AlterInForeachInspection */
5670 41
    foreach ($search as &$s) {
5671 41
      if ('' === $s .= '') {
5672 7
        $s = '/^(?<=.)$/';
5673
      } else {
5674 41
        $s = '/' . \preg_quote($s, '/') . '/ui';
5675
      }
5676
    }
5677
5678 41
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5679 41
    $count = $replace; // used as reference parameter
5680
5681 41
    return $subject;
5682
  }
5683
5684
  /**
5685
   * Check if the string starts with the given substring, case insensitive.
5686
   *
5687
   * @param string $haystack <p>The string to search in.</p>
5688
   * @param string $needle   <p>The substring to search for.</p>
5689
   *
5690
   * @return bool
5691
   */
5692 12
  public static function str_istarts_with(string $haystack, string $needle): bool
5693
  {
5694 12
    if ('' === $haystack || '' === $needle) {
5695 2
      return false;
5696
    }
5697
5698 12
    if (self::stripos($haystack, $needle) === 0) {
5699 12
      return true;
5700
    }
5701
5702 4
    return false;
5703
  }
5704
5705
  /**
5706
   * Returns true if the string begins with any of $substrings, false otherwise.
5707
   *
5708
   * - case-insensitive
5709
   *
5710
   * @param string $str        <p>The input string.</p>
5711
   * @param array  $substrings <p>Substrings to look for.</p>
5712
   *
5713
   * @return bool Whether or not $str starts with $substring.
5714
   */
5715 4
  public static function str_istarts_with_any(string $str, array $substrings): bool
5716
  {
5717 4
    if ('' === $str) {
5718
      return false;
5719
    }
5720
5721 4
    if (empty($substrings)) {
5722
      return false;
5723
    }
5724
5725 4
    foreach ($substrings as $substring) {
5726 4
      if (self::str_istarts_with($str, $substring)) {
5727 4
        return true;
5728
      }
5729
    }
5730
5731
    return false;
5732
  }
5733
5734
  /**
5735
   * Gets the substring after the first occurrence of a separator.
5736
   *
5737
   * @param string $str       <p>The input string.</p>
5738
   * @param string $separator <p>The string separator.</p>
5739
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5740
   *
5741
   * @return string
5742
   */
5743 1
  public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5744
  {
5745
    if (
5746 1
        $separator === ''
5747
        ||
5748 1
        $str === ''
5749
    ) {
5750 1
      return '';
5751
    }
5752
5753 1
    $offset = self::str_iindex_first($str, $separator);
5754 1
    if ($offset === false) {
5755 1
      return '';
5756
    }
5757
5758 1
    return (string)self::substr(
5759 1
        $str,
5760 1
        $offset + self::strlen($separator, $encoding),
5761 1
        null,
5762 1
        $encoding
5763
    );
5764
  }
5765
5766
  /**
5767
   * Gets the substring after the last occurrence of a separator.
5768
   *
5769
   * @param string $str       <p>The input string.</p>
5770
   * @param string $separator <p>The string separator.</p>
5771
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5772
   *
5773
   * @return string
5774
   */
5775 1
  public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5776
  {
5777
    if (
5778 1
        $separator === ''
5779
        ||
5780 1
        $str === ''
5781
    ) {
5782 1
      return '';
5783
    }
5784
5785 1
    $offset = self::str_iindex_last($str, $separator);
5786 1
    if ($offset === false) {
5787 1
      return '';
5788
    }
5789
5790 1
    return (string)self::substr(
5791 1
        $str,
5792 1
        $offset + self::strlen($separator, $encoding),
5793 1
        null,
5794 1
        $encoding
5795
    );
5796
  }
5797
5798
  /**
5799
   * Gets the substring before the first occurrence of a separator.
5800
   *
5801
   * @param string $str       <p>The input string.</p>
5802
   * @param string $separator <p>The string separator.</p>
5803
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5804
   *
5805
   * @return string
5806
   */
5807 1
  public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5808
  {
5809
    if (
5810 1
        $separator === ''
5811
        ||
5812 1
        $str === ''
5813
    ) {
5814 1
      return '';
5815
    }
5816
5817 1
    $offset = self::str_iindex_first($str, $separator);
5818 1
    if ($offset === false) {
5819 1
      return '';
5820
    }
5821
5822 1
    return (string)self::substr($str, 0, $offset, $encoding);
5823
  }
5824
5825
  /**
5826
   * Gets the substring before the last occurrence of a separator.
5827
   *
5828
   * @param string $str       <p>The input string.</p>
5829
   * @param string $separator <p>The string separator.</p>
5830
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5831
   *
5832
   * @return string
5833
   */
5834 1
  public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5835
  {
5836
    if (
5837 1
        $separator === ''
5838
        ||
5839 1
        $str === ''
5840
    ) {
5841 1
      return '';
5842
    }
5843
5844 1
    $offset = self::str_iindex_last($str, $separator);
5845 1
    if ($offset === false) {
5846 1
      return '';
5847
    }
5848
5849 1
    return (string)self::substr($str, 0, $offset, $encoding);
5850
  }
5851
5852
  /**
5853
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5854
   *
5855
   * @param string $str          <p>The input string.</p>
5856
   * @param string $needle       <p>The string to look for.</p>
5857
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5858
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5859
   *
5860
   * @return string
5861
   */
5862 2
  public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5863
  {
5864
    if (
5865 2
        '' === $needle
5866
        ||
5867 2
        '' === $str
5868
    ) {
5869 2
      return '';
5870
    }
5871
5872 2
    $part = self::stristr(
5873 2
        $str,
5874 2
        $needle,
5875 2
        $beforeNeedle,
5876 2
        $encoding
5877
    );
5878 2
    if (false === $part) {
5879 2
      return '';
5880
    }
5881
5882 2
    return $part;
5883
  }
5884
5885
  /**
5886
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5887
   *
5888
   * @param string $str          <p>The input string.</p>
5889
   * @param string $needle       <p>The string to look for.</p>
5890
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5891
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5892
   *
5893
   * @return string
5894
   */
5895 1
  public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5896
  {
5897
    if (
5898 1
        '' === $needle
5899
        ||
5900 1
        '' === $str
5901
    ) {
5902 1
      return '';
5903
    }
5904
5905 1
    $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5906 1
    if (false === $part) {
5907 1
      return '';
5908
    }
5909
5910 1
    return $part;
5911
  }
5912
5913
  /**
5914
   * Returns the last $n characters of the string.
5915
   *
5916
   * @param string $str      <p>The input string.</p>
5917
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5918
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5919
   *
5920
   * @return string
5921
   */
5922 12
  public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5923
  {
5924 12
    if ($n <= 0) {
5925 4
      return '';
5926
    }
5927
5928 8
    $returnTmp = self::substr($str, -$n, null, $encoding);
5929
5930 8
    return ($returnTmp === false ? '' : $returnTmp);
5931
  }
5932
5933
  /**
5934
   * Limit the number of characters in a string.
5935
   *
5936
   * @param string $str      <p>The input string.</p>
5937
   * @param int    $length   [optional] <p>Default: 100</p>
5938
   * @param string $strAddOn [optional] <p>Default: …</p>
5939
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5940
   *
5941
   * @return string
5942
   */
5943 2
  public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5944
  {
5945 2
    if ('' === $str) {
5946 2
      return '';
5947
    }
5948
5949 2
    if ($length <= 0) {
5950 2
      return '';
5951
    }
5952
5953 2
    if (self::strlen($str, $encoding) <= $length) {
5954 2
      return $str;
5955
    }
5956
5957 2
    return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5957
    return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
5958
  }
5959
5960
  /**
5961
   * Limit the number of characters in a string, but also after the next word.
5962
   *
5963
   * @param string $str      <p>The input string.</p>
5964
   * @param int    $length   [optional] <p>Default: 100</p>
5965
   * @param string $strAddOn [optional] <p>Default: …</p>
5966
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5967
   *
5968
   * @return string
5969
   */
5970 6
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5971
  {
5972 6
    if ('' === $str) {
5973 2
      return '';
5974
    }
5975
5976 6
    if ($length <= 0) {
5977 2
      return '';
5978
    }
5979
5980 6
    if (self::strlen($str, $encoding) <= $length) {
5981 2
      return $str;
5982
    }
5983
5984 6
    if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
5985 5
      return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5985
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
5986
    }
5987
5988 3
    $str = (string)self::substr($str, 0, $length, $encoding);
5989 3
    $array = \explode(' ', $str);
5990 3
    \array_pop($array);
5991 3
    $new_str = \implode(' ', $array);
5992
5993 3
    if ($new_str === '') {
5994 2
      $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
5995
    } else {
5996 3
      $str = $new_str . $strAddOn;
5997
    }
5998
5999 3
    return $str;
6000
  }
6001
6002
  /**
6003
   * Returns the longest common prefix between the string and $otherStr.
6004
   *
6005
   * @param string $str      <p>The input sting.</p>
6006
   * @param string $otherStr <p>Second string for comparison.</p>
6007
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6008
   *
6009
   * @return string
6010
   */
6011 10
  public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6012
  {
6013 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6014
6015 10
    $longestCommonPrefix = '';
6016 10
    for ($i = 0; $i < $maxLength; $i++) {
6017 8
      $char = self::substr($str, $i, 1, $encoding);
6018
6019 8
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
6020 6
        $longestCommonPrefix .= $char;
6021
      } else {
6022 6
        break;
6023
      }
6024
    }
6025
6026 10
    return $longestCommonPrefix;
6027
  }
6028
6029
  /**
6030
   * Returns the longest common substring between the string and $otherStr.
6031
   * In the case of ties, it returns that which occurs first.
6032
   *
6033
   * @param string $str
6034
   * @param string $otherStr <p>Second string for comparison.</p>
6035
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6036
   *
6037
   * @return string String with its $str being the longest common substring.
6038
   */
6039 11
  public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6040
  {
6041
    // Uses dynamic programming to solve
6042
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6043 11
    $strLength = self::strlen($str, $encoding);
6044 11
    $otherLength = self::strlen($otherStr, $encoding);
6045
6046
    // Return if either string is empty
6047 11
    if ($strLength == 0 || $otherLength == 0) {
6048 2
      return '';
6049
    }
6050
6051 9
    $len = 0;
6052 9
    $end = 0;
6053 9
    $table = \array_fill(
6054 9
        0,
6055 9
        $strLength + 1,
6056 9
        \array_fill(0, $otherLength + 1, 0)
6057
    );
6058
6059 9
    for ($i = 1; $i <= $strLength; $i++) {
6060 9
      for ($j = 1; $j <= $otherLength; $j++) {
6061 9
        $strChar = self::substr($str, $i - 1, 1, $encoding);
6062 9
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6063
6064 9
        if ($strChar == $otherChar) {
6065 8
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6066 8
          if ($table[$i][$j] > $len) {
6067 8
            $len = $table[$i][$j];
6068 8
            $end = $i;
6069
          }
6070
        } else {
6071 9
          $table[$i][$j] = 0;
6072
        }
6073
      }
6074
    }
6075
6076 9
    $returnTmp = self::substr($str, $end - $len, $len, $encoding);
6077
6078 9
    return ($returnTmp === false ? '' : $returnTmp);
6079
  }
6080
6081
  /**
6082
   * Returns the longest common suffix between the string and $otherStr.
6083
   *
6084
   * @param string $str
6085
   * @param string $otherStr <p>Second string for comparison.</p>
6086
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6087
   *
6088
   * @return string
6089
   */
6090 10
  public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6091
  {
6092 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6093
6094 10
    $longestCommonSuffix = '';
6095 10
    for ($i = 1; $i <= $maxLength; $i++) {
6096 8
      $char = self::substr($str, -$i, 1, $encoding);
6097
6098 8
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
6099 6
        $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6099
        $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
6100
      } else {
6101 6
        break;
6102
      }
6103
    }
6104
6105 10
    return $longestCommonSuffix;
6106
  }
6107
6108
  /**
6109
   * Returns true if $str matches the supplied pattern, false otherwise.
6110
   *
6111
   * @param string $str     <p>The input string.</p>
6112
   * @param string $pattern <p>Regex pattern to match against.</p>
6113
   *
6114
   * @return bool Whether or not $str matches the pattern.
6115
   */
6116 126
  public static function str_matches_pattern(string $str, string $pattern): bool
6117
  {
6118 126
    if (\preg_match('/' . $pattern . '/u', $str)) {
6119 87
      return true;
6120
    }
6121
6122 39
    return false;
6123
  }
6124
6125
  /**
6126
   * Returns whether or not a character exists at an index. Offsets may be
6127
   * negative to count from the last character in the string. Implements
6128
   * part of the ArrayAccess interface.
6129
   *
6130
   * @param string $str      <p>The input string.</p>
6131
   * @param int    $offset   <p>The index to check.</p>
6132
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6133
   *
6134
   *
6135
   * @return bool Whether or not the index exists.
6136
   */
6137 6
  public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6138
  {
6139
    // init
6140 6
    $length = self::strlen($str, $encoding);
6141
6142 6
    if ($offset >= 0) {
6143 3
      return ($length > $offset);
6144
    }
6145
6146 3
    return ($length >= \abs($offset));
6147
  }
6148
6149
  /**
6150
   * Returns the character at the given index. Offsets may be negative to
6151
   * count from the last character in the string. Implements part of the
6152
   * ArrayAccess interface, and throws an OutOfBoundsException if the index
6153
   * does not exist.
6154
   *
6155
   * @param string $str      <p>The input string.</p>
6156
   * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6157
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6158
   *
6159
   * @return string The character at the specified index.
6160
   *
6161
   * @throws \OutOfBoundsException If the positive or negative offset does not exist.
6162
   */
6163 2
  public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6164
  {
6165
    // init
6166 2
    $length = self::strlen($str);
6167
6168
    if (
6169 2
        ($index >= 0 && $length <= $index)
6170
        ||
6171 2
        $length < \abs($index)
6172
    ) {
6173 1
      throw new \OutOfBoundsException('No character exists at the index');
6174
    }
6175
6176 1
    return self::char_at($str, $index, $encoding);
6177
  }
6178
6179
  /**
6180
   * Pad a UTF-8 string to given length with another string.
6181
   *
6182
   * @param string $str        <p>The input string.</p>
6183
   * @param int    $pad_length <p>The length of return string.</p>
6184
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
6185
   * @param int    $pad_type   [optional] <p>
6186
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
6187
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
6188
   *                           </p>
6189
   * @param string $encoding   [optional] <p>Default: UTF-8</p>
6190
   *
6191
   * @return string Returns the padded string.
6192
   */
6193 41
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
6194
  {
6195 41
    if ('' === $str) {
6196
      return '';
6197
    }
6198
6199 41
    if ($pad_type !== (int)$pad_type) {
6200 13
      if ($pad_type == 'left') {
6201 3
        $pad_type = STR_PAD_LEFT;
6202 10
      } elseif ($pad_type == 'right') {
6203 6
        $pad_type = STR_PAD_RIGHT;
6204 4
      } elseif ($pad_type == 'both') {
6205 3
        $pad_type = STR_PAD_BOTH;
6206
      } else {
6207 1
        throw new \InvalidArgumentException(
6208 1
            'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6209
        );
6210
      }
6211
    }
6212
6213 40
    $str_length = self::strlen($str, $encoding);
6214
6215
    if (
6216 40
        $pad_length > 0
6217
        &&
6218 40
        $pad_length >= $str_length
6219
    ) {
6220 39
      $ps_length = self::strlen($pad_string, $encoding);
6221
6222 39
      $diff = ($pad_length - $str_length);
6223
6224
      switch ($pad_type) {
6225 39
        case STR_PAD_LEFT:
6226 13
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6227 13
          $pre = (string)self::substr($pre, 0, $diff, $encoding);
6228 13
          $post = '';
6229 13
          break;
6230
6231 29
        case STR_PAD_BOTH:
6232 14
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6233 14
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2), $encoding);
6234 14
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6235 14
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2), $encoding);
6236 14
          break;
6237
6238 18
        case STR_PAD_RIGHT:
6239
        default:
6240 18
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6241 18
          $post = (string)self::substr($post, 0, $diff, $encoding);
6242 18
          $pre = '';
6243
      }
6244
6245 39
      return $pre . $str . $post;
6246
    }
6247
6248 4
    return $str;
6249
  }
6250
6251
  /**
6252
   * Returns a new string of a given length such that both sides of the
6253
   * string are padded. Alias for pad() with a $padType of 'both'.
6254
   *
6255
   * @param string $str
6256
   * @param int    $length   <p>Desired string length after padding.</p>
6257
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6258
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6259
   *
6260
   * @return string String with padding applied.
6261
   */
6262 11
  public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6263
  {
6264 11
    $padding = $length - self::strlen($str, $encoding);
6265
6266 11
    return self::apply_padding($str, (int)\floor($padding / 2), (int)\ceil($padding / 2), $padStr, $encoding);
6267
  }
6268
6269
  /**
6270
   * Returns a new string of a given length such that the beginning of the
6271
   * string is padded. Alias for pad() with a $padType of 'left'.
6272
   *
6273
   * @param string $str
6274
   * @param int    $length   <p>Desired string length after padding.</p>
6275
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6276
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6277
   *
6278
   * @return string String with left padding.
6279
   */
6280 7
  public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6281
  {
6282 7
    return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
6283
  }
6284
6285
  /**
6286
   * Returns a new string of a given length such that the end of the string
6287
   * is padded. Alias for pad() with a $padType of 'right'.
6288
   *
6289
   * @param string $str
6290
   * @param int    $length   <p>Desired string length after padding.</p>
6291
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6292
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6293
   *
6294
   * @return string String with right padding.
6295
   */
6296 7
  public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6297
  {
6298 7
    return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
6299
  }
6300
6301
  /**
6302
   * Repeat a string.
6303
   *
6304
   * @param string $str        <p>
6305
   *                           The string to be repeated.
6306
   *                           </p>
6307
   * @param int    $multiplier <p>
6308
   *                           Number of time the input string should be
6309
   *                           repeated.
6310
   *                           </p>
6311
   *                           <p>
6312
   *                           multiplier has to be greater than or equal to 0.
6313
   *                           If the multiplier is set to 0, the function
6314
   *                           will return an empty string.
6315
   *                           </p>
6316
   *
6317
   * @return string The repeated string.
6318
   */
6319 9
  public static function str_repeat(string $str, int $multiplier): string
6320
  {
6321 9
    $str = self::filter($str);
6322
6323 9
    return \str_repeat($str, $multiplier);
6324
  }
6325
6326
  /**
6327
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6328
   *
6329
   * Replace all occurrences of the search string with the replacement string
6330
   *
6331
   * @link http://php.net/manual/en/function.str-replace.php
6332
   *
6333
   * @param mixed $search  <p>
6334
   *                       The value being searched for, otherwise known as the needle.
6335
   *                       An array may be used to designate multiple needles.
6336
   *                       </p>
6337
   * @param mixed $replace <p>
6338
   *                       The replacement value that replaces found search
6339
   *                       values. An array may be used to designate multiple replacements.
6340
   *                       </p>
6341
   * @param mixed $subject <p>
6342
   *                       The string or array being searched and replaced on,
6343
   *                       otherwise known as the haystack.
6344
   *                       </p>
6345
   *                       <p>
6346
   *                       If subject is an array, then the search and
6347
   *                       replace is performed with every entry of
6348
   *                       subject, and the return value is an array as
6349
   *                       well.
6350
   *                       </p>
6351
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
6352
   *
6353
   * @return mixed This function returns a string or an array with the replaced values.
6354
   */
6355 92
  public static function str_replace($search, $replace, $subject, int &$count = null)
6356
  {
6357 92
    return \str_replace($search, $replace, $subject, $count);
6358
  }
6359
6360
  /**
6361
   * Replaces all occurrences of $search from the beginning of string with $replacement.
6362
   *
6363
   * @param string $str         <p>The input string.</p>
6364
   * @param string $search      <p>The string to search for.</p>
6365
   * @param string $replacement <p>The replacement.</p>
6366
   *
6367
   * @return string String after the replacements.
6368
   */
6369 16
  public static function str_replace_beginning(string $str, string $search, string $replacement): string
6370
  {
6371 16
    return self::regex_replace(
6372 16
        $str,
6373 16
        '^' . \preg_quote($search, '/'),
6374 16
        self::str_replace('\\', '\\\\', $replacement)
6375
    );
6376
  }
6377
6378
  /**
6379
   * Replaces all occurrences of $search from the ending of string with $replacement.
6380
   *
6381
   * @param string $str         <p>The input string.</p>
6382
   * @param string $search      <p>The string to search for.</p>
6383
   * @param string $replacement <p>The replacement.</p>
6384
   *
6385
   * @return string String after the replacements.
6386
   */
6387 16
  public static function str_replace_ending(string $str, string $search, string $replacement): string
6388
  {
6389 16
    return self::regex_replace(
6390 16
        $str,
6391 16
        \preg_quote($search, '/') . '$',
6392 16
        self::str_replace('\\', '\\\\', $replacement)
6393
    );
6394
  }
6395
6396
  /**
6397
   * Replace the first "$search"-term with the "$replace"-term.
6398
   *
6399
   * @param string $search
6400
   * @param string $replace
6401
   * @param string $subject
6402
   *
6403
   * @return string
6404
   */
6405 2
  public static function str_replace_first(string $search, string $replace, string $subject): string
6406
  {
6407 2
    $pos = self::strpos($subject, $search);
6408 2
    if ($pos !== false) {
6409 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6409
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
6410
    }
6411
6412 2
    return $subject;
6413
  }
6414
6415
  /**
6416
   * Replace the last "$search"-term with the "$replace"-term.
6417
   *
6418
   * @param string $search
6419
   * @param string $replace
6420
   * @param string $subject
6421
   *
6422
   * @return string
6423
   */
6424 2
  public static function str_replace_last(string $search, string $replace, string $subject): string
6425
  {
6426 2
    $pos = self::strrpos($subject, $search);
6427 2
    if ($pos !== false) {
6428 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6428
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6429
    }
6430
6431 2
    return $subject;
6432
  }
6433
6434
  /**
6435
   * Shuffles all the characters in the string.
6436
   *
6437
   * PS: uses random algorithm which is weak for cryptography purposes
6438
   *
6439
   * @param string $str <p>The input string</p>
6440
   *
6441
   * @return string The shuffled string.
6442
   */
6443 5
  public static function str_shuffle(string $str): string
6444
  {
6445 5
    $indexes = \range(0, self::strlen($str) - 1);
6446
    /** @noinspection NonSecureShuffleUsageInspection */
6447 5
    \shuffle($indexes);
6448
6449 5
    $shuffledStr = '';
6450 5
    foreach ($indexes as $i) {
6451 5
      $shuffledStr .= self::substr($str, $i, 1);
6452
    }
6453
6454 5
    return $shuffledStr;
6455
  }
6456
6457
  /**
6458
   * Returns the substring beginning at $start, and up to, but not including
6459
   * the index specified by $end. If $end is omitted, the function extracts
6460
   * the remaining string. If $end is negative, it is computed from the end
6461
   * of the string.
6462
   *
6463
   * @param string $str
6464
   * @param int    $start    <p>Initial index from which to begin extraction.</p>
6465
   * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6466
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6467
   *
6468
   * @return string|false
6469
   *                     <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6470
   *                     characters long, <b>FALSE</b> will be returned.
6471
   */
6472 18
  public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6473
  {
6474 18
    if ($end === null) {
6475 6
      $length = self::strlen($str);
6476 12
    } elseif ($end >= 0 && $end <= $start) {
6477 4
      return '';
6478 8
    } elseif ($end < 0) {
6479 2
      $length = self::strlen($str) + $end - $start;
6480
    } else {
6481 6
      $length = $end - $start;
6482
    }
6483
6484 14
    return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6484
    return self::substr($str, $start, /** @scrutinizer ignore-type */ $length, $encoding);
Loading history...
6485
  }
6486
6487
  /**
6488
   * Convert a string to e.g.: "snake_case"
6489
   *
6490
   * @param string $str
6491
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6492
   *
6493
   * @return string String in snake_case.
6494
   */
6495 20
  public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6496
  {
6497 20
    $str = self::normalize_whitespace($str);
6498 20
    $str = \str_replace('-', '_', $str);
6499
6500 20
    $str = (string)\preg_replace_callback(
6501 20
        '/([\d|A-Z])/u',
6502 20
        function ($matches) use ($encoding) {
6503 8
          $match = $matches[1];
6504 8
          $matchInt = (int)$match;
6505
6506 8
          if ((string)$matchInt == $match) {
6507 4
            return '_' . $match . '_';
6508
          }
6509
6510 4
          return '_' . UTF8::strtolower($match, $encoding);
6511 20
        },
6512 20
        $str
6513
    );
6514
6515 20
    $str = (string)\preg_replace(
6516
        [
6517 20
            '/\s+/',        // convert spaces to "_"
6518
            '/^\s+|\s+$/',  // trim leading & trailing spaces
6519
            '/_+/',         // remove double "_"
6520
        ],
6521
        [
6522 20
            '_',
6523
            '',
6524
            '_',
6525
        ],
6526 20
        $str
6527
    );
6528
6529 20
    $str = self::trim($str, '_'); // trim leading & trailing "_"
6530 20
    $str = self::trim($str); // trim leading & trailing whitespace
6531
6532 20
    return $str;
6533
  }
6534
6535
  /**
6536
   * Sort all characters according to code points.
6537
   *
6538
   * @param string $str    <p>A UTF-8 string.</p>
6539
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6540
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6541
   *
6542
   * @return string String of sorted characters.
6543
   */
6544 2
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6545
  {
6546 2
    $array = self::codepoints($str);
6547
6548 2
    if ($unique) {
6549 2
      $array = \array_flip(\array_flip($array));
6550
    }
6551
6552 2
    if ($desc) {
6553 2
      \arsort($array);
6554
    } else {
6555 2
      \asort($array);
6556
    }
6557
6558 2
    return self::string($array);
6559
  }
6560
6561
  /**
6562
   * alias for "UTF8::split()"
6563
   *
6564
   * @see UTF8::split()
6565
   *
6566
   * @param string|string[] $str
6567
   * @param int             $len
6568
   *
6569
   * @return string[]
6570
   */
6571 25
  public static function str_split($str, int $len = 1): array
6572
  {
6573 25
    return self::split($str, $len);
6574
  }
6575
6576
  /**
6577
   * Splits the string with the provided regular expression, returning an
6578
   * array of Stringy objects. An optional integer $limit will truncate the
6579
   * results.
6580
   *
6581
   * @param string $str
6582
   * @param string $pattern <p>The regex with which to split the string.</p>
6583
   * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6584
   *
6585
   * @return string[] An array of strings.
6586
   */
6587 16
  public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6588
  {
6589 16
    if ($limit === 0) {
6590 2
      return [];
6591
    }
6592
6593
    // this->split errors when supplied an empty pattern in < PHP 5.4.13
6594
    // and current versions of HHVM (3.8 and below)
6595 14
    if ($pattern === '') {
6596 1
      return [$str];
6597
    }
6598
6599
    // this->split returns the remaining unsplit string in the last index when
6600
    // supplying a limit
6601 13
    if ($limit > 0) {
6602 8
      ++$limit;
6603
    } else {
6604 5
      $limit = -1;
6605
    }
6606
6607 13
    $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6608
6609 13
    if ($array === false) {
6610
      return [];
6611
    }
6612
6613 13
    if ($limit > 0 && \count($array) === $limit) {
6614 4
      \array_pop($array);
6615
    }
6616
6617 13
    return $array;
6618
  }
6619
6620
  /**
6621
   * Check if the string starts with the given substring.
6622
   *
6623
   * @param string $haystack <p>The string to search in.</p>
6624
   * @param string $needle   <p>The substring to search for.</p>
6625
   *
6626
   * @return bool
6627
   */
6628 41
  public static function str_starts_with(string $haystack, string $needle): bool
6629
  {
6630 41
    if ('' === $haystack || '' === $needle) {
6631 4
      return false;
6632
    }
6633
6634 39
    if (\strpos($haystack, $needle) === 0) {
6635 19
      return true;
6636
    }
6637
6638 24
    return false;
6639
  }
6640
6641
  /**
6642
   * Returns true if the string begins with any of $substrings, false otherwise.
6643
   *
6644
   * - case-sensitive
6645
   *
6646
   * @param string $str        <p>The input string.</p>
6647
   * @param array  $substrings <p>Substrings to look for.</p>
6648
   *
6649
   * @return bool Whether or not $str starts with $substring.
6650
   */
6651 8
  public static function str_starts_with_any(string $str, array $substrings): bool
6652
  {
6653 8
    if ('' === $str) {
6654
      return false;
6655
    }
6656
6657 8
    if (empty($substrings)) {
6658
      return false;
6659
    }
6660
6661 8
    foreach ($substrings as $substring) {
6662 8
      if (self::str_starts_with($str, $substring)) {
6663 8
        return true;
6664
      }
6665
    }
6666
6667 6
    return false;
6668
  }
6669
6670
  /**
6671
   * Gets the substring after the first occurrence of a separator.
6672
   *
6673
   * @param string $str       <p>The input string.</p>
6674
   * @param string $separator <p>The string separator.</p>
6675
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6676
   *
6677
   * @return string
6678
   */
6679 1
  public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6680
  {
6681
    if (
6682 1
        $separator === ''
6683
        ||
6684 1
        $str === ''
6685
    ) {
6686 1
      return '';
6687
    }
6688
6689 1
    $offset = self::str_index_first($str, $separator);
6690 1
    if ($offset === false) {
6691 1
      return '';
6692
    }
6693
6694 1
    return (string)self::substr(
6695 1
        $str,
6696 1
        $offset + self::strlen($separator, $encoding),
6697 1
        null,
6698 1
        $encoding
6699
    );
6700
  }
6701
6702
  /**
6703
   * Gets the substring after the last occurrence of a separator.
6704
   *
6705
   * @param string $str       <p>The input string.</p>
6706
   * @param string $separator <p>The string separator.</p>
6707
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6708
   *
6709
   * @return string
6710
   */
6711 1
  public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6712
  {
6713
    if (
6714 1
        $separator === ''
6715
        ||
6716 1
        $str === ''
6717
    ) {
6718 1
      return '';
6719
    }
6720
6721 1
    $offset = self::str_index_last($str, $separator);
6722 1
    if ($offset === false) {
6723 1
      return '';
6724
    }
6725
6726 1
    return (string)self::substr(
6727 1
        $str,
6728 1
        $offset + self::strlen($separator, $encoding),
6729 1
        null,
6730 1
        $encoding
6731
    );
6732
  }
6733
6734
  /**
6735
   * Gets the substring before the first occurrence of a separator.
6736
   *
6737
   * @param string $str       <p>The input string.</p>
6738
   * @param string $separator <p>The string separator.</p>
6739
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6740
   *
6741
   * @return string
6742
   */
6743 1
  public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6744
  {
6745
    if (
6746 1
        $separator === ''
6747
        ||
6748 1
        $str === ''
6749
    ) {
6750 1
      return '';
6751
    }
6752
6753 1
    $offset = self::str_index_first($str, $separator);
6754 1
    if ($offset === false) {
6755 1
      return '';
6756
    }
6757
6758 1
    return (string)self::substr(
6759 1
        $str,
6760 1
        0,
6761 1
        $offset,
6762 1
        $encoding
6763
    );
6764
  }
6765
6766
  /**
6767
   * Gets the substring before the last occurrence of a separator.
6768
   *
6769
   * @param string $str       <p>The input string.</p>
6770
   * @param string $separator <p>The string separator.</p>
6771
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6772
   *
6773
   * @return string
6774
   */
6775 1
  public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6776
  {
6777
    if (
6778 1
        $separator === ''
6779
        ||
6780 1
        $str === ''
6781
    ) {
6782 1
      return '';
6783
    }
6784
6785 1
    $offset = self::str_index_last($str, $separator);
6786 1
    if ($offset === false) {
6787 1
      return '';
6788
    }
6789
6790 1
    return (string)self::substr(
6791 1
        $str,
6792 1
        0,
6793 1
        $offset,
6794 1
        $encoding
6795
    );
6796
  }
6797
6798
  /**
6799
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6800
   *
6801
   * @param string $str          <p>The input string.</p>
6802
   * @param string $needle       <p>The string to look for.</p>
6803
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6804
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6805
   *
6806
   * @return string
6807
   */
6808 2
  public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6809
  {
6810
    if (
6811 2
        '' === $str
6812
        ||
6813 2
        '' === $needle
6814
    ) {
6815 2
      return '';
6816
    }
6817
6818 2
    $part = self::strstr(
6819 2
        $str,
6820 2
        $needle,
6821 2
        $beforeNeedle,
6822 2
        $encoding
6823
    );
6824 2
    if (false === $part) {
6825 2
      return '';
6826
    }
6827
6828 2
    return $part;
6829
  }
6830
6831
  /**
6832
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6833
   *
6834
   * @param string $str          <p>The input string.</p>
6835
   * @param string $needle       <p>The string to look for.</p>
6836
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6837
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6838
   *
6839
   * @return string
6840
   */
6841 2
  public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6842
  {
6843
    if (
6844 2
        '' === $str
6845
        ||
6846 2
        '' === $needle
6847
    ) {
6848 2
      return '';
6849
    }
6850
6851 2
    $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6852 2
    if (false === $part) {
6853 2
      return '';
6854
    }
6855
6856 2
    return $part;
6857
  }
6858
6859
  /**
6860
   * Surrounds $str with the given substring.
6861
   *
6862
   * @param string $str
6863
   * @param string $substring <p>The substring to add to both sides.</P>
6864
   *
6865
   * @return string String with the substring both prepended and appended.
6866
   */
6867 5
  public static function str_surround(string $str, string $substring): string
6868
  {
6869 5
    return \implode('', [$substring, $str, $substring]);
6870
  }
6871
6872
  /**
6873
   * Returns a trimmed string with the first letter of each word capitalized.
6874
   * Also accepts an array, $ignore, allowing you to list words not to be
6875
   * capitalized.
6876
   *
6877
   * @param string              $str
6878
   * @param string[]|array|null $ignore   [optional] <p>An array of words not to capitalize or null. Default: null</p>
6879
   * @param string              $encoding [optional] <p>Default: UTF-8</p>
6880
   *
6881
   * @return string The titleized string.
6882
   */
6883 5
  public static function str_titleize(string $str, array $ignore = null, string $encoding = 'UTF-8'): string
6884
  {
6885 5
    $str = self::trim($str);
6886
6887 5
    $str = (string)\preg_replace_callback(
6888 5
        '/([\S]+)/u',
6889 5
        function ($match) use ($encoding, $ignore) {
6890 5
          if ($ignore && \in_array($match[0], $ignore, true)) {
6891 2
            return $match[0];
6892
          }
6893
6894 5
          return self::str_upper_first(self::strtolower($match[0], $encoding));
6895 5
        },
6896 5
        $str
6897
    );
6898
6899 5
    return $str;
6900
  }
6901
6902
  /**
6903
   * Returns a trimmed string in proper title case.
6904
   *
6905
   * Also accepts an array, $ignore, allowing you to list words not to be
6906
   * capitalized.
6907
   *
6908
   * Adapted from John Gruber's script.
6909
   *
6910
   * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6911
   *
6912
   * @param string $str
6913
   * @param array  $ignore   <p>An array of words not to capitalize.</p>
6914
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6915
   *
6916
   * @return string The titleized string.
6917
   */
6918 35
  public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6919
  {
6920 35
    $smallWords = \array_merge(
6921
        [
6922 35
            '(?<!q&)a',
6923
            'an',
6924
            'and',
6925
            'as',
6926
            'at(?!&t)',
6927
            'but',
6928
            'by',
6929
            'en',
6930
            'for',
6931
            'if',
6932
            'in',
6933
            'of',
6934
            'on',
6935
            'or',
6936
            'the',
6937
            'to',
6938
            'v[.]?',
6939
            'via',
6940
            'vs[.]?',
6941
        ],
6942 35
        $ignore
6943
    );
6944
6945 35
    $smallWordsRx = \implode('|', $smallWords);
6946 35
    $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6947
6948 35
    $str = self::trim($str);
6949
6950 35
    if (self::has_lowercase($str) === false) {
6951 2
      $str = self::strtolower($str);
6952
    }
6953
6954
    // The main substitutions
6955 35
    $str = (string)\preg_replace_callback(
6956
        '~\b (_*) (?:                                                              # 1. Leading underscore and
6957
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6958 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6959
                        |
6960 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6961
                        |
6962 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6963
                        |
6964 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
6965
                      ) (_*) \b                                                           # 6. With trailing underscore
6966
                    ~ux',
6967 35
        function ($matches) use ($encoding) {
6968
          // Preserve leading underscore
6969 35
          $str = $matches[1];
6970 35
          if ($matches[2]) {
6971
            // Preserve URLs, domains, emails and file paths
6972 5
            $str .= $matches[2];
6973 35
          } elseif ($matches[3]) {
6974
            // Lower-case small words
6975 25
            $str .= self::strtolower($matches[3], $encoding);
6976 35
          } elseif ($matches[4]) {
6977
            // Capitalize word w/o internal caps
6978 34
            $str .= static::str_upper_first($matches[4], $encoding);
6979
          } else {
6980
            // Preserve other kinds of word (iPhone)
6981 7
            $str .= $matches[5];
6982
          }
6983
          // Preserve trailing underscore
6984 35
          $str .= $matches[6];
6985
6986 35
          return $str;
6987 35
        },
6988 35
        $str
6989
    );
6990
6991
    // Exceptions for small words: capitalize at start of title...
6992 35
    $str = (string)\preg_replace_callback(
6993
        '~(  \A [[:punct:]]*                # start of title...
6994
                      |  [:.;?!][ ]+               # or of subsentence...
6995
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
6996 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
6997
                     ~uxi',
6998 35
        function ($matches) use ($encoding) {
6999 11
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
7000 35
        },
7001 35
        $str
7002
    );
7003
7004
    // ...and end of title
7005 35
    $str = (string)\preg_replace_callback(
7006 35
        '~\b ( ' . $smallWordsRx . ' ) # small word...
7007
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7008
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7009
                     ~uxi',
7010 35
        function ($matches) use ($encoding) {
7011 3
          return static::str_upper_first($matches[1], $encoding);
7012 35
        },
7013 35
        $str
7014
    );
7015
7016
    // Exceptions for small words in hyphenated compound words
7017
    // e.g. "in-flight" -> In-Flight
7018 35
    $str = (string)\preg_replace_callback(
7019
        '~\b
7020
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7021 35
                        ( ' . $smallWordsRx . ' )
7022
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7023
                       ~uxi',
7024 35
        function ($matches) use ($encoding) {
7025
          return static::str_upper_first($matches[1], $encoding);
7026 35
        },
7027 35
        $str
7028
    );
7029
7030
    // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7031 35
    $str = (string)\preg_replace_callback(
7032
        '~\b
7033
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7034
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7035 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7036
                      (?!	- )                   # Negative lookahead for another -
7037
                     ~uxi',
7038 35
        function ($matches) use ($encoding) {
7039
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
7040 35
        },
7041 35
        $str
7042
    );
7043
7044 35
    return $str;
7045
  }
7046
7047
  /**
7048
   * Get a binary representation of a specific string.
7049
   *
7050
   * @param string $str <p>The input string.</p>
7051
   *
7052
   * @return string
7053
   */
7054 2
  public static function str_to_binary(string $str): string
7055
  {
7056 2
    $value = \unpack('H*', $str);
7057
7058 2
    return \base_convert($value[1], 16, 2);
7059
  }
7060
7061
  /**
7062
   * @param string   $str
7063
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7064
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7065
   *
7066
   * @return string[]
7067
   */
7068 17
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7069
  {
7070 17
    if ('' === $str) {
7071 1
      return ($removeEmptyValues === true ? [] : ['']);
7072
    }
7073
7074 16
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
7075
7076 16
    if ($return === false) {
7077
      return ($removeEmptyValues === true ? [] : ['']);
7078
    }
7079
7080
    if (
7081 16
        $removeShortValues === null
7082
        &&
7083 16
        $removeEmptyValues === false
7084
    ) {
7085 16
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7086
    }
7087
7088
    $tmpReturn = self::reduce_string_array(
7089
        $return,
7090
        $removeEmptyValues,
7091
        $removeShortValues
7092
    );
7093
7094
    return $tmpReturn;
7095
  }
7096
7097
  /**
7098
   * Convert a string into an array of words.
7099
   *
7100
   * @param string   $str
7101
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7102
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7103
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7104
   *
7105
   * @return string[]
7106
   */
7107 14
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
7108
  {
7109 14
    if ('' === $str) {
7110 4
      return ($removeEmptyValues === true ? [] : ['']);
7111
    }
7112
7113 14
    $charList = self::rxClass($charList, '\pL');
7114
7115 14
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
7116
7117 14
    if ($return === false) {
7118
      return ($removeEmptyValues === true ? [] : ['']);
7119
    }
7120
7121
    if (
7122 14
        $removeShortValues === null
7123
        &&
7124 14
        $removeEmptyValues === false
7125
    ) {
7126 14
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7127
    }
7128
7129 2
    $tmpReturn = self::reduce_string_array(
7130 2
        $return,
7131 2
        $removeEmptyValues,
7132 2
        $removeShortValues
7133
    );
7134
7135 2
    foreach ($tmpReturn as &$item) {
7136 2
      $item = (string)$item;
7137
    }
7138
7139 2
    return $tmpReturn;
7140
  }
7141
7142
  /**
7143
   * alias for "UTF8::to_ascii()"
7144
   *
7145
   * @see UTF8::to_ascii()
7146
   *
7147
   * @param string $str
7148
   * @param string $unknown
7149
   * @param bool   $strict
7150
   *
7151
   * @return string
7152
   */
7153 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7154
  {
7155 7
    return self::to_ascii($str, $unknown, $strict);
7156
  }
7157
7158
  /**
7159
   * Truncates the string to a given length. If $substring is provided, and
7160
   * truncating occurs, the string is further truncated so that the substring
7161
   * may be appended without exceeding the desired length.
7162
   *
7163
   * @param string $str
7164
   * @param int    $length    <p>Desired length of the truncated string.</p>
7165
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7166
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7167
   *
7168
   * @return string String after truncating.
7169
   */
7170 22
  public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7171
  {
7172
    // init
7173 22
    $str = (string)$str;
7174
7175 22
    if ('' === $str) {
7176
      return '';
7177
    }
7178
7179 22
    if ($length >= self::strlen($str, $encoding)) {
7180 4
      return $str;
7181
    }
7182
7183
    // Need to further trim the string so we can append the substring
7184 18
    $substringLength = self::strlen($substring, $encoding);
7185 18
    $length -= $substringLength;
7186
7187 18
    $truncated = self::substr($str, 0, $length, $encoding);
7188
7189 18
    return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7189
    return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7190
  }
7191
7192
  /**
7193
   * Truncates the string to a given length, while ensuring that it does not
7194
   * split words. If $substring is provided, and truncating occurs, the
7195
   * string is further truncated so that the substring may be appended without
7196
   * exceeding the desired length.
7197
   *
7198
   * @param string $str
7199
   * @param int    $length    <p>Desired length of the truncated string.</p>
7200
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7201
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7202
   *
7203
   * @return string String after truncating.
7204
   */
7205 23
  public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7206
  {
7207 23
    if ($length >= self::strlen($str, $encoding)) {
7208 4
      return $str;
7209
    }
7210
7211
    // need to further trim the string so we can append the substring
7212 19
    $substringLength = self::strlen($substring, $encoding);
7213 19
    $length -= $substringLength;
7214
7215 19
    $truncated = self::substr($str, 0, $length, $encoding);
7216 19
    if ($truncated === false) {
7217
      return '';
7218
    }
7219
7220
    // if the last word was truncated
7221 19
    $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7222 19
    if ($strPosSpace != $length) {
7223
      // find pos of the last occurrence of a space, get up to that
7224 12
      $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7225
7226 12
      if ($lastPos !== false || $strPosSpace !== false) {
7227 11
        $truncated = self::substr($truncated, 0, (int)$lastPos, $encoding);
7228
      }
7229
    }
7230
7231 19
    $str = $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7231
    $str = /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7232
7233 19
    return $str;
7234
  }
7235
7236
  /**
7237
   * Returns a lowercase and trimmed string separated by underscores.
7238
   * Underscores are inserted before uppercase characters (with the exception
7239
   * of the first character of the string), and in place of spaces as well as
7240
   * dashes.
7241
   *
7242
   * @param string $str
7243
   *
7244
   * @return string The underscored string.
7245
   */
7246 16
  public static function str_underscored(string $str): string
7247
  {
7248 16
    return self::str_delimit($str, '_');
7249
  }
7250
7251
  /**
7252
   * Returns an UpperCamelCase version of the supplied string. It trims
7253
   * surrounding spaces, capitalizes letters following digits, spaces, dashes
7254
   * and underscores, and removes spaces, dashes, underscores.
7255
   *
7256
   * @param string $str      <p>The input string.</p>
7257
   * @param string $encoding [optional] <p>Default: UTF-8</p>
7258
   *
7259
   * @return string String in UpperCamelCase.
7260
   */
7261 13
  public static function str_upper_camelize(string $str, string $encoding = 'UTF-8'): string
7262
  {
7263 13
    return self::str_upper_first(self::str_camelize($str, $encoding), $encoding);
7264
  }
7265
7266
  /**
7267
   * alias for "UTF8::ucfirst()"
7268
   *
7269
   * @see UTF8::ucfirst()
7270
   *
7271
   * @param string $str
7272
   * @param string $encoding
7273
   * @param bool   $cleanUtf8
7274
   *
7275
   * @return string
7276
   */
7277 58
  public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7278
  {
7279 58
    return self::ucfirst($str, $encoding, $cleanUtf8);
7280
  }
7281
7282
  /**
7283
   * Counts number of words in the UTF-8 string.
7284
   *
7285
   * @param string $str      <p>The input string.</p>
7286
   * @param int    $format   [optional] <p>
7287
   *                         <strong>0</strong> => return a number of words (default)<br>
7288
   *                         <strong>1</strong> => return an array of words<br>
7289
   *                         <strong>2</strong> => return an array of words with word-offset as key
7290
   *                         </p>
7291
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7292
   *
7293
   * @return string[]|int The number of words in the string
7294
   */
7295 2
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7296
  {
7297 2
    $strParts = self::str_to_words($str, $charlist);
7298
7299 2
    $len = \count($strParts);
7300
7301 2
    if ($format === 1) {
7302
7303 2
      $numberOfWords = [];
7304 2
      for ($i = 1; $i < $len; $i += 2) {
7305 2
        $numberOfWords[] = $strParts[$i];
7306
      }
7307
7308 2
    } elseif ($format === 2) {
7309
7310 2
      $numberOfWords = [];
7311 2
      $offset = self::strlen($strParts[0]);
7312 2
      for ($i = 1; $i < $len; $i += 2) {
7313 2
        $numberOfWords[$offset] = $strParts[$i];
7314 2
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
7315
      }
7316
7317
    } else {
7318
7319 2
      $numberOfWords = (int)(($len - 1) / 2);
7320
7321
    }
7322
7323 2
    return $numberOfWords;
7324
  }
7325
7326
  /**
7327
   * Case-insensitive string comparison.
7328
   *
7329
   * INFO: Case-insensitive version of UTF8::strcmp()
7330
   *
7331
   * @param string $str1     <p>The first string.</p>
7332
   * @param string $str2     <p>The second string.</p>
7333
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7334
   *
7335
   * @return int
7336
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7337
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7338
   *             <strong>0</strong> if they are equal.
7339
   */
7340 23
  public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7341
  {
7342 23
    return self::strcmp(
7343 23
        self::strtocasefold($str1, true, false, $encoding, null, false),
7344 23
        self::strtocasefold($str2, true, false, $encoding, null, false)
7345
    );
7346
  }
7347
7348
  /**
7349
   * alias for "UTF8::strstr()"
7350
   *
7351
   * @see UTF8::strstr()
7352
   *
7353
   * @param string $haystack
7354
   * @param string $needle
7355
   * @param bool   $before_needle
7356
   * @param string $encoding
7357
   * @param bool   $cleanUtf8
7358
   *
7359
   * @return string|false
7360
   */
7361 2
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7362
  {
7363 2
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7364
  }
7365
7366
  /**
7367
   * Case-sensitive string comparison.
7368
   *
7369
   * @param string $str1 <p>The first string.</p>
7370
   * @param string $str2 <p>The second string.</p>
7371
   *
7372
   * @return int
7373
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
7374
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
7375
   *              <strong>0</strong> if they are equal.
7376
   */
7377 29
  public static function strcmp(string $str1, string $str2): int
7378
  {
7379
    /** @noinspection PhpUndefinedClassInspection */
7380 29
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7381 24
        \Normalizer::normalize($str1, \Normalizer::NFD),
7382 29
        \Normalizer::normalize($str2, \Normalizer::NFD)
7383
    );
7384
  }
7385
7386
  /**
7387
   * Find length of initial segment not matching mask.
7388
   *
7389
   * @param string $str
7390
   * @param string $charList
7391
   * @param int    $offset
7392
   * @param int    $length
7393
   *
7394
   * @return int|null
7395
   */
7396 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
7397
  {
7398 15
    if ('' === $charList .= '') {
7399 1
      return null;
7400
    }
7401
7402 14
    if ($offset || $length !== null) {
7403 2
      $strTmp = self::substr($str, $offset, $length);
7404 2
      if ($strTmp === false) {
7405
        return null;
7406
      }
7407 2
      $str = (string)$strTmp;
7408
    }
7409
7410 14
    if ('' === $str) {
7411 1
      return null;
7412
    }
7413
7414 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept null|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7414
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7415 13
      return self::strlen($length[1]);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($length[1]) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7416
    }
7417
7418 1
    return self::strlen($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($str) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7419
  }
7420
7421
  /**
7422
   * alias for "UTF8::stristr()"
7423
   *
7424
   * @see UTF8::stristr()
7425
   *
7426
   * @param string $haystack
7427
   * @param string $needle
7428
   * @param bool   $before_needle
7429
   * @param string $encoding
7430
   * @param bool   $cleanUtf8
7431
   *
7432
   * @return string|false
7433
   */
7434 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7435
  {
7436 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7437
  }
7438
7439
  /**
7440
   * Create a UTF-8 string from code points.
7441
   *
7442
   * INFO: opposite to UTF8::codepoints()
7443
   *
7444
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7445
   *
7446
   * @return string UTF-8 encoded string.
7447
   */
7448 4
  public static function string(array $array): string
7449
  {
7450 4
    return \implode(
7451 4
        '',
7452 4
        \array_map(
7453
            [
7454 4
                self::class,
7455
                'chr',
7456
            ],
7457 4
            $array
7458
        )
7459
    );
7460
  }
7461
7462
  /**
7463
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7464
   *
7465
   * @param string $str <p>The input string.</p>
7466
   *
7467
   * @return bool
7468
   *              <strong>true</strong> if the string has BOM at the start,<br>
7469
   *              <strong>false</strong> otherwise.
7470
   */
7471 6
  public static function string_has_bom(string $str): bool
7472
  {
7473 6
    foreach (self::$BOM as $bomString => $bomByteLength) {
7474 6
      if (0 === \strpos($str, $bomString)) {
7475 6
        return true;
7476
      }
7477
    }
7478
7479 6
    return false;
7480
  }
7481
7482
  /**
7483
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7484
   *
7485
   * @link http://php.net/manual/en/function.strip-tags.php
7486
   *
7487
   * @param string $str             <p>
7488
   *                                The input string.
7489
   *                                </p>
7490
   * @param string $allowable_tags  [optional] <p>
7491
   *                                You can use the optional second parameter to specify tags which should
7492
   *                                not be stripped.
7493
   *                                </p>
7494
   *                                <p>
7495
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
7496
   *                                can not be changed with allowable_tags.
7497
   *                                </p>
7498
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
7499
   *
7500
   * @return string The stripped string.
7501
   */
7502 4
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7503
  {
7504 4
    if ('' === $str) {
7505 1
      return '';
7506
    }
7507
7508 4
    if ($cleanUtf8 === true) {
7509 2
      $str = self::clean($str);
7510
    }
7511
7512 4
    return \strip_tags($str, $allowable_tags);
7513
  }
7514
7515
  /**
7516
   * Strip all whitespace characters. This includes tabs and newline
7517
   * characters, as well as multibyte whitespace such as the thin space
7518
   * and ideographic space.
7519
   *
7520
   * @param string $str
7521
   *
7522
   * @return string
7523
   */
7524 36
  public static function strip_whitespace(string $str): string
7525
  {
7526 36
    if ('' === $str) {
7527 3
      return '';
7528
    }
7529
7530 33
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
7531
  }
7532
7533
  /**
7534
   * Finds position of first occurrence of a string within another, case insensitive.
7535
   *
7536
   * @link http://php.net/manual/en/function.mb-stripos.php
7537
   *
7538
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7539
   * @param string $needle    <p>The string to find in haystack.</p>
7540
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7541
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7542
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7543
   *
7544
   * @return int|false
7545
   *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7546
   *                   haystack string,<br> or <strong>false</strong> if needle is not found.
7547
   */
7548 75
  public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7549
  {
7550 75
    if ('' === $haystack || '' === $needle) {
7551 5
      return false;
7552
    }
7553
7554 74
    if ($cleanUtf8 === true) {
7555
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7556
      // if invalid characters are found in $haystack before $needle
7557 1
      $haystack = self::clean($haystack);
7558 1
      $needle = self::clean($needle);
7559
    }
7560
7561 74
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7562 23
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7563
    }
7564
7565 74
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7566
      self::checkForSupport();
7567
    }
7568
7569 74
    if (self::$SUPPORT['mbstring'] === true) {
7570 74
      $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7571 74
      if ($returnTmp !== false) {
7572 54
        return $returnTmp;
7573
      }
7574
    }
7575
7576
    if (
7577 31
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7578
        &&
7579 31
        $offset >= 0 // grapheme_stripos() can't handle negative offset
7580
        &&
7581 31
        self::$SUPPORT['intl'] === true
7582
    ) {
7583 31
      $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7584 31
      if ($returnTmp !== false) {
7585
        return $returnTmp;
7586
      }
7587
    }
7588
7589
    //
7590
    // fallback for ascii only
7591
    //
7592
7593 31
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7594 15
      return \stripos($haystack, $needle, $offset);
7595
    }
7596
7597
    //
7598
    // fallback via vanilla php
7599
    //
7600
7601 20
    $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
7602 20
    $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
7603
7604 20
    return self::strpos($haystack, $needle, $offset, $encoding);
7605
  }
7606
7607
  /**
7608
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
7609
   *
7610
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7611
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7612
   * @param bool   $before_needle  [optional] <p>
7613
   *                               If <b>TRUE</b>, it returns the part of the
7614
   *                               haystack before the first occurrence of the needle (excluding the needle).
7615
   *                               </p>
7616
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
7617
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7618
   *
7619
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
7620
   */
7621 19
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7622
  {
7623 19
    if ('' === $haystack || '' === $needle) {
7624 6
      return false;
7625
    }
7626
7627 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7628 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7629
    }
7630
7631 13
    if ($cleanUtf8 === true) {
7632
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7633
      // if invalid characters are found in $haystack before $needle
7634 1
      $needle = self::clean($needle);
7635 1
      $haystack = self::clean($haystack);
7636
    }
7637
7638 13
    if (!$needle) {
7639
      return $haystack;
7640
    }
7641
7642 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7643
      self::checkForSupport();
7644
    }
7645
7646
    if (
7647 13
        $encoding !== 'UTF-8'
7648
        &&
7649 13
        self::$SUPPORT['mbstring'] === false
7650
    ) {
7651
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7652
    }
7653
7654 13
    if (self::$SUPPORT['mbstring'] === true) {
7655 13
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7656
    }
7657
7658
    if (
7659
        $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7660
        &&
7661
        self::$SUPPORT['intl'] === true
7662
    ) {
7663
      $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7664
      if ($returnTmp !== false) {
7665
        return $returnTmp;
7666
      }
7667
    }
7668
7669
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7670
      return \stristr($haystack, $needle, $before_needle);
7671
    }
7672
7673
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7674
7675
    if (!isset($match[1])) {
7676
      return false;
7677
    }
7678
7679
    if ($before_needle) {
7680
      return $match[1];
7681
    }
7682
7683
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7683
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
7684
  }
7685
7686
  /**
7687
   * Get the string length, not the byte-length!
7688
   *
7689
   * @link     http://php.net/manual/en/function.mb-strlen.php
7690
   *
7691
   * @param string $str       <p>The string being checked for length.</p>
7692
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7693
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7694
   *
7695
   * @return int|false
7696
   *             The number <strong>(int)</strong> of characters in the string $str having character encoding $encoding.
7697
   *             (One multi-byte character counted as +1).
7698
   *             <br>
7699
   *             Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid chars.
7700
   */
7701 258
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7702
  {
7703 258
    if ('' === $str) {
7704 37
      return 0;
7705
    }
7706
7707 256
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7708 81
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7709
    }
7710
7711
    //
7712
    // fallback for binary || ascii only
7713
    //
7714
7715
    if (
7716 256
        $encoding === 'CP850'
7717
        ||
7718 256
        $encoding === 'ASCII'
7719
    ) {
7720 2
      return self::strlen_in_byte($str);
7721
    }
7722
7723 256
    if ($cleanUtf8 === true) {
7724
      // "mb_strlen" and "\iconv_strlen" returns wrong length,
7725
      // if invalid characters are found in $str
7726 4
      $str = self::clean($str);
7727
    }
7728
7729 256
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7730
      self::checkForSupport();
7731
    }
7732
7733
    if (
7734 256
        $encoding !== 'UTF-8'
7735
        &&
7736 256
        self::$SUPPORT['mbstring'] === false
7737
        &&
7738 256
        self::$SUPPORT['iconv'] === false
7739
    ) {
7740
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7741
    }
7742
7743
    //
7744
    // fallback via mbstring
7745
    //
7746
7747 256
    if (self::$SUPPORT['mbstring'] === true) {
7748 256
      $returnTmp = \mb_strlen($str, $encoding);
7749 256
      if ($returnTmp !== false) {
7750 256
        return $returnTmp;
7751
      }
7752
    }
7753
7754
    //
7755
    // fallback via iconv
7756
    //
7757
7758
    if (self::$SUPPORT['iconv'] === true) {
7759
      $returnTmp = \iconv_strlen($str, $encoding);
7760
      if ($returnTmp !== false) {
7761
        return $returnTmp;
7762
      }
7763
    }
7764
7765
    //
7766
    // fallback via intl
7767
    //
7768
7769
    if (
7770
        $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7771
        &&
7772
        self::$SUPPORT['intl'] === true
7773
    ) {
7774
      $returnTmp = \grapheme_strlen($str);
7775
      if ($returnTmp !== null) {
7776
        return $returnTmp;
7777
      }
7778
    }
7779
7780
    //
7781
    // fallback for ascii only
7782
    //
7783
7784
    if (self::is_ascii($str)) {
7785
      return \strlen($str);
7786
    }
7787
7788
    //
7789
    // fallback via vanilla php
7790
    //
7791
7792
    \preg_match_all('/./us', $str, $parts);
7793
7794
    $returnTmp = \count($parts[0]);
7795
    if ($returnTmp === 0 && isset($str[0])) {
7796
      return false;
7797
    }
7798
7799
    return $returnTmp;
7800
  }
7801
7802
  /**
7803
   * Get string length in byte.
7804
   *
7805
   * @param string $str
7806
   *
7807
   * @return int
7808
   */
7809 192
  public static function strlen_in_byte(string $str): int
7810
  {
7811 192
    if ($str === '') {
7812
      return 0;
7813
    }
7814
7815 192
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7816
      self::checkForSupport();
7817
    }
7818
7819 192
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7820
      // "mb_" is available if overload is used, so use it ...
7821 192
      return \mb_strlen($str, 'CP850'); // 8-BIT
7822
    }
7823
7824
    return \strlen($str);
7825
  }
7826
7827
  /**
7828
   * Case insensitive string comparisons using a "natural order" algorithm.
7829
   *
7830
   * INFO: natural order version of UTF8::strcasecmp()
7831
   *
7832
   * @param string $str1     <p>The first string.</p>
7833
   * @param string $str2     <p>The second string.</p>
7834
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7835
   *
7836
   * @return int
7837
   *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7838
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7839
   *             <strong>0</strong> if they are equal
7840
   */
7841 2
  public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7842
  {
7843 2
    return self::strnatcmp(
7844 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
7845 2
        self::strtocasefold($str2, true, false, $encoding, null, false)
7846
    );
7847
  }
7848
7849
  /**
7850
   * String comparisons using a "natural order" algorithm
7851
   *
7852
   * INFO: natural order version of UTF8::strcmp()
7853
   *
7854
   * @link  http://php.net/manual/en/function.strnatcmp.php
7855
   *
7856
   * @param string $str1 <p>The first string.</p>
7857
   * @param string $str2 <p>The second string.</p>
7858
   *
7859
   * @return int
7860
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7861
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7862
   *             <strong>0</strong> if they are equal
7863
   */
7864 4
  public static function strnatcmp(string $str1, string $str2): int
7865
  {
7866 4
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
7867
  }
7868
7869
  /**
7870
   * Case-insensitive string comparison of the first n characters.
7871
   *
7872
   * @link  http://php.net/manual/en/function.strncasecmp.php
7873
   *
7874
   * @param string $str1     <p>The first string.</p>
7875
   * @param string $str2     <p>The second string.</p>
7876
   * @param int    $len      <p>The length of strings to be used in the comparison.</p>
7877
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7878
   *
7879
   * @return int
7880
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7881
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7882
   *             <strong>0</strong> if they are equal
7883
   */
7884 2
  public static function strncasecmp(string $str1, string $str2, int $len, string $encoding = 'UTF-8'): int
7885
  {
7886 2
    return self::strncmp(
7887 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
7888 2
        self::strtocasefold($str2, true, false, $encoding, null, false),
7889 2
        $len
7890
    );
7891
  }
7892
7893
  /**
7894
   * String comparison of the first n characters.
7895
   *
7896
   * @link  http://php.net/manual/en/function.strncmp.php
7897
   *
7898
   * @param string $str1 <p>The first string.</p>
7899
   * @param string $str2 <p>The second string.</p>
7900
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
7901
   *
7902
   * @return int
7903
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7904
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7905
   *             <strong>0</strong> if they are equal
7906
   */
7907 4
  public static function strncmp(string $str1, string $str2, int $len): int
7908
  {
7909 4
    $str1 = (string)self::substr($str1, 0, $len);
7910 4
    $str2 = (string)self::substr($str2, 0, $len);
7911
7912 4
    return self::strcmp($str1, $str2);
7913
  }
7914
7915
  /**
7916
   * Search a string for any of a set of characters.
7917
   *
7918
   * @link  http://php.net/manual/en/function.strpbrk.php
7919
   *
7920
   * @param string $haystack  <p>The string where char_list is looked for.</p>
7921
   * @param string $char_list <p>This parameter is case sensitive.</p>
7922
   *
7923
   * @return string|false String starting from the character found, or false if it is not found.
7924
   */
7925 2
  public static function strpbrk(string $haystack, string $char_list)
7926
  {
7927 2
    if ('' === $haystack || '' === $char_list) {
7928 2
      return false;
7929
    }
7930
7931 2
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7932 2
      return \substr($haystack, (int)\strpos($haystack, $m[0]));
7933
    }
7934
7935 2
    return false;
7936
  }
7937
7938
  /**
7939
   * Find position of first occurrence of string in a string.
7940
   *
7941
   * @link http://php.net/manual/en/function.mb-strpos.php
7942
   *
7943
   * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7944
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7945
   * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7946
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7947
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7948
   *
7949
   * @return int|false
7950
   *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
7951
   *                   string.<br> If needle is not found it returns false.
7952
   */
7953 142
  public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7954
  {
7955 142
    if ('' === $haystack) {
7956 4
      return false;
7957
    }
7958
7959
    // iconv and mbstring do not support integer $needle
7960 141
    if ((int)$needle === $needle && $needle >= 0) {
7961
      $needle = (string)self::chr($needle);
7962
    }
7963 141
    $needle = (string)$needle;
7964
7965 141
    if ('' === $needle) {
7966 2
      return false;
7967
    }
7968
7969 141
    if ($cleanUtf8 === true) {
7970
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7971
      // if invalid characters are found in $haystack before $needle
7972 3
      $needle = self::clean($needle);
7973 3
      $haystack = self::clean($haystack);
7974
    }
7975
7976 141
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7977 55
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7978
    }
7979
7980 141
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7981
      self::checkForSupport();
7982
    }
7983
7984
    //
7985
    // fallback for binary || ascii only
7986
    //
7987
7988
    if (
7989 141
        $encoding === 'CP850'
7990
        ||
7991 141
        $encoding === 'ASCII'
7992
    ) {
7993 2
      return self::strpos_in_byte($haystack, $needle, $offset);
7994
    }
7995
7996
    if (
7997 141
        $encoding !== 'UTF-8'
7998
        &&
7999 141
        self::$SUPPORT['iconv'] === false
8000
        &&
8001 141
        self::$SUPPORT['mbstring'] === false
8002
    ) {
8003
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8004
    }
8005
8006
    //
8007
    // fallback via mbstring
8008
    //
8009
8010 141
    if (self::$SUPPORT['mbstring'] === true) {
8011 141
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
8012 141
      if ($returnTmp !== false) {
8013 86
        return $returnTmp;
8014
      }
8015
    }
8016
8017
    //
8018
    // fallback via intl
8019
    //
8020
8021
    if (
8022 69
        $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8023
        &&
8024 69
        $offset >= 0 // grapheme_strpos() can't handle negative offset
8025
        &&
8026 69
        self::$SUPPORT['intl'] === true
8027
    ) {
8028 69
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
8029 69
      if ($returnTmp !== false) {
8030
        return $returnTmp;
8031
      }
8032
    }
8033
8034
    //
8035
    // fallback via iconv
8036
    //
8037
8038
    if (
8039 69
        $offset >= 0 // iconv_strpos() can't handle negative offset
8040
        &&
8041 69
        self::$SUPPORT['iconv'] === true
8042
    ) {
8043
      // ignore invalid negative offset to keep compatibility
8044
      // with php < 5.5.35, < 5.6.21, < 7.0.6
8045 69
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
8046 69
      if ($returnTmp !== false) {
8047
        return $returnTmp;
8048
      }
8049
    }
8050
8051
    //
8052
    // fallback for ascii only
8053
    //
8054
8055 69
    if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
8056 35
      return \strpos($haystack, $needle, $offset);
8057
    }
8058
8059
    //
8060
    // fallback via vanilla php
8061
    //
8062
8063 39
    if ($haystackIsAscii) {
8064
      $haystackTmp = \substr($haystack, $offset);
8065
    } else {
8066 39
      $haystackTmp = self::substr($haystack, $offset, null, $encoding);
8067
    }
8068 39
    if ($haystackTmp === false) {
8069
      $haystackTmp = '';
8070
    }
8071 39
    $haystack = (string)$haystackTmp;
8072
8073 39
    if ($offset < 0) {
8074 2
      $offset = 0;
8075
    }
8076
8077 39
    $pos = \strpos($haystack, $needle);
8078 39
    if ($pos === false) {
8079 39
      return false;
8080
    }
8081
8082 2
    if ($pos) {
8083 2
      return ($offset + (self::strlen(substr($haystack, 0, $pos), $encoding)));
8084
    }
8085
8086
    return ($offset + 0);
8087
  }
8088
8089
  /**
8090
   * Find position of first occurrence of string in a string.
8091
   *
8092
   * @param string $haystack <p>
8093
   *                         The string being checked.
8094
   *                         </p>
8095
   * @param string $needle   <p>
8096
   *                         The position counted from the beginning of haystack.
8097
   *                         </p>
8098
   * @param int    $offset   [optional] <p>
8099
   *                         The search offset. If it is not specified, 0 is used.
8100
   *                         </p>
8101
   *
8102
   * @return int|false The numeric position of the first occurrence of needle in the
8103
   *                   haystack string. If needle is not found, it returns false.
8104
   */
8105 77
  public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8106
  {
8107 77
    if ($haystack === '' || $needle === '') {
8108
      return false;
8109
    }
8110
8111 77
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8112
      self::checkForSupport();
8113
    }
8114
8115 77
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8116
      // "mb_" is available if overload is used, so use it ...
8117 77
      return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8118
    }
8119
8120
    return \strpos($haystack, $needle, $offset);
8121
  }
8122
8123
  /**
8124
   * Finds the last occurrence of a character in a string within another.
8125
   *
8126
   * @link http://php.net/manual/en/function.mb-strrchr.php
8127
   *
8128
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8129
   * @param string $needle        <p>The string to find in haystack</p>
8130
   * @param bool   $before_needle [optional] <p>
8131
   *                              Determines which portion of haystack
8132
   *                              this function returns.
8133
   *                              If set to true, it returns all of haystack
8134
   *                              from the beginning to the last occurrence of needle.
8135
   *                              If set to false, it returns all of haystack
8136
   *                              from the last occurrence of needle to the end,
8137
   *                              </p>
8138
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8139
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8140
   *
8141
   * @return string|false The portion of haystack or false if needle is not found.
8142
   */
8143 4
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8144
  {
8145 4
    if ('' === $haystack || '' === $needle) {
8146 2
      return false;
8147
    }
8148
8149 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8150 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8151
    }
8152
8153 4
    if ($cleanUtf8 === true) {
8154
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8155
      // if invalid characters are found in $haystack before $needle
8156 2
      $needle = self::clean($needle);
8157 2
      $haystack = self::clean($haystack);
8158
    }
8159
8160 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8161
      self::checkForSupport();
8162
    }
8163
8164
    if (
8165 4
        $encoding !== 'UTF-8'
8166
        &&
8167 4
        self::$SUPPORT['mbstring'] === false
8168
    ) {
8169
      \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8170
    }
8171
8172 4
    if (self::$SUPPORT['mbstring'] === true) {
8173 4
      return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8174
    }
8175
8176
    //
8177
    // fallback for binary || ascii only
8178
    //
8179
8180
    if (
8181
        $before_needle === false
8182
        &&
8183
        (
8184
            $encoding === 'CP850'
8185
            ||
8186
            $encoding === 'ASCII'
8187
        )
8188
    ) {
8189
      return \strrchr($haystack, $needle);
8190
    }
8191
8192
    //
8193
    // fallback via iconv
8194
    //
8195
8196
    if (self::$SUPPORT['iconv'] === true) {
8197
      $needleTmp = self::substr($needle, 0, 1, $encoding);
8198
      if ($needleTmp === false) {
8199
        return false;
8200
      }
8201
      $needle = (string)$needleTmp;
8202
8203
      $pos = \iconv_strrpos($haystack, $needle, $encoding);
8204
      if (false === $pos) {
8205
        return false;
8206
      }
8207
8208
      if ($before_needle) {
8209
        return self::substr($haystack, 0, $pos, $encoding);
8210
      }
8211
8212
      return self::substr($haystack, $pos, null, $encoding);
8213
    }
8214
8215
    //
8216
    // fallback via vanilla php
8217
    //
8218
8219
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8220
    if ($needleTmp === false) {
8221
      return false;
8222
    }
8223
    $needle = (string)$needleTmp;
8224
8225
    $pos = self::strrpos($haystack, $needle, null, $encoding);
8226
    if ($pos === false) {
8227
      return false;
8228
    }
8229
8230
    if ($before_needle) {
8231
      return self::substr($haystack, 0, $pos, $encoding);
8232
    }
8233
8234
    return self::substr($haystack, $pos, null, $encoding);
8235
  }
8236
8237
  /**
8238
   * Reverses characters order in the string.
8239
   *
8240
   * @param string $str <p>The input string.</p>
8241
   *
8242
   * @return string The string with characters in the reverse sequence.
8243
   */
8244 10
  public static function strrev(string $str): string
8245
  {
8246 10
    if ('' === $str) {
8247 4
      return '';
8248
    }
8249
8250 8
    $reversed = '';
8251 8
    $i = self::strlen($str);
8252 8
    while ($i--) {
8253 8
      $reversed .= self::substr($str, $i, 1);
0 ignored issues
show
Bug introduced by
It seems like $i can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8253
      $reversed .= self::substr($str, /** @scrutinizer ignore-type */ $i, 1);
Loading history...
8254
    }
8255
8256 8
    return $reversed;
8257
  }
8258
8259
  /**
8260
   * Finds the last occurrence of a character in a string within another, case insensitive.
8261
   *
8262
   * @link http://php.net/manual/en/function.mb-strrichr.php
8263
   *
8264
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
8265
   * @param string $needle         <p>The string to find in haystack.</p>
8266
   * @param bool   $before_needle  [optional] <p>
8267
   *                               Determines which portion of haystack
8268
   *                               this function returns.
8269
   *                               If set to true, it returns all of haystack
8270
   *                               from the beginning to the last occurrence of needle.
8271
   *                               If set to false, it returns all of haystack
8272
   *                               from the last occurrence of needle to the end,
8273
   *                               </p>
8274
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8275
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8276
   *
8277
   * @return string|false The portion of haystack or<br>false if needle is not found.
8278
   */
8279 3
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8280
  {
8281 3
    if ('' === $haystack || '' === $needle) {
8282 2
      return false;
8283
    }
8284
8285 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8286 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8287
    }
8288
8289 3
    if ($cleanUtf8 === true) {
8290
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8291
      // if invalid characters are found in $haystack before $needle
8292 2
      $needle = self::clean($needle);
8293 2
      $haystack = self::clean($haystack);
8294
    }
8295
8296 3
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8297
      self::checkForSupport();
8298
    }
8299
8300
    //
8301
    // fallback via mbstring
8302
    //
8303
8304 3
    if (self::$SUPPORT['mbstring'] === true) {
8305 3
      return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8306
    }
8307
8308
    //
8309
    // fallback via vanilla php
8310
    //
8311
8312
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8313
    if ($needleTmp === false) {
8314
      return false;
8315
    }
8316
    $needle = (string)$needleTmp;
8317
8318
    $pos = self::strripos($haystack, $needle, 0, $encoding);
8319
    if ($pos === false) {
8320
      return false;
8321
    }
8322
8323
    if ($before_needle) {
8324
      return self::substr($haystack, 0, $pos, $encoding);
8325
    }
8326
8327
    return self::substr($haystack, $pos, null, $encoding);
8328
  }
8329
8330
  /**
8331
   * Find position of last occurrence of a case-insensitive string.
8332
   *
8333
   * @param string     $haystack  <p>The string to look in.</p>
8334
   * @param string|int $needle    <p>The string to look for.</p>
8335
   * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8336
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8337
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8338
   *
8339
   * @return int|false
8340
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8341
   *                   string.<br>If needle is not found, it returns false.
8342
   */
8343 4
  public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8344
  {
8345 4
    if ('' === $haystack) {
8346
      return false;
8347
    }
8348
8349
    // iconv and mbstring do not support integer $needle
8350 4
    if ((int)$needle === $needle && $needle >= 0) {
8351
      $needle = (string)self::chr($needle);
8352
    }
8353 4
    $needle = (string)$needle;
8354
8355 4
    if ('' === $needle) {
8356
      return false;
8357
    }
8358
8359 4
    if ($cleanUtf8 === true) {
8360
      // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8361 2
      $needle = self::clean($needle);
8362 2
      $haystack = self::clean($haystack);
8363
    }
8364
8365 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8366 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8367
    }
8368
8369
    //
8370
    // fallback for binary || ascii only
8371
    //
8372
8373
    if (
8374 4
        $encoding === 'CP850'
8375
        ||
8376 4
        $encoding === 'ASCII'
8377
    ) {
8378
      return self::strripos_in_byte($haystack, $needle, $offset);
8379
    }
8380
8381 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8382
      self::checkForSupport();
8383
    }
8384
8385
    if (
8386 4
        $encoding !== 'UTF-8'
8387
        &&
8388 4
        self::$SUPPORT['mbstring'] === false
8389
    ) {
8390
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8391
    }
8392
8393
    //
8394
    // fallback via mbstrig
8395
    //
8396
8397 4
    if (self::$SUPPORT['mbstring'] === true) {
8398 4
      return \mb_strripos($haystack, $needle, $offset, $encoding);
8399
    }
8400
8401
    //
8402
    // fallback via intl
8403
    //
8404
8405
    if (
8406
        $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8407
        &&
8408
        $offset >= 0 // grapheme_strripos() can't handle negative offset
8409
        &&
8410
        self::$SUPPORT['intl'] === true
8411
    ) {
8412
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8413
      if ($returnTmp !== false) {
8414
        return $returnTmp;
8415
      }
8416
    }
8417
8418
    //
8419
    // fallback for ascii only
8420
    //
8421
8422
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8423
      return self::strripos_in_byte($haystack, $needle, $offset);
8424
    }
8425
8426
    //
8427
    // fallback via vanilla php
8428
    //
8429
8430
    $haystack = self::strtocasefold($haystack, true, false, $encoding);
8431
    $needle = self::strtocasefold($needle, true, false, $encoding);
8432
8433
    return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8434
  }
8435
8436
  /**
8437
   * Finds position of last occurrence of a string within another, case insensitive.
8438
   *
8439
   * @param string $haystack <p>
8440
   *                         The string from which to get the position of the last occurrence
8441
   *                         of needle.
8442
   *                         </p>
8443
   * @param string $needle   <p>
8444
   *                         The string to find in haystack.
8445
   *                         </p>
8446
   * @param int    $offset   [optional] <p>
8447
   *                         The position in haystack
8448
   *                         to start searching.
8449
   *                         </p>
8450
   *
8451
   * @return int|false Return the numeric position of the last occurrence of needle in the
8452
   *                   haystack string, or false if needle is not found.
8453
   */
8454
  public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
8455
  {
8456
    if ($haystack === '' || $needle === '') {
8457
      return false;
8458
    }
8459
8460
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8461
      self::checkForSupport();
8462
    }
8463
8464
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8465
      // "mb_" is available if overload is used, so use it ...
8466
      return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8467
    }
8468
8469
    return \strripos($haystack, $needle, $offset);
8470
  }
8471
8472
  /**
8473
   * Find position of last occurrence of a string in a string.
8474
   *
8475
   * @link http://php.net/manual/en/function.mb-strrpos.php
8476
   *
8477
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8478
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8479
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8480
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
8481
   *                              the end of the string.
8482
   *                              </p>
8483
   * @param string     $encoding  [optional] <p>Set the charset.</p>
8484
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8485
   *
8486
   * @return int|false
8487
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8488
   *                   string.<br>If needle is not found, it returns false.
8489
   */
8490 38
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8491
  {
8492 38
    if ('' === $haystack) {
8493 3
      return false;
8494
    }
8495
8496
    // iconv and mbstring do not support integer $needle
8497 37
    if ((int)$needle === $needle && $needle >= 0) {
8498 1
      $needle = (string)self::chr($needle);
8499
    }
8500 37
    $needle = (string)$needle;
8501
8502 37
    if ('' === $needle) {
8503 2
      return false;
8504
    }
8505
8506 37
    if ($cleanUtf8 === true) {
8507
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8508 4
      $needle = self::clean($needle);
8509 4
      $haystack = self::clean($haystack);
8510
    }
8511
8512 37
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8513 14
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8514
    }
8515
8516
    //
8517
    // fallback for binary || ascii only
8518
    //
8519
8520
    if (
8521 37
        $encoding === 'CP850'
8522
        ||
8523 37
        $encoding === 'ASCII'
8524
    ) {
8525 2
      return self::strrpos_in_byte($haystack, $needle, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::strrpos_in_byte() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8525
      return self::strrpos_in_byte($haystack, $needle, /** @scrutinizer ignore-type */ $offset);
Loading history...
8526
    }
8527
8528 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8529
      self::checkForSupport();
8530
    }
8531
8532
    if (
8533 37
        $encoding !== 'UTF-8'
8534
        &&
8535 37
        self::$SUPPORT['mbstring'] === false
8536
    ) {
8537
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8538
    }
8539
8540
    //
8541
    // fallback via mbstring
8542
    //
8543
8544 37
    if (self::$SUPPORT['mbstring'] === true) {
8545 37
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
8546
    }
8547
8548
    //
8549
    // fallback via intl
8550
    //
8551
8552
    if (
8553
        $offset !== null
8554
        &&
8555
        $offset >= 0 // grapheme_strrpos() can't handle negative offset
8556
        &&
8557
        $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8558
        &&
8559
        self::$SUPPORT['intl'] === true
8560
    ) {
8561
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8562
      if ($returnTmp !== false) {
8563
        return $returnTmp;
8564
      }
8565
    }
8566
8567
    //
8568
    // fallback for ascii only
8569
    //
8570
8571
    if (
8572
        $offset !== null
8573
        &&
8574
        self::is_ascii($haystack)
8575
        &&
8576
        self::is_ascii($needle)
8577
    ) {
8578
      return self::strrpos_in_byte($haystack, $needle, $offset);
8579
    }
8580
8581
    //
8582
    // fallback via vanilla php
8583
    //
8584
8585
    $haystackTmp = null;
8586
    if ($offset > 0) {
8587
      $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8587
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
8588
    } elseif ($offset < 0) {
8589
      $haystackTmp = self::substr($haystack, 0, $offset);
8590
      $offset = 0;
8591
    }
8592
8593
    if ($haystackTmp !== null) {
8594
      if ($haystackTmp === false) {
8595
        $haystackTmp = '';
8596
      }
8597
      $haystack = (string)$haystackTmp;
8598
    }
8599
8600
    $pos = self::strrpos_in_byte($haystack, $needle);
8601
    if ($pos === false) {
8602
      return false;
8603
    }
8604
8605
    return $offset + self::strlen(self::substr_in_byte($haystack, 0, $pos));
8606
  }
8607
8608
  /**
8609
   * Find position of last occurrence of a string in a string.
8610
   *
8611
   * @param string $haystack <p>
8612
   *                         The string being checked, for the last occurrence
8613
   *                         of needle.
8614
   *                         </p>
8615
   * @param string $needle   <p>
8616
   *                         The string to find in haystack.
8617
   *                         </p>
8618
   * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
8619
   *                         the string. Negative values will stop searching at an arbitrary point
8620
   *                         prior to the end of the string.
8621
   *
8622
   * @return int|false The numeric position of the last occurrence of needle in the
8623
   *                   haystack string. If needle is not found, it returns false.
8624
   */
8625 2
  public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
8626
  {
8627 2
    if ($haystack === '' || $needle === '') {
8628
      return false;
8629
    }
8630
8631 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8632
      self::checkForSupport();
8633
    }
8634
8635 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8636
      // "mb_" is available if overload is used, so use it ...
8637 2
      return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8638
    }
8639
8640
    return \strrpos($haystack, $needle, $offset);
8641
  }
8642
8643
  /**
8644
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8645
   * mask.
8646
   *
8647
   * @param string $str    <p>The input string.</p>
8648
   * @param string $mask   <p>The mask of chars</p>
8649
   * @param int    $offset [optional]
8650
   * @param int    $length [optional]
8651
   *
8652
   * @return int
8653
   */
8654 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
8655
  {
8656 10
    if ($offset || $length !== null) {
8657 2
      $strTmp = self::substr($str, $offset, $length);
8658 2
      if ($strTmp === false) {
8659
        $strTmp = '';
8660
      }
8661 2
      $str = (string)$strTmp;
8662
    }
8663
8664 10
    if ('' === $str || '' === $mask) {
8665 2
      return 0;
8666
    }
8667
8668 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug introduced by
$str of type string is incompatible with the type null|array expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8668
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
Bug Best Practice introduced by
The expression return preg_match('/^' ....lf::strlen($str[0]) : 0 could return the type false which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
8669
  }
8670
8671
  /**
8672
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8673
   *
8674
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
8675
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
8676
   * @param bool   $before_needle  [optional] <p>
8677
   *                               If <b>TRUE</b>, strstr() returns the part of the
8678
   *                               haystack before the first occurrence of the needle (excluding the needle).
8679
   *                               </p>
8680
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8681
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8682
   *
8683
   * @return string|false
8684
   *                       A sub-string,<br>or <strong>false</strong> if needle is not found.
8685
   */
8686 5
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8687
  {
8688 5
    if ('' === $haystack || '' === $needle) {
8689 2
      return false;
8690
    }
8691
8692 5
    if ($cleanUtf8 === true) {
8693
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8694
      // if invalid characters are found in $haystack before $needle
8695
      $needle = self::clean($needle);
8696
      $haystack = self::clean($haystack);
8697
    }
8698
8699 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8700 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8701
    }
8702
8703
    //
8704
    // fallback for binary || ascii only
8705
    //
8706
8707
    if (
8708 5
        $encoding === 'CP850'
8709
        ||
8710 5
        $encoding === 'ASCII'
8711
    ) {
8712
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8713
    }
8714
8715 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8716
      self::checkForSupport();
8717
    }
8718
8719
    if (
8720 5
        $encoding !== 'UTF-8'
8721
        &&
8722 5
        self::$SUPPORT['mbstring'] === false
8723
    ) {
8724
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8725
    }
8726
8727
    //
8728
    // fallback via mbstring
8729
    //
8730
8731 5
    if (self::$SUPPORT['mbstring'] === true) {
8732 5
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8733
    }
8734
8735
    //
8736
    // fallback via intl
8737
    //
8738
8739
    if (
8740
        $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8741
        &&
8742
        self::$SUPPORT['intl'] === true
8743
    ) {
8744
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8745
      if ($returnTmp !== false) {
8746
        return $returnTmp;
8747
      }
8748
    }
8749
8750
    //
8751
    // fallback for ascii only
8752
    //
8753
8754
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8755
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8756
    }
8757
8758
    //
8759
    // fallback via vanilla php
8760
    //
8761
8762
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8763
8764
    if (!isset($match[1])) {
8765
      return false;
8766
    }
8767
8768
    if ($before_needle) {
8769
      return $match[1];
8770
    }
8771
8772
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8772
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
8773
  }
8774
8775
  /**
8776
   *  * Finds first occurrence of a string within another.
8777
   *
8778
   * @param string $haystack      <p>
8779
   *                              The string from which to get the first occurrence
8780
   *                              of needle.
8781
   *                              </p>
8782
   * @param string $needle        <p>
8783
   *                              The string to find in haystack.
8784
   *                              </p>
8785
   * @param bool   $before_needle [optional] <p>
8786
   *                              Determines which portion of haystack
8787
   *                              this function returns.
8788
   *                              If set to true, it returns all of haystack
8789
   *                              from the beginning to the first occurrence of needle.
8790
   *                              If set to false, it returns all of haystack
8791
   *                              from the first occurrence of needle to the end,
8792
   *                              </p>
8793
   *
8794
   * @return string|false The portion of haystack,
8795
   *                      or false if needle is not found.
8796
   */
8797
  public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
8798
  {
8799
    if ($haystack === '' || $needle === '') {
8800
      return false;
8801
    }
8802
8803
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8804
      self::checkForSupport();
8805
    }
8806
8807
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8808
      // "mb_" is available if overload is used, so use it ...
8809
      return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
8810
    }
8811
8812
    return \strstr($haystack, $needle, $before_needle);
8813
  }
8814
8815
  /**
8816
   * Unicode transformation for case-less matching.
8817
   *
8818
   * @link http://unicode.org/reports/tr21/tr21-5.html
8819
   *
8820
   * @param string      $str       <p>The input string.</p>
8821
   * @param bool        $full      [optional] <p>
8822
   *                               <b>true</b>, replace full case folding chars (default)<br>
8823
   *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
8824
   *                               </p>
8825
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8826
   * @param string      $encoding  [optional] <p>Set the charset.</p>
8827
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8828
   * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase is
8829
   *                               for some languages better ...</p>
8830
   *
8831
   * @return string
8832
   */
8833 53
  public static function strtocasefold(
8834
      string $str,
8835
      bool $full = true,
8836
      bool $cleanUtf8 = false,
8837
      string $encoding = 'UTF-8',
8838
      string $lang = null,
8839
      $lower = true
8840
  ): string
8841
  {
8842 53
    if ('' === $str) {
8843 5
      return '';
8844
    }
8845
8846 52
    $str = self::fixStrCaseHelper($str, $lower, $full);
8847
8848 52
    if ($lower === true) {
8849 2
      return self::strtolower($str, $encoding, $cleanUtf8, $lang);
8850
    }
8851
8852 50
    return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
8853
  }
8854
8855
  /**
8856
   * Make a string lowercase.
8857
   *
8858
   * @link http://php.net/manual/en/function.mb-strtolower.php
8859
   *
8860
   * @param string      $str                   <p>The string being lowercased.</p>
8861
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
8862
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8863
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8864
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8865
   *
8866
   * @return string String with all alphabetic characters converted to lowercase.
8867
   */
8868 151
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8869
  {
8870
    // init
8871 151
    $str = (string)$str;
8872
8873 151
    if ('' === $str) {
8874 4
      return '';
8875
    }
8876
8877 149
    if ($cleanUtf8 === true) {
8878
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8879
      // if invalid characters are found in $haystack before $needle
8880 4
      $str = self::clean($str);
8881
    }
8882
8883 149
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8884 94
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8885
    }
8886
8887
    // hack for old php version or for the polyfill ...
8888 149
    if ($tryToKeepStringLength === true) {
8889
      $str = self::fixStrCaseHelper($str, true);
8890
    }
8891
8892 149
    if ($lang !== null) {
8893
8894 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8895
        self::checkForSupport();
8896
      }
8897
8898 2
      if (self::$SUPPORT['intl'] === true) {
8899
8900 2
        $langCode = $lang . '-Lower';
8901 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8902
          \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, E_USER_WARNING);
8903
8904
          $langCode = 'Any-Lower';
8905
        }
8906
8907
        /** @noinspection PhpComposerExtensionStubsInspection */
8908 2
        return transliterator_transliterate($langCode, $str);
8909
      }
8910
8911
      \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, E_USER_WARNING);
8912
    }
8913
8914
    // always fallback via symfony polyfill
8915 149
    return \mb_strtolower($str, $encoding);
8916
  }
8917
8918
  /**
8919
   * Generic case sensitive transformation for collation matching.
8920
   *
8921
   * @param string $str <p>The input string</p>
8922
   *
8923
   * @return string
8924
   */
8925 6
  private static function strtonatfold(string $str): string
8926
  {
8927
    /** @noinspection PhpUndefinedClassInspection */
8928 6
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
8929
  }
8930
8931
  /**
8932
   * Make a string uppercase.
8933
   *
8934
   * @link http://php.net/manual/en/function.mb-strtoupper.php
8935
   *
8936
   * @param string      $str                   <p>The string being uppercased.</p>
8937
   * @param string      $encoding              [optional] <p>Set the charset.</p>
8938
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8939
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8940
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8941
   *
8942
   * @return string String with all alphabetic characters converted to uppercase.
8943
   */
8944 160
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8945
  {
8946
    // init
8947 160
    $str = (string)$str;
8948
8949 160
    if ('' === $str) {
8950 4
      return '';
8951
    }
8952
8953 158
    if ($cleanUtf8 === true) {
8954
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8955
      // if invalid characters are found in $haystack before $needle
8956 3
      $str = self::clean($str);
8957
    }
8958
8959 158
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8960 72
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8961
    }
8962
8963
    // hack for old php version or for the polyfill ...
8964 158
    if ($tryToKeepStringLength === true) {
8965 2
      $str = self::fixStrCaseHelper($str, false);
8966
    }
8967
8968 158
    if ($lang !== null) {
8969
8970 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8971
        self::checkForSupport();
8972
      }
8973
8974 2
      if (self::$SUPPORT['intl'] === true) {
8975
8976 2
        $langCode = $lang . '-Upper';
8977 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8978
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
8979
8980
          $langCode = 'Any-Upper';
8981
        }
8982
8983
        /** @noinspection PhpComposerExtensionStubsInspection */
8984 2
        return transliterator_transliterate($langCode, $str);
8985
      }
8986
8987
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
8988
    }
8989
8990
    // always fallback via symfony polyfill
8991 158
    return \mb_strtoupper($str, $encoding);
8992
  }
8993
8994
  /**
8995
   * Translate characters or replace sub-strings.
8996
   *
8997
   * @link  http://php.net/manual/en/function.strtr.php
8998
   *
8999
   * @param string          $str  <p>The string being translated.</p>
9000
   * @param string|string[] $from <p>The string replacing from.</p>
9001
   * @param string|string[] $to   <p>The string being translated to to.</p>
9002
   *
9003
   * @return string
9004
   *                This function returns a copy of str, translating all occurrences of each character in from to the
9005
   *                corresponding character in to.
9006
   */
9007 2
  public static function strtr(string $str, $from, $to = INF): string
9008
  {
9009 2
    if ('' === $str) {
9010
      return '';
9011
    }
9012
9013 2
    if ($from === $to) {
9014
      return $str;
9015
    }
9016
9017 2
    if (INF !== $to) {
9018 2
      $from = self::str_split($from);
9019 2
      $to = self::str_split($to);
9020 2
      $countFrom = \count($from);
9021 2
      $countTo = \count($to);
9022
9023 2
      if ($countFrom > $countTo) {
9024 2
        $from = \array_slice($from, 0, $countTo);
9025 2
      } elseif ($countFrom < $countTo) {
9026 2
        $to = \array_slice($to, 0, $countFrom);
9027
      }
9028
9029 2
      $from = \array_combine($from, $to);
9030
    }
9031
9032 2
    if (\is_string($from)) {
9033 2
      return \str_replace($from, '', $str);
9034
    }
9035
9036 2
    return \strtr($str, $from);
9037
  }
9038
9039
  /**
9040
   * Return the width of a string.
9041
   *
9042
   * @param string $str       <p>The input string.</p>
9043
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9044
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9045
   *
9046
   * @return int
9047
   */
9048 2
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
9049
  {
9050 2
    if ('' === $str) {
9051 2
      return 0;
9052
    }
9053
9054 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9055 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9056
    }
9057
9058 2
    if ($cleanUtf8 === true) {
9059
      // iconv and mbstring are not tolerant to invalid encoding
9060
      // further, their behaviour is inconsistent with that of PHP's substr
9061 2
      $str = self::clean($str);
9062
    }
9063
9064 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9065
      self::checkForSupport();
9066
    }
9067
9068
    //
9069
    // fallback via mbstring
9070
    //
9071
9072 2
    if (self::$SUPPORT['mbstring'] === true) {
9073 2
      return \mb_strwidth($str, $encoding);
9074
    }
9075
9076
    //
9077
    // fallback via vanilla php
9078
    //
9079
9080
    if ('UTF-8' !== $encoding) {
9081
      $str = self::encode('UTF-8', $str, false, $encoding);
9082
    }
9083
9084
    $wide = 0;
9085
    $str = (string)preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
9086
9087
    return ($wide << 1) + self::strlen($str, 'UTF-8');
9088
  }
9089
9090
  /**
9091
   * Get part of a string.
9092
   *
9093
   * @link http://php.net/manual/en/function.mb-substr.php
9094
   *
9095
   * @param string $str       <p>The string being checked.</p>
9096
   * @param int    $offset    <p>The first position used in str.</p>
9097
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
9098
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9099
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9100
   *
9101
   * @return string|false
9102
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9103
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9104
   *                      characters long, <b>FALSE</b> will be returned.
9105
   */
9106 392
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
9107
  {
9108 392
    if ('' === $str) {
9109 19
      return '';
9110
    }
9111
9112
    // Empty string
9113 386
    if ($length === 0) {
9114 14
      return '';
9115
    }
9116
9117 383
    if ($cleanUtf8 === true) {
9118
      // iconv and mbstring are not tolerant to invalid encoding
9119
      // further, their behaviour is inconsistent with that of PHP's substr
9120 2
      $str = self::clean($str);
9121
    }
9122
9123
    // Whole string
9124 383
    if (!$offset && $length === null) {
9125 38
      return $str;
9126
    }
9127
9128 354
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9129 157
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9130
    }
9131
9132 354
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9133
      self::checkForSupport();
9134
    }
9135
9136
    //
9137
    // fallback for binary || ascii only
9138
    //
9139
9140
    if (
9141 354
        $encoding === 'CP850'
9142
        ||
9143 354
        $encoding === 'ASCII'
9144
    ) {
9145 3
      return self::substr_in_byte($str, $offset, $length);
9146
    }
9147
9148
    //
9149
    // fallback via mbstring
9150
    //
9151
9152 351
    if (self::$SUPPORT['mbstring'] === true) {
9153 351
      return \mb_substr($str, $offset, $length ?? 2147483647, $encoding);
9154
    }
9155
9156
    // otherwise we need the string-length and can't fake it via "2147483647"
9157
    $str_length = 0;
9158
    if ($offset || $length === null) {
9159
      $str_length = self::strlen($str, $encoding);
9160
    }
9161
9162
    // e.g.: invalid chars + mbstring not installed
9163
    if ($str_length === false) {
9164
      return false;
9165
    }
9166
9167
    // Empty string
9168
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
9169
      return '';
9170
    }
9171
9172
    // Impossible
9173
    if ($offset && $offset > $str_length) {
9174
      // "false" is the php native return type here,
9175
      //  but we optimized this for performance ... see "2147483647" instead of "strlen"
9176
      return '';
9177
9178
    }
9179
9180
    if ($length === null) {
9181
      $length = (int)$str_length;
9182
    } else {
9183
      $length = (int)$length;
9184
    }
9185
9186
    if (
9187
        $encoding !== 'UTF-8'
9188
        &&
9189
        self::$SUPPORT['mbstring'] === false
9190
    ) {
9191
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9192
    }
9193
9194
    //
9195
    // fallback via intl
9196
    //
9197
9198
    if (
9199
        $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
9200
        &&
9201
        $offset >= 0 // grapheme_substr() can't handle negative offset
9202
        &&
9203
        self::$SUPPORT['intl'] === true
9204
    ) {
9205
      $returnTmp = \grapheme_substr($str, $offset, $length);
9206
      if ($returnTmp !== false) {
9207
        return $returnTmp;
9208
      }
9209
    }
9210
9211
    //
9212
    // fallback via iconv
9213
    //
9214
9215
    if (
9216
        $length >= 0 // "iconv_substr()" can't handle negative length
9217
        &&
9218
        self::$SUPPORT['iconv'] === true
9219
    ) {
9220
      $returnTmp = \iconv_substr($str, $offset, $length);
9221
      if ($returnTmp !== false) {
9222
        return $returnTmp;
9223
      }
9224
    }
9225
9226
    //
9227
    // fallback for ascii only
9228
    //
9229
9230
    if (self::is_ascii($str)) {
9231
      return \substr($str, $offset, $length);
9232
    }
9233
9234
    //
9235
    // fallback via vanilla php
9236
    //
9237
9238
    // split to array, and remove invalid characters
9239
    $array = self::split($str);
9240
9241
    // extract relevant part, and join to make sting again
9242
    return \implode('', \array_slice($array, $offset, $length));
9243
  }
9244
9245
  /**
9246
   * Binary safe comparison of two strings from an offset, up to length characters.
9247
   *
9248
   * @param string   $str1               <p>The main string being compared.</p>
9249
   * @param string   $str2               <p>The secondary string being compared.</p>
9250
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
9251
   *                                     counting from the end of the string.</p>
9252
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
9253
   *                                     the length of the str compared to the length of main_str less the offset.</p>
9254
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
9255
   *                                     insensitive.</p>
9256
   *
9257
   * @return int
9258
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9259
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9260
   *             <strong>0</strong> if they are equal.
9261
   */
9262 2
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
9263
  {
9264
    if (
9265 2
        $offset !== 0
9266
        ||
9267 2
        $length !== null
9268
    ) {
9269 2
      $str1Tmp = self::substr($str1, $offset, $length);
9270 2
      if ($str1Tmp === false) {
9271
        $str1Tmp = '';
9272
      }
9273 2
      $str1 = (string)$str1Tmp;
9274
9275 2
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str1) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9275
      $str2Tmp = self::substr($str2, 0, /** @scrutinizer ignore-type */ self::strlen($str1));
Loading history...
9276 2
      if ($str2Tmp === false) {
9277
        $str2Tmp = '';
9278
      }
9279 2
      $str2 = (string)$str2Tmp;
9280
    }
9281
9282 2
    if ($case_insensitivity === true) {
9283 2
      return self::strcasecmp($str1, $str2);
9284
    }
9285
9286 2
    return self::strcmp($str1, $str2);
9287
  }
9288
9289
  /**
9290
   * Count the number of substring occurrences.
9291
   *
9292
   * @link  http://php.net/manual/en/function.substr-count.php
9293
   *
9294
   * @param string $haystack   <p>The string to search in.</p>
9295
   * @param string $needle     <p>The substring to search for.</p>
9296
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
9297
   * @param int    $length     [optional] <p>
9298
   *                           The maximum length after the specified offset to search for the
9299
   *                           substring. It outputs a warning if the offset plus the length is
9300
   *                           greater than the haystack length.
9301
   *                           </p>
9302
   * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9303
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9304
   *
9305
   * @return int|false This functions returns an integer or false if there isn't a string.
9306
   */
9307 18
  public static function substr_count(
9308
      string $haystack,
9309
      string $needle,
9310
      int $offset = 0,
9311
      int $length = null,
9312
      string $encoding = 'UTF-8',
9313
      bool $cleanUtf8 = false
9314
  )
9315
  {
9316 18
    if ('' === $haystack || '' === $needle) {
9317 2
      return false;
9318
    }
9319
9320 18
    if ($offset || $length !== null) {
9321
9322 2
      if ($length === null) {
9323 2
        $lengthTmp = self::strlen($haystack);
9324 2
        if ($lengthTmp === false) {
9325
          return false;
9326
        }
9327 2
        $length = (int)$lengthTmp;
9328
      }
9329
9330
      if (
9331
          (
9332 2
              $length !== 0
9333
              &&
9334 2
              $offset !== 0
9335
          )
9336
          &&
9337 2
          ($length + $offset) <= 0
9338
          &&
9339 2
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9340
      ) {
9341 2
        return false;
9342
      }
9343
9344 2
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
9345 2
      if ($haystackTmp === false) {
9346
        $haystackTmp = '';
9347
      }
9348 2
      $haystack = (string)$haystackTmp;
9349
    }
9350
9351 18
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9352 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9353
    }
9354
9355 18
    if ($cleanUtf8 === true) {
9356
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9357
      // if invalid characters are found in $haystack before $needle
9358
      $needle = self::clean($needle);
9359
      $haystack = self::clean($haystack);
9360
    }
9361
9362 18
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9363
      self::checkForSupport();
9364
    }
9365
9366
    if (
9367 18
        $encoding !== 'UTF-8'
9368
        &&
9369 18
        self::$SUPPORT['mbstring'] === false
9370
    ) {
9371
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9372
    }
9373
9374 18
    if (self::$SUPPORT['mbstring'] === true) {
9375 18
      return \mb_substr_count($haystack, $needle, $encoding);
9376
    }
9377
9378
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
9379
9380
    return \count($matches);
9381
  }
9382
9383
  /**
9384
   * Count the number of substring occurrences.
9385
   *
9386
   * @param string $haystack <p>
9387
   *                         The string being checked.
9388
   *                         </p>
9389
   * @param string $needle   <p>
9390
   *                         The string being found.
9391
   *                         </p>
9392
   * @param int    $offset   [optional] <p>
9393
   *                         The offset where to start counting
9394
   *                         </p>
9395
   * @param int    $length   [optional] <p>
9396
   *                         The maximum length after the specified offset to search for the
9397
   *                         substring. It outputs a warning if the offset plus the length is
9398
   *                         greater than the haystack length.
9399
   *                         </p>
9400
   *
9401
   * @return int|false The number of times the
9402
   *                   needle substring occurs in the
9403
   *                   haystack string.
9404
   */
9405 38
  public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null)
9406
  {
9407 38
    if ($haystack === '' || $needle === '') {
9408
      return 0;
9409
    }
9410
9411 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9412
      self::checkForSupport();
9413
    }
9414
9415
    if (
9416 38
        ($offset || $length !== null)
9417
        &&
9418 38
        self::$SUPPORT['mbstring_func_overload'] === true
9419
    ) {
9420
9421 38
      if ($length === null) {
9422
        $lengthTmp = self::strlen($haystack);
9423
        if ($lengthTmp === false) {
9424
          return false;
9425
        }
9426
        $length = (int)$lengthTmp;
9427
      }
9428
9429
      if (
9430
          (
9431 38
              $length !== 0
9432
              &&
9433 38
              $offset !== 0
9434
          )
9435
          &&
9436 38
          ($length + $offset) <= 0
9437
          &&
9438 38
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9439
      ) {
9440
        return false;
9441
      }
9442
9443 38
      $haystackTmp = self::substr_in_byte($haystack, $offset, $length);
9444 38
      if ($haystackTmp === false) {
0 ignored issues
show
introduced by
The condition $haystackTmp === false is always false.
Loading history...
9445
        $haystackTmp = '';
9446
      }
9447 38
      $haystack = (string)$haystackTmp;
9448
    }
9449
9450 38
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9451
      // "mb_" is available if overload is used, so use it ...
9452 38
      return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
9453
    }
9454
9455
    return \substr_count($haystack, $needle, $offset, $length);
9456
  }
9457
9458
  /**
9459
   * Returns the number of occurrences of $substring in the given string.
9460
   * By default, the comparison is case-sensitive, but can be made insensitive
9461
   * by setting $caseSensitive to false.
9462
   *
9463
   * @param string $str           <p>The input string.</p>
9464
   * @param string $substring     <p>The substring to search for.</p>
9465
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
9466
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9467
   *
9468
   * @return int
9469
   */
9470 15
  public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
9471
  {
9472 15
    if ('' === $str || '' === $substring) {
9473 2
      return 0;
9474
    }
9475
9476
    // only a fallback to prevent BC in the api ...
9477 13
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
9478 4
      $encoding = (string)$caseSensitive;
9479
    }
9480
9481 13
    if (!$caseSensitive) {
9482 6
      $str = self::strtocasefold($str, true, false, $encoding, null, false);
9483 6
      $substring = self::strtocasefold($substring, true, false, $encoding, null, false);
9484
    }
9485
9486 13
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
9487
  }
9488
9489
  /**
9490
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
9491
   *
9492
   * @param string $haystack <p>The string to search in.</p>
9493
   * @param string $needle   <p>The substring to search for.</p>
9494
   *
9495
   * @return string Return the sub-string.
9496
   */
9497 2
  public static function substr_ileft(string $haystack, string $needle): string
9498
  {
9499 2
    if ('' === $haystack) {
9500 2
      return '';
9501
    }
9502
9503 2
    if ('' === $needle) {
9504 2
      return $haystack;
9505
    }
9506
9507 2
    if (self::str_istarts_with($haystack, $needle) === true) {
9508 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9508
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9509 2
      if ($haystackTmp === false) {
9510
        $haystackTmp = '';
9511
      }
9512 2
      $haystack = (string)$haystackTmp;
9513
    }
9514
9515 2
    return $haystack;
9516
  }
9517
9518
  /**
9519
   * Get part of a string process in bytes.
9520
   *
9521
   * @param string $str    <p>The string being checked.</p>
9522
   * @param int    $offset <p>The first position used in str.</p>
9523
   * @param int    $length [optional] <p>The maximum length of the returned string.</p>
9524
   *
9525
   * @return string|false
9526
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9527
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9528
   *                      characters long, <b>FALSE</b> will be returned.
9529
   */
9530 54
  public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
9531
  {
9532 54
    if ($str === '') {
9533
      return '';
9534
    }
9535
9536
    // Empty string
9537 54
    if ($length === 0) {
9538
      return '';
9539
    }
9540
9541
    // Whole string
9542 54
    if (!$offset && $length === null) {
9543
      return $str;
9544
    }
9545
9546 54
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9547
      self::checkForSupport();
9548
    }
9549
9550 54
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9551
      // "mb_" is available if overload is used, so use it ...
9552 54
      return \mb_substr($str, $offset, $length ?? 2147483647, 'CP850'); // 8-BIT
9553
    }
9554
9555
    return \substr($str, $offset, $length ?? 2147483647);
9556
  }
9557
9558
  /**
9559
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
9560
   *
9561
   * @param string $haystack <p>The string to search in.</p>
9562
   * @param string $needle   <p>The substring to search for.</p>
9563
   *
9564
   * @return string Return the sub-string.
9565
   */
9566 2
  public static function substr_iright(string $haystack, string $needle): string
9567
  {
9568 2
    if ('' === $haystack) {
9569 2
      return '';
9570
    }
9571
9572 2
    if ('' === $needle) {
9573 2
      return $haystack;
9574
    }
9575
9576 2
    if (self::str_iends_with($haystack, $needle) === true) {
9577 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9578 2
      if ($haystackTmp === false) {
9579
        $haystackTmp = '';
9580
      }
9581 2
      $haystack = (string)$haystackTmp;
9582
    }
9583
9584 2
    return $haystack;
9585
  }
9586
9587
  /**
9588
   * Removes an prefix ($needle) from start of the string ($haystack).
9589
   *
9590
   * @param string $haystack <p>The string to search in.</p>
9591
   * @param string $needle   <p>The substring to search for.</p>
9592
   *
9593
   * @return string Return the sub-string.
9594
   */
9595 2
  public static function substr_left(string $haystack, string $needle): string
9596
  {
9597 2
    if ('' === $haystack) {
9598 2
      return '';
9599
    }
9600
9601 2
    if ('' === $needle) {
9602 2
      return $haystack;
9603
    }
9604
9605 2
    if (self::str_starts_with($haystack, $needle) === true) {
9606 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9606
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9607 2
      if ($haystackTmp === false) {
9608
        $haystackTmp = '';
9609
      }
9610 2
      $haystack = (string)$haystackTmp;
9611
    }
9612
9613 2
    return $haystack;
9614
  }
9615
9616
  /**
9617
   * Replace text within a portion of a string.
9618
   *
9619
   * source: https://gist.github.com/stemar/8287074
9620
   *
9621
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
9622
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
9623
   * @param int|int[]       $offset           <p>
9624
   *                                          If start is positive, the replacing will begin at the start'th offset
9625
   *                                          into string.
9626
   *                                          <br><br>
9627
   *                                          If start is negative, the replacing will begin at the start'th character
9628
   *                                          from the end of string.
9629
   *                                          </p>
9630
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
9631
   *                                          portion of string which is to be replaced. If it is negative, it
9632
   *                                          represents the number of characters from the end of string at which to
9633
   *                                          stop replacing. If it is not given, then it will default to strlen(
9634
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
9635
   *                                          length is zero then this function will have the effect of inserting
9636
   *                                          replacement into string at the given start offset.</p>
9637
   *
9638
   * @return string|string[] The result string is returned. If string is an array then array is returned.
9639
   */
9640 10
  public static function substr_replace($str, $replacement, $offset, $length = null)
9641
  {
9642 10
    if (\is_array($str) === true) {
9643 1
      $num = \count($str);
9644
9645
      // the replacement
9646 1
      if (\is_array($replacement) === true) {
9647 1
        $replacement = \array_slice($replacement, 0, $num);
9648
      } else {
9649 1
        $replacement = \array_pad([$replacement], $num, $replacement);
9650
      }
9651
9652
      // the offset
9653 1
      if (\is_array($offset) === true) {
9654 1
        $offset = \array_slice($offset, 0, $num);
9655 1
        foreach ($offset as &$valueTmp) {
9656 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
9657
        }
9658 1
        unset($valueTmp);
9659
      } else {
9660 1
        $offset = \array_pad([$offset], $num, $offset);
9661
      }
9662
9663
      // the length
9664 1
      if (null === $length) {
9665 1
        $length = \array_fill(0, $num, 0);
9666 1
      } elseif (\is_array($length) === true) {
9667 1
        $length = \array_slice($length, 0, $num);
9668 1
        foreach ($length as &$valueTmpV2) {
9669 1
          if (null !== $valueTmpV2) {
9670 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9671
          } else {
9672 1
            $valueTmpV2 = 0;
9673
          }
9674
        }
9675 1
        unset($valueTmpV2);
9676
      } else {
9677 1
        $length = \array_pad([$length], $num, $length);
9678
      }
9679
9680
      // recursive call
9681 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9682
    }
9683
9684 10
    if (\is_array($replacement) === true) {
9685 1
      if (\count($replacement) > 0) {
9686 1
        $replacement = $replacement[0];
9687
      } else {
9688 1
        $replacement = '';
9689
      }
9690
    }
9691
9692
    // init
9693 10
    $str = (string)$str;
9694 10
    $replacement = (string)$replacement;
9695
9696 10
    if ('' === $str) {
9697 1
      return $replacement;
9698
    }
9699
9700 9
    if (self::is_ascii($str)) {
9701 6
      return ($length === null) ?
9702
          \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9702
          \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
9703 6
          \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9703
          \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
9704
    }
9705
9706 8
    \preg_match_all('/./us', $str, $smatches);
9707 8
    \preg_match_all('/./us', $replacement, $rmatches);
9708
9709 8
    if ($length === null) {
9710 3
      $lengthTmp = self::strlen($str);
9711 3
      if ($lengthTmp === false) {
9712
        // e.g.: non mbstring support + invalid chars
9713
        return '';
9714
      }
9715 3
      $length = (int)$lengthTmp;
9716
    }
9717
9718 8
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9718
    \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9718
    \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
9719
9720 8
    return \implode('', $smatches[0]);
9721
  }
9722
9723
  /**
9724
   * Removes an suffix ($needle) from end of the string ($haystack).
9725
   *
9726
   * @param string $haystack <p>The string to search in.</p>
9727
   * @param string $needle   <p>The substring to search for.</p>
9728
   *
9729
   * @return string Return the sub-string.
9730
   */
9731 2
  public static function substr_right(string $haystack, string $needle): string
9732
  {
9733 2
    if ('' === $haystack) {
9734 2
      return '';
9735
    }
9736
9737 2
    if ('' === $needle) {
9738 2
      return $haystack;
9739
    }
9740
9741 2
    if (self::str_ends_with($haystack, $needle) === true) {
9742 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9743 2
      if ($haystackTmp === false) {
9744
        $haystackTmp = '';
9745
      }
9746 2
      $haystack = (string)$haystackTmp;
9747
    }
9748
9749 2
    return $haystack;
9750
  }
9751
9752
  /**
9753
   * Returns a case swapped version of the string.
9754
   *
9755
   * @param string $str       <p>The input string.</p>
9756
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9757
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9758
   *
9759
   * @return string Each character's case swapped.
9760
   */
9761 6
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9762
  {
9763 6
    if ('' === $str) {
9764 1
      return '';
9765
    }
9766
9767 6
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9768 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9769
    }
9770
9771 6
    if ($cleanUtf8 === true) {
9772
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9773
      // if invalid characters are found in $haystack before $needle
9774 2
      $str = self::clean($str);
9775
    }
9776
9777 6
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9778
  }
9779
9780
  /**
9781
   * Checks whether mbstring is available on the server.
9782
   *
9783
   * @return bool
9784
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
9785
   */
9786
  public static function symfony_polyfill_used(): bool
9787
  {
9788
    // init
9789
    $return = false;
9790
9791
    $returnTmp = \extension_loaded('mbstring') ? true : false;
9792
    if ($returnTmp === false && \function_exists('mb_strlen')) {
9793
      $return = true;
9794
    }
9795
9796
    $returnTmp = \extension_loaded('iconv') ? true : false;
9797
    if ($returnTmp === false && \function_exists('iconv')) {
9798
      $return = true;
9799
    }
9800
9801
    return $return;
9802
  }
9803
9804
  /**
9805
   * @param string $str
9806
   * @param int    $tabLength
9807
   *
9808
   * @return string
9809
   */
9810 6
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9811
  {
9812 6
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9813
  }
9814
9815
  /**
9816
   * Converts the first character of each word in the string to uppercase
9817
   * and all other chars to lowercase.
9818
   *
9819
   * @param string $str      <p>The input string.</p>
9820
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9821
   *
9822
   * @return string String with all characters of $str being title-cased.
9823
   */
9824 5
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
9825
  {
9826 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9827 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9828
    }
9829
9830
    // always fallback via symfony polyfill
9831 5
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
9832
  }
9833
9834
  /**
9835
   * alias for "UTF8::to_ascii()"
9836
   *
9837
   * @see        UTF8::to_ascii()
9838
   *
9839
   * @param string $str
9840
   * @param string $subst_chr
9841
   * @param bool   $strict
9842
   *
9843
   * @return string
9844
   *
9845
   * @deprecated <p>use "UTF8::to_ascii()"</p>
9846
   */
9847 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
9848
  {
9849 7
    return self::to_ascii($str, $subst_chr, $strict);
9850
  }
9851
9852
  /**
9853
   * alias for "UTF8::to_iso8859()"
9854
   *
9855
   * @see        UTF8::to_iso8859()
9856
   *
9857
   * @param string|string[] $str
9858
   *
9859
   * @return string|string[]
9860
   *
9861
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
9862
   */
9863 2
  public static function toIso8859($str)
9864
  {
9865 2
    return self::to_iso8859($str);
9866
  }
9867
9868
  /**
9869
   * alias for "UTF8::to_latin1()"
9870
   *
9871
   * @see        UTF8::to_latin1()
9872
   *
9873
   * @param string|string[] $str
9874
   *
9875
   * @return string|string[]
9876
   *
9877
   * @deprecated <p>use "UTF8::to_latin1()"</p>
9878
   */
9879 2
  public static function toLatin1($str)
9880
  {
9881 2
    return self::to_latin1($str);
9882
  }
9883
9884
  /**
9885
   * alias for "UTF8::to_utf8()"
9886
   *
9887
   * @see        UTF8::to_utf8()
9888
   *
9889
   * @param string|string[] $str
9890
   *
9891
   * @return string|string[]
9892
   *
9893
   * @deprecated <p>use "UTF8::to_utf8()"</p>
9894
   */
9895 2
  public static function toUTF8($str)
9896
  {
9897 2
    return self::to_utf8($str);
9898
  }
9899
9900
  /**
9901
   * Convert a string into ASCII.
9902
   *
9903
   * @param string $str     <p>The input string.</p>
9904
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
9905
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
9906
   *                        performance</p>
9907
   *
9908
   * @return string
9909
   */
9910 37
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
9911
  {
9912 37
    static $UTF8_TO_ASCII;
9913
9914 37
    if ('' === $str) {
9915 3
      return '';
9916
    }
9917
9918
    // check if we only have ASCII, first (better performance)
9919 34
    if (self::is_ascii($str) === true) {
9920 6
      return $str;
9921
    }
9922
9923 29
    $str = self::clean(
9924 29
        $str,
9925 29
        true,
9926 29
        true,
9927 29
        true,
9928 29
        false,
9929 29
        true,
9930 29
        true
9931
    );
9932
9933
    // check again, if we only have ASCII, now ...
9934 29
    if (self::is_ascii($str) === true) {
9935 12
      return $str;
9936
    }
9937
9938 18
    if ($strict === true) {
9939
9940 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9941
        self::checkForSupport();
9942
      }
9943
9944 1
      if (self::$SUPPORT['intl'] === true) {
9945
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
9946
        /** @noinspection PhpComposerExtensionStubsInspection */
9947 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
9948
9949
        // check again, if we only have ASCII, now ...
9950 1
        if (self::is_ascii($str) === true) {
9951 1
          return $str;
9952
        }
9953
9954
      }
9955
    }
9956
9957 18
    if (self::$ORD === null) {
9958
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9959
    }
9960
9961 18
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
9962 18
    $chars = $ar[0];
9963 18
    $ord = null;
9964 18
    foreach ($chars as &$c) {
9965
9966 18
      $ordC0 = self::$ORD[$c[0]];
9967
9968 18
      if ($ordC0 >= 0 && $ordC0 <= 127) {
9969 14
        continue;
9970
      }
9971
9972 18
      $ordC1 = self::$ORD[$c[1]];
9973
9974
      // ASCII - next please
9975 18
      if ($ordC0 >= 192 && $ordC0 <= 223) {
9976 16
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
9977
      }
9978
9979 18
      if ($ordC0 >= 224) {
9980 7
        $ordC2 = self::$ORD[$c[2]];
9981
9982 7
        if ($ordC0 <= 239) {
9983 6
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
9984
        }
9985
9986 7
        if ($ordC0 >= 240) {
9987 2
          $ordC3 = self::$ORD[$c[3]];
9988
9989 2
          if ($ordC0 <= 247) {
9990 2
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
9991
          }
9992
9993 2
          if ($ordC0 >= 248) {
9994
            $ordC4 = self::$ORD[$c[4]];
9995
9996
            if ($ordC0 <= 251) {
9997
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
9998
            }
9999
10000
            if ($ordC0 >= 252) {
10001
              $ordC5 = self::$ORD[$c[5]];
10002
10003
              if ($ordC0 <= 253) {
10004
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
10005
              }
10006
            }
10007
          }
10008
        }
10009
      }
10010
10011 18
      if ($ordC0 === 254 || $ordC0 === 255) {
10012
        $c = $unknown;
10013
        continue;
10014
      }
10015
10016 18
      if ($ord === null) {
10017
        $c = $unknown;
10018
        continue;
10019
      }
10020
10021 18
      $bank = $ord >> 8;
10022 18
      if (!isset($UTF8_TO_ASCII[$bank])) {
10023 9
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
10024 9
        if ($UTF8_TO_ASCII[$bank] === false) {
10025 2
          $UTF8_TO_ASCII[$bank] = [];
10026
        }
10027
      }
10028
10029 18
      $newchar = $ord & 255;
10030
10031 18
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
10032
10033
        // keep for debugging
10034
        /*
10035
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10036
        echo "char: " . $c . "\n";
10037
        echo "ord: " . $ord . "\n";
10038
        echo "newchar: " . $newchar . "\n";
10039
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
10040
        echo "bank:" . $bank . "\n\n";
10041
        */
10042
10043 17
        $c = $UTF8_TO_ASCII[$bank][$newchar];
10044
      } else {
10045
10046
        // keep for debugging missing chars
10047
        /*
10048
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10049
        echo "char: " . $c . "\n";
10050
        echo "ord: " . $ord . "\n";
10051
        echo "newchar: " . $newchar . "\n";
10052
        echo "bank:" . $bank . "\n\n";
10053
        */
10054
10055 18
        $c = $unknown;
10056
      }
10057
    }
10058
10059 18
    return \implode('', $chars);
10060
  }
10061
10062
  /**
10063
   * @param mixed $str
10064
   *
10065
   * @return bool
10066
   */
10067 19
  public static function to_boolean($str): bool
10068
  {
10069
    // init
10070 19
    $str = (string)$str;
10071
10072 19
    if ('' === $str) {
10073 2
      return false;
10074
    }
10075
10076 17
    $key = \strtolower($str);
10077
10078
    // Info: http://php.net/manual/en/filter.filters.validate.php
10079
    $map = [
10080 17
        'true'  => true,
10081
        '1'     => true,
10082
        'on'    => true,
10083
        'yes'   => true,
10084
        'false' => false,
10085
        '0'     => false,
10086
        'off'   => false,
10087
        'no'    => false,
10088
    ];
10089
10090 17
    if (isset($map[$key])) {
10091 13
      return $map[$key];
10092
    }
10093
10094
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10095 4
    if (\is_numeric($str)) {
10096 2
      return (((float)$str + 0) > 0);
10097
    }
10098
10099 2
    return (bool)self::trim($str);
10100
  }
10101
10102
  /**
10103
   * Convert a string into "ISO-8859"-encoding (Latin-1).
10104
   *
10105
   * @param string|string[] $str
10106
   *
10107
   * @return string|string[]
10108
   */
10109 8
  public static function to_iso8859($str)
10110
  {
10111 8
    if (\is_array($str) === true) {
10112 2
      foreach ($str as $k => $v) {
10113 2
        $str[$k] = self::to_iso8859($v);
10114
      }
10115
10116 2
      return $str;
10117
    }
10118
10119 8
    $str = (string)$str;
10120 8
    if ('' === $str) {
10121 2
      return '';
10122
    }
10123
10124 8
    return self::utf8_decode($str);
10125
  }
10126
10127
  /**
10128
   * alias for "UTF8::to_iso8859()"
10129
   *
10130
   * @see UTF8::to_iso8859()
10131
   *
10132
   * @param string|string[] $str
10133
   *
10134
   * @return string|string[]
10135
   */
10136 2
  public static function to_latin1($str)
10137
  {
10138 2
    return self::to_iso8859($str);
10139
  }
10140
10141
  /**
10142
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
10143
   *
10144
   * <ul>
10145
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
10146
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
10147
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
10148
   * case.</li>
10149
   * </ul>
10150
   *
10151
   * @param string|string[] $str                    <p>Any string or array.</p>
10152
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
10153
   *
10154
   * @return string|string[] The UTF-8 encoded string.
10155
   */
10156 38
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
10157
  {
10158 38
    if (\is_array($str) === true) {
10159 4
      foreach ($str as $k => $v) {
10160 4
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
10161
      }
10162
10163 4
      return $str;
10164
    }
10165
10166 38
    $str = (string)$str;
10167 38
    if ('' === $str) {
10168 6
      return $str;
10169
    }
10170
10171 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10172
      self::checkForSupport();
10173
    }
10174
10175 38
    $max = self::strlen_in_byte($str);
10176 38
    $buf = '';
10177
10178
    /** @noinspection ForeachInvariantsInspection */
10179 38
    for ($i = 0; $i < $max; $i++) {
10180 38
      $c1 = $str[$i];
10181
10182 38
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
10183
10184 34
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
10185
10186 31
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10187
10188 31
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
10189 17
            $buf .= $c1 . $c2;
10190 17
            $i++;
10191
          } else { // not valid UTF8 - convert it
10192 31
            $buf .= self::to_utf8_convert_helper($c1);
10193
          }
10194
10195 34
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
10196
10197 32
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10198 32
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10199
10200 32
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
10201 14
            $buf .= $c1 . $c2 . $c3;
10202 14
            $i += 2;
10203
          } else { // not valid UTF8 - convert it
10204 32
            $buf .= self::to_utf8_convert_helper($c1);
10205
          }
10206
10207 26
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
10208
10209 26
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10210 26
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10211 26
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
10212
10213 26
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
10214 8
            $buf .= $c1 . $c2 . $c3 . $c4;
10215 8
            $i += 3;
10216
          } else { // not valid UTF8 - convert it
10217 26
            $buf .= self::to_utf8_convert_helper($c1);
10218
          }
10219
10220
        } else { // doesn't look like UTF8, but should be converted
10221 34
          $buf .= self::to_utf8_convert_helper($c1);
10222
        }
10223
10224 35
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
10225
10226 4
        $buf .= self::to_utf8_convert_helper($c1);
10227
10228
      } else { // it doesn't need conversion
10229 35
        $buf .= $c1;
10230
      }
10231
    }
10232
10233
    // decode unicode escape sequences
10234 38
    $buf = \preg_replace_callback(
10235 38
        '/\\\\u([0-9a-f]{4})/i',
10236 38
        function ($match) {
10237
          // always fallback via symfony polyfill
10238 8
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
10239 38
        },
10240 38
        $buf
10241
    );
10242
10243
    // decode UTF-8 codepoints
10244 38
    if ($decodeHtmlEntityToUtf8 === true) {
10245 2
      $buf = self::html_entity_decode($buf);
10246
    }
10247
10248 38
    return $buf;
10249
  }
10250
10251
  /**
10252
   * @param int|string $input
10253
   *
10254
   * @return string
10255
   */
10256 30
  private static function to_utf8_convert_helper($input): string
10257
  {
10258
    // init
10259 30
    $buf = '';
10260
10261 30
    if (self::$ORD === null) {
10262 1
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10263
    }
10264
10265 30
    if (self::$CHR === null) {
10266 1
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10267
    }
10268
10269 30
    if (self::$WIN1252_TO_UTF8 === null) {
10270 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10271
    }
10272
10273 30
    $ordC1 = self::$ORD[$input];
10274 30
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
10275 30
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
10276
    } else {
10277 2
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
10278 2
      $cc2 = ((string)$input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
10279 2
      $buf .= $cc1 . $cc2;
10280
    }
10281
10282 30
    return $buf;
10283
  }
10284
10285
  /**
10286
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
10287
   *
10288
   * INFO: This is slower then "trim()"
10289
   *
10290
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
10291
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
10292
   *
10293
   * @param string $str   <p>The string to be trimmed</p>
10294
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
10295
   *
10296
   * @return string The trimmed string.
10297
   */
10298 214
  public static function trim(string $str = '', $chars = INF): string
10299
  {
10300 214
    if ('' === $str) {
10301 11
      return '';
10302
    }
10303
10304
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
10305 206
    if ($chars === INF || !$chars) {
10306 179
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
10307
    } else {
10308 47
      $chars = \preg_quote($chars, '/');
10309 47
      $pattern = "^[$chars]+|[$chars]+\$";
10310
    }
10311
10312 206
    return self::regex_replace($str, $pattern, '', '', '/');
10313
  }
10314
10315
  /**
10316
   * Makes string's first char uppercase.
10317
   *
10318
   * @param string $str       <p>The input string.</p>
10319
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10320
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10321
   *
10322
   * @return string The resulting string.
10323
   */
10324 76
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10325
  {
10326 76
    if ($cleanUtf8 === true) {
10327
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10328
      // if invalid characters are found in $haystack before $needle
10329 1
      $str = self::clean($str);
10330
    }
10331
10332 76
    $strPartTwo = self::substr($str, 1, null, $encoding);
10333 76
    if ($strPartTwo === false) {
10334 16
      $strPartTwo = '';
10335
    }
10336
10337 76
    $strPartOne = self::strtoupper(
10338 76
        (string)self::substr($str, 0, 1, $encoding),
10339 76
        $encoding,
10340 76
        $cleanUtf8
10341
    );
10342
10343 76
    return $strPartOne . $strPartTwo;
10344
  }
10345
10346
  /**
10347
   * alias for "UTF8::ucfirst()"
10348
   *
10349
   * @see UTF8::ucfirst()
10350
   *
10351
   * @param string $str
10352
   * @param string $encoding
10353
   * @param bool   $cleanUtf8
10354
   *
10355
   * @return string
10356
   */
10357 1
  public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10358
  {
10359 1
    return self::ucfirst($str, $encoding, $cleanUtf8);
10360
  }
10361
10362
  /**
10363
   * Uppercase for all words in the string.
10364
   *
10365
   * @param string   $str        <p>The input string.</p>
10366
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
10367
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
10368
   * @param string   $encoding   [optional] <p>Set the charset.</p>
10369
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
10370
   *
10371
   * @return string
10372
   */
10373 9
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10374
  {
10375 9
    if (!$str) {
10376 2
      return '';
10377
    }
10378
10379
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
10380
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
10381
10382 8
    if ($cleanUtf8 === true) {
10383
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10384
      // if invalid characters are found in $haystack before $needle
10385 1
      $str = self::clean($str);
10386
    }
10387
10388 8
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
10389
10390
    if (
10391 8
        $usePhpDefaultFunctions === true
10392
        &&
10393 8
        self::is_ascii($str) === true
10394
    ) {
10395
      return \ucwords($str);
10396
    }
10397
10398 8
    $words = self::str_to_words($str, $charlist);
10399 8
    $newWords = [];
10400
10401 8
    if (\count($exceptions) > 0) {
10402 1
      $useExceptions = true;
10403
    } else {
10404 8
      $useExceptions = false;
10405
    }
10406
10407 8
    foreach ($words as $word) {
10408
10409 8
      if (!$word) {
10410 8
        continue;
10411
      }
10412
10413
      if (
10414 8
          $useExceptions === false
10415
          ||
10416
          (
10417 1
              $useExceptions === true
10418
              &&
10419 8
              !\in_array($word, $exceptions, true)
10420
          )
10421
      ) {
10422 8
        $word = self::ucfirst($word, $encoding);
10423
      }
10424
10425 8
      $newWords[] = $word;
10426
    }
10427
10428 8
    return \implode('', $newWords);
10429
  }
10430
10431
  /**
10432
   * Multi decode html entity & fix urlencoded-win1252-chars.
10433
   *
10434
   * e.g:
10435
   * 'test+test'                     => 'test test'
10436
   * 'D&#252;sseldorf'               => 'Düsseldorf'
10437
   * 'D%FCsseldorf'                  => 'Düsseldorf'
10438
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
10439
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
10440
   * 'Düsseldorf'                   => 'Düsseldorf'
10441
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
10442
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
10443
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
10444
   *
10445
   * @param string $str          <p>The input string.</p>
10446
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
10447
   *
10448
   * @return string
10449
   */
10450 2
  public static function urldecode(string $str, bool $multi_decode = true): string
10451
  {
10452 2
    if ('' === $str) {
10453 2
      return '';
10454
    }
10455
10456 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
10457 2
    if (\preg_match($pattern, $str)) {
10458 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
10459
    }
10460
10461 2
    $flags = ENT_QUOTES | ENT_HTML5;
10462
10463
    do {
10464 2
      $str_compare = $str;
10465
10466 2
      $str = self::fix_simple_utf8(
10467 2
          \urldecode(
10468 2
              self::html_entity_decode(
10469 2
                  self::to_utf8($str),
10470 2
                  $flags
10471
              )
10472
          )
10473
      );
10474
10475 2
    } while ($multi_decode === true && $str_compare !== $str);
10476
10477 2
    return $str;
10478
  }
10479
10480
  /**
10481
   * Return a array with "urlencoded"-win1252 -> UTF-8
10482
   *
10483
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
10484
   *
10485
   * @return string[]
10486
   */
10487 2
  public static function urldecode_fix_win1252_chars(): array
10488
  {
10489
    return [
10490 2
        '%20' => ' ',
10491
        '%21' => '!',
10492
        '%22' => '"',
10493
        '%23' => '#',
10494
        '%24' => '$',
10495
        '%25' => '%',
10496
        '%26' => '&',
10497
        '%27' => "'",
10498
        '%28' => '(',
10499
        '%29' => ')',
10500
        '%2A' => '*',
10501
        '%2B' => '+',
10502
        '%2C' => ',',
10503
        '%2D' => '-',
10504
        '%2E' => '.',
10505
        '%2F' => '/',
10506
        '%30' => '0',
10507
        '%31' => '1',
10508
        '%32' => '2',
10509
        '%33' => '3',
10510
        '%34' => '4',
10511
        '%35' => '5',
10512
        '%36' => '6',
10513
        '%37' => '7',
10514
        '%38' => '8',
10515
        '%39' => '9',
10516
        '%3A' => ':',
10517
        '%3B' => ';',
10518
        '%3C' => '<',
10519
        '%3D' => '=',
10520
        '%3E' => '>',
10521
        '%3F' => '?',
10522
        '%40' => '@',
10523
        '%41' => 'A',
10524
        '%42' => 'B',
10525
        '%43' => 'C',
10526
        '%44' => 'D',
10527
        '%45' => 'E',
10528
        '%46' => 'F',
10529
        '%47' => 'G',
10530
        '%48' => 'H',
10531
        '%49' => 'I',
10532
        '%4A' => 'J',
10533
        '%4B' => 'K',
10534
        '%4C' => 'L',
10535
        '%4D' => 'M',
10536
        '%4E' => 'N',
10537
        '%4F' => 'O',
10538
        '%50' => 'P',
10539
        '%51' => 'Q',
10540
        '%52' => 'R',
10541
        '%53' => 'S',
10542
        '%54' => 'T',
10543
        '%55' => 'U',
10544
        '%56' => 'V',
10545
        '%57' => 'W',
10546
        '%58' => 'X',
10547
        '%59' => 'Y',
10548
        '%5A' => 'Z',
10549
        '%5B' => '[',
10550
        '%5C' => '\\',
10551
        '%5D' => ']',
10552
        '%5E' => '^',
10553
        '%5F' => '_',
10554
        '%60' => '`',
10555
        '%61' => 'a',
10556
        '%62' => 'b',
10557
        '%63' => 'c',
10558
        '%64' => 'd',
10559
        '%65' => 'e',
10560
        '%66' => 'f',
10561
        '%67' => 'g',
10562
        '%68' => 'h',
10563
        '%69' => 'i',
10564
        '%6A' => 'j',
10565
        '%6B' => 'k',
10566
        '%6C' => 'l',
10567
        '%6D' => 'm',
10568
        '%6E' => 'n',
10569
        '%6F' => 'o',
10570
        '%70' => 'p',
10571
        '%71' => 'q',
10572
        '%72' => 'r',
10573
        '%73' => 's',
10574
        '%74' => 't',
10575
        '%75' => 'u',
10576
        '%76' => 'v',
10577
        '%77' => 'w',
10578
        '%78' => 'x',
10579
        '%79' => 'y',
10580
        '%7A' => 'z',
10581
        '%7B' => '{',
10582
        '%7C' => '|',
10583
        '%7D' => '}',
10584
        '%7E' => '~',
10585
        '%7F' => '',
10586
        '%80' => '`',
10587
        '%81' => '',
10588
        '%82' => '‚',
10589
        '%83' => 'ƒ',
10590
        '%84' => '„',
10591
        '%85' => '…',
10592
        '%86' => '†',
10593
        '%87' => '‡',
10594
        '%88' => 'ˆ',
10595
        '%89' => '‰',
10596
        '%8A' => 'Š',
10597
        '%8B' => '‹',
10598
        '%8C' => 'Œ',
10599
        '%8D' => '',
10600
        '%8E' => 'Ž',
10601
        '%8F' => '',
10602
        '%90' => '',
10603
        '%91' => '‘',
10604
        '%92' => '’',
10605
        '%93' => '“',
10606
        '%94' => '”',
10607
        '%95' => '•',
10608
        '%96' => '–',
10609
        '%97' => '—',
10610
        '%98' => '˜',
10611
        '%99' => '™',
10612
        '%9A' => 'š',
10613
        '%9B' => '›',
10614
        '%9C' => 'œ',
10615
        '%9D' => '',
10616
        '%9E' => 'ž',
10617
        '%9F' => 'Ÿ',
10618
        '%A0' => '',
10619
        '%A1' => '¡',
10620
        '%A2' => '¢',
10621
        '%A3' => '£',
10622
        '%A4' => '¤',
10623
        '%A5' => '¥',
10624
        '%A6' => '¦',
10625
        '%A7' => '§',
10626
        '%A8' => '¨',
10627
        '%A9' => '©',
10628
        '%AA' => 'ª',
10629
        '%AB' => '«',
10630
        '%AC' => '¬',
10631
        '%AD' => '',
10632
        '%AE' => '®',
10633
        '%AF' => '¯',
10634
        '%B0' => '°',
10635
        '%B1' => '±',
10636
        '%B2' => '²',
10637
        '%B3' => '³',
10638
        '%B4' => '´',
10639
        '%B5' => 'µ',
10640
        '%B6' => '¶',
10641
        '%B7' => '·',
10642
        '%B8' => '¸',
10643
        '%B9' => '¹',
10644
        '%BA' => 'º',
10645
        '%BB' => '»',
10646
        '%BC' => '¼',
10647
        '%BD' => '½',
10648
        '%BE' => '¾',
10649
        '%BF' => '¿',
10650
        '%C0' => 'À',
10651
        '%C1' => 'Á',
10652
        '%C2' => 'Â',
10653
        '%C3' => 'Ã',
10654
        '%C4' => 'Ä',
10655
        '%C5' => 'Å',
10656
        '%C6' => 'Æ',
10657
        '%C7' => 'Ç',
10658
        '%C8' => 'È',
10659
        '%C9' => 'É',
10660
        '%CA' => 'Ê',
10661
        '%CB' => 'Ë',
10662
        '%CC' => 'Ì',
10663
        '%CD' => 'Í',
10664
        '%CE' => 'Î',
10665
        '%CF' => 'Ï',
10666
        '%D0' => 'Ð',
10667
        '%D1' => 'Ñ',
10668
        '%D2' => 'Ò',
10669
        '%D3' => 'Ó',
10670
        '%D4' => 'Ô',
10671
        '%D5' => 'Õ',
10672
        '%D6' => 'Ö',
10673
        '%D7' => '×',
10674
        '%D8' => 'Ø',
10675
        '%D9' => 'Ù',
10676
        '%DA' => 'Ú',
10677
        '%DB' => 'Û',
10678
        '%DC' => 'Ü',
10679
        '%DD' => 'Ý',
10680
        '%DE' => 'Þ',
10681
        '%DF' => 'ß',
10682
        '%E0' => 'à',
10683
        '%E1' => 'á',
10684
        '%E2' => 'â',
10685
        '%E3' => 'ã',
10686
        '%E4' => 'ä',
10687
        '%E5' => 'å',
10688
        '%E6' => 'æ',
10689
        '%E7' => 'ç',
10690
        '%E8' => 'è',
10691
        '%E9' => 'é',
10692
        '%EA' => 'ê',
10693
        '%EB' => 'ë',
10694
        '%EC' => 'ì',
10695
        '%ED' => 'í',
10696
        '%EE' => 'î',
10697
        '%EF' => 'ï',
10698
        '%F0' => 'ð',
10699
        '%F1' => 'ñ',
10700
        '%F2' => 'ò',
10701
        '%F3' => 'ó',
10702
        '%F4' => 'ô',
10703
        '%F5' => 'õ',
10704
        '%F6' => 'ö',
10705
        '%F7' => '÷',
10706
        '%F8' => 'ø',
10707
        '%F9' => 'ù',
10708
        '%FA' => 'ú',
10709
        '%FB' => 'û',
10710
        '%FC' => 'ü',
10711
        '%FD' => 'ý',
10712
        '%FE' => 'þ',
10713
        '%FF' => 'ÿ',
10714
    ];
10715
  }
10716
10717
  /**
10718
   * Decodes an UTF-8 string to ISO-8859-1.
10719
   *
10720
   * @param string $str <p>The input string.</p>
10721
   * @param bool   $keepUtf8Chars
10722
   *
10723
   * @return string
10724
   */
10725 14
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10726
  {
10727 14
    if ('' === $str) {
10728 5
      return '';
10729
    }
10730
10731 14
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10732 14
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10733
10734 14
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10735
10736 1
      if (self::$WIN1252_TO_UTF8 === null) {
10737
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10738
      }
10739
10740 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10740
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10741 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10741
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10742
    }
10743
10744
    /** @noinspection PhpInternalEntityUsedInspection */
10745 14
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10746
10747 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10748
      self::checkForSupport();
10749
    }
10750
10751
    // save for later comparision
10752 14
    $str_backup = $str;
10753 14
    $len = self::strlen_in_byte($str);
10754
10755 14
    if (self::$ORD === null) {
10756
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10757
    }
10758
10759 14
    if (self::$CHR === null) {
10760
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10761
    }
10762
10763 14
    $noCharFound = '?';
10764
    /** @noinspection ForeachInvariantsInspection */
10765 14
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10766 14
      switch ($str[$i] & "\xF0") {
10767 14
        case "\xC0":
10768 12
        case "\xD0":
10769 14
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10770 14
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10771 14
          break;
10772
10773
        /** @noinspection PhpMissingBreakStatementInspection */
10774 12
        case "\xF0":
10775
          ++$i;
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment if this fall-through is intended.
Loading history...
10776 12
        case "\xE0":
10777 10
          $str[$j] = $noCharFound;
10778 10
          $i += 2;
10779 10
          break;
10780
10781
        default:
10782 12
          $str[$j] = $str[$i];
10783
      }
10784
    }
10785
10786 14
    $return = self::substr_in_byte($str, 0, $j);
10787 14
    if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
10788
      $return = '';
10789
    }
10790
10791
    if (
10792 14
        $keepUtf8Chars === true
10793
        &&
10794 14
        self::strlen($return) >= self::strlen($str_backup)
10795
    ) {
10796 2
      return $str_backup;
10797
    }
10798
10799 14
    return $return;
10800
  }
10801
10802
  /**
10803
   * Encodes an ISO-8859-1 string to UTF-8.
10804
   *
10805
   * @param string $str <p>The input string.</p>
10806
   *
10807
   * @return string
10808
   */
10809 14
  public static function utf8_encode(string $str): string
10810
  {
10811 14
    if ('' === $str) {
10812 13
      return '';
10813
    }
10814
10815 14
    $str = \utf8_encode($str);
10816
10817
    // the polyfill maybe return false
10818
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10819 14
    if ($str === false) {
10820
      return '';
10821
    }
10822
10823 14
    if (false === \strpos($str, "\xC2")) {
10824 6
      return $str;
10825
    }
10826
10827 12
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
10828 12
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
10829
10830 12
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
10831
10832 1
      if (self::$WIN1252_TO_UTF8 === null) {
10833
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10834
      }
10835
10836 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10836
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10837 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10837
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10838
    }
10839
10840 12
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
10841
  }
10842
10843
  /**
10844
   * fix -> utf8-win1252 chars
10845
   *
10846
   * @param string $str <p>The input string.</p>
10847
   *
10848
   * @return string
10849
   *
10850
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
10851
   */
10852 2
  public static function utf8_fix_win1252_chars(string $str): string
10853
  {
10854 2
    return self::fix_simple_utf8($str);
10855
  }
10856
10857
  /**
10858
   * Returns an array with all utf8 whitespace characters.
10859
   *
10860
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
10861
   *
10862
   * @author: Derek E. [email protected]
10863
   *
10864
   * @return string[]
10865
   *                 An array with all known whitespace characters as values and the type of whitespace as keys
10866
   *                 as defined in above URL.
10867
   */
10868 2
  public static function whitespace_table(): array
10869
  {
10870 2
    return self::$WHITESPACE_TABLE;
10871
  }
10872
10873
  /**
10874
   * Limit the number of words in a string.
10875
   *
10876
   * @param string $str      <p>The input string.</p>
10877
   * @param int    $limit    <p>The limit of words as integer.</p>
10878
   * @param string $strAddOn <p>Replacement for the striped string.</p>
10879
   *
10880
   * @return string
10881
   */
10882 2
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
10883
  {
10884 2
    if ('' === $str) {
10885 2
      return '';
10886
    }
10887
10888 2
    if ($limit < 1) {
10889 2
      return '';
10890
    }
10891
10892 2
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
10893
10894
    if (
10895 2
        !isset($matches[0])
10896
        ||
10897 2
        self::strlen($str) === self::strlen($matches[0])
10898
    ) {
10899 2
      return $str;
10900
    }
10901
10902 2
    return self::rtrim($matches[0]) . $strAddOn;
10903
  }
10904
10905
  /**
10906
   * Wraps a string to a given number of characters
10907
   *
10908
   * @link  http://php.net/manual/en/function.wordwrap.php
10909
   *
10910
   * @param string $str   <p>The input string.</p>
10911
   * @param int    $width [optional] <p>The column width.</p>
10912
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
10913
   * @param bool   $cut   [optional] <p>
10914
   *                      If the cut is set to true, the string is
10915
   *                      always wrapped at or before the specified width. So if you have
10916
   *                      a word that is larger than the given width, it is broken apart.
10917
   *                      </p>
10918
   *
10919
   * @return string The given string wrapped at the specified column.
10920
   */
10921 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
10922
  {
10923 10
    if ('' === $str || '' === $break) {
10924 3
      return '';
10925
    }
10926
10927 8
    $w = '';
10928 8
    $strSplit = \explode($break, $str);
10929 8
    if ($strSplit === false) {
10930
      $count = 0;
10931
    } else {
10932 8
      $count = \count($strSplit);
10933
    }
10934
10935 8
    $chars = [];
10936
    /** @noinspection ForeachInvariantsInspection */
10937 8
    for ($i = 0; $i < $count; ++$i) {
10938
10939 8
      if ($i) {
10940 1
        $chars[] = $break;
10941 1
        $w .= '#';
10942
      }
10943
10944 8
      $c = $strSplit[$i];
10945 8
      unset($strSplit[$i]);
10946
10947 8
      if ($c !== null) {
10948 8
        foreach (self::split($c) as $c) {
10949 8
          $chars[] = $c;
10950 8
          $w .= ' ' === $c ? ' ' : '?';
10951
        }
10952
      }
10953
    }
10954
10955 8
    $strReturn = '';
10956 8
    $j = 0;
10957 8
    $b = $i = -1;
10958 8
    $w = \wordwrap($w, $width, '#', $cut);
10959
10960 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
10961 6
      for (++$i; $i < $b; ++$i) {
10962 6
        $strReturn .= $chars[$j];
10963 6
        unset($chars[$j++]);
10964
      }
10965
10966 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
10967 3
        unset($chars[$j++]);
10968
      }
10969
10970 6
      $strReturn .= $break;
10971
    }
10972
10973 8
    return $strReturn . \implode('', $chars);
10974
  }
10975
10976
  /**
10977
   * Line-Wrap the string after $limit, but also after the next word.
10978
   *
10979
   * @param string $str
10980
   * @param int    $limit
10981
   *
10982
   * @return string
10983
   */
10984 1
  public static function wordwrap_per_line(string $str, int $limit): string
10985
  {
10986 1
    $strings = (array)\preg_split('/\\r\\n|\\r|\\n/', $str);
10987
10988 1
    $string = '';
10989 1
    foreach ($strings as $value) {
10990 1
      if ($value === false) {
10991
        continue;
10992
      }
10993
10994 1
      $string .= wordwrap($value, $limit);
10995 1
      $string .= "\n";
10996
    }
10997
10998 1
    return $string;
10999
  }
11000
11001
  /**
11002
   * Returns an array of Unicode White Space characters.
11003
   *
11004
   * @return string[] An array with numeric code point as key and White Space Character as value.
11005
   */
11006 2
  public static function ws(): array
11007
  {
11008 2
    return self::$WHITESPACE;
11009
  }
11010
11011
11012
}
11013