Passed
Push — master ( 32fce0...267d99 )
by Lars
20:52 queued 18:23
created

UTF8::chr()   F

Complexity

Conditions 21
Paths 264

Size

Total Lines 88
Code Lines 53

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 22
CRAP Score 94.7723

Importance

Changes 0
Metric Value
cc 21
eloc 53
nc 264
nop 2
dl 0
loc 88
ccs 22
cts 49
cp 0.449
crap 94.7723
rs 2.5333
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'upper' => [
157
          'µ',
158
          'ſ',
159
          "\xCD\x85",
160
          'ς',
161
          'ẞ',
162
          "\xCF\x90",
163
          "\xCF\x91",
164
          "\xCF\x95",
165
          "\xCF\x96",
166
          "\xCF\xB0",
167
          "\xCF\xB1",
168
          "\xCF\xB5",
169
          "\xE1\xBA\x9B",
170
          "\xE1\xBE\xBE",
171
      ],
172
      'lower' => [
173
          'μ',
174
          's',
175
          'ι',
176
          'σ',
177
          'ß',
178
          'β',
179
          'θ',
180
          'φ',
181
          'π',
182
          'κ',
183
          'ρ',
184
          'ε',
185
          "\xE1\xB9\xA1",
186
          'ι',
187
      ],
188
  ];
189
190
191
  /**
192
   * @var array
193
   */
194
  private static $SUPPORT = [];
195
196
  /**
197
   * @var null|array
198
   */
199
  private static $UTF8_MSWORD;
200
201
  /**
202
   * @var null|array
203
   */
204
  private static $BROKEN_UTF8_FIX;
205
206
  /**
207
   * @var null|array
208
   */
209
  private static $WIN1252_TO_UTF8;
210
211
  /**
212
   * @var null|array
213
   */
214
  private static $ENCODINGS;
215
216
  /**
217
   * @var null|array
218
   */
219
  private static $ORD;
220
221
  /**
222
   * @var null|array
223
   */
224
  private static $CHR;
225
226
  /**
227
   * __construct()
228
   */
229 32
  public function __construct()
230
  {
231 32
    self::checkForSupport();
232 32
  }
233
234
  /**
235
   * Return the character at the specified position: $str[1] like functionality.
236
   *
237
   * @param string $str <p>A UTF-8 string.</p>
238
   * @param int    $pos <p>The position of character to return.</p>
239
   *
240
   * @return string Single Multi-Byte character.
241
   */
242 3
  public static function access(string $str, int $pos): string
243
  {
244 3
    if ('' === $str) {
245 1
      return '';
246
    }
247
248 3
    if ($pos < 0) {
249 2
      return '';
250
    }
251
252 3
    return (string)self::substr($str, $pos, 1);
253
  }
254
255
  /**
256
   * Prepends UTF-8 BOM character to the string and returns the whole string.
257
   *
258
   * INFO: If BOM already existed there, the Input string is returned.
259
   *
260
   * @param string $str <p>The input string.</p>
261
   *
262
   * @return string The output string that contains BOM.
263
   */
264 2
  public static function add_bom_to_string(string $str): string
265
  {
266 2
    if (self::string_has_bom($str) === false) {
267 2
      $str = self::bom() . $str;
268
    }
269
270 2
    return $str;
271
  }
272
273
  /**
274
   * Adds the specified amount of left and right padding to the given string.
275
   * The default character used is a space.
276
   *
277
   * @param string $str
278
   * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
279
   * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
280
   * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
281
   * @param string $encoding [optional] <p>Default: UTF-8</p>
282
   *
283
   * @return string String with padding applied.
284
   */
285 25
  private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
286
  {
287 25
    $strlen = self::strlen($str, $encoding);
288
289 25
    if ($left && $right) {
290 8
      $length = ($left + $right) + $strlen;
291 8
      $type = STR_PAD_BOTH;
292 17
    } elseif ($left) {
293 7
      $length = $left + $strlen;
294 7
      $type = STR_PAD_LEFT;
295 10
    } elseif ($right) {
296 10
      $length = $right + $strlen;
297 10
      $type = STR_PAD_RIGHT;
298
    } else {
299
      $length = ($left + $right) + $strlen;
300
      $type = STR_PAD_BOTH;
301
    }
302
303 25
    return self::str_pad($str, $length, $padStr, $type, $encoding);
304
  }
305
306
  /**
307
   * Changes all keys in an array.
308
   *
309
   * @param array $array <p>The array to work on</p>
310
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
311
   *                     or <strong>CASE_LOWER</strong> (default)</p>
312
   *
313
   * @return string[] An array with its keys lower or uppercased.
314
   */
315 2
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
316
  {
317
    if (
318 2
        $case !== CASE_LOWER
319
        &&
320 2
        $case !== CASE_UPPER
321
    ) {
322
      $case = CASE_LOWER;
323
    }
324
325 2
    $return = [];
326 2
    foreach ($array as $key => $value) {
327 2
      if ($case === CASE_LOWER) {
328 2
        $key = self::strtolower($key);
329
      } else {
330 2
        $key = self::strtoupper($key);
331
      }
332
333 2
      $return[$key] = $value;
334
    }
335
336 2
    return $return;
337
  }
338
339
  /**
340
   * Returns the substring between $start and $end, if found, or an empty
341
   * string. An optional offset may be supplied from which to begin the
342
   * search for the start string.
343
   *
344
   * @param string $str
345
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
346
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
347
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
348
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
349
   *
350
   * @return string
351
   */
352 16
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
353
  {
354 16
    $posStart = self::strpos($str, $start, $offset, $encoding);
355 16
    if ($posStart === false) {
356 2
      return '';
357
    }
358
359 14
    $substrIndex = $posStart + self::strlen($start, $encoding);
360 14
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
361
    if (
362 14
        $posEnd === false
363
        ||
364 14
        $posEnd === $substrIndex
365
    ) {
366 4
      return '';
367
    }
368
369 10
    $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
370
371 10
    if ($return === false) {
372
      return '';
373
    }
374
375 10
    return $return;
376
  }
377
378
  /**
379
   * Convert binary into an string.
380
   *
381
   * @param mixed $bin 1|0
382
   *
383
   * @return string
384
   */
385 2
  public static function binary_to_str($bin): string
386
  {
387 2
    if (!isset($bin[0])) {
388
      return '';
389
    }
390
391 2
    $convert = \base_convert($bin, 2, 16);
392 2
    if ($convert === '0') {
393 1
      return '';
394
    }
395
396 2
    return \pack('H*', $convert);
397
  }
398
399
  /**
400
   * Returns the UTF-8 Byte Order Mark Character.
401
   *
402
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
403
   *
404
   * @return string UTF-8 Byte Order Mark
405
   */
406 4
  public static function bom(): string
407
  {
408 4
    return "\xef\xbb\xbf";
409
  }
410
411
  /**
412
   * @alias of UTF8::chr_map()
413
   *
414
   * @see   UTF8::chr_map()
415
   *
416
   * @param string|array $callback
417
   * @param string       $str
418
   *
419
   * @return string[]
420
   */
421 2
  public static function callback($callback, string $str): array
422
  {
423 2
    return self::chr_map($callback, $str);
424
  }
425
426
  /**
427
   * Returns the character at $index, with indexes starting at 0.
428
   *
429
   * @param string $str
430
   * @param int    $index    <p>Position of the character.</p>
431
   * @param string $encoding [optional] <p>Default is UTF-8</p>
432
   *
433
   * @return string The character at $index.
434
   */
435 9
  public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
436
  {
437 9
    return (string)self::substr($str, $index, 1, $encoding);
438
  }
439
440
  /**
441
   * Returns an array consisting of the characters in the string.
442
   *
443
   * @param string $str <p>The input string.</p>
444
   *
445
   * @return string[] An array of chars.
446
   */
447 3
  public static function chars(string $str): array
448
  {
449 3
    return self::str_split($str, 1);
450
  }
451
452
  /**
453
   * This method will auto-detect your server environment for UTF-8 support.
454
   *
455
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
456
   */
457 37
  public static function checkForSupport()
458
  {
459 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
460
461
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
462
463
      // http://php.net/manual/en/book.mbstring.php
464
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
465
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
466
467
      // http://php.net/manual/en/book.iconv.php
468
      self::$SUPPORT['iconv'] = self::iconv_loaded();
469
470
      // http://php.net/manual/en/book.intl.php
471
      self::$SUPPORT['intl'] = self::intl_loaded();
472
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
473
474
      self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
475
476
      if (
477
          self::$SUPPORT['intl'] === true
478
          &&
479
          \function_exists('transliterator_list_ids') === true
480
      ) {
481
        /** @noinspection PhpComposerExtensionStubsInspection */
482
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
483
      }
484
485
      // http://php.net/manual/en/class.intlchar.php
486
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
487
488
      // http://php.net/manual/en/book.ctype.php
489
      self::$SUPPORT['ctype'] = self::ctype_loaded();
490
491
      // http://php.net/manual/en/class.finfo.php
492
      self::$SUPPORT['finfo'] = self::finfo_loaded();
493
494
      // http://php.net/manual/en/book.json.php
495
      self::$SUPPORT['json'] = self::json_loaded();
496
497
      // http://php.net/manual/en/book.pcre.php
498
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
499
    }
500 37
  }
501
502
  /**
503
   * Generates a UTF-8 encoded character from the given code point.
504
   *
505
   * INFO: opposite to UTF8::ord()
506
   *
507
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
508
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
509
   *
510
   * @return string|null Multi-Byte character, returns null on failure or empty input.
511
   */
512 16
  public static function chr($code_point, string $encoding = 'UTF-8')
513
  {
514
    // init
515 16
    static $CHAR_CACHE = [];
516
517 16
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
518
      self::checkForSupport();
519
    }
520
521 16
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
522 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
523
    }
524
525
    if (
526 16
        $encoding !== 'UTF-8'
527
        &&
528 16
        $encoding !== 'ISO-8859-1'
529
        &&
530 16
        $encoding !== 'WINDOWS-1252'
531
        &&
532 16
        self::$SUPPORT['mbstring'] === false
533
    ) {
534
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
535
    }
536
537 16
    $cacheKey = $code_point . $encoding;
538 16
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
539 15
      return $CHAR_CACHE[$cacheKey];
540
    }
541
542 10
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
543
544 9
      if (self::$CHR === null) {
545
        $chrTmp = self::getData('chr');
546
        if ($chrTmp) {
547
          self::$CHR = (array)$chrTmp;
548
        }
549
      }
550
551 9
      $chr = self::$CHR[$code_point];
552
553 9
      if ($encoding !== 'UTF-8') {
554 1
        $chr = self::encode($encoding, $chr);
555
      }
556
557 9
      return $CHAR_CACHE[$cacheKey] = $chr;
558
    }
559
560 7
    if (self::$SUPPORT['intlChar'] === true) {
561
      /** @noinspection PhpComposerExtensionStubsInspection */
562 7
      $chr = \IntlChar::chr($code_point);
563
564 7
      if ($encoding !== 'UTF-8') {
565
        $chr = self::encode($encoding, $chr);
566
      }
567
568 7
      return $CHAR_CACHE[$cacheKey] = $chr;
569
    }
570
571
    if (self::$CHR === null) {
572
      $chrTmp = self::getData('chr');
573
      if ($chrTmp) {
574
        self::$CHR = (array)$chrTmp;
575
      }
576
    }
577
578
    $code_point = (int)$code_point;
579
    if ($code_point <= 0x7F) {
580
      $chr = self::$CHR[$code_point];
581
    } elseif ($code_point <= 0x7FF) {
582
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
583
             self::$CHR[($code_point & 0x3F) + 0x80];
584
    } elseif ($code_point <= 0xFFFF) {
585
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
586
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
587
             self::$CHR[($code_point & 0x3F) + 0x80];
588
    } else {
589
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
590
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
591
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
592
             self::$CHR[($code_point & 0x3F) + 0x80];
593
    }
594
595
    if ($encoding !== 'UTF-8') {
596
      $chr = self::encode($encoding, $chr);
597
    }
598
599
    return $CHAR_CACHE[$cacheKey] = $chr;
600
  }
601
602
  /**
603
   * Applies callback to all characters of a string.
604
   *
605
   * @param string|array $callback <p>The callback function.</p>
606
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
607
   *
608
   * @return string[] The outcome of callback.
609
   */
610 2
  public static function chr_map($callback, string $str): array
611
  {
612 2
    $chars = self::split($str);
613
614 2
    return \array_map($callback, $chars);
615
  }
616
617
  /**
618
   * Generates an array of byte length of each character of a Unicode string.
619
   *
620
   * 1 byte => U+0000  - U+007F
621
   * 2 byte => U+0080  - U+07FF
622
   * 3 byte => U+0800  - U+FFFF
623
   * 4 byte => U+10000 - U+10FFFF
624
   *
625
   * @param string $str <p>The original unicode string.</p>
626
   *
627
   * @return int[] An array of byte lengths of each character.
628
   */
629 4
  public static function chr_size_list(string $str): array
630
  {
631 4
    if ('' === $str) {
632 4
      return [];
633
    }
634
635 4
    $strSplit = self::split($str);
636
637 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
638
      self::checkForSupport();
639
    }
640
641 4
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
642 4
      return \array_map(
643
          function ($data) {
644 4
            return UTF8::strlen_in_byte($data);
645 4
          },
646 4
          $strSplit
647
      );
648
    }
649
650
    return \array_map('\strlen', $strSplit);
651
  }
652
653
  /**
654
   * Get a decimal code representation of a specific character.
655
   *
656
   * @param string $char <p>The input character.</p>
657
   *
658
   * @return int
659
   */
660 4
  public static function chr_to_decimal(string $char): int
661
  {
662 4
    $code = self::ord($char[0]);
663 4
    $bytes = 1;
664
665 4
    if (!($code & 0x80)) {
666
      // 0xxxxxxx
667 4
      return $code;
668
    }
669
670 4
    if (($code & 0xe0) === 0xc0) {
671
      // 110xxxxx
672 4
      $bytes = 2;
673 4
      $code &= ~0xc0;
674 4
    } elseif (($code & 0xf0) === 0xe0) {
675
      // 1110xxxx
676 4
      $bytes = 3;
677 4
      $code &= ~0xe0;
678 2
    } elseif (($code & 0xf8) === 0xf0) {
679
      // 11110xxx
680 2
      $bytes = 4;
681 2
      $code &= ~0xf0;
682
    }
683
684 4
    for ($i = 2; $i <= $bytes; $i++) {
685
      // 10xxxxxx
686 4
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
687
    }
688
689 4
    return $code;
690
  }
691
692
  /**
693
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
694
   *
695
   * @param string|int $char <p>The input character</p>
696
   * @param string     $pfix [optional]
697
   *
698
   * @return string The code point encoded as U+xxxx
699
   */
700 2
  public static function chr_to_hex($char, string $pfix = 'U+'): string
701
  {
702 2
    if ('' === $char) {
703 2
      return '';
704
    }
705
706 2
    if ($char === '&#0;') {
707 2
      $char = '';
708
    }
709
710 2
    return self::int_to_hex(self::ord($char), $pfix);
711
  }
712
713
  /**
714
   * alias for "UTF8::chr_to_decimal()"
715
   *
716
   * @see UTF8::chr_to_decimal()
717
   *
718
   * @param string $chr
719
   *
720
   * @return int
721
   */
722 2
  public static function chr_to_int(string $chr): int
723
  {
724 2
    return self::chr_to_decimal($chr);
725
  }
726
727
  /**
728
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
729
   *
730
   * @param string $body     <p>The original string to be split.</p>
731
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
732
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
733
   *
734
   * @return string The chunked string.
735
   */
736 4
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
737
  {
738 4
    return \implode($end, self::split($body, $chunklen));
739
  }
740
741
  /**
742
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
743
   *
744
   * @param string $str                           <p>The string to be sanitized.</p>
745
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
746
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
747
   *                                              whitespace.</p>
748
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
749
   *                                              e.g.: "…"
750
   *                                              => "..."</p>
751
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
752
   *                                              combination with
753
   *                                              $normalize_whitespace</p>
754
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
755
   *                                              mark e.g.: "�"</p>
756
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
757
   *                                              characters e.g.: "\0"</p>
758
   *
759
   * @return string Clean UTF-8 encoded string.
760
   */
761 112
  public static function clean(
762
      string $str,
763
      bool $remove_bom = false,
764
      bool $normalize_whitespace = false,
765
      bool $normalize_msword = false,
766
      bool $keep_non_breaking_space = false,
767
      bool $replace_diamond_question_mark = false,
768
      bool $remove_invisible_characters = true
769
  ): string
770
  {
771
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
772
    // caused connection reset problem on larger strings
773
774 112
    $regx = '/
775
      (
776
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
777
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
778
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
779
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
780
        ){1,100}                      # ...one or more times
781
      )
782
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
783
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
784
    /x';
785 112
    $str = (string)\preg_replace($regx, '$1', $str);
786
787 112
    if ($replace_diamond_question_mark === true) {
788 62
      $str = self::replace_diamond_question_mark($str, '');
789
    }
790
791 112
    if ($remove_invisible_characters === true) {
792 112
      $str = self::remove_invisible_characters($str);
793
    }
794
795 112
    if ($normalize_whitespace === true) {
796 66
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
797
    }
798
799 112
    if ($normalize_msword === true) {
800 34
      $str = self::normalize_msword($str);
801
    }
802
803 112
    if ($remove_bom === true) {
804 64
      $str = self::remove_bom($str);
805
    }
806
807 112
    return $str;
808
  }
809
810
  /**
811
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
812
   *
813
   * @param string $str <p>The input string.</p>
814
   *
815
   * @return string
816
   */
817 33
  public static function cleanup($str): string
818
  {
819
    // init
820 33
    $str = (string)$str;
821
822 33
    if ('' === $str) {
823 5
      return '';
824
    }
825
826
    // fixed ISO <-> UTF-8 Errors
827 33
    $str = self::fix_simple_utf8($str);
828
829
    // remove all none UTF-8 symbols
830
    // && remove diamond question mark (�)
831
    // && remove remove invisible characters (e.g. "\0")
832
    // && remove BOM
833
    // && normalize whitespace chars (but keep non-breaking-spaces)
834 33
    $str = self::clean(
835 33
        $str,
836 33
        true,
837 33
        true,
838 33
        false,
839 33
        true,
840 33
        true,
841 33
        true
842
    );
843
844 33
    return $str;
845
  }
846
847
  /**
848
   * Accepts a string or a array of strings and returns an array of Unicode code points.
849
   *
850
   * INFO: opposite to UTF8::string()
851
   *
852
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
853
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
854
   *                                    default, code points will be returned as integers.</p>
855
   *
856
   * @return array<int|string>
857
   *                           The array of code points:<br>
858
   *                           array<int> for $u_style === false<br>
859
   *                           array<string> for $u_style === true<br>
860
   */
861 12
  public static function codepoints($arg, bool $u_style = false): array
862
  {
863 12
    if (\is_string($arg) === true) {
864 12
      $arg = self::split($arg);
865
    }
866
867 12
    $arg = \array_map(
868
        [
869 12
            self::class,
870
            'ord',
871
        ],
872 12
        $arg
873
    );
874
875 12
    if (\count($arg) === 0) {
876 7
      return [];
877
    }
878
879 11
    if ($u_style) {
880 2
      $arg = \array_map(
881
          [
882 2
              self::class,
883
              'int_to_hex',
884
          ],
885 2
          $arg
886
      );
887
    }
888
889 11
    return $arg;
890
  }
891
892
  /**
893
   * Trims the string and replaces consecutive whitespace characters with a
894
   * single space. This includes tabs and newline characters, as well as
895
   * multibyte whitespace such as the thin space and ideographic space.
896
   *
897
   * @param string $str <p>The input string.</p>
898
   *
899
   * @return string String with a trimmed $str and condensed whitespace.
900
   */
901 13
  public static function collapse_whitespace(string $str): string
902
  {
903 13
    return self::trim(
904 13
        self::regex_replace($str, '[[:space:]]+', ' ')
905
    );
906
  }
907
908
  /**
909
   * Returns count of characters used in a string.
910
   *
911
   * @param string $str       <p>The input string.</p>
912
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
913
   *
914
   * @return int[] An associative array of Character as keys and
915
   *               their count as values.
916
   */
917 18
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
918
  {
919 18
    return \array_count_values(self::split($str, 1, $cleanUtf8));
920
  }
921
922
  /**
923
   * Remove css media-queries.
924
   *
925
   * @param string $str
926
   *
927
   * @return string
928
   */
929 1
  public static function css_stripe_media_queries(string $str): string
930
  {
931 1
    return (string)\preg_replace(
932 1
        '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
933 1
        '',
934 1
        $str
935
    );
936
  }
937
938
  /**
939
   * Checks whether ctype is available on the server.
940
   *
941
   * @return bool
942
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
943
   */
944
  public static function ctype_loaded(): bool
945
  {
946
    return \extension_loaded('ctype');
947
  }
948
949
  /**
950
   * Converts a int-value into an UTF-8 character.
951
   *
952
   * @param mixed $int
953
   *
954
   * @return string
955
   */
956 10
  public static function decimal_to_chr($int): string
957
  {
958 10
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
959
  }
960
961
  /**
962
   * Decodes a MIME header field
963
   *
964
   * @param string $str
965
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
966
   *
967
   * @return string|false
968
   *                      A decoded MIME field on success,
969
   *                      or false if an error occurs during the decoding.
970
   */
971
  public static function decode_mimeheader($str, $encoding = 'UTF-8')
972
  {
973
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
974
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
975
    }
976
977
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
978
      self::checkForSupport();
979
    }
980
981
    if (self::$SUPPORT['iconv'] === true) {
982
      return \iconv_mime_decode($str, ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
983
    }
984
985
    if ($encoding != 'UTF-8') {
986
      $str = self::encode($encoding, $str);
987
    }
988
989
    return \mb_decode_mimeheader($str);
990
  }
991
992
  /**
993
   * Encode a string with a new charset-encoding.
994
   *
995
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
996
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
997
   *
998
   * @param string $toEncoding                  <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
999
   * @param string $str                         <p>The input string</p>
1000
   * @param bool   $autodetectFromEncoding      [optional] <p>Force the new encoding (we try to fix broken / double
1001
   *                                            encoding for UTF-8)<br> otherwise we auto-detect the current
1002
   *                                            string-encoding</p>
1003
   * @param string $fromEncoding                [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1004
   *                                            A empty string will trigger the autodetect anyway.</p>
1005
   *
1006
   * @return string
1007
   */
1008 30
  public static function encode(string $toEncoding, string $str, bool $autodetectFromEncoding = true, string $fromEncoding = ''): string
1009
  {
1010 30
    if ('' === $str || '' === $toEncoding) {
1011 12
      return $str;
1012
    }
1013
1014 30
    if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1015 8
      $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1016
    }
1017
1018 30
    if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1019 2
      $fromEncoding = self::normalize_encoding($fromEncoding, null);
1020
    }
1021
1022 30
    if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) {
1023
      return $str;
1024
    }
1025
1026 30
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1027
      self::checkForSupport();
1028
    }
1029
1030 30
    if ($toEncoding === 'JSON') {
1031 1
      return self::json_encode($str);
1032
    }
1033 30
    if ($fromEncoding === 'JSON') {
1034 1
      $str = self::json_decode($str);
1035 1
      $fromEncoding = '';
1036
    }
1037
1038 30
    if ($toEncoding === 'BASE64') {
1039 2
      return base64_encode($str);
1040
    }
1041 30
    if ($fromEncoding === 'BASE64') {
1042 2
      $str = base64_decode($str);
1043 2
      $fromEncoding = '';
1044
    }
1045
1046 30
    if ($toEncoding === 'HTML-ENTITIES') {
1047 2
      return self::html_encode($str, true, 'UTF-8');
1048
    }
1049 30
    if ($fromEncoding === 'HTML-ENTITIES') {
1050 2
      $str = self::html_decode($str, ENT_COMPAT, 'UTF-8');
1051 2
      $fromEncoding = '';
1052
    }
1053
1054 30
    $fromEncodingDetected = false;
1055
    if (
1056 30
        $autodetectFromEncoding === true
1057
        ||
1058 30
        !$fromEncoding
1059
    ) {
1060 30
      $fromEncodingDetected = self::str_detect_encoding($str);
1061
    }
1062
1063
    // DEBUG
1064
    //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1065
1066 30
    if ($fromEncodingDetected !== false) {
1067 25
      $fromEncoding = $fromEncodingDetected;
1068 7
    } elseif ($fromEncodingDetected === false && $autodetectFromEncoding === true) {
1069
      // fallback for the "autodetect"-mode
1070 7
      return self::to_utf8($str);
1071
    }
1072
1073
    if (
1074 25
        !$fromEncoding
1075
        ||
1076 25
        $fromEncoding === $toEncoding
1077
    ) {
1078 15
      return $str;
1079
    }
1080
1081
    if (
1082 19
        $toEncoding === 'UTF-8'
1083
        &&
1084
        (
1085 17
            $fromEncoding === 'WINDOWS-1252'
1086
            ||
1087 19
            $fromEncoding === 'ISO-8859-1'
1088
        )
1089
    ) {
1090 14
      return self::to_utf8($str);
1091
    }
1092
1093
    if (
1094 11
        $toEncoding === 'ISO-8859-1'
1095
        &&
1096
        (
1097 6
            $fromEncoding === 'WINDOWS-1252'
1098
            ||
1099 11
            $fromEncoding === 'UTF-8'
1100
        )
1101
    ) {
1102 6
      return self::to_iso8859($str);
1103
    }
1104
1105
    if (
1106 9
        $toEncoding !== 'UTF-8'
1107
        &&
1108 9
        $toEncoding !== 'ISO-8859-1'
1109
        &&
1110 9
        $toEncoding !== 'WINDOWS-1252'
1111
        &&
1112 9
        self::$SUPPORT['mbstring'] === false
1113
    ) {
1114
      \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', E_USER_WARNING);
1115
    }
1116
1117 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1118
      self::checkForSupport();
1119
    }
1120
1121 9
    if (self::$SUPPORT['mbstring'] === true) {
1122
      // info: do not use the symfony polyfill here
1123 9
      $strEncoded = \mb_convert_encoding(
1124 9
          $str,
1125 9
          $toEncoding,
1126 9
          $fromEncoding
1127
      );
1128
1129 9
      if ($strEncoded) {
1130 9
        return $strEncoded;
1131
      }
1132
    }
1133
1134
    $return = \iconv($fromEncoding, $toEncoding, $str);
1135
    if ($return !== false) {
1136
      return $return;
1137
    }
1138
1139
    return $str;
1140
  }
1141
1142
  /**
1143
   * @param string $str
1144
   * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1145
   * @param string $toCharset        [optional] <p>Set the output charset.</p>
1146
   * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1147
   * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1148
   * @param int    $indent           [optional] <p>Set the max length indent.</p>
1149
   *
1150
   * @return string|false
1151
   *                      An encoded MIME field on success,
1152
   *                      or false if an error occurs during the encoding.
1153
   */
1154
  public static function encode_mimeheader(
1155
      $str,
1156
      $fromCharset = 'UTF-8',
1157
      $toCharset = 'UTF-8',
1158
      $transferEncoding = 'Q',
1159
      $linefeed = "\r\n",
1160
      $indent = 76
1161
  )
1162
  {
1163
    if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1164
      $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1165
    }
1166
1167
    if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1168
      $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1169
    }
1170
1171
    $output = \iconv_mime_encode(
1172
        '',
1173
        $str,
1174
        [
1175
            'scheme'           => $transferEncoding,
1176
            'line-length'      => $indent,
1177
            'input-charset'    => $fromCharset,
1178
            'output-charset'   => $toCharset,
1179
            'line-break-chars' => $linefeed,
1180
        ]
1181
    );
1182
1183
    return $output;
1184
  }
1185
1186
  /**
1187
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1188
   *
1189
   * @param string   $str                    <p>The input string.</p>
1190
   * @param string   $search                 <p>The searched string.</p>
1191
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1192
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1193
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1194
   *
1195
   * @return string
1196
   */
1197 1
  public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1198
  {
1199 1
    if ('' === $str) {
1200 1
      return '';
1201
    }
1202
1203 1
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1204
1205 1
    if ($length === null) {
1206 1
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
1207
    }
1208
1209 1
    if (empty($search)) {
1210
1211 1
      $stringLength = self::strlen($str, $encoding);
1212
1213 1
      if ($length > 0) {
1214 1
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1215
      } else {
1216 1
        $end = 0;
1217
      }
1218
1219 1
      $pos = (int)\min(
1220 1
          self::strpos($str, ' ', $end, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $end can also be of type false; however, parameter $offset of voku\helper\UTF8::strpos() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1220
          self::strpos($str, ' ', /** @scrutinizer ignore-type */ $end, $encoding),
Loading history...
1221 1
          self::strpos($str, '.', $end, $encoding)
1222
      );
1223
1224 1
      if ($pos) {
1225 1
        $strSub = self::substr($str, 0, $pos, $encoding);
1226 1
        if ($strSub === false) {
1227
          return '';
1228
        }
1229
1230 1
        return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1231
      }
1232
1233
      return $str;
1234
    }
1235
1236 1
    $wordPos = self::stripos($str, $search, 0, $encoding);
1237 1
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1238
1239 1
    $pos_start = 0;
1240 1
    if ($halfSide > 0) {
1241 1
      $halfText = self::substr($str, 0, $halfSide, $encoding);
1242 1
      if ($halfText !== false) {
1243 1
        $pos_start = (int)\max(
1244 1
            self::strrpos($halfText, ' ', 0, $encoding),
1245 1
            self::strrpos($halfText, '.', 0, $encoding)
1246
        );
1247
      }
1248
    }
1249
1250 1
    if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type integer|false is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1251 1
      $l = $pos_start + $length - 1;
1252 1
      $realLength = self::strlen($str, $encoding);
1253
1254 1
      if ($l > $realLength) {
1255
        $l = $realLength;
1256
      }
1257
1258 1
      $pos_end = (int)\min(
1259 1
              self::strpos($str, ' ', $l, $encoding),
1260 1
              self::strpos($str, '.', $l, $encoding)
1261 1
          ) - $pos_start;
1262
1263 1
      if (!$pos_end || $pos_end <= 0) {
1264 1
        $strSub = self::substr($str, $pos_start, self::strlen($str), $encoding);
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1264
        $strSub = self::substr($str, $pos_start, /** @scrutinizer ignore-type */ self::strlen($str), $encoding);
Loading history...
1265 1
        if ($strSub !== false) {
1266 1
          $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1267
        } else {
1268 1
          $extract = '';
1269
        }
1270
      } else {
1271 1
        $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1272 1
        if ($strSub !== false) {
1273 1
          $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1274
        } else {
1275 1
          $extract = '';
1276
        }
1277
      }
1278
1279
    } else {
1280
1281 1
      $l = $length - 1;
1282 1
      $trueLength = self::strlen($str, $encoding);
1283
1284 1
      if ($l > $trueLength) {
1285
        $l = $trueLength;
1286
      }
1287
1288 1
      $pos_end = \min(
1289 1
          self::strpos($str, ' ', $l, $encoding),
1290 1
          self::strpos($str, '.', $l, $encoding)
1291
      );
1292
1293 1
      if ($pos_end) {
1294 1
        $strSub = self::substr($str, 0, $pos_end, $encoding);
1295 1
        if ($strSub !== false) {
1296 1
          $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1297
        } else {
1298 1
          $extract = '';
1299
        }
1300
      } else {
1301 1
        $extract = $str;
1302
      }
1303
    }
1304
1305 1
    return $extract;
1306
  }
1307
1308
  /**
1309
   * Reads entire file into a string.
1310
   *
1311
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1312
   *
1313
   * @link http://php.net/manual/en/function.file-get-contents.php
1314
   *
1315
   * @param string        $filename             <p>
1316
   *                                            Name of the file to read.
1317
   *                                            </p>
1318
   * @param bool          $use_include_path     [optional] <p>
1319
   *                                            Prior to PHP 5, this parameter is called
1320
   *                                            use_include_path and is a bool.
1321
   *                                            As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1322
   *                                            to trigger include path
1323
   *                                            search.
1324
   *                                            </p>
1325
   * @param resource|null $context              [optional] <p>
1326
   *                                            A valid context resource created with
1327
   *                                            stream_context_create. If you don't need to use a
1328
   *                                            custom context, you can skip this parameter by &null;.
1329
   *                                            </p>
1330
   * @param int|null      $offset               [optional] <p>
1331
   *                                            The offset where the reading starts.
1332
   *                                            </p>
1333
   * @param int|null      $maxLength            [optional] <p>
1334
   *                                            Maximum length of data read. The default is to read until end
1335
   *                                            of file is reached.
1336
   *                                            </p>
1337
   * @param int           $timeout              <p>The time in seconds for the timeout.</p>
1338
   *
1339
   * @param bool          $convertToUtf8        <strong>WARNING!!!</strong> <p>Maybe you can't use this option for some
1340
   *                                            files, because they used non default utf-8 chars. Binary files like
1341
   *                                            images or pdf will not be converted.</p>
1342
   * @param string        $fromEncoding         [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1343
   *                                            A empty string will trigger the autodetect anyway.</p>
1344
   *
1345
   * @return string|false The function returns the read data or false on failure.
1346
   */
1347 11
  public static function file_get_contents(
1348
      string $filename,
1349
      bool $use_include_path = false,
1350
      $context = null,
1351
      int $offset = null,
1352
      int $maxLength = null,
1353
      int $timeout = 10,
1354
      bool $convertToUtf8 = true,
1355
      string $fromEncoding = ''
1356
  )
1357
  {
1358
    // init
1359 11
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1360
1361 11
    if ($timeout && $context === null) {
1362 9
      $context = \stream_context_create(
1363
          [
1364
              'http' =>
1365
                  [
1366 9
                      'timeout' => $timeout,
1367
                  ],
1368
          ]
1369
      );
1370
    }
1371
1372 11
    if ($offset === null) {
1373 11
      $offset = 0;
1374
    }
1375
1376 11
    if (\is_int($maxLength) === true) {
1377 2
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1378
    } else {
1379 11
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1380
    }
1381
1382
    // return false on error
1383 11
    if ($data === false) {
1384
      return false;
1385
    }
1386
1387 11
    if ($convertToUtf8 === true) {
1388
      // only for non binary, but also for UTF-16 or UTF-32
1389
      if (
1390 11
          self::is_binary($data, true) !== true
1391
          ||
1392 8
          self::is_utf16($data) !== false
1393
          ||
1394 11
          self::is_utf32($data) !== false
1395
      ) {
1396 9
        $data = self::encode('UTF-8', $data, false, $fromEncoding);
1397 9
        $data = self::cleanup($data);
1398
      }
1399
    }
1400
1401 11
    return $data;
1402
  }
1403
1404
  /**
1405
   * Checks if a file starts with BOM (Byte Order Mark) character.
1406
   *
1407
   * @param string $file_path <p>Path to a valid file.</p>
1408
   *
1409
   * @throws \RuntimeException if file_get_contents() returned false
1410
   *
1411
   * @return bool
1412
   *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.
1413
   */
1414 2
  public static function file_has_bom(string $file_path): bool
1415
  {
1416 2
    $file_content = \file_get_contents($file_path);
1417 2
    if ($file_content === false) {
1418
      throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1419
    }
1420
1421 2
    return self::string_has_bom($file_content);
1422
  }
1423
1424
  /**
1425
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1426
   *
1427
   * @param mixed  $var
1428
   * @param int    $normalization_form
1429
   * @param string $leading_combining
1430
   *
1431
   * @return mixed
1432
   */
1433 43
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1434
  {
1435 43
    switch (\gettype($var)) {
1436 43
      case 'array':
1437 6
        foreach ($var as $k => $v) {
1438
          /** @noinspection AlterInForeachInspection */
1439 6
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1440
        }
1441 6
        break;
1442 43
      case 'object':
1443 4
        foreach ($var as $k => $v) {
1444 4
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1445
        }
1446 4
        break;
1447 43
      case 'string':
1448
1449 43
        if (false !== \strpos($var, "\r")) {
1450
          // Workaround https://bugs.php.net/65732
1451 3
          $var = self::normalize_line_ending($var);
1452
        }
1453
1454 43
        if (self::is_ascii($var) === false) {
1455
          /** @noinspection PhpUndefinedClassInspection */
1456 26
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1457 21
            $n = '-';
1458
          } else {
1459
            /** @noinspection PhpUndefinedClassInspection */
1460 13
            $n = \Normalizer::normalize($var, $normalization_form);
1461
1462 13
            if (isset($n[0])) {
1463 7
              $var = $n;
1464
            } else {
1465 9
              $var = self::encode('UTF-8', $var, true);
1466
            }
1467
          }
1468
1469
          if (
1470 26
              $var[0] >= "\x80"
1471
              &&
1472 26
              isset($n[0], $leading_combining[0])
1473
              &&
1474 26
              \preg_match('/^\p{Mn}/u', $var)
1475
          ) {
1476
            // Prevent leading combining chars
1477
            // for NFC-safe concatenations.
1478 3
            $var = $leading_combining . $var;
1479
          }
1480
        }
1481
1482 43
        break;
1483
    }
1484
1485 43
    return $var;
1486
  }
1487
1488
  /**
1489
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1490
   *
1491
   * Gets a specific external variable by name and optionally filters it
1492
   *
1493
   * @link  http://php.net/manual/en/function.filter-input.php
1494
   *
1495
   * @param int    $type          <p>
1496
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1497
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1498
   *                              <b>INPUT_ENV</b>.
1499
   *                              </p>
1500
   * @param string $variable_name <p>
1501
   *                              Name of a variable to get.
1502
   *                              </p>
1503
   * @param int    $filter        [optional] <p>
1504
   *                              The ID of the filter to apply. The
1505
   *                              manual page lists the available filters.
1506
   *                              </p>
1507
   * @param mixed  $options       [optional] <p>
1508
   *                              Associative array of options or bitwise disjunction of flags. If filter
1509
   *                              accepts options, flags can be provided in "flags" field of array.
1510
   *                              </p>
1511
   *
1512
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1513
   *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1514
   *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1515
   */
1516
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
1517
  {
1518
    if (4 > \func_num_args()) {
1519
      $var = \filter_input($type, $variable_name, $filter);
1520
    } else {
1521
      $var = \filter_input($type, $variable_name, $filter, $options);
1522
    }
1523
1524
    return self::filter($var);
1525
  }
1526
1527
  /**
1528
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1529
   *
1530
   * Gets external variables and optionally filters them
1531
   *
1532
   * @link  http://php.net/manual/en/function.filter-input-array.php
1533
   *
1534
   * @param int   $type       <p>
1535
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1536
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1537
   *                          <b>INPUT_ENV</b>.
1538
   *                          </p>
1539
   * @param mixed $definition [optional] <p>
1540
   *                          An array defining the arguments. A valid key is a string
1541
   *                          containing a variable name and a valid value is either a filter type, or an array
1542
   *                          optionally specifying the filter, flags and options. If the value is an
1543
   *                          array, valid keys are filter which specifies the
1544
   *                          filter type,
1545
   *                          flags which specifies any flags that apply to the
1546
   *                          filter, and options which specifies any options that
1547
   *                          apply to the filter. See the example below for a better understanding.
1548
   *                          </p>
1549
   *                          <p>
1550
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1551
   *                          input array are filtered by this filter.
1552
   *                          </p>
1553
   * @param bool  $add_empty  [optional] <p>
1554
   *                          Add missing keys as <b>NULL</b> to the return value.
1555
   *                          </p>
1556
   *
1557
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1558
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set. Or
1559
   *               if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable is not
1560
   *               set and <b>NULL</b> if the filter fails.
1561
   */
1562
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1563
  {
1564
    if (2 > \func_num_args()) {
1565
      $a = \filter_input_array($type);
1566
    } else {
1567
      $a = \filter_input_array($type, $definition, $add_empty);
1568
    }
1569
1570
    return self::filter($a);
1571
  }
1572
1573
  /**
1574
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1575
   *
1576
   * Filters a variable with a specified filter
1577
   *
1578
   * @link  http://php.net/manual/en/function.filter-var.php
1579
   *
1580
   * @param mixed $variable <p>
1581
   *                        Value to filter.
1582
   *                        </p>
1583
   * @param int   $filter   [optional] <p>
1584
   *                        The ID of the filter to apply. The
1585
   *                        manual page lists the available filters.
1586
   *                        </p>
1587
   * @param mixed $options  [optional] <p>
1588
   *                        Associative array of options or bitwise disjunction of flags. If filter
1589
   *                        accepts options, flags can be provided in "flags" field of array. For
1590
   *                        the "callback" filter, callable type should be passed. The
1591
   *                        callback must accept one argument, the value to be filtered, and return
1592
   *                        the value after filtering/sanitizing it.
1593
   *                        </p>
1594
   *                        <p>
1595
   *                        <code>
1596
   *                        // for filters that accept options, use this format
1597
   *                        $options = array(
1598
   *                        'options' => array(
1599
   *                        'default' => 3, // value to return if the filter fails
1600
   *                        // other options here
1601
   *                        'min_range' => 0
1602
   *                        ),
1603
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1604
   *                        );
1605
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1606
   *                        // for filter that only accept flags, you can pass them directly
1607
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1608
   *                        // for filter that only accept flags, you can also pass as an array
1609
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1610
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1611
   *                        // callback validate filter
1612
   *                        function foo($value)
1613
   *                        {
1614
   *                        // Expected format: Surname, GivenNames
1615
   *                        if (strpos($value, ", ") === false) return false;
1616
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1617
   *                        $empty = (empty($surname) || empty($givennames));
1618
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1619
   *                        if ($empty || $notstrings) {
1620
   *                        return false;
1621
   *                        } else {
1622
   *                        return $value;
1623
   *                        }
1624
   *                        }
1625
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1626
   *                        </code>
1627
   *                        </p>
1628
   *
1629
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1630
   */
1631 2
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
1632
  {
1633 2
    if (3 > \func_num_args()) {
1634 2
      $variable = \filter_var($variable, $filter);
1635
    } else {
1636 2
      $variable = \filter_var($variable, $filter, $options);
1637
    }
1638
1639 2
    return self::filter($variable);
1640
  }
1641
1642
  /**
1643
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1644
   *
1645
   * Gets multiple variables and optionally filters them
1646
   *
1647
   * @link  http://php.net/manual/en/function.filter-var-array.php
1648
   *
1649
   * @param array $data       <p>
1650
   *                          An array with string keys containing the data to filter.
1651
   *                          </p>
1652
   * @param mixed $definition [optional] <p>
1653
   *                          An array defining the arguments. A valid key is a string
1654
   *                          containing a variable name and a valid value is either a
1655
   *                          filter type, or an
1656
   *                          array optionally specifying the filter, flags and options.
1657
   *                          If the value is an array, valid keys are filter
1658
   *                          which specifies the filter type,
1659
   *                          flags which specifies any flags that apply to the
1660
   *                          filter, and options which specifies any options that
1661
   *                          apply to the filter. See the example below for a better understanding.
1662
   *                          </p>
1663
   *                          <p>
1664
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1665
   *                          input array are filtered by this filter.
1666
   *                          </p>
1667
   * @param bool  $add_empty  [optional] <p>
1668
   *                          Add missing keys as <b>NULL</b> to the return value.
1669
   *                          </p>
1670
   *
1671
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1672
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set.
1673
   */
1674 2
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1675
  {
1676 2
    if (2 > \func_num_args()) {
1677 2
      $a = \filter_var_array($data);
1678
    } else {
1679 2
      $a = \filter_var_array($data, $definition, $add_empty);
1680
    }
1681
1682 2
    return self::filter($a);
1683
  }
1684
1685
  /**
1686
   * Checks whether finfo is available on the server.
1687
   *
1688
   * @return bool
1689
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
1690
   */
1691
  public static function finfo_loaded(): bool
1692
  {
1693
    return \class_exists('finfo');
1694
  }
1695
1696
  /**
1697
   * Returns the first $n characters of the string.
1698
   *
1699
   * @param string $str      <p>The input string.</p>
1700
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1701
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1702
   *
1703
   * @return string
1704
   */
1705 13
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1706
  {
1707 13
    if ($n <= 0) {
1708 4
      return '';
1709
    }
1710
1711 9
    $strSub = self::substr($str, 0, $n, $encoding);
1712 9
    if ($strSub === false) {
1713
      return '';
1714
    }
1715
1716 9
    return $strSub;
1717
  }
1718
1719
  /**
1720
   * Check if the number of unicode characters are not more than the specified integer.
1721
   *
1722
   * @param string $str      The original string to be checked.
1723
   * @param int    $box_size The size in number of chars to be checked against string.
1724
   *
1725
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1726
   */
1727 2
  public static function fits_inside(string $str, int $box_size): bool
1728
  {
1729 2
    return (self::strlen($str) <= $box_size);
1730
  }
1731
1732
  /**
1733
   * @param string $str
1734
   * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
1735
   * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
1736
   *
1737
   * @return string
1738
   */
1739 54
  private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
1740
  {
1741 54
    $upper = self::$COMMON_CASE_FOLD['upper'];
1742 54
    $lower = self::$COMMON_CASE_FOLD['lower'];
1743
1744 54
    if ($useLower === true) {
1745 2
      $str = (string)\str_replace(
1746 2
          $upper,
1747 2
          $lower,
1748 2
          $str
1749
      );
1750
    } else {
1751 52
      $str = (string)\str_replace(
1752 52
          $lower,
1753 52
          $upper,
1754 52
          $str
1755
      );
1756
    }
1757
1758 54
    if ($fullCaseFold) {
1759
1760 52
      static $FULL_CASE_FOLD = null;
1761 52
      if ($FULL_CASE_FOLD === null) {
1762 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
1763
      }
1764
1765 52
      if ($useLower === true) {
1766 2
        $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
1767
      } else {
1768 50
        $str = (string)\str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
1769
      }
1770
    }
1771
1772 54
    return $str;
1773
  }
1774
1775
  /**
1776
   * Try to fix simple broken UTF-8 strings.
1777
   *
1778
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1779
   *
1780
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1781
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1782
   * See: http://en.wikipedia.org/wiki/Windows-1252
1783
   *
1784
   * @param string $str <p>The input string</p>
1785
   *
1786
   * @return string
1787
   */
1788 42
  public static function fix_simple_utf8(string $str): string
1789
  {
1790 42
    if ('' === $str) {
1791 4
      return '';
1792
    }
1793
1794 42
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1795 42
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1796
1797 42
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1798
1799 1
      if (self::$BROKEN_UTF8_FIX === null) {
1800 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_fix') can also be of type false. However, the property $BROKEN_UTF8_FIX is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1801
      }
1802
1803 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1803
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1804 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1804
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1805
    }
1806
1807 42
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1808
  }
1809
1810
  /**
1811
   * Fix a double (or multiple) encoded UTF8 string.
1812
   *
1813
   * @param string[]|string $str You can use a string or an array of strings.
1814
   *
1815
   * @return string[]|string
1816
   *                          Will return the fixed input-"array" or
1817
   *                          the fixed input-"string".
1818
   */
1819 2
  public static function fix_utf8($str)
1820
  {
1821 2
    if (\is_array($str) === true) {
1822 2
      foreach ($str as $k => $v) {
1823 2
        $str[$k] = self::fix_utf8($v);
1824
      }
1825
1826 2
      return $str;
1827
    }
1828
1829 2
    $str = (string)$str;
1830 2
    $last = '';
1831 2
    while ($last !== $str) {
1832 2
      $last = $str;
1833 2
      $str = self::to_utf8(
1834 2
          self::utf8_decode($str, true)
1835
      );
1836
    }
1837
1838 2
    return $str;
1839
  }
1840
1841
  /**
1842
   * Get character of a specific character.
1843
   *
1844
   * @param string $char
1845
   *
1846
   * @return string 'RTL' or 'LTR'
1847
   */
1848 2
  public static function getCharDirection(string $char): string
1849
  {
1850 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1851
      self::checkForSupport();
1852
    }
1853
1854 2
    if (self::$SUPPORT['intlChar'] === true) {
1855
      /** @noinspection PhpComposerExtensionStubsInspection */
1856 2
      $tmpReturn = \IntlChar::charDirection($char);
1857
1858
      // from "IntlChar"-Class
1859
      $charDirection = [
1860 2
          'RTL' => [1, 13, 14, 15, 21],
1861
          'LTR' => [0, 11, 12, 20],
1862
      ];
1863
1864 2
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1865
        return 'LTR';
1866
      }
1867
1868 2
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1869 2
        return 'RTL';
1870
      }
1871
    }
1872
1873 2
    $c = static::chr_to_decimal($char);
1874
1875 2
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1876 2
      return 'LTR';
1877
    }
1878
1879 2
    if (0x85e >= $c) {
1880
1881 2
      if (0x5be === $c ||
1882 2
          0x5c0 === $c ||
1883 2
          0x5c3 === $c ||
1884 2
          0x5c6 === $c ||
1885 2
          (0x5d0 <= $c && 0x5ea >= $c) ||
1886 2
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1887 2
          0x608 === $c ||
1888 2
          0x60b === $c ||
1889 2
          0x60d === $c ||
1890 2
          0x61b === $c ||
1891 2
          (0x61e <= $c && 0x64a >= $c) ||
1892
          (0x66d <= $c && 0x66f >= $c) ||
1893
          (0x671 <= $c && 0x6d5 >= $c) ||
1894
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1895
          (0x6ee <= $c && 0x6ef >= $c) ||
1896
          (0x6fa <= $c && 0x70d >= $c) ||
1897
          0x710 === $c ||
1898
          (0x712 <= $c && 0x72f >= $c) ||
1899
          (0x74d <= $c && 0x7a5 >= $c) ||
1900
          0x7b1 === $c ||
1901
          (0x7c0 <= $c && 0x7ea >= $c) ||
1902
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1903
          0x7fa === $c ||
1904
          (0x800 <= $c && 0x815 >= $c) ||
1905
          0x81a === $c ||
1906
          0x824 === $c ||
1907
          0x828 === $c ||
1908
          (0x830 <= $c && 0x83e >= $c) ||
1909
          (0x840 <= $c && 0x858 >= $c) ||
1910 2
          0x85e === $c
1911
      ) {
1912 2
        return 'RTL';
1913
      }
1914
1915 2
    } elseif (0x200f === $c) {
1916
1917
      return 'RTL';
1918
1919 2
    } elseif (0xfb1d <= $c) {
1920
1921 2
      if (0xfb1d === $c ||
1922 2
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1923 2
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1924 2
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1925 2
          0xfb3e === $c ||
1926 2
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1927 2
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1928 2
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1929 2
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1930 2
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1931 2
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1932 2
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1933 2
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1934 2
          (0xfe76 <= $c && 0xfefc >= $c) ||
1935 2
          (0x10800 <= $c && 0x10805 >= $c) ||
1936 2
          0x10808 === $c ||
1937 2
          (0x1080a <= $c && 0x10835 >= $c) ||
1938 2
          (0x10837 <= $c && 0x10838 >= $c) ||
1939 2
          0x1083c === $c ||
1940 2
          (0x1083f <= $c && 0x10855 >= $c) ||
1941 2
          (0x10857 <= $c && 0x1085f >= $c) ||
1942 2
          (0x10900 <= $c && 0x1091b >= $c) ||
1943 2
          (0x10920 <= $c && 0x10939 >= $c) ||
1944 2
          0x1093f === $c ||
1945 2
          0x10a00 === $c ||
1946 2
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1947 2
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1948 2
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1949 2
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1950 2
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1951 2
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1952 2
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1953 2
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1954 2
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1955 2
          (0x10b78 <= $c && 0x10b7f >= $c)
1956
      ) {
1957 2
        return 'RTL';
1958
      }
1959
    }
1960
1961 2
    return 'LTR';
1962
  }
1963
1964
  /**
1965
   * get data from "/data/*.ser"
1966
   *
1967
   * @param string $file
1968
   *
1969
   * @return mixed|false Will return false on error.
1970
   */
1971 13
  private static function getData(string $file)
1972
  {
1973 13
    $file = __DIR__ . '/data/' . $file . '.php';
1974 13
    if (\file_exists($file)) {
1975
      /** @noinspection PhpIncludeInspection */
1976 12
      return require $file;
1977
    }
1978
1979 2
    return false;
1980
  }
1981
1982
  /**
1983
   * Check for php-support.
1984
   *
1985
   * @param string|null $key
1986
   *
1987
   * @return mixed
1988
   *               Return the full support-"array", if $key === null<br>
1989
   *               return bool-value, if $key is used and available<br>
1990
   *               otherwise return <strong>null</strong>.
1991
   */
1992 25
  public static function getSupportInfo(string $key = null)
1993
  {
1994 25
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1995
      self::checkForSupport();
1996
    }
1997
1998 25
    if ($key === null) {
1999 4
      return self::$SUPPORT;
2000
    }
2001
2002 23
    if (!isset(self::$SUPPORT[$key])) {
2003 2
      return null;
2004
    }
2005
2006 21
    return self::$SUPPORT[$key];
2007
  }
2008
2009
  /**
2010
   * @param string $str
2011
   * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2012
   *
2013
   * @return array
2014
   *               with this keys: 'ext', 'mime', 'type'
2015
   */
2016 40
  private static function get_file_type(
2017
      string $str,
2018
      array $fallback = [
2019
          'ext'  => null,
2020
          'mime' => 'application/octet-stream',
2021
          'type' => null,
2022
      ]
2023
  ): array
2024
  {
2025 40
    if ($str === '') {
2026
      return $fallback;
2027
    }
2028
2029 40
    $str_info = self::substr_in_byte($str, 0, 2);
2030 40
    if (self::strlen_in_byte($str_info) !== 2) {
2031 11
      return $fallback;
2032
    }
2033
2034 35
    $str_info = \unpack('C2chars', $str_info);
2035 35
    $type_code = (int)($str_info['chars1'] . $str_info['chars2']);
2036
2037
    // DEBUG
2038
    //var_dump($type_code);
2039
2040 35
    switch ($type_code) {
2041 35
      case 3780:
2042 4
        $ext = 'pdf';
2043 4
        $mime = 'application/pdf';
2044 4
        $type = 'binary';
2045
2046 4
        break;
2047 35
      case 7790:
2048
        $ext = 'exe';
2049
        $mime = 'application/octet-stream';
2050
        $type = 'binary';
2051
2052
        break;
2053 35
      case 7784:
2054
        $ext = 'midi';
2055
        $mime = 'audio/x-midi';
2056
        $type = 'binary';
2057
2058
        break;
2059 35
      case 8075:
2060 6
        $ext = 'zip';
2061 6
        $mime = 'application/zip';
2062 6
        $type = 'binary';
2063
2064 6
        break;
2065 35
      case 8297:
2066
        $ext = 'rar';
2067
        $mime = 'application/rar';
2068
        $type = 'binary';
2069
2070
        break;
2071 35
      case 255216:
2072
        $ext = 'jpg';
2073
        $mime = 'image/jpeg';
2074
        $type = 'binary';
2075
2076
        break;
2077 35
      case 7173:
2078
        $ext = 'gif';
2079
        $mime = 'image/gif';
2080
        $type = 'binary';
2081
2082
        break;
2083 35
      case 6677:
2084
        $ext = 'bmp';
2085
        $mime = 'image/bmp';
2086
        $type = 'binary';
2087
2088
        break;
2089 35
      case 13780:
2090 6
        $ext = 'png';
2091 6
        $mime = 'image/png';
2092 6
        $type = 'binary';
2093
2094 6
        break;
2095
      default:
2096 33
        return $fallback;
2097
    }
2098
2099
    return [
2100 6
        'ext'  => $ext,
2101 6
        'mime' => $mime,
2102 6
        'type' => $type,
2103
    ];
2104
  }
2105
2106
  /**
2107
   * @param int    $length        <p>Length of the random string.</p>
2108
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2109
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2110
   *
2111
   * @return string
2112
   */
2113 1
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2114
  {
2115
    // init
2116 1
    $i = 0;
2117 1
    $str = '';
2118 1
    $maxlength = self::strlen($possibleChars, $encoding);
2119
2120 1
    if ($maxlength === 0) {
2121 1
      return '';
2122
    }
2123
2124
    // add random chars
2125 1
    while ($i < $length) {
2126
      try {
2127 1
        $randInt = \random_int(0, $maxlength - 1);
2128
      } catch (\Exception $e) {
2129
        /** @noinspection RandomApiMigrationInspection */
2130
        $randInt = \mt_rand(0, $maxlength - 1);
2131
      }
2132 1
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
2133 1
      $str .= $char;
2134 1
      $i++;
2135
    }
2136
2137 1
    return $str;
2138
  }
2139
2140
  /**
2141
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2142
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2143
   *
2144
   * @return string
2145
   */
2146 1
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2147
  {
2148 1
    $uniqueHelper = \mt_rand() .
2149 1
                    \session_id() .
2150 1
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
2151 1
                    ($_SERVER['SERVER_ADDR'] ?? '') .
2152 1
                    $entropyExtra;
2153
2154 1
    $uniqueString = \uniqid($uniqueHelper, true);
2155
2156 1
    if ($md5) {
2157 1
      $uniqueString = \md5($uniqueString . $uniqueHelper);
2158
    }
2159
2160 1
    return $uniqueString;
2161
  }
2162
2163
  /**
2164
   * alias for "UTF8::string_has_bom()"
2165
   *
2166
   * @see        UTF8::string_has_bom()
2167
   *
2168
   * @param string $str
2169
   *
2170
   * @return bool
2171
   *
2172
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2173
   */
2174 2
  public static function hasBom(string $str): bool
2175
  {
2176 2
    return self::string_has_bom($str);
2177
  }
2178
2179
  /**
2180
   * Returns true if the string contains a lower case char, false otherwise.
2181
   *
2182
   * @param string $str <p>The input string.</p>
2183
   *
2184
   * @return bool Whether or not the string contains a lower case character.
2185
   */
2186 47
  public static function has_lowercase(string $str): bool
2187
  {
2188 47
    return self::str_matches_pattern($str, '.*[[:lower:]]');
2189
  }
2190
2191
  /**
2192
   * Returns true if the string contains an upper case char, false otherwise.
2193
   *
2194
   * @param string $str <p>The input string.</p>
2195
   *
2196
   * @return bool Whether or not the string contains an upper case character.
2197
   */
2198 12
  public static function has_uppercase(string $str): bool
2199
  {
2200 12
    return self::str_matches_pattern($str, '.*[[:upper:]]');
2201
  }
2202
2203
  /**
2204
   * Converts a hexadecimal-value into an UTF-8 character.
2205
   *
2206
   * @param string $hexdec <p>The hexadecimal value.</p>
2207
   *
2208
   * @return string|false One single UTF-8 character.
2209
   */
2210 4
  public static function hex_to_chr(string $hexdec)
2211
  {
2212 4
    return self::decimal_to_chr(\hexdec($hexdec));
2213
  }
2214
2215
  /**
2216
   * Converts hexadecimal U+xxxx code point representation to integer.
2217
   *
2218
   * INFO: opposite to UTF8::int_to_hex()
2219
   *
2220
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2221
   *
2222
   * @return int|false The code point, or false on failure.
2223
   */
2224 2
  public static function hex_to_int($hexDec)
2225
  {
2226
    // init
2227 2
    $hexDec = (string)$hexDec;
2228
2229 2
    if ('' === $hexDec) {
2230 2
      return false;
2231
    }
2232
2233 2
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2234 2
      return \intval($match[1], 16);
2235
    }
2236
2237 2
    return false;
2238
  }
2239
2240
  /**
2241
   * alias for "UTF8::html_entity_decode()"
2242
   *
2243
   * @see UTF8::html_entity_decode()
2244
   *
2245
   * @param string $str
2246
   * @param int    $flags
2247
   * @param string $encoding
2248
   *
2249
   * @return string
2250
   */
2251 4
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2252
  {
2253 4
    return self::html_entity_decode($str, $flags, $encoding);
2254
  }
2255
2256
  /**
2257
   * Converts a UTF-8 string to a series of HTML numbered entities.
2258
   *
2259
   * INFO: opposite to UTF8::html_decode()
2260
   *
2261
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2262
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2263
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2264
   *
2265
   * @return string HTML numbered entities.
2266
   */
2267 13
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2268
  {
2269 13
    if ('' === $str) {
2270 4
      return '';
2271
    }
2272
2273 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2274 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2275
    }
2276
2277 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2278
      self::checkForSupport();
2279
    }
2280
2281
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2282 13
    if (self::$SUPPORT['mbstring'] === true) {
2283 13
      $startCode = 0x00;
2284 13
      if ($keepAsciiChars === true) {
2285 13
        $startCode = 0x80;
2286
      }
2287
2288 13
      return \mb_encode_numericentity(
2289 13
          $str,
2290 13
          [$startCode, 0xfffff, 0, 0xfffff, 0],
2291 13
          $encoding
2292
      );
2293
    }
2294
2295
    //
2296
    // fallback via vanilla php
2297
    //
2298
2299
    return \implode(
2300
        '',
2301
        \array_map(
2302
            function ($chr) use ($keepAsciiChars, $encoding) {
2303
              return UTF8::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2304
            },
2305
            self::split($str)
2306
        )
2307
    );
2308
  }
2309
2310
  /**
2311
   * UTF-8 version of html_entity_decode()
2312
   *
2313
   * The reason we are not using html_entity_decode() by itself is because
2314
   * while it is not technically correct to leave out the semicolon
2315
   * at the end of an entity most browsers will still interpret the entity
2316
   * correctly. html_entity_decode() does not convert entities without
2317
   * semicolons, so we are left with our own little solution here. Bummer.
2318
   *
2319
   * Convert all HTML entities to their applicable characters
2320
   *
2321
   * INFO: opposite to UTF8::html_encode()
2322
   *
2323
   * @link http://php.net/manual/en/function.html-entity-decode.php
2324
   *
2325
   * @param string $str      <p>
2326
   *                         The input string.
2327
   *                         </p>
2328
   * @param int    $flags    [optional] <p>
2329
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2330
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2331
   *                         <table>
2332
   *                         Available <i>flags</i> constants
2333
   *                         <tr valign="top">
2334
   *                         <td>Constant Name</td>
2335
   *                         <td>Description</td>
2336
   *                         </tr>
2337
   *                         <tr valign="top">
2338
   *                         <td><b>ENT_COMPAT</b></td>
2339
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2340
   *                         </tr>
2341
   *                         <tr valign="top">
2342
   *                         <td><b>ENT_QUOTES</b></td>
2343
   *                         <td>Will convert both double and single quotes.</td>
2344
   *                         </tr>
2345
   *                         <tr valign="top">
2346
   *                         <td><b>ENT_NOQUOTES</b></td>
2347
   *                         <td>Will leave both double and single quotes unconverted.</td>
2348
   *                         </tr>
2349
   *                         <tr valign="top">
2350
   *                         <td><b>ENT_HTML401</b></td>
2351
   *                         <td>
2352
   *                         Handle code as HTML 4.01.
2353
   *                         </td>
2354
   *                         </tr>
2355
   *                         <tr valign="top">
2356
   *                         <td><b>ENT_XML1</b></td>
2357
   *                         <td>
2358
   *                         Handle code as XML 1.
2359
   *                         </td>
2360
   *                         </tr>
2361
   *                         <tr valign="top">
2362
   *                         <td><b>ENT_XHTML</b></td>
2363
   *                         <td>
2364
   *                         Handle code as XHTML.
2365
   *                         </td>
2366
   *                         </tr>
2367
   *                         <tr valign="top">
2368
   *                         <td><b>ENT_HTML5</b></td>
2369
   *                         <td>
2370
   *                         Handle code as HTML 5.
2371
   *                         </td>
2372
   *                         </tr>
2373
   *                         </table>
2374
   *                         </p>
2375
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2376
   *
2377
   * @return string The decoded string.
2378
   */
2379 40
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2380
  {
2381 40
    if ('' === $str) {
2382 12
      return '';
2383
    }
2384
2385 40
    if (!isset($str[3])) { // examples: &; || &x;
2386 19
      return $str;
2387
    }
2388
2389
    if (
2390 39
        \strpos($str, '&') === false
2391
        ||
2392
        (
2393 39
            \strpos($str, '&#') === false
2394
            &&
2395 39
            \strpos($str, ';') === false
2396
        )
2397
    ) {
2398 18
      return $str;
2399
    }
2400
2401 39
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2402 9
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2403
    }
2404
2405 39
    if ($flags === null) {
2406 10
      $flags = ENT_QUOTES | ENT_HTML5;
2407
    }
2408
2409
    if (
2410 39
        $encoding !== 'UTF-8'
2411
        &&
2412 39
        $encoding !== 'ISO-8859-1'
2413
        &&
2414 39
        $encoding !== 'WINDOWS-1252'
2415
        &&
2416 39
        self::$SUPPORT['mbstring'] === false
2417
    ) {
2418
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2419
    }
2420
2421 39
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2422
      self::checkForSupport();
2423
    }
2424
2425
    do {
2426 39
      $str_compare = $str;
2427
2428
      # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2429 39
      if (self::$SUPPORT['mbstring'] === true) {
2430
2431 39
        $str = \mb_decode_numericentity(
2432 39
            $str,
2433 39
            [0x80, 0xfffff, 0, 0xfffff, 0],
2434 39
            $encoding
2435
        );
2436
2437
      } else {
2438
2439
        $str = (string)\preg_replace_callback(
2440
            "/&#\d{2,6};/",
2441
            function ($matches) use ($encoding) {
2442
              // always fallback via symfony polyfill
2443
              $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2444
2445
              if ($returnTmp !== '"' && $returnTmp !== "'") {
2446
                return $returnTmp;
2447
              }
2448
2449
              return $matches[0];
2450
            },
2451
            $str
2452
        );
2453
2454
      }
2455
2456
      // decode numeric & UTF16 two byte entities
2457 39
      $str = \html_entity_decode(
2458 39
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2459 39
          $flags,
2460 39
          $encoding
2461
      );
2462
2463 39
    } while ($str_compare !== $str);
2464
2465 39
    return $str;
2466
  }
2467
2468
  /**
2469
   * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2470
   *
2471
   * @param string $str
2472
   * @param string $encoding [optional] <p>Default: UTF-8</p>
2473
   *
2474
   * @return string
2475
   */
2476 6
  public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2477
  {
2478 6
    return self::htmlspecialchars(
2479 6
        $str,
2480 6
        ENT_QUOTES | ENT_SUBSTITUTE,
2481 6
        $encoding
2482
    );
2483
  }
2484
2485
  /**
2486
   * Remove empty html-tag.
2487
   *
2488
   * e.g.: <tag></tag>
2489
   *
2490
   * @param string $str
2491
   *
2492
   * @return string
2493
   */
2494 1
  public static function html_stripe_empty_tags(string $str): string
2495
  {
2496 1
    return (string)\preg_replace(
2497 1
        "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2498 1
        '',
2499 1
        $str
2500
    );
2501
  }
2502
2503
  /**
2504
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2505
   *
2506
   * @link http://php.net/manual/en/function.htmlentities.php
2507
   *
2508
   * @param string $str           <p>
2509
   *                              The input string.
2510
   *                              </p>
2511
   * @param int    $flags         [optional] <p>
2512
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2513
   *                              invalid code unit sequences and the used document type. The default is
2514
   *                              ENT_COMPAT | ENT_HTML401.
2515
   *                              <table>
2516
   *                              Available <i>flags</i> constants
2517
   *                              <tr valign="top">
2518
   *                              <td>Constant Name</td>
2519
   *                              <td>Description</td>
2520
   *                              </tr>
2521
   *                              <tr valign="top">
2522
   *                              <td><b>ENT_COMPAT</b></td>
2523
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2524
   *                              </tr>
2525
   *                              <tr valign="top">
2526
   *                              <td><b>ENT_QUOTES</b></td>
2527
   *                              <td>Will convert both double and single quotes.</td>
2528
   *                              </tr>
2529
   *                              <tr valign="top">
2530
   *                              <td><b>ENT_NOQUOTES</b></td>
2531
   *                              <td>Will leave both double and single quotes unconverted.</td>
2532
   *                              </tr>
2533
   *                              <tr valign="top">
2534
   *                              <td><b>ENT_IGNORE</b></td>
2535
   *                              <td>
2536
   *                              Silently discard invalid code unit sequences instead of returning
2537
   *                              an empty string. Using this flag is discouraged as it
2538
   *                              may have security implications.
2539
   *                              </td>
2540
   *                              </tr>
2541
   *                              <tr valign="top">
2542
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2543
   *                              <td>
2544
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2545
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2546
   *                              </td>
2547
   *                              </tr>
2548
   *                              <tr valign="top">
2549
   *                              <td><b>ENT_DISALLOWED</b></td>
2550
   *                              <td>
2551
   *                              Replace invalid code points for the given document type with a
2552
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2553
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2554
   *                              instance, to ensure the well-formedness of XML documents with
2555
   *                              embedded external content.
2556
   *                              </td>
2557
   *                              </tr>
2558
   *                              <tr valign="top">
2559
   *                              <td><b>ENT_HTML401</b></td>
2560
   *                              <td>
2561
   *                              Handle code as HTML 4.01.
2562
   *                              </td>
2563
   *                              </tr>
2564
   *                              <tr valign="top">
2565
   *                              <td><b>ENT_XML1</b></td>
2566
   *                              <td>
2567
   *                              Handle code as XML 1.
2568
   *                              </td>
2569
   *                              </tr>
2570
   *                              <tr valign="top">
2571
   *                              <td><b>ENT_XHTML</b></td>
2572
   *                              <td>
2573
   *                              Handle code as XHTML.
2574
   *                              </td>
2575
   *                              </tr>
2576
   *                              <tr valign="top">
2577
   *                              <td><b>ENT_HTML5</b></td>
2578
   *                              <td>
2579
   *                              Handle code as HTML 5.
2580
   *                              </td>
2581
   *                              </tr>
2582
   *                              </table>
2583
   *                              </p>
2584
   * @param string $encoding      [optional] <p>
2585
   *                              Like <b>htmlspecialchars</b>,
2586
   *                              <b>htmlentities</b> takes an optional third argument
2587
   *                              <i>encoding</i> which defines encoding used in
2588
   *                              conversion.
2589
   *                              Although this argument is technically optional, you are highly
2590
   *                              encouraged to specify the correct value for your code.
2591
   *                              </p>
2592
   * @param bool   $double_encode [optional] <p>
2593
   *                              When <i>double_encode</i> is turned off PHP will not
2594
   *                              encode existing html entities. The default is to convert everything.
2595
   *                              </p>
2596
   *
2597
   *
2598
   * @return string The encoded string.
2599
   * </p>
2600
   * <p>
2601
   * If the input <i>string</i> contains an invalid code unit
2602
   * sequence within the given <i>encoding</i> an empty string
2603
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2604
   * <b>ENT_SUBSTITUTE</b> flags are set.
2605
   */
2606 9
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2607
  {
2608 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2609 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2610
    }
2611
2612 9
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2613
2614
    /**
2615
     * PHP doesn't replace a backslash to its html entity since this is something
2616
     * that's mostly used to escape characters when inserting in a database. Since
2617
     * we're using a decent database layer, we don't need this shit and we're replacing
2618
     * the double backslashes by its' html entity equivalent.
2619
     *
2620
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2621
     */
2622 9
    $str = \str_replace('\\', '&#92;', $str);
2623
2624 9
    return self::html_encode($str, true, $encoding);
2625
  }
2626
2627
  /**
2628
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2629
   *
2630
   * INFO: Take a look at "UTF8::htmlentities()"
2631
   *
2632
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2633
   *
2634
   * @param string $str           <p>
2635
   *                              The string being converted.
2636
   *                              </p>
2637
   * @param int    $flags         [optional] <p>
2638
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2639
   *                              invalid code unit sequences and the used document type. The default is
2640
   *                              ENT_COMPAT | ENT_HTML401.
2641
   *                              <table>
2642
   *                              Available <i>flags</i> constants
2643
   *                              <tr valign="top">
2644
   *                              <td>Constant Name</td>
2645
   *                              <td>Description</td>
2646
   *                              </tr>
2647
   *                              <tr valign="top">
2648
   *                              <td><b>ENT_COMPAT</b></td>
2649
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2650
   *                              </tr>
2651
   *                              <tr valign="top">
2652
   *                              <td><b>ENT_QUOTES</b></td>
2653
   *                              <td>Will convert both double and single quotes.</td>
2654
   *                              </tr>
2655
   *                              <tr valign="top">
2656
   *                              <td><b>ENT_NOQUOTES</b></td>
2657
   *                              <td>Will leave both double and single quotes unconverted.</td>
2658
   *                              </tr>
2659
   *                              <tr valign="top">
2660
   *                              <td><b>ENT_IGNORE</b></td>
2661
   *                              <td>
2662
   *                              Silently discard invalid code unit sequences instead of returning
2663
   *                              an empty string. Using this flag is discouraged as it
2664
   *                              may have security implications.
2665
   *                              </td>
2666
   *                              </tr>
2667
   *                              <tr valign="top">
2668
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2669
   *                              <td>
2670
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2671
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2672
   *                              </td>
2673
   *                              </tr>
2674
   *                              <tr valign="top">
2675
   *                              <td><b>ENT_DISALLOWED</b></td>
2676
   *                              <td>
2677
   *                              Replace invalid code points for the given document type with a
2678
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2679
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2680
   *                              instance, to ensure the well-formedness of XML documents with
2681
   *                              embedded external content.
2682
   *                              </td>
2683
   *                              </tr>
2684
   *                              <tr valign="top">
2685
   *                              <td><b>ENT_HTML401</b></td>
2686
   *                              <td>
2687
   *                              Handle code as HTML 4.01.
2688
   *                              </td>
2689
   *                              </tr>
2690
   *                              <tr valign="top">
2691
   *                              <td><b>ENT_XML1</b></td>
2692
   *                              <td>
2693
   *                              Handle code as XML 1.
2694
   *                              </td>
2695
   *                              </tr>
2696
   *                              <tr valign="top">
2697
   *                              <td><b>ENT_XHTML</b></td>
2698
   *                              <td>
2699
   *                              Handle code as XHTML.
2700
   *                              </td>
2701
   *                              </tr>
2702
   *                              <tr valign="top">
2703
   *                              <td><b>ENT_HTML5</b></td>
2704
   *                              <td>
2705
   *                              Handle code as HTML 5.
2706
   *                              </td>
2707
   *                              </tr>
2708
   *                              </table>
2709
   *                              </p>
2710
   * @param string $encoding      [optional] <p>
2711
   *                              Defines encoding used in conversion.
2712
   *                              </p>
2713
   *                              <p>
2714
   *                              For the purposes of this function, the encodings
2715
   *                              ISO-8859-1, ISO-8859-15,
2716
   *                              UTF-8, cp866,
2717
   *                              cp1251, cp1252, and
2718
   *                              KOI8-R are effectively equivalent, provided the
2719
   *                              <i>string</i> itself is valid for the encoding, as
2720
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2721
   *                              the same positions in all of these encodings.
2722
   *                              </p>
2723
   * @param bool   $double_encode [optional] <p>
2724
   *                              When <i>double_encode</i> is turned off PHP will not
2725
   *                              encode existing html entities, the default is to convert everything.
2726
   *                              </p>
2727
   *
2728
   * @return string The converted string.
2729
   * </p>
2730
   * <p>
2731
   * If the input <i>string</i> contains an invalid code unit
2732
   * sequence within the given <i>encoding</i> an empty string
2733
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2734
   * <b>ENT_SUBSTITUTE</b> flags are set.
2735
   */
2736 8
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2737
  {
2738 8
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2739 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2740
    }
2741
2742 8
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2743
  }
2744
2745
  /**
2746
   * Checks whether iconv is available on the server.
2747
   *
2748
   * @return bool
2749
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2750
   */
2751
  public static function iconv_loaded(): bool
2752
  {
2753
    return \extension_loaded('iconv') ? true : false;
2754
  }
2755
2756
  /**
2757
   * alias for "UTF8::decimal_to_chr()"
2758
   *
2759
   * @see UTF8::decimal_to_chr()
2760
   *
2761
   * @param mixed $int
2762
   *
2763
   * @return string
2764
   */
2765 4
  public static function int_to_chr($int): string
2766
  {
2767 4
    return self::decimal_to_chr($int);
2768
  }
2769
2770
  /**
2771
   * Converts Integer to hexadecimal U+xxxx code point representation.
2772
   *
2773
   * INFO: opposite to UTF8::hex_to_int()
2774
   *
2775
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2776
   * @param string $pfix [optional]
2777
   *
2778
   * @return string The code point, or empty string on failure.
2779
   */
2780 6
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2781
  {
2782 6
    $hex = \dechex($int);
2783
2784 6
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2785
2786 6
    return $pfix . $hex . '';
2787
  }
2788
2789
  /**
2790
   * Checks whether intl-char is available on the server.
2791
   *
2792
   * @return bool
2793
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2794
   */
2795
  public static function intlChar_loaded(): bool
2796
  {
2797
    return \class_exists('IntlChar');
2798
  }
2799
2800
  /**
2801
   * Checks whether intl is available on the server.
2802
   *
2803
   * @return bool
2804
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2805
   */
2806 5
  public static function intl_loaded(): bool
2807
  {
2808 5
    return \extension_loaded('intl');
2809
  }
2810
2811
  /**
2812
   * alias for "UTF8::is_ascii()"
2813
   *
2814
   * @see        UTF8::is_ascii()
2815
   *
2816
   * @param string $str
2817
   *
2818
   * @return bool
2819
   *
2820
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2821
   */
2822 2
  public static function isAscii(string $str): bool
2823
  {
2824 2
    return self::is_ascii($str);
2825
  }
2826
2827
  /**
2828
   * alias for "UTF8::is_base64()"
2829
   *
2830
   * @see        UTF8::is_base64()
2831
   *
2832
   * @param string $str
2833
   *
2834
   * @return bool
2835
   *
2836
   * @deprecated <p>use "UTF8::is_base64()"</p>
2837
   */
2838 2
  public static function isBase64($str): bool
2839
  {
2840 2
    return self::is_base64($str);
2841
  }
2842
2843
  /**
2844
   * alias for "UTF8::is_binary()"
2845
   *
2846
   * @see        UTF8::is_binary()
2847
   *
2848
   * @param mixed $str
2849
   * @param bool  $strict
2850
   *
2851
   * @return bool
2852
   *
2853
   * @deprecated <p>use "UTF8::is_binary()"</p>
2854
   */
2855 4
  public static function isBinary($str, $strict = false): bool
2856
  {
2857 4
    return self::is_binary($str, $strict);
2858
  }
2859
2860
  /**
2861
   * alias for "UTF8::is_bom()"
2862
   *
2863
   * @see        UTF8::is_bom()
2864
   *
2865
   * @param string $utf8_chr
2866
   *
2867
   * @return bool
2868
   *
2869
   * @deprecated <p>use "UTF8::is_bom()"</p>
2870
   */
2871 2
  public static function isBom(string $utf8_chr): bool
2872
  {
2873 2
    return self::is_bom($utf8_chr);
2874
  }
2875
2876
  /**
2877
   * alias for "UTF8::is_html()"
2878
   *
2879
   * @see        UTF8::is_html()
2880
   *
2881
   * @param string $str
2882
   *
2883
   * @return bool
2884
   *
2885
   * @deprecated <p>use "UTF8::is_html()"</p>
2886
   */
2887 2
  public static function isHtml(string $str): bool
2888
  {
2889 2
    return self::is_html($str);
2890
  }
2891
2892
  /**
2893
   * alias for "UTF8::is_json()"
2894
   *
2895
   * @see        UTF8::is_json()
2896
   *
2897
   * @param string $str
2898
   *
2899
   * @return bool
2900
   *
2901
   * @deprecated <p>use "UTF8::is_json()"</p>
2902
   */
2903
  public static function isJson(string $str): bool
2904
  {
2905
    return self::is_json($str);
2906
  }
2907
2908
  /**
2909
   * alias for "UTF8::is_utf16()"
2910
   *
2911
   * @see        UTF8::is_utf16()
2912
   *
2913
   * @param mixed $str
2914
   *
2915
   * @return int|false
2916
   *                    <strong>false</strong> if is't not UTF16,<br>
2917
   *                    <strong>1</strong> for UTF-16LE,<br>
2918
   *                    <strong>2</strong> for UTF-16BE.
2919
   *
2920
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2921
   */
2922 2
  public static function isUtf16($str)
2923
  {
2924 2
    return self::is_utf16($str);
2925
  }
2926
2927
  /**
2928
   * alias for "UTF8::is_utf32()"
2929
   *
2930
   * @see        UTF8::is_utf32()
2931
   *
2932
   * @param mixed $str
2933
   *
2934
   * @return int|false
2935
   *                   <strong>false</strong> if is't not UTF16,
2936
   *                   <strong>1</strong> for UTF-32LE,
2937
   *                   <strong>2</strong> for UTF-32BE.
2938
   *
2939
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2940
   */
2941 2
  public static function isUtf32($str)
2942
  {
2943 2
    return self::is_utf32($str);
2944
  }
2945
2946
  /**
2947
   * alias for "UTF8::is_utf8()"
2948
   *
2949
   * @see        UTF8::is_utf8()
2950
   *
2951
   * @param string $str
2952
   * @param bool   $strict
2953
   *
2954
   * @return bool
2955
   *
2956
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2957
   */
2958 17
  public static function isUtf8($str, $strict = false): bool
2959
  {
2960 17
    return self::is_utf8($str, $strict);
2961
  }
2962
2963
  /**
2964
   * Returns true if the string contains only alphabetic chars, false otherwise.
2965
   *
2966
   * @param string $str
2967
   *
2968
   * @return bool
2969
   *               Whether or not $str contains only alphabetic chars.
2970
   */
2971 10
  public static function is_alpha(string $str): bool
2972
  {
2973 10
    return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2974
  }
2975
2976
  /**
2977
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2978
   *
2979
   * @param string $str
2980
   *
2981
   * @return bool
2982
   *               Whether or not $str contains only alphanumeric chars.
2983
   */
2984 13
  public static function is_alphanumeric(string $str): bool
2985
  {
2986 13
    return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2987
  }
2988
2989
  /**
2990
   * Checks if a string is 7 bit ASCII.
2991
   *
2992
   * @param string $str <p>The string to check.</p>
2993
   *
2994
   * @return bool
2995
   *              <strong>true</strong> if it is ASCII<br>
2996
   *              <strong>false</strong> otherwise
2997
   *
2998
   */
2999 198
  public static function is_ascii(string $str): bool
3000
  {
3001 198
    if ('' === $str) {
3002 10
      return true;
3003
    }
3004
3005 197
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3006
  }
3007
3008
  /**
3009
   * Returns true if the string is base64 encoded, false otherwise.
3010
   *
3011
   * @param string $str <p>The input string.</p>
3012
   *
3013
   * @return bool Whether or not $str is base64 encoded.
3014
   */
3015 9
  public static function is_base64($str): bool
3016
  {
3017 9
    if ('' === $str) {
3018 3
      return false;
3019
    }
3020
3021 8
    if (\is_string($str) === false) {
0 ignored issues
show
introduced by
The condition is_string($str) === false is always false.
Loading history...
3022 2
      return false;
3023
    }
3024
3025 8
    $base64String = (string)\base64_decode($str, true);
3026
3027 8
    return $base64String && \base64_encode($base64String) === $str;
3028
  }
3029
3030
  /**
3031
   * Check if the input is binary... (is look like a hack).
3032
   *
3033
   * @param mixed $input
3034
   * @param bool  $strict
3035
   *
3036
   * @return bool
3037
   */
3038 40
  public static function is_binary($input, bool $strict = false): bool
3039
  {
3040 40
    $input = (string)$input;
3041 40
    if ('' === $input) {
3042 10
      return false;
3043
    }
3044
3045 40
    if (\preg_match('~^[01]+$~', $input)) {
3046 12
      return true;
3047
    }
3048
3049 40
    if ($strict === true) {
3050
3051 34
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3052
        self::checkForSupport();
3053
      }
3054
3055 34
      if (self::$SUPPORT['finfo'] === false) {
3056
        throw new \RuntimeException('ext-fileinfo: is not installed');
3057
      }
3058
3059
      /** @noinspection PhpComposerExtensionStubsInspection */
3060 34
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
3061 34
      $finfo_encoding = $finfo->buffer($input);
3062 34
      if ($finfo_encoding && $finfo_encoding === 'binary') {
3063 15
        return true;
3064
      }
3065
3066
    }
3067
3068 40
    $ext = self::get_file_type($input);
3069 40
    if ($ext['type'] === 'binary') {
3070 6
      return true;
3071
    }
3072
3073 38
    $testLength = self::strlen_in_byte($input);
3074 38
    if ($testLength) {
3075 38
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3076
        self::checkForSupport();
3077
      }
3078
3079 38
      $testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength);
3080 38
      if (($testNull / $testLength) > 0.256) {
3081 12
        return true;
3082
      }
3083
    }
3084
3085 36
    return false;
3086
  }
3087
3088
  /**
3089
   * Check if the file is binary.
3090
   *
3091
   * @param string $file
3092
   *
3093
   * @return bool
3094
   */
3095 6
  public static function is_binary_file($file): bool
3096
  {
3097
    // init
3098 6
    $block = '';
3099
3100 6
    $fp = \fopen($file, 'rb');
3101 6
    if (\is_resource($fp)) {
3102 6
      $block = \fread($fp, 512);
3103 6
      \fclose($fp);
3104
    }
3105
3106 6
    if ($block === '') {
3107 2
      return false;
3108
    }
3109
3110 6
    return self::is_binary($block, true);
3111
  }
3112
3113
  /**
3114
   * Returns true if the string contains only whitespace chars, false otherwise.
3115
   *
3116
   * @param string $str
3117
   *
3118
   * @return bool
3119
   *               Whether or not $str contains only whitespace characters.
3120
   */
3121 15
  public static function is_blank(string $str): bool
3122
  {
3123 15
    return self::str_matches_pattern($str, '^[[:space:]]*$');
3124
  }
3125
3126
  /**
3127
   * Checks if the given string is equal to any "Byte Order Mark".
3128
   *
3129
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3130
   *
3131
   * @param string $str <p>The input string.</p>
3132
   *
3133
   * @return bool
3134
   *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.
3135
   */
3136 2
  public static function is_bom($str): bool
3137
  {
3138 2
    foreach (self::$BOM as $bomString => $bomByteLength) {
3139 2
      if ($str === $bomString) {
3140 2
        return true;
3141
      }
3142
    }
3143
3144 2
    return false;
3145
  }
3146
3147
  /**
3148
   * Determine whether the string is considered to be empty.
3149
   *
3150
   * A variable is considered empty if it does not exist or if its value equals FALSE.
3151
   * empty() does not generate a warning if the variable does not exist.
3152
   *
3153
   * @param mixed $str
3154
   *
3155
   * @return bool Whether or not $str is empty().
3156
   */
3157
  public static function is_empty($str): bool
3158
  {
3159
    return empty($str);
3160
  }
3161
3162
  /**
3163
   * Returns true if the string contains only hexadecimal chars, false otherwise.
3164
   *
3165
   * @param string $str
3166
   *
3167
   * @return bool
3168
   *               Whether or not $str contains only hexadecimal chars.
3169
   */
3170 13
  public static function is_hexadecimal(string $str): bool
3171
  {
3172 13
    return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3173
  }
3174
3175
  /**
3176
   * Check if the string contains any html-tags <lall>.
3177
   *
3178
   * @param string $str <p>The input string.</p>
3179
   *
3180
   * @return bool
3181
   */
3182 3
  public static function is_html(string $str): bool
3183
  {
3184 3
    if ('' === $str) {
3185 3
      return false;
3186
    }
3187
3188
    // init
3189 3
    $matches = [];
3190
3191 3
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3192
3193 3
    return !(\count($matches) === 0);
3194
  }
3195
3196
  /**
3197
   * Try to check if "$str" is an json-string.
3198
   *
3199
   * @param string $str <p>The input string.</p>
3200
   *
3201
   * @return bool
3202
   */
3203 22
  public static function is_json(string $str): bool
3204
  {
3205 22
    if ('' === $str) {
3206 3
      return false;
3207
    }
3208
3209 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3210
      self::checkForSupport();
3211
    }
3212
3213 21
    if (self::$SUPPORT['json'] === false) {
3214
      throw new \RuntimeException('ext-json: is not installed');
3215
    }
3216
3217 21
    $json = self::json_decode($str);
3218
3219
    /** @noinspection PhpComposerExtensionStubsInspection */
3220
    return (
3221 21
               \is_object($json) === true
3222
               ||
3223 21
               \is_array($json) === true
3224
           )
3225
           &&
3226 21
           \json_last_error() === JSON_ERROR_NONE;
3227
  }
3228
3229
  /**
3230
   * @param string $str
3231
   *
3232
   * @return bool
3233
   */
3234 8
  public static function is_lowercase(string $str): bool
3235
  {
3236 8
    if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
3237 3
      return true;
3238
    }
3239
3240 5
    return false;
3241
  }
3242
3243
  /**
3244
   * Returns true if the string is serialized, false otherwise.
3245
   *
3246
   * @param string $str
3247
   *
3248
   * @return bool Whether or not $str is serialized.
3249
   */
3250 7
  public static function is_serialized(string $str): bool
3251
  {
3252 7
    if ('' === $str) {
3253 1
      return false;
3254
    }
3255
3256
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3257
    /** @noinspection UnserializeExploitsInspection */
3258 6
    return $str === 'b:0;'
3259
           ||
3260 6
           @\unserialize($str) !== false;
3261
  }
3262
3263
  /**
3264
   * Returns true if the string contains only lower case chars, false
3265
   * otherwise.
3266
   *
3267
   * @param string $str <p>The input string.</p>
3268
   *
3269
   * @return bool
3270
   *               Whether or not $str contains only lower case characters.
3271
   */
3272 8
  public static function is_uppercase(string $str): bool
3273
  {
3274 8
    return self::str_matches_pattern($str, '^[[:upper:]]*$');
3275
  }
3276
3277
  /**
3278
   * Check if the string is UTF-16.
3279
   *
3280
   * @param mixed $str <p>The input string.</p>
3281
   *
3282
   * @return int|false
3283
   *                   <strong>false</strong> if is't not UTF-16,<br>
3284
   *                   <strong>1</strong> for UTF-16LE,<br>
3285
   *                   <strong>2</strong> for UTF-16BE.
3286
   */
3287 21
  public static function is_utf16($str)
3288
  {
3289
    // init
3290 21
    $str = (string)$str;
3291
3292 21
    if (self::is_binary($str) === false) {
3293 9
      return false;
3294
    }
3295
3296 16
    if (self::$SUPPORT['mbstring'] === false) {
3297
      \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', E_USER_WARNING);
3298
    }
3299
3300
    // init
3301 16
    $strChars = [];
3302
3303 16
    $str = self::remove_bom($str);
3304
3305 16
    $maybeUTF16LE = 0;
3306 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3307 16
    if ($test) {
3308 14
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3309 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3310 14
      if ($test3 === $test) {
3311 14
        if (\count($strChars) === 0) {
3312 14
          $strChars = self::count_chars($str, true);
3313
        }
3314 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3315 14
          if (\in_array($test3char, $strChars, true) === true) {
3316 14
            $maybeUTF16LE++;
3317
          }
3318
        }
3319
      }
3320
    }
3321
3322 16
    $maybeUTF16BE = 0;
3323 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3324 16
    if ($test) {
3325 14
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3326 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3327 14
      if ($test3 === $test) {
3328 14
        if (\count($strChars) === 0) {
3329 6
          $strChars = self::count_chars($str, true);
3330
        }
3331 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3332 14
          if (\in_array($test3char, $strChars, true) === true) {
3333 14
            $maybeUTF16BE++;
3334
          }
3335
        }
3336
      }
3337
    }
3338
3339 16
    if ($maybeUTF16BE !== $maybeUTF16LE) {
3340 6
      if ($maybeUTF16LE > $maybeUTF16BE) {
3341 4
        return 1;
3342
      }
3343
3344 6
      return 2;
3345
    }
3346
3347 12
    return false;
3348
  }
3349
3350
  /**
3351
   * Check if the string is UTF-32.
3352
   *
3353
   * @param mixed $str
3354
   *
3355
   * @return int|false
3356
   *                   <strong>false</strong> if is't not UTF-32,<br>
3357
   *                   <strong>1</strong> for UTF-32LE,<br>
3358
   *                   <strong>2</strong> for UTF-32BE.
3359
   */
3360 17
  public static function is_utf32($str)
3361
  {
3362
    // init
3363 17
    $str = (string)$str;
3364
3365 17
    if (self::is_binary($str) === false) {
3366 9
      return false;
3367
    }
3368
3369 12
    if (self::$SUPPORT['mbstring'] === false) {
3370
      \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', E_USER_WARNING);
3371
    }
3372
3373
    // init
3374 12
    $strChars = [];
3375
3376 12
    $str = self::remove_bom($str);
3377
3378 12
    $maybeUTF32LE = 0;
3379 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3380 12
    if ($test) {
3381 10
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3382 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3383 10
      if ($test3 === $test) {
3384 10
        if (\count($strChars) === 0) {
3385 10
          $strChars = self::count_chars($str, true);
3386
        }
3387 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3388 10
          if (\in_array($test3char, $strChars, true) === true) {
3389 10
            $maybeUTF32LE++;
3390
          }
3391
        }
3392
      }
3393
    }
3394
3395 12
    $maybeUTF32BE = 0;
3396 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3397 12
    if ($test) {
3398 10
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3399 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3400 10
      if ($test3 === $test) {
3401 10
        if (\count($strChars) === 0) {
3402 6
          $strChars = self::count_chars($str, true);
3403
        }
3404 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3405 10
          if (\in_array($test3char, $strChars, true) === true) {
3406 10
            $maybeUTF32BE++;
3407
          }
3408
        }
3409
      }
3410
    }
3411
3412 12
    if ($maybeUTF32BE !== $maybeUTF32LE) {
3413 2
      if ($maybeUTF32LE > $maybeUTF32BE) {
3414 2
        return 1;
3415
      }
3416
3417 2
      return 2;
3418
    }
3419
3420 12
    return false;
3421
  }
3422
3423
  /**
3424
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3425
   *
3426
   * @see    http://hsivonen.iki.fi/php-utf8/
3427
   *
3428
   * @param string|string[] $str    <p>The string to be checked.</p>
3429
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3430
   *
3431
   * @return bool
3432
   */
3433 107
  public static function is_utf8($str, bool $strict = false): bool
3434
  {
3435 107
    if (\is_array($str) === true) {
3436 2
      foreach ($str as $k => $v) {
3437 2
        if (false === self::is_utf8($v, $strict)) {
3438 2
          return false;
3439
        }
3440
      }
3441
3442
      return true;
3443
    }
3444
3445 107
    if ('' === $str) {
3446 12
      return true;
3447
    }
3448
3449 103
    if ($strict === true) {
3450 2
      if (self::is_utf16($str) !== false) {
3451 2
        return false;
3452
      }
3453
3454
      if (self::is_utf32($str) !== false) {
3455
        return false;
3456
      }
3457
    }
3458
3459 103
    if (self::pcre_utf8_support() !== true) {
3460
3461
      // If even just the first character can be matched, when the /u
3462
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3463
      // invalid, nothing at all will match, even if the string contains
3464
      // some valid sequences
3465
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
3466
    }
3467
3468 103
    $mState = 0; // cached expected number of octets after the current octet
3469
    // until the beginning of the next UTF8 character sequence
3470 103
    $mUcs4 = 0; // cached Unicode character
3471 103
    $mBytes = 1; // cached expected number of octets in the current sequence
3472
3473 103
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3474
      self::checkForSupport();
3475
    }
3476
3477 103
    if (self::$ORD === null) {
3478
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3479
    }
3480
3481 103
    $len = self::strlen_in_byte((string)$str);
3482
    /** @noinspection ForeachInvariantsInspection */
3483 103
    for ($i = 0; $i < $len; $i++) {
3484 103
      $in = self::$ORD[$str[$i]];
3485 103
      if ($mState === 0) {
3486
        // When mState is zero we expect either a US-ASCII character or a
3487
        // multi-octet sequence.
3488 103
        if (0 === (0x80 & $in)) {
3489
          // US-ASCII, pass straight through.
3490 98
          $mBytes = 1;
3491 84
        } elseif (0xC0 === (0xE0 & $in)) {
3492
          // First octet of 2 octet sequence.
3493 75
          $mUcs4 = $in;
3494 75
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3495 75
          $mState = 1;
3496 75
          $mBytes = 2;
3497 58
        } elseif (0xE0 === (0xF0 & $in)) {
3498
          // First octet of 3 octet sequence.
3499 41
          $mUcs4 = $in;
3500 41
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3501 41
          $mState = 2;
3502 41
          $mBytes = 3;
3503 30
        } elseif (0xF0 === (0xF8 & $in)) {
3504
          // First octet of 4 octet sequence.
3505 19
          $mUcs4 = $in;
3506 19
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3507 19
          $mState = 3;
3508 19
          $mBytes = 4;
3509 13
        } elseif (0xF8 === (0xFC & $in)) {
3510
          /* First octet of 5 octet sequence.
3511
          *
3512
          * This is illegal because the encoded codepoint must be either
3513
          * (a) not the shortest form or
3514
          * (b) outside the Unicode range of 0-0x10FFFF.
3515
          * Rather than trying to resynchronize, we will carry on until the end
3516
          * of the sequence and let the later error handling code catch it.
3517
          */
3518 5
          $mUcs4 = $in;
3519 5
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3520 5
          $mState = 4;
3521 5
          $mBytes = 5;
3522 10
        } elseif (0xFC === (0xFE & $in)) {
3523
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3524 5
          $mUcs4 = $in;
3525 5
          $mUcs4 = ($mUcs4 & 1) << 30;
3526 5
          $mState = 5;
3527 5
          $mBytes = 6;
3528
        } else {
3529
          // Current octet is neither in the US-ASCII range nor a legal first
3530
          // octet of a multi-octet sequence.
3531 103
          return false;
3532
        }
3533
      } else {
3534
        // When mState is non-zero, we expect a continuation of the multi-octet
3535
        // sequence
3536 84
        if (0x80 === (0xC0 & $in)) {
3537
          // Legal continuation.
3538 76
          $shift = ($mState - 1) * 6;
3539 76
          $tmp = $in;
3540 76
          $tmp = ($tmp & 0x0000003F) << $shift;
3541 76
          $mUcs4 |= $tmp;
3542
          // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3543
          // Unicode code point to be output.
3544 76
          if (0 === --$mState) {
3545
            // Check for illegal sequences and code points.
3546
            //
3547
            // From Unicode 3.1, non-shortest form is illegal
3548
            if (
3549 76
                (2 === $mBytes && $mUcs4 < 0x0080)
3550
                ||
3551 76
                (3 === $mBytes && $mUcs4 < 0x0800)
3552
                ||
3553 76
                (4 === $mBytes && $mUcs4 < 0x10000)
3554
                ||
3555 76
                (4 < $mBytes)
3556
                ||
3557
                // From Unicode 3.2, surrogate characters are illegal.
3558 76
                (($mUcs4 & 0xFFFFF800) === 0xD800)
3559
                ||
3560
                // Code points outside the Unicode range are illegal.
3561 76
                ($mUcs4 > 0x10FFFF)
3562
            ) {
3563 8
              return false;
3564
            }
3565
            // initialize UTF8 cache
3566 76
            $mState = 0;
3567 76
            $mUcs4 = 0;
3568 76
            $mBytes = 1;
3569
          }
3570
        } else {
3571
          // ((0xC0 & (*in) != 0x80) && (mState != 0))
3572
          // Incomplete multi-octet sequence.
3573 36
          return false;
3574
        }
3575
      }
3576
    }
3577
3578 67
    return true;
3579
  }
3580
3581
  /**
3582
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3583
   * Decodes a JSON string
3584
   *
3585
   * @link http://php.net/manual/en/function.json-decode.php
3586
   *
3587
   * @param string $json    <p>
3588
   *                        The <i>json</i> string being decoded.
3589
   *                        </p>
3590
   *                        <p>
3591
   *                        This function only works with UTF-8 encoded strings.
3592
   *                        </p>
3593
   *                        <p>PHP implements a superset of
3594
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3595
   *                        only supports these values when they are nested inside an array or an object.
3596
   *                        </p>
3597
   * @param bool   $assoc   [optional] <p>
3598
   *                        When <b>TRUE</b>, returned objects will be converted into
3599
   *                        associative arrays.
3600
   *                        </p>
3601
   * @param int    $depth   [optional] <p>
3602
   *                        User specified recursion depth.
3603
   *                        </p>
3604
   * @param int    $options [optional] <p>
3605
   *                        Bitmask of JSON decode options. Currently only
3606
   *                        <b>JSON_BIGINT_AS_STRING</b>
3607
   *                        is supported (default is to cast large integers as floats)
3608
   *                        </p>
3609
   *
3610
   * @return mixed
3611
   *                The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3612
   *                null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3613
   *                <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3614
   *                is deeper than the recursion limit.
3615
   */
3616 24
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3617
  {
3618 24
    $json = self::filter($json);
3619
3620 24
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3621
      self::checkForSupport();
3622
    }
3623
3624 24
    if (self::$SUPPORT['json'] === false) {
3625
      throw new \RuntimeException('ext-json: is not installed');
3626
    }
3627
3628
    /** @noinspection PhpComposerExtensionStubsInspection */
3629 24
    $json = \json_decode($json, $assoc, $depth, $options);
3630
3631 24
    return $json;
3632
  }
3633
3634
  /**
3635
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3636
   * Returns the JSON representation of a value.
3637
   *
3638
   * @link http://php.net/manual/en/function.json-encode.php
3639
   *
3640
   * @param mixed $value   <p>
3641
   *                       The <i>value</i> being encoded. Can be any type except
3642
   *                       a resource.
3643
   *                       </p>
3644
   *                       <p>
3645
   *                       All string data must be UTF-8 encoded.
3646
   *                       </p>
3647
   *                       <p>PHP implements a superset of
3648
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3649
   *                       only supports these values when they are nested inside an array or an object.
3650
   *                       </p>
3651
   * @param int   $options [optional] <p>
3652
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3653
   *                       <b>JSON_HEX_TAG</b>,
3654
   *                       <b>JSON_HEX_AMP</b>,
3655
   *                       <b>JSON_HEX_APOS</b>,
3656
   *                       <b>JSON_NUMERIC_CHECK</b>,
3657
   *                       <b>JSON_PRETTY_PRINT</b>,
3658
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3659
   *                       <b>JSON_FORCE_OBJECT</b>,
3660
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3661
   *                       constants is described on
3662
   *                       the JSON constants page.
3663
   *                       </p>
3664
   * @param int   $depth   [optional] <p>
3665
   *                       Set the maximum depth. Must be greater than zero.
3666
   *                       </p>
3667
   *
3668
   * @return string|false
3669
   *                      A JSON encoded <strong>string</strong> on success or<br>
3670
   *                      <strong>FALSE</strong> on failure.
3671
   */
3672 5
  public static function json_encode($value, int $options = 0, int $depth = 512)
3673
  {
3674 5
    $value = self::filter($value);
3675
3676 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3677
      self::checkForSupport();
3678
    }
3679
3680 5
    if (self::$SUPPORT['json'] === false) {
3681
      throw new \RuntimeException('ext-json: is not installed');
3682
    }
3683
3684
    /** @noinspection PhpComposerExtensionStubsInspection */
3685 5
    $json = \json_encode($value, $options, $depth);
3686
3687 5
    return $json;
3688
  }
3689
3690
  /**
3691
   * Checks whether JSON is available on the server.
3692
   *
3693
   * @return bool
3694
   *              <strong>true</strong> if available, <strong>false</strong> otherwise
3695
   */
3696
  public static function json_loaded(): bool
3697
  {
3698
    return \function_exists('json_decode');
3699
  }
3700
3701
  /**
3702
   * Makes string's first char lowercase.
3703
   *
3704
   * @param string $str       <p>The input string</p>
3705
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
3706
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3707
   *
3708
   * @return string The resulting string.
3709
   */
3710 46
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3711
  {
3712 46
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3713 46
    if ($strPartTwo === false) {
3714
      $strPartTwo = '';
3715
    }
3716
3717 46
    $strPartOne = self::strtolower(
3718 46
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3719 46
        $encoding,
3720 46
        $cleanUtf8
3721
    );
3722
3723 46
    return $strPartOne . $strPartTwo;
3724
  }
3725
3726
  /**
3727
   * alias for "UTF8::lcfirst()"
3728
   *
3729
   * @see UTF8::lcfirst()
3730
   *
3731
   * @param string $str
3732
   * @param string $encoding
3733
   * @param bool   $cleanUtf8
3734
   *
3735
   * @return string
3736
   */
3737 2
  public static function lcword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3738
  {
3739 2
    return self::lcfirst($str, $encoding, $cleanUtf8);
3740
  }
3741
3742
  /**
3743
   * Lowercase for all words in the string.
3744
   *
3745
   * @param string   $str        <p>The input string.</p>
3746
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3747
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3748
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3749
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3750
   *
3751
   * @return string
3752
   */
3753 2
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3754
  {
3755 2
    if (!$str) {
3756 2
      return '';
3757
    }
3758
3759 2
    $words = self::str_to_words($str, $charlist);
3760 2
    $newWords = [];
3761
3762 2
    if (\count($exceptions) > 0) {
3763 2
      $useExceptions = true;
3764
    } else {
3765 2
      $useExceptions = false;
3766
    }
3767
3768 2
    foreach ($words as $word) {
3769
3770 2
      if (!$word) {
3771 2
        continue;
3772
      }
3773
3774
      if (
3775 2
          $useExceptions === false
3776
          ||
3777
          (
3778 2
              $useExceptions === true
3779
              &&
3780 2
              !\in_array($word, $exceptions, true)
3781
          )
3782
      ) {
3783 2
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3784
      }
3785
3786 2
      $newWords[] = $word;
3787
    }
3788
3789 2
    return \implode('', $newWords);
3790
  }
3791
3792
  /**
3793
   * alias for "UTF8::lcfirst()"
3794
   *
3795
   * @see UTF8::lcfirst()
3796
   *
3797
   * @param string $str
3798
   * @param string $encoding
3799
   * @param bool   $cleanUtf8
3800
   *
3801
   * @return string
3802
   */
3803 5
  public static function lowerCaseFirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3804
  {
3805 5
    return self::lcfirst($str, $encoding, $cleanUtf8);
3806
  }
3807
3808
  /**
3809
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3810
   *
3811
   * @param string $str   <p>The string to be trimmed</p>
3812
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3813
   *
3814
   * @return string The string with unwanted characters stripped from the left.
3815
   */
3816 22
  public static function ltrim(string $str = '', $chars = INF): string
3817
  {
3818 22
    if ('' === $str) {
3819 3
      return '';
3820
    }
3821
3822
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3823 21
    if ($chars === INF || !$chars) {
3824 14
      $pattern = "^[\pZ\pC]+";
3825
    } else {
3826 10
      $chars = \preg_quote($chars, '/');
3827 10
      $pattern = "^[$chars]+";
3828
    }
3829
3830 21
    return self::regex_replace($str, $pattern, '', '', '/');
3831
  }
3832
3833
  /**
3834
   * Returns the UTF-8 character with the maximum code point in the given data.
3835
   *
3836
   * @param string|array<string> $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3837
   *
3838
   * @return string|null The character with the highest code point than others, returns null on failure or empty input.
3839
   */
3840 2
  public static function max($arg)
3841
  {
3842 2
    if (\is_array($arg) === true) {
3843 2
      $arg = \implode('', $arg);
3844
    }
3845
3846 2
    $codepoints = self::codepoints($arg, false);
3847 2
    if (\count($codepoints) === 0) {
3848 2
      return null;
3849
    }
3850
3851 2
    $codepoint_max = \max($codepoints);
3852
3853 2
    return self::chr($codepoint_max);
3854
  }
3855
3856
  /**
3857
   * Calculates and returns the maximum number of bytes taken by any
3858
   * UTF-8 encoded character in the given string.
3859
   *
3860
   * @param string $str <p>The original Unicode string.</p>
3861
   *
3862
   * @return int Max byte lengths of the given chars.
3863
   */
3864 2
  public static function max_chr_width(string $str): int
3865
  {
3866 2
    $bytes = self::chr_size_list($str);
3867 2
    if (\count($bytes) > 0) {
3868 2
      return (int)\max($bytes);
3869
    }
3870
3871 2
    return 0;
3872
  }
3873
3874
  /**
3875
   * Checks whether mbstring is available on the server.
3876
   *
3877
   * @return bool
3878
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
3879
   */
3880 27
  public static function mbstring_loaded(): bool
3881
  {
3882 27
    $return = \extension_loaded('mbstring') ? true : false;
3883
3884 27
    if ($return === true) {
3885 27
      \mb_internal_encoding('UTF-8');
3886
    }
3887
3888 27
    return $return;
3889
  }
3890
3891
  /**
3892
   * Checks whether mbstring "overloaded" is active on the server.
3893
   *
3894
   * @return bool
3895
   */
3896
  private static function mbstring_overloaded(): bool
3897
  {
3898
    /**
3899
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3900
     */
3901
3902
    /** @noinspection PhpComposerExtensionStubsInspection */
3903
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3904
    return \defined('MB_OVERLOAD_STRING')
3905
           &&
3906
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3907
  }
3908
3909
  /**
3910
   * Returns the UTF-8 character with the minimum code point in the given data.
3911
   *
3912
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3913
   *
3914
   * @return string|null The character with the lowest code point than others, returns null on failure or empty input.
3915
   */
3916 2
  public static function min($arg)
3917
  {
3918 2
    if (\is_array($arg) === true) {
3919 2
      $arg = \implode('', $arg);
3920
    }
3921
3922 2
    $codepoints = self::codepoints($arg, false);
3923 2
    if (\count($codepoints) === 0) {
3924 2
      return null;
3925
    }
3926
3927 2
    $codepoint_min = \min($codepoints);
3928
3929 2
    return self::chr($codepoint_min);
3930
  }
3931
3932
  /**
3933
   * alias for "UTF8::normalize_encoding()"
3934
   *
3935
   * @see        UTF8::normalize_encoding()
3936
   *
3937
   * @param mixed $encoding
3938
   * @param mixed $fallback
3939
   *
3940
   * @return mixed
3941
   *
3942
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3943
   */
3944 2
  public static function normalizeEncoding($encoding, $fallback = '')
3945
  {
3946 2
    return self::normalize_encoding($encoding, $fallback);
3947
  }
3948
3949
  /**
3950
   * Normalize the encoding-"name" input.
3951
   *
3952
   * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3953
   * @param mixed $fallback <p>e.g.: UTF-8</p>
3954
   *
3955
   * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3956
   */
3957 340
  public static function normalize_encoding($encoding, $fallback = '')
3958
  {
3959 340
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3960
3961
    // init
3962 340
    $encoding = (string)$encoding;
3963
3964
    if (
3965 340
        !$encoding
3966
        ||
3967 49
        $encoding === '1' // only a fallback, for non "strict_types" usage ...
3968
        ||
3969 340
        $encoding === '0' // only a fallback, for non "strict_types" usage ...
3970
    ) {
3971 296
      return $fallback;
3972
    }
3973
3974
    if (
3975 48
        'UTF-8' === $encoding
3976
        ||
3977 48
        'UTF8' === $encoding
3978
    ) {
3979 21
      return 'UTF-8';
3980
    }
3981
3982
    if (
3983 41
        '8BIT' === $encoding
3984
        ||
3985 41
        'BINARY' === $encoding
3986
    ) {
3987
      return 'CP850';
3988
    }
3989
3990
    if (
3991 41
        'HTML' === $encoding
3992
        ||
3993 41
        'HTML-ENTITIES' === $encoding
3994
    ) {
3995 2
      return 'HTML-ENTITIES';
3996
    }
3997
3998 41
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3999 39
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4000
    }
4001
4002 6
    if (self::$ENCODINGS === null) {
4003 1
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4004
    }
4005
4006 6
    if (\in_array($encoding, self::$ENCODINGS, true)) {
0 ignored issues
show
Bug introduced by
It seems like self::ENCODINGS can also be of type false; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4006
    if (\in_array($encoding, /** @scrutinizer ignore-type */ self::$ENCODINGS, true)) {
Loading history...
4007 4
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4008
4009 4
      return $encoding;
4010
    }
4011
4012 5
    $encodingOrig = $encoding;
4013 5
    $encoding = \strtoupper($encoding);
4014 5
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
4015
4016
    $equivalences = [
4017 5
        'ISO8859'     => 'ISO-8859-1',
4018
        'ISO88591'    => 'ISO-8859-1',
4019
        'ISO'         => 'ISO-8859-1',
4020
        'LATIN'       => 'ISO-8859-1',
4021
        'LATIN1'      => 'ISO-8859-1', // Western European
4022
        'ISO88592'    => 'ISO-8859-2',
4023
        'LATIN2'      => 'ISO-8859-2', // Central European
4024
        'ISO88593'    => 'ISO-8859-3',
4025
        'LATIN3'      => 'ISO-8859-3', // Southern European
4026
        'ISO88594'    => 'ISO-8859-4',
4027
        'LATIN4'      => 'ISO-8859-4', // Northern European
4028
        'ISO88595'    => 'ISO-8859-5',
4029
        'ISO88596'    => 'ISO-8859-6', // Greek
4030
        'ISO88597'    => 'ISO-8859-7',
4031
        'ISO88598'    => 'ISO-8859-8', // Hebrew
4032
        'ISO88599'    => 'ISO-8859-9',
4033
        'LATIN5'      => 'ISO-8859-9', // Turkish
4034
        'ISO885911'   => 'ISO-8859-11',
4035
        'TIS620'      => 'ISO-8859-11', // Thai
4036
        'ISO885910'   => 'ISO-8859-10',
4037
        'LATIN6'      => 'ISO-8859-10', // Nordic
4038
        'ISO885913'   => 'ISO-8859-13',
4039
        'LATIN7'      => 'ISO-8859-13', // Baltic
4040
        'ISO885914'   => 'ISO-8859-14',
4041
        'LATIN8'      => 'ISO-8859-14', // Celtic
4042
        'ISO885915'   => 'ISO-8859-15',
4043
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4044
        'ISO885916'   => 'ISO-8859-16',
4045
        'LATIN10'     => 'ISO-8859-16', // Southeast European
4046
        'CP1250'      => 'WINDOWS-1250',
4047
        'WIN1250'     => 'WINDOWS-1250',
4048
        'WINDOWS1250' => 'WINDOWS-1250',
4049
        'CP1251'      => 'WINDOWS-1251',
4050
        'WIN1251'     => 'WINDOWS-1251',
4051
        'WINDOWS1251' => 'WINDOWS-1251',
4052
        'CP1252'      => 'WINDOWS-1252',
4053
        'WIN1252'     => 'WINDOWS-1252',
4054
        'WINDOWS1252' => 'WINDOWS-1252',
4055
        'CP1253'      => 'WINDOWS-1253',
4056
        'WIN1253'     => 'WINDOWS-1253',
4057
        'WINDOWS1253' => 'WINDOWS-1253',
4058
        'CP1254'      => 'WINDOWS-1254',
4059
        'WIN1254'     => 'WINDOWS-1254',
4060
        'WINDOWS1254' => 'WINDOWS-1254',
4061
        'CP1255'      => 'WINDOWS-1255',
4062
        'WIN1255'     => 'WINDOWS-1255',
4063
        'WINDOWS1255' => 'WINDOWS-1255',
4064
        'CP1256'      => 'WINDOWS-1256',
4065
        'WIN1256'     => 'WINDOWS-1256',
4066
        'WINDOWS1256' => 'WINDOWS-1256',
4067
        'CP1257'      => 'WINDOWS-1257',
4068
        'WIN1257'     => 'WINDOWS-1257',
4069
        'WINDOWS1257' => 'WINDOWS-1257',
4070
        'CP1258'      => 'WINDOWS-1258',
4071
        'WIN1258'     => 'WINDOWS-1258',
4072
        'WINDOWS1258' => 'WINDOWS-1258',
4073
        'UTF16'       => 'UTF-16',
4074
        'UTF32'       => 'UTF-32',
4075
        'UTF8'        => 'UTF-8',
4076
        'UTF'         => 'UTF-8',
4077
        'UTF7'        => 'UTF-7',
4078
        '8BIT'        => 'CP850',
4079
        'BINARY'      => 'CP850',
4080
    ];
4081
4082 5
    if (!empty($equivalences[$encodingUpperHelper])) {
4083 4
      $encoding = $equivalences[$encodingUpperHelper];
4084
    }
4085
4086 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4087
4088 5
    return $encoding;
4089
  }
4090
4091
  /**
4092
   * Standardize line ending to unix-like.
4093
   *
4094
   * @param string $str
4095
   *
4096
   * @return string
4097
   */
4098 5
  public static function normalize_line_ending(string $str): string
4099
  {
4100 5
    return (string)str_replace(["\r\n", "\r"], "\n", $str);
4101
  }
4102
4103
  /**
4104
   * Normalize some MS Word special characters.
4105
   *
4106
   * @param string $str <p>The string to be normalized.</p>
4107
   *
4108
   * @return string
4109
   */
4110 40
  public static function normalize_msword(string $str): string
4111
  {
4112 40
    if ('' === $str) {
4113 2
      return '';
4114
    }
4115
4116 40
    static $UTF8_MSWORD_KEYS_CACHE = null;
4117 40
    static $UTF8_MSWORD_VALUES_CACHE = null;
4118
4119 40
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
4120
4121 1
      if (self::$UTF8_MSWORD === null) {
4122 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_msword') can also be of type false. However, the property $UTF8_MSWORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4123
      }
4124
4125 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4125
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4126 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4126
      $UTF8_MSWORD_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4127
    }
4128
4129 40
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
4130
  }
4131
4132
  /**
4133
   * Normalize the whitespace.
4134
   *
4135
   * @param string $str                     <p>The string to be normalized.</p>
4136
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4137
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4138
   *                                        bidirectional text chars.</p>
4139
   *
4140
   * @return string
4141
   */
4142 88
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4143
  {
4144 88
    if ('' === $str) {
4145 9
      return '';
4146
    }
4147
4148 88
    static $WHITESPACE_CACHE = [];
4149 88
    $cacheKey = (int)$keepNonBreakingSpace;
4150
4151 88
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4152
4153 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4154
4155 2
      if ($keepNonBreakingSpace === true) {
4156 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4157
      }
4158
4159 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4160
    }
4161
4162 88
    if ($keepBidiUnicodeControls === false) {
4163 88
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
4164
4165 88
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4166 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4167
      }
4168
4169 88
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4170
    }
4171
4172 88
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4173
  }
4174
4175
  /**
4176
   * Calculates Unicode code point of the given UTF-8 encoded character.
4177
   *
4178
   * INFO: opposite to UTF8::chr()
4179
   *
4180
   * @param string $chr      <p>The character of which to calculate code point.<p/>
4181
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4182
   *
4183
   * @return int
4184
   *             Unicode code point of the given character,<br>
4185
   *             0 on invalid UTF-8 byte sequence.
4186
   */
4187 35
  public static function ord($chr, string $encoding = 'UTF-8'): int
4188
  {
4189
    // init
4190 35
    $chr = (string)$chr;
4191
4192 35
    static $CHAR_CACHE = [];
4193
4194
    // save the original string
4195 35
    $chr_orig = $chr;
4196
4197 35
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4198 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4199
4200
      // check again, if it's still not UTF-8
4201 4
      if ($encoding !== 'UTF-8') {
4202 4
        $chr = self::encode($encoding, $chr);
4203
      }
4204
    }
4205
4206 35
    $cacheKey = $chr_orig . $encoding;
4207 35
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
4208 35
      return $CHAR_CACHE[$cacheKey];
4209
    }
4210
4211 12
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4212
      self::checkForSupport();
4213
    }
4214
4215 12
    if (self::$SUPPORT['intlChar'] === true) {
4216
      /** @noinspection PhpComposerExtensionStubsInspection */
4217 12
      $code = \IntlChar::ord($chr);
4218 12
      if ($code) {
4219 11
        return $CHAR_CACHE[$cacheKey] = $code;
4220
      }
4221
    }
4222
4223
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4224 4
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
0 ignored issues
show
Bug introduced by
$chr of type array is incompatible with the type string expected by parameter $str of voku\helper\UTF8::substr(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4224
    $chr = \unpack('C*', (string)self::substr(/** @scrutinizer ignore-type */ $chr, 0, 4, 'CP850'));
Loading history...
4225 4
    $code = $chr ? $chr[1] : 0;
4226
4227 4
    if (0xF0 <= $code && isset($chr[4])) {
4228
      /** @noinspection UnnecessaryCastingInspection */
4229
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4230
    }
4231
4232 4
    if (0xE0 <= $code && isset($chr[3])) {
4233
      /** @noinspection UnnecessaryCastingInspection */
4234
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4235
    }
4236
4237 4
    if (0xC0 <= $code && isset($chr[2])) {
4238
      /** @noinspection UnnecessaryCastingInspection */
4239
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xC0) << 6) + $chr[2] - 0x80);
4240
    }
4241
4242 4
    return $CHAR_CACHE[$cacheKey] = $code;
4243
  }
4244
4245
  /**
4246
   * Parses the string into an array (into the the second parameter).
4247
   *
4248
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4249
   *          if the second parameter is not set!
4250
   *
4251
   * @link http://php.net/manual/en/function.parse-str.php
4252
   *
4253
   * @param string $str       <p>The input string.</p>
4254
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4255
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4256
   *
4257
   * @return bool
4258
   *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result.
4259
   */
4260 2
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4261
  {
4262 2
    if ($cleanUtf8 === true) {
4263 2
      $str = self::clean($str);
4264
    }
4265
4266 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4267
      self::checkForSupport();
4268
    }
4269
4270 2
    if (self::$SUPPORT['mbstring'] === true) {
4271 2
      $return = \mb_parse_str($str, $result);
4272
4273 2
      return !($return === false || empty($result));
4274
    }
4275
4276
    /** @noinspection PhpVoidFunctionResultUsedInspection */
4277
    \parse_str($str, $result);
4278
4279
    return !empty($result);
4280
  }
4281
4282
  /**
4283
   * Checks if \u modifier is available that enables Unicode support in PCRE.
4284
   *
4285
   * @return bool
4286
   *              <strong>true</strong> if support is available,<br>
4287
   *              <strong>false</strong> otherwise.
4288
   */
4289 103
  public static function pcre_utf8_support(): bool
4290
  {
4291
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4292 103
    return (bool)@\preg_match('//u', '');
4293
  }
4294
4295
  /**
4296
   * Create an array containing a range of UTF-8 characters.
4297
   *
4298
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4299
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4300
   *
4301
   * @return string[]
4302
   */
4303 2
  public static function range($var1, $var2): array
4304
  {
4305 2
    if (!$var1 || !$var2) {
4306 2
      return [];
4307
    }
4308
4309 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4310
      self::checkForSupport();
4311
    }
4312
4313 2
    if (self::$SUPPORT['ctype'] === false) {
4314
      throw new \RuntimeException('ext-ctype: is not installed');
4315
    }
4316
4317
    /** @noinspection PhpComposerExtensionStubsInspection */
4318 2
    if (\ctype_digit((string)$var1)) {
4319 2
      $start = (int)$var1;
4320 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4321
      $start = (int)self::hex_to_int($var1);
4322
    } else {
4323 2
      $start = self::ord($var1);
4324
    }
4325
4326 2
    if (!$start) {
4327
      return [];
4328
    }
4329
4330
    /** @noinspection PhpComposerExtensionStubsInspection */
4331 2
    if (\ctype_digit((string)$var2)) {
4332 2
      $end = (int)$var2;
4333 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4334
      $end = (int)self::hex_to_int($var2);
4335
    } else {
4336 2
      $end = self::ord($var2);
4337
    }
4338
4339 2
    if (!$end) {
4340
      return [];
4341
    }
4342
4343 2
    return \array_map(
4344
        [
4345 2
            self::class,
4346
            'chr',
4347
        ],
4348 2
        \range($start, $end)
4349
    );
4350
  }
4351
4352
  /**
4353
   * Multi decode html entity & fix urlencoded-win1252-chars.
4354
   *
4355
   * e.g:
4356
   * 'test+test'                     => 'test+test'
4357
   * 'D&#252;sseldorf'               => 'Düsseldorf'
4358
   * 'D%FCsseldorf'                  => 'Düsseldorf'
4359
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4360
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4361
   * 'Düsseldorf'                   => 'Düsseldorf'
4362
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4363
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4364
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4365
   *
4366
   * @param string $str          <p>The input string.</p>
4367
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
4368
   *
4369
   * @return string
4370
   */
4371 3
  public static function rawurldecode(string $str, bool $multi_decode = true): string
4372
  {
4373 3
    if ('' === $str) {
4374 2
      return '';
4375
    }
4376
4377 3
    $pattern = '/%u([0-9a-f]{3,4})/i';
4378 3
    if (\preg_match($pattern, $str)) {
4379 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4380
    }
4381
4382 3
    $flags = ENT_QUOTES | ENT_HTML5;
4383
4384
    do {
4385 3
      $str_compare = $str;
4386
4387 3
      $str = self::fix_simple_utf8(
4388 3
          \rawurldecode(
4389 3
              self::html_entity_decode(
4390 3
                  self::to_utf8($str),
4391 3
                  $flags
4392
              )
4393
          )
4394
      );
4395
4396 3
    } while ($multi_decode === true && $str_compare !== $str);
4397
4398 3
    return $str;
4399
  }
4400
4401
  /**
4402
   * @param array $strings
4403
   * @param bool  $removeEmptyValues
4404
   * @param int   $removeShortValues
4405
   *
4406
   * @return array
4407
   */
4408 2
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
4409
  {
4410
    // init
4411 2
    $return = [];
4412
4413 2
    foreach ($strings as $str) {
4414
      if (
4415 2
          $removeShortValues !== null
4416
          &&
4417 2
          self::strlen($str) <= $removeShortValues
4418
      ) {
4419 2
        continue;
4420
      }
4421
4422
      if (
4423 2
          $removeEmptyValues === true
4424
          &&
4425 2
          \trim($str) === ''
4426
      ) {
4427 2
        continue;
4428
      }
4429
4430 2
      $return[] = $str;
4431
    }
4432
4433 2
    return $return;
4434
  }
4435
4436
  /**
4437
   * Replaces all occurrences of $pattern in $str by $replacement.
4438
   *
4439
   * @param string $str         <p>The input string.</p>
4440
   * @param string $pattern     <p>The regular expression pattern.</p>
4441
   * @param string $replacement <p>The string to replace with.</p>
4442
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
4443
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4444
   *
4445
   * @return string
4446
   */
4447 291
  public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4448
  {
4449 291
    if ($options === 'msr') {
4450 9
      $options = 'ms';
4451
    }
4452
4453
    // fallback
4454 291
    if (!$delimiter) {
4455
      $delimiter = '/';
4456
    }
4457
4458 291
    $str = (string)\preg_replace(
4459 291
        $delimiter . $pattern . $delimiter . 'u' . $options,
4460 291
        $replacement,
4461 291
        $str
4462
    );
4463
4464 291
    return $str;
4465
  }
4466
4467
  /**
4468
   * alias for "UTF8::remove_bom()"
4469
   *
4470
   * @see        UTF8::remove_bom()
4471
   *
4472
   * @param string $str
4473
   *
4474
   * @return string
4475
   *
4476
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4477
   */
4478
  public static function removeBOM(string $str): string
4479
  {
4480
    return self::remove_bom($str);
4481
  }
4482
4483
  /**
4484
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4485
   *
4486
   * @param string $str <p>The input string.</p>
4487
   *
4488
   * @return string String without UTF-BOM.
4489
   */
4490 76
  public static function remove_bom(string $str): string
4491
  {
4492 76
    if ('' === $str) {
4493 7
      return '';
4494
    }
4495
4496 76
    $strLength = self::strlen_in_byte($str);
4497 76
    foreach (self::$BOM as $bomString => $bomByteLength) {
4498 76
      if (0 === self::strpos_in_byte($str, $bomString, 0)) {
4499 10
        $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4500 10
        if ($strTmp === false) {
4501
          return '';
4502
        }
4503
4504 10
        $strLength -= $bomByteLength;
4505
4506 76
        $str = (string)$strTmp;
4507
      }
4508
    }
4509
4510 76
    return $str;
4511
  }
4512
4513
  /**
4514
   * Removes duplicate occurrences of a string in another string.
4515
   *
4516
   * @param string          $str  <p>The base string.</p>
4517
   * @param string|string[] $what <p>String to search for in the base string.</p>
4518
   *
4519
   * @return string The result string with removed duplicates.
4520
   */
4521 2
  public static function remove_duplicates(string $str, $what = ' '): string
4522
  {
4523 2
    if (\is_string($what) === true) {
4524 2
      $what = [$what];
4525
    }
4526
4527 2
    if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4528
      /** @noinspection ForeachSourceInspection */
4529 2
      foreach ($what as $item) {
4530 2
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4531
      }
4532
    }
4533
4534 2
    return $str;
4535
  }
4536
4537
  /**
4538
   * Remove html via "strip_tags()" from the string.
4539
   *
4540
   * @param string $str
4541
   * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4542
   *                              not be stripped. Default: null
4543
   *                              </p>
4544
   *
4545
   * @return string
4546
   */
4547 6
  public static function remove_html(string $str, string $allowableTags = ''): string
4548
  {
4549 6
    return \strip_tags($str, $allowableTags);
4550
  }
4551
4552
  /**
4553
   * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4554
   *
4555
   * @param string $str
4556
   * @param string $replacement [optional] <p>Default is a empty string.</p>
4557
   *
4558
   * @return string
4559
   */
4560 6
  public static function remove_html_breaks(string $str, string $replacement = ''): string
4561
  {
4562 6
    return (string)\preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4563
  }
4564
4565
  /**
4566
   * Remove invisible characters from a string.
4567
   *
4568
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4569
   *
4570
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4571
   *
4572
   * @param string $str
4573
   * @param bool   $url_encoded
4574
   * @param string $replacement
4575
   *
4576
   * @return string
4577
   */
4578 114
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4579
  {
4580
    // init
4581 114
    $non_displayables = [];
4582
4583
    // every control character except newline (dec 10),
4584
    // carriage return (dec 13) and horizontal tab (dec 09)
4585 114
    if ($url_encoded) {
4586 114
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4587 114
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4588
    }
4589
4590 114
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4591
4592
    do {
4593 114
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4594 114
    } while ($count !== 0);
4595
4596 114
    return $str;
4597
  }
4598
4599
  /**
4600
   * Returns a new string with the prefix $substring removed, if present.
4601
   *
4602
   * @param string $str
4603
   * @param string $substring <p>The prefix to remove.</p>
4604
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4605
   *
4606
   * @return string String without the prefix $substring.
4607
   */
4608 12
  public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4609
  {
4610 12
    if (self::str_starts_with($str, $substring)) {
4611
4612 6
      return (string)self::substr(
4613 6
          $str,
4614 6
          self::strlen($substring, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::strlen($substring, $encoding) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4614
          /** @scrutinizer ignore-type */ self::strlen($substring, $encoding),
Loading history...
4615 6
          null,
4616 6
          $encoding
4617
      );
4618
    }
4619
4620 6
    return $str;
4621
  }
4622
4623
  /**
4624
   * Returns a new string with the suffix $substring removed, if present.
4625
   *
4626
   * @param string $str
4627
   * @param string $substring <p>The suffix to remove.</p>
4628
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4629
   *
4630
   * @return string String having a $str without the suffix $substring.
4631
   */
4632 12
  public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4633
  {
4634 12
    if (self::str_ends_with($str, $substring)) {
4635
4636 6
      return (string)self::substr(
4637 6
          $str,
4638 6
          0,
4639 6
          self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4640
      );
4641
    }
4642
4643 6
    return $str;
4644
  }
4645
4646
  /**
4647
   * Replaces all occurrences of $search in $str by $replacement.
4648
   *
4649
   * @param string $str           <p>The input string.</p>
4650
   * @param string $search        <p>The needle to search for.</p>
4651
   * @param string $replacement   <p>The string to replace with.</p>
4652
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4653
   *
4654
   * @return string String after the replacements.
4655
   */
4656 29
  public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4657
  {
4658 29
    if ($caseSensitive) {
4659 22
      return self::str_replace($search, $replacement, $str);
4660
    }
4661
4662 7
    return self::str_ireplace($search, $replacement, $str);
4663
  }
4664
4665
  /**
4666
   * Replaces all occurrences of $search in $str by $replacement.
4667
   *
4668
   * @param string       $str           <p>The input string.</p>
4669
   * @param array        $search        <p>The elements to search for.</p>
4670
   * @param string|array $replacement   <p>The string to replace with.</p>
4671
   * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4672
   *
4673
   * @return string String after the replacements.
4674
   */
4675 30
  public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4676
  {
4677 30
    if ($caseSensitive) {
4678 23
      return self::str_replace($search, $replacement, $str);
4679
    }
4680
4681 7
    return self::str_ireplace($search, $replacement, $str);
4682
  }
4683
4684
  /**
4685
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4686
   *
4687
   * @param string $str                <p>The input string</p>
4688
   * @param string $replacementChar    <p>The replacement character.</p>
4689
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4690
   *
4691
   * @return string
4692
   */
4693 64
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4694
  {
4695 64
    if ('' === $str) {
4696 9
      return '';
4697
    }
4698
4699 64
    if ($processInvalidUtf8 === true) {
4700 64
      $replacementCharHelper = $replacementChar;
4701 64
      if ($replacementChar === '') {
4702 64
        $replacementCharHelper = 'none';
4703
      }
4704
4705 64
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4706
        self::checkForSupport();
4707
      }
4708
4709 64
      if (self::$SUPPORT['mbstring'] === false) {
4710
        // if there is no native support for "mbstring",
4711
        // then we need to clean the string before ...
4712
        $str = self::clean($str);
4713
      }
4714
4715
      // always fallback via symfony polyfill
4716 64
      $save = \mb_substitute_character();
4717 64
      \mb_substitute_character($replacementCharHelper);
4718 64
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4719 64
      \mb_substitute_character($save);
4720
4721 64
      if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4722 64
        $str = $strTmp;
4723
      } else {
4724
        $str = '';
4725
      }
4726
    }
4727
4728 64
    return str_replace(
4729
        [
4730 64
            "\xEF\xBF\xBD",
4731
            '�',
4732
        ],
4733
        [
4734 64
            $replacementChar,
4735 64
            $replacementChar,
4736
        ],
4737 64
        $str
4738
    );
4739
  }
4740
4741
  /**
4742
   * Strip whitespace or other characters from end of a UTF-8 string.
4743
   *
4744
   * @param string $str   <p>The string to be trimmed.</p>
4745
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4746
   *
4747
   * @return string The string with unwanted characters stripped from the right.
4748
   */
4749 22
  public static function rtrim(string $str = '', $chars = INF): string
4750
  {
4751 22
    if ('' === $str) {
4752 3
      return '';
4753
    }
4754
4755
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4756 21
    if ($chars === INF || !$chars) {
4757 16
      $pattern = "[\pZ\pC]+\$";
4758
    } else {
4759 8
      $chars = \preg_quote($chars, '/');
4760 8
      $pattern = "[$chars]+\$";
4761
    }
4762
4763 21
    return self::regex_replace($str, $pattern, '', '', '/');
4764
  }
4765
4766
  /**
4767
   * rxClass
4768
   *
4769
   * @param string $s
4770
   * @param string $class
4771
   *
4772
   * @return string
4773
   */
4774 37
  private static function rxClass(string $s, string $class = ''): string
4775
  {
4776 37
    static $RX_CLASSS_CACHE = [];
4777
4778 37
    $cacheKey = $s . $class;
4779
4780 37
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4781 25
      return $RX_CLASSS_CACHE[$cacheKey];
4782
    }
4783
4784
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4785 16
    $class = [$class];
4786
4787
    /** @noinspection SuspiciousLoopInspection */
4788 16
    foreach (self::str_split($s) as $s) {
4789 15
      if ('-' === $s) {
4790
        $class[0] = '-' . $class[0];
4791 15
      } elseif (!isset($s[2])) {
4792 15
        $class[0] .= \preg_quote($s, '/');
4793 1
      } elseif (1 === self::strlen($s)) {
4794 1
        $class[0] .= $s;
4795
      } else {
4796 15
        $class[] = $s;
4797
      }
4798
    }
4799
4800 16
    if ($class[0]) {
4801 16
      $class[0] = '[' . $class[0] . ']';
4802
    }
4803
4804 16
    if (1 === \count($class)) {
4805 16
      $return = $class[0];
4806
    } else {
4807
      $return = '(?:' . \implode('|', $class) . ')';
4808
    }
4809
4810 16
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4811
4812 16
    return $return;
4813
  }
4814
4815
  /**
4816
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4817
   */
4818 2
  public static function showSupport()
4819
  {
4820 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4821
      self::checkForSupport();
4822
    }
4823
4824 2
    echo '<pre>';
4825 2
    foreach (self::$SUPPORT as $key => $value) {
4826 2
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4827
    }
4828 2
    echo '</pre>';
4829 2
  }
4830
4831
  /**
4832
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4833
   *
4834
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4835
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4836
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4837
   *
4838
   * @return string The HTML numbered entity.
4839
   */
4840 2
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4841
  {
4842 2
    if ('' === $char) {
4843 2
      return '';
4844
    }
4845
4846
    if (
4847 2
        $keepAsciiChars === true
4848
        &&
4849 2
        self::is_ascii($char) === true
4850
    ) {
4851 2
      return $char;
4852
    }
4853
4854 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4855 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4856
    }
4857
4858 2
    return '&#' . self::ord($char, $encoding) . ';';
4859
  }
4860
4861
  /**
4862
   * @param string $str
4863
   * @param int    $tabLength
4864
   *
4865
   * @return string
4866
   */
4867 5
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4868
  {
4869 5
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4870
  }
4871
4872
  /**
4873
   * Convert a string to an array of Unicode characters.
4874
   *
4875
   * @param string|int|string[]|int[] $str       <p>The string to split into array.</p>
4876
   * @param int                       $length    [optional] <p>Max character length of each array element.</p>
4877
   * @param bool                      $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4878
   *
4879
   * @return string[] An array containing chunks of the string.
4880
   */
4881 82
  public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4882
  {
4883 82
    if ($length <= 0) {
4884 3
      return [];
4885
    }
4886
4887 81
    if (\is_array($str) === true) {
4888 2
      foreach ($str as $k => $v) {
4889 2
        $str[$k] = self::split($v, $length);
4890
      }
4891
4892 2
      return $str;
4893
    }
4894
4895
    // init
4896 81
    $str = (string)$str;
4897
4898 81
    if ('' === $str) {
4899 13
      return [];
4900
    }
4901
4902
    // init
4903 78
    $ret = [];
4904
4905 78
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4906
      self::checkForSupport();
4907
    }
4908
4909 78
    if ($cleanUtf8 === true) {
4910 18
      $str = self::clean($str);
4911
    }
4912
4913 78
    if (self::$SUPPORT['pcre_utf8'] === true) {
4914
4915 78
      \preg_match_all('/./us', $str, $retArray);
4916 78
      if (isset($retArray[0])) {
4917 78
        $ret = $retArray[0];
4918
      }
4919 78
      unset($retArray);
4920
4921
    } else {
4922
4923
      // fallback
4924
4925
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4926
        self::checkForSupport();
4927
      }
4928
4929
      $len = self::strlen_in_byte($str);
4930
4931
      /** @noinspection ForeachInvariantsInspection */
4932
      for ($i = 0; $i < $len; $i++) {
4933
4934
        if (($str[$i] & "\x80") === "\x00") {
4935
4936
          $ret[] = $str[$i];
4937
4938
        } elseif (
4939
            isset($str[$i + 1])
4940
            &&
4941
            ($str[$i] & "\xE0") === "\xC0"
4942
        ) {
4943
4944
          if (($str[$i + 1] & "\xC0") === "\x80") {
4945
            $ret[] = $str[$i] . $str[$i + 1];
4946
4947
            $i++;
4948
          }
4949
4950
        } elseif (
4951
            isset($str[$i + 2])
4952
            &&
4953
            ($str[$i] & "\xF0") === "\xE0"
4954
        ) {
4955
4956
          if (
4957
              ($str[$i + 1] & "\xC0") === "\x80"
4958
              &&
4959
              ($str[$i + 2] & "\xC0") === "\x80"
4960
          ) {
4961
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4962
4963
            $i += 2;
4964
          }
4965
4966
        } elseif (
4967
            isset($str[$i + 3])
4968
            &&
4969
            ($str[$i] & "\xF8") === "\xF0"
4970
        ) {
4971
4972
          if (
4973
              ($str[$i + 1] & "\xC0") === "\x80"
4974
              &&
4975
              ($str[$i + 2] & "\xC0") === "\x80"
4976
              &&
4977
              ($str[$i + 3] & "\xC0") === "\x80"
4978
          ) {
4979
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4980
4981
            $i += 3;
4982
          }
4983
4984
        }
4985
      }
4986
    }
4987
4988 78
    if ($length > 1) {
4989 11
      $ret = \array_chunk($ret, $length);
4990
4991 11
      return \array_map(
4992
          function ($item) {
4993 11
            return \implode('', $item);
4994 11
          }, $ret
4995
      );
4996
    }
4997
4998 71
    if (isset($ret[0]) && $ret[0] === '') {
4999
      return [];
5000
    }
5001
5002 71
    return $ret;
5003
  }
5004
5005
  /**
5006
   * Returns a camelCase version of the string. Trims surrounding spaces,
5007
   * capitalizes letters following digits, spaces, dashes and underscores,
5008
   * and removes spaces, dashes, as well as underscores.
5009
   *
5010
   * @param string $str      <p>The input string.</p>
5011
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5012
   *
5013
   * @return string
5014
   */
5015 32
  public static function str_camelize(string $str, string $encoding = 'UTF-8'): string
5016
  {
5017 32
    $str = self::lcfirst(self::trim($str), $encoding);
5018 32
    $str = (string)\preg_replace('/^[-_]+/', '', $str);
5019
5020 32
    $str = (string)\preg_replace_callback(
5021 32
        '/[-_\s]+(.)?/u',
5022
        function ($match) use ($encoding) {
5023 27
          if (isset($match[1])) {
5024 27
            return UTF8::strtoupper($match[1], $encoding);
5025
          }
5026
5027 1
          return '';
5028 32
        },
5029 32
        $str
5030
    );
5031
5032 32
    $str = (string)\preg_replace_callback(
5033 32
        '/[\d]+(.)?/u',
5034
        function ($match) use ($encoding) {
5035 6
          return UTF8::strtoupper($match[0], $encoding);
5036 32
        },
5037 32
        $str
5038
    );
5039
5040 32
    return $str;
5041
  }
5042
5043
  /**
5044
   * Returns the string with the first letter of each word capitalized,
5045
   * except for when the word is a name which shouldn't be capitalized.
5046
   *
5047
   * @param string $str
5048
   *
5049
   * @return string String with $str capitalized.
5050
   */
5051 1
  public static function str_capitalize_name(string $str): string
5052
  {
5053 1
    $str = self::collapse_whitespace($str);
5054
5055 1
    $str = self::str_capitalize_name_helper($str, ' ');
5056 1
    $str = self::str_capitalize_name_helper($str, '-');
5057
5058 1
    return $str;
5059
  }
5060
5061
  /**
5062
   * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
5063
   *
5064
   * @param string $names
5065
   * @param string $delimiter
5066
   * @param string $encoding
5067
   *
5068
   * @return string
5069
   */
5070 1
  private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
5071
  {
5072
    // init
5073 1
    $namesArray = \explode($delimiter, $names);
5074
5075 1
    if ($namesArray === false) {
5076
      return '';
5077
    }
5078
5079
    $specialCases = [
5080 1
        'names'    => [
5081
            'ab',
5082
            'af',
5083
            'al',
5084
            'and',
5085
            'ap',
5086
            'bint',
5087
            'binte',
5088
            'da',
5089
            'de',
5090
            'del',
5091
            'den',
5092
            'der',
5093
            'di',
5094
            'dit',
5095
            'ibn',
5096
            'la',
5097
            'mac',
5098
            'nic',
5099
            'of',
5100
            'ter',
5101
            'the',
5102
            'und',
5103
            'van',
5104
            'von',
5105
            'y',
5106
            'zu',
5107
        ],
5108
        'prefixes' => [
5109
            'al-',
5110
            "d'",
5111
            'ff',
5112
            "l'",
5113
            'mac',
5114
            'mc',
5115
            'nic',
5116
        ],
5117
    ];
5118
5119 1
    foreach ($namesArray as &$name) {
5120 1
      if (\in_array($name, $specialCases['names'], true)) {
5121 1
        continue;
5122
      }
5123
5124 1
      $continue = false;
5125
5126 1
      if ($delimiter == '-') {
5127 1
        foreach ($specialCases['names'] as $beginning) {
5128 1
          if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5129 1
            $continue = true;
5130
          }
5131
        }
5132
      }
5133
5134 1
      foreach ($specialCases['prefixes'] as $beginning) {
5135 1
        if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5136 1
          $continue = true;
5137
        }
5138
      }
5139
5140 1
      if ($continue) {
5141 1
        continue;
5142
      }
5143
5144 1
      $name = self::str_upper_first($name);
5145
    }
5146
5147 1
    return \implode($delimiter, $namesArray);
5148
  }
5149
5150
  /**
5151
   * Returns true if the string contains $needle, false otherwise. By default
5152
   * the comparison is case-sensitive, but can be made insensitive by setting
5153
   * $caseSensitive to false.
5154
   *
5155
   * @param string $haystack      <p>The input string.</p>
5156
   * @param string $needle        <p>Substring to look for.</p>
5157
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5158
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5159
   *
5160
   * @return bool Whether or not $haystack contains $needle.
5161
   */
5162 106
  public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5163
  {
5164 106
    if ('' === $haystack || '' === $needle) {
5165 1
      return false;
5166
    }
5167
5168
    // only a fallback to prevent BC in the api ...
5169 105
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5170 2
      $encoding = (string)$caseSensitive;
5171
    }
5172
5173 105
    if ($caseSensitive) {
5174 55
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
5175
    }
5176
5177 50
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
5178
  }
5179
5180
  /**
5181
   * Returns true if the string contains all $needles, false otherwise. By
5182
   * default the comparison is case-sensitive, but can be made insensitive by
5183
   * setting $caseSensitive to false.
5184
   *
5185
   * @param string $haystack      <p>The input string.</p>
5186
   * @param array  $needles       <p>SubStrings to look for.</p>
5187
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5188
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5189
   *
5190
   * @return bool Whether or not $haystack contains $needle.
5191
   */
5192 44
  public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5193
  {
5194 44
    if ('' === $haystack) {
5195
      return false;
5196
    }
5197
5198 44
    if (empty($needles)) {
5199 1
      return false;
5200
    }
5201
5202
    // only a fallback to prevent BC in the api ...
5203 43
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5204 1
      $encoding = (string)$caseSensitive;
5205
    }
5206
5207 43
    foreach ($needles as $needle) {
5208 43
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5209 43
        return false;
5210
      }
5211
    }
5212
5213 24
    return true;
5214
  }
5215
5216
  /**
5217
   * Returns true if the string contains any $needles, false otherwise. By
5218
   * default the comparison is case-sensitive, but can be made insensitive by
5219
   * setting $caseSensitive to false.
5220
   *
5221
   * @param string $haystack      <p>The input string.</p>
5222
   * @param array  $needles       <p>SubStrings to look for.</p>
5223
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5224
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5225
   *
5226
   * @return bool
5227
   *               Whether or not $str contains $needle.
5228
   */
5229 43
  public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5230
  {
5231 43
    if (empty($needles)) {
5232 1
      return false;
5233
    }
5234
5235 42
    foreach ($needles as $needle) {
5236 42
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5237 42
        return true;
5238
      }
5239
    }
5240
5241 18
    return false;
5242
  }
5243
5244
  /**
5245
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
5246
   * inserted before uppercase characters (with the exception of the first
5247
   * character of the string), and in place of spaces as well as underscores.
5248
   *
5249
   * @param string $str      <p>The input string.</p>
5250
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5251
   *
5252
   * @return string
5253
   */
5254 19
  public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5255
  {
5256 19
    return self::str_delimit($str, '-', $encoding);
5257
  }
5258
5259
  /**
5260
   * Returns a lowercase and trimmed string separated by the given delimiter.
5261
   * Delimiters are inserted before uppercase characters (with the exception
5262
   * of the first character of the string), and in place of spaces, dashes,
5263
   * and underscores. Alpha delimiters are not converted to lowercase.
5264
   *
5265
   * @param string $str       <p>The input string.</p>
5266
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
5267
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5268
   *
5269
   * @return string
5270
   */
5271 49
  public static function str_delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
5272
  {
5273 49
    $str = self::trim($str);
5274
5275 49
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
5276
5277 49
    $str = self::strtolower($str, $encoding);
5278
5279 49
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
5280
  }
5281
5282
  /**
5283
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5284
   *
5285
   * @param string $str <p>The input string.</p>
5286
   *
5287
   * @return false|string
5288
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5289
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5290
   */
5291 32
  public static function str_detect_encoding($str)
5292
  {
5293
    // init
5294 32
    $str = (string)$str;
5295
5296
    //
5297
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5298
    //
5299
5300 32
    if (self::is_binary($str, true) === true) {
5301
5302 11
      if (self::is_utf16($str) === 1) {
5303 2
        return 'UTF-16LE';
5304
      }
5305
5306 11
      if (self::is_utf16($str) === 2) {
5307 2
        return 'UTF-16BE';
5308
      }
5309
5310 9
      if (self::is_utf32($str) === 1) {
5311
        return 'UTF-32LE';
5312
      }
5313
5314 9
      if (self::is_utf32($str) === 2) {
5315
        return 'UTF-32BE';
5316
      }
5317
5318
      // is binary but not "UTF-16" or "UTF-32"
5319 9
      return false;
5320
    }
5321
5322
    //
5323
    // 2.) simple check for ASCII chars
5324
    //
5325
5326 27
    if (self::is_ascii($str) === true) {
5327 9
      return 'ASCII';
5328
    }
5329
5330
    //
5331
    // 3.) simple check for UTF-8 chars
5332
    //
5333
5334 27
    if (self::is_utf8($str) === true) {
5335 19
      return 'UTF-8';
5336
    }
5337
5338
    //
5339
    // 4.) check via "mb_detect_encoding()"
5340
    //
5341
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5342
5343
    $detectOrder = [
5344 16
        'ISO-8859-1',
5345
        'ISO-8859-2',
5346
        'ISO-8859-3',
5347
        'ISO-8859-4',
5348
        'ISO-8859-5',
5349
        'ISO-8859-6',
5350
        'ISO-8859-7',
5351
        'ISO-8859-8',
5352
        'ISO-8859-9',
5353
        'ISO-8859-10',
5354
        'ISO-8859-13',
5355
        'ISO-8859-14',
5356
        'ISO-8859-15',
5357
        'ISO-8859-16',
5358
        'WINDOWS-1251',
5359
        'WINDOWS-1252',
5360
        'WINDOWS-1254',
5361
        'CP932',
5362
        'CP936',
5363
        'CP950',
5364
        'CP866',
5365
        'CP850',
5366
        'CP51932',
5367
        'CP50220',
5368
        'CP50221',
5369
        'CP50222',
5370
        'ISO-2022-JP',
5371
        'ISO-2022-KR',
5372
        'JIS',
5373
        'JIS-ms',
5374
        'EUC-CN',
5375
        'EUC-JP',
5376
    ];
5377
5378 16
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5379
      self::checkForSupport();
5380
    }
5381
5382 16
    if (self::$SUPPORT['mbstring'] === true) {
5383
      // info: do not use the symfony polyfill here
5384 16
      $encoding = \mb_detect_encoding($str, $detectOrder, true);
5385 16
      if ($encoding) {
5386 16
        return $encoding;
5387
      }
5388
    }
5389
5390
    //
5391
    // 5.) check via "iconv()"
5392
    //
5393
5394
    if (self::$ENCODINGS === null) {
5395
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
5396
    }
5397
5398
    foreach (self::$ENCODINGS as $encodingTmp) {
5399
      # INFO: //IGNORE but still throw notice
5400
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
5401
      if ((string)@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5402
        return $encodingTmp;
5403
      }
5404
    }
5405
5406
    return false;
5407
  }
5408
5409
  /**
5410
   * Check if the string ends with the given substring.
5411
   *
5412
   * @param string $haystack <p>The string to search in.</p>
5413
   * @param string $needle   <p>The substring to search for.</p>
5414
   *
5415
   * @return bool
5416
   */
5417 40
  public static function str_ends_with(string $haystack, string $needle): bool
5418
  {
5419 40
    if ('' === $haystack || '' === $needle) {
5420 4
      return false;
5421
    }
5422
5423 38
    return \substr($haystack, -\strlen($needle)) === $needle;
5424
  }
5425
5426
  /**
5427
   * Returns true if the string ends with any of $substrings, false otherwise.
5428
   *
5429
   * - case-sensitive
5430
   *
5431
   * @param string   $str        <p>The input string.</p>
5432
   * @param string[] $substrings <p>Substrings to look for.</p>
5433
   *
5434
   * @return bool Whether or not $str ends with $substring.
5435
   */
5436 7
  public static function str_ends_with_any(string $str, array $substrings): bool
5437
  {
5438 7
    if (empty($substrings)) {
5439
      return false;
5440
    }
5441
5442 7
    foreach ($substrings as $substring) {
5443 7
      if (self::str_ends_with($str, $substring)) {
5444 7
        return true;
5445
      }
5446
    }
5447
5448 6
    return false;
5449
  }
5450
5451
  /**
5452
   * Ensures that the string begins with $substring. If it doesn't, it's
5453
   * prepended.
5454
   *
5455
   * @param string $str       <p>The input string.</p>
5456
   * @param string $substring <p>The substring to add if not present.</p>
5457
   *
5458
   * @return string
5459
   */
5460 10
  public static function str_ensure_left(string $str, string $substring): string
5461
  {
5462 10
    if (!self::str_starts_with($str, $substring)) {
5463 4
      $str = $substring . $str;
5464
    }
5465
5466 10
    return $str;
5467
  }
5468
5469
  /**
5470
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
5471
   *
5472
   * @param string $str       <p>The input string.</p>
5473
   * @param string $substring <p>The substring to add if not present.</p>
5474
   *
5475
   * @return string
5476
   */
5477 10
  public static function str_ensure_right(string $str, string $substring): string
5478
  {
5479 10
    if (!self::str_ends_with($str, $substring)) {
5480 4
      $str .= $substring;
5481
    }
5482
5483 10
    return $str;
5484
  }
5485
5486
  /**
5487
   * Capitalizes the first word of the string, replaces underscores with
5488
   * spaces, and strips '_id'.
5489
   *
5490
   * @param string $str
5491
   *
5492
   * @return string
5493
   */
5494 3
  public static function str_humanize($str): string
5495
  {
5496 3
    $str = self::str_replace(
5497
        [
5498 3
            '_id',
5499
            '_',
5500
        ],
5501
        [
5502 3
            '',
5503
            ' ',
5504
        ],
5505 3
        $str
5506
    );
5507
5508 3
    return self::ucfirst(self::trim($str));
5509
  }
5510
5511
  /**
5512
   * Check if the string ends with the given substring, case insensitive.
5513
   *
5514
   * @param string $haystack <p>The string to search in.</p>
5515
   * @param string $needle   <p>The substring to search for.</p>
5516
   *
5517
   * @return bool
5518
   */
5519 12
  public static function str_iends_with(string $haystack, string $needle): bool
5520
  {
5521 12
    if ('' === $haystack || '' === $needle) {
5522 2
      return false;
5523
    }
5524
5525 12
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
5526 12
      return true;
5527
    }
5528
5529 8
    return false;
5530
  }
5531
5532
  /**
5533
   * Returns true if the string ends with any of $substrings, false otherwise.
5534
   *
5535
   * - case-insensitive
5536
   *
5537
   * @param string   $str        <p>The input string.</p>
5538
   * @param string[] $substrings <p>Substrings to look for.</p>
5539
   *
5540
   * @return bool Whether or not $str ends with $substring.
5541
   */
5542 4
  public static function str_iends_with_any(string $str, array $substrings): bool
5543
  {
5544 4
    if (empty($substrings)) {
5545
      return false;
5546
    }
5547
5548 4
    foreach ($substrings as $substring) {
5549 4
      if (self::str_iends_with($str, $substring)) {
5550 4
        return true;
5551
      }
5552
    }
5553
5554
    return false;
5555
  }
5556
5557
  /**
5558
   * Returns the index of the first occurrence of $needle in the string,
5559
   * and false if not found. Accepts an optional offset from which to begin
5560
   * the search.
5561
   *
5562
   * @param string $str      <p>The input string.</p>
5563
   * @param string $needle   <p>Substring to look for.</p>
5564
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5565
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5566
   *
5567
   * @return int|false
5568
   *                    The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5569
   */
5570 2
  public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5571
  {
5572 2
    return self::stripos(
5573 2
        $str,
5574 2
        $needle,
5575 2
        $offset,
5576 2
        $encoding
5577
    );
5578
  }
5579
5580
  /**
5581
   * Returns the index of the last occurrence of $needle in the string,
5582
   * and false if not found. Accepts an optional offset from which to begin
5583
   * the search. Offsets may be negative to count from the last character
5584
   * in the string.
5585
   *
5586
   * @param string $str      <p>The input string.</p>
5587
   * @param string $needle   <p>Substring to look for.</p>
5588
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5589
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5590
   *
5591
   * @return int|false
5592
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5593
   */
5594 2
  public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5595
  {
5596 2
    return self::strripos(
5597 2
        $str,
5598 2
        $needle,
5599 2
        $offset,
5600 2
        $encoding
5601
    );
5602
  }
5603
5604
  /**
5605
   * Returns the index of the first occurrence of $needle in the string,
5606
   * and false if not found. Accepts an optional offset from which to begin
5607
   * the search.
5608
   *
5609
   * @param string $str      <p>The input string.</p>
5610
   * @param string $needle   <p>Substring to look for.</p>
5611
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5612
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5613
   *
5614
   * @return int|false
5615
   *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5616
   */
5617 12
  public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5618
  {
5619 12
    return self::strpos(
5620 12
        $str,
5621 12
        $needle,
5622 12
        $offset,
5623 12
        $encoding
5624
    );
5625
  }
5626
5627
  /**
5628
   * Returns the index of the last occurrence of $needle in the string,
5629
   * and false if not found. Accepts an optional offset from which to begin
5630
   * the search. Offsets may be negative to count from the last character
5631
   * in the string.
5632
   *
5633
   * @param string $str      <p>The input string.</p>
5634
   * @param string $needle   <p>Substring to look for.</p>
5635
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5636
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5637
   *
5638
   * @return int|false
5639
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5640
   */
5641 12
  public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5642
  {
5643 12
    return self::strrpos(
5644 12
        $str,
5645 12
        $needle,
5646 12
        $offset,
5647 12
        $encoding
5648
    );
5649
  }
5650
5651
  /**
5652
   * Inserts $substring into the string at the $index provided.
5653
   *
5654
   * @param string $str       <p>The input string.</p>
5655
   * @param string $substring <p>String to be inserted.</p>
5656
   * @param int    $index     <p>The index at which to insert the substring.</p>
5657
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5658
   *
5659
   * @return string
5660
   */
5661 8
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5662
  {
5663 8
    $len = self::strlen($str, $encoding);
5664
5665 8
    if ($index > $len) {
5666 1
      return $str;
5667
    }
5668
5669 7
    $start = self::substr($str, 0, $index, $encoding);
5670 7
    $end = self::substr($str, $index, $len, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $len can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5670
    $end = self::substr($str, $index, /** @scrutinizer ignore-type */ $len, $encoding);
Loading history...
5671
5672 7
    return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5672
    return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5672
    return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
5673
  }
5674
5675
  /**
5676
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5677
   *
5678
   * @link  http://php.net/manual/en/function.str-ireplace.php
5679
   *
5680
   * @param mixed $search  <p>
5681
   *                       Every replacement with search array is
5682
   *                       performed on the result of previous replacement.
5683
   *                       </p>
5684
   * @param mixed $replace <p>
5685
   *                       </p>
5686
   * @param mixed $subject <p>
5687
   *                       If subject is an array, then the search and
5688
   *                       replace is performed with every entry of
5689
   *                       subject, and the return value is an array as
5690
   *                       well.
5691
   *                       </p>
5692
   * @param int   $count   [optional] <p>
5693
   *                       The number of matched and replaced needles will
5694
   *                       be returned in count which is passed by
5695
   *                       reference.
5696
   *                       </p>
5697
   *
5698
   * @return mixed A string or an array of replacements.
5699
   */
5700 41
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5701
  {
5702 41
    $search = (array)$search;
5703
5704
    /** @noinspection AlterInForeachInspection */
5705 41
    foreach ($search as &$s) {
5706 41
      if ('' === $s .= '') {
5707 7
        $s = '/^(?<=.)$/';
5708
      } else {
5709 41
        $s = '/' . \preg_quote($s, '/') . '/ui';
5710
      }
5711
    }
5712
5713 41
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5714 41
    $count = $replace; // used as reference parameter
5715
5716 41
    return $subject;
5717
  }
5718
5719
  /**
5720
   * Check if the string starts with the given substring, case insensitive.
5721
   *
5722
   * @param string $haystack <p>The string to search in.</p>
5723
   * @param string $needle   <p>The substring to search for.</p>
5724
   *
5725
   * @return bool
5726
   */
5727 12
  public static function str_istarts_with(string $haystack, string $needle): bool
5728
  {
5729 12
    if ('' === $haystack || '' === $needle) {
5730 2
      return false;
5731
    }
5732
5733 12
    if (self::stripos($haystack, $needle) === 0) {
5734 12
      return true;
5735
    }
5736
5737 4
    return false;
5738
  }
5739
5740
  /**
5741
   * Returns true if the string begins with any of $substrings, false otherwise.
5742
   *
5743
   * - case-insensitive
5744
   *
5745
   * @param string $str        <p>The input string.</p>
5746
   * @param array  $substrings <p>Substrings to look for.</p>
5747
   *
5748
   * @return bool Whether or not $str starts with $substring.
5749
   */
5750 4
  public static function str_istarts_with_any(string $str, array $substrings): bool
5751
  {
5752 4
    if ('' === $str) {
5753
      return false;
5754
    }
5755
5756 4
    if (empty($substrings)) {
5757
      return false;
5758
    }
5759
5760 4
    foreach ($substrings as $substring) {
5761 4
      if (self::str_istarts_with($str, $substring)) {
5762 4
        return true;
5763
      }
5764
    }
5765
5766
    return false;
5767
  }
5768
5769
  /**
5770
   * Gets the substring after the first occurrence of a separator.
5771
   *
5772
   * @param string $str       <p>The input string.</p>
5773
   * @param string $separator <p>The string separator.</p>
5774
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5775
   *
5776
   * @return string
5777
   */
5778 1
  public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5779
  {
5780
    if (
5781 1
        $separator === ''
5782
        ||
5783 1
        $str === ''
5784
    ) {
5785 1
      return '';
5786
    }
5787
5788 1
    $offset = self::str_iindex_first($str, $separator);
5789 1
    if ($offset === false) {
5790 1
      return '';
5791
    }
5792
5793 1
    return (string)self::substr(
5794 1
        $str,
5795 1
        $offset + self::strlen($separator, $encoding),
5796 1
        null,
5797 1
        $encoding
5798
    );
5799
  }
5800
5801
  /**
5802
   * Gets the substring after the last occurrence of a separator.
5803
   *
5804
   * @param string $str       <p>The input string.</p>
5805
   * @param string $separator <p>The string separator.</p>
5806
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5807
   *
5808
   * @return string
5809
   */
5810 1
  public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5811
  {
5812
    if (
5813 1
        $separator === ''
5814
        ||
5815 1
        $str === ''
5816
    ) {
5817 1
      return '';
5818
    }
5819
5820 1
    $offset = self::str_iindex_last($str, $separator);
5821 1
    if ($offset === false) {
5822 1
      return '';
5823
    }
5824
5825 1
    return (string)self::substr(
5826 1
        $str,
5827 1
        $offset + self::strlen($separator, $encoding),
5828 1
        null,
5829 1
        $encoding
5830
    );
5831
  }
5832
5833
  /**
5834
   * Gets the substring before the first occurrence of a separator.
5835
   *
5836
   * @param string $str       <p>The input string.</p>
5837
   * @param string $separator <p>The string separator.</p>
5838
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5839
   *
5840
   * @return string
5841
   */
5842 1
  public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5843
  {
5844
    if (
5845 1
        $separator === ''
5846
        ||
5847 1
        $str === ''
5848
    ) {
5849 1
      return '';
5850
    }
5851
5852 1
    $offset = self::str_iindex_first($str, $separator);
5853 1
    if ($offset === false) {
5854 1
      return '';
5855
    }
5856
5857 1
    return (string)self::substr($str, 0, $offset, $encoding);
5858
  }
5859
5860
  /**
5861
   * Gets the substring before the last occurrence of a separator.
5862
   *
5863
   * @param string $str       <p>The input string.</p>
5864
   * @param string $separator <p>The string separator.</p>
5865
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5866
   *
5867
   * @return string
5868
   */
5869 1
  public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5870
  {
5871
    if (
5872 1
        $separator === ''
5873
        ||
5874 1
        $str === ''
5875
    ) {
5876 1
      return '';
5877
    }
5878
5879 1
    $offset = self::str_iindex_last($str, $separator);
5880 1
    if ($offset === false) {
5881 1
      return '';
5882
    }
5883
5884 1
    return (string)self::substr($str, 0, $offset, $encoding);
5885
  }
5886
5887
  /**
5888
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5889
   *
5890
   * @param string $str          <p>The input string.</p>
5891
   * @param string $needle       <p>The string to look for.</p>
5892
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5893
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5894
   *
5895
   * @return string
5896
   */
5897 2
  public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5898
  {
5899
    if (
5900 2
        '' === $needle
5901
        ||
5902 2
        '' === $str
5903
    ) {
5904 2
      return '';
5905
    }
5906
5907 2
    $part = self::stristr(
5908 2
        $str,
5909 2
        $needle,
5910 2
        $beforeNeedle,
5911 2
        $encoding
5912
    );
5913 2
    if (false === $part) {
5914 2
      return '';
5915
    }
5916
5917 2
    return $part;
5918
  }
5919
5920
  /**
5921
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5922
   *
5923
   * @param string $str          <p>The input string.</p>
5924
   * @param string $needle       <p>The string to look for.</p>
5925
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5926
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5927
   *
5928
   * @return string
5929
   */
5930 1
  public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5931
  {
5932
    if (
5933 1
        '' === $needle
5934
        ||
5935 1
        '' === $str
5936
    ) {
5937 1
      return '';
5938
    }
5939
5940 1
    $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5941 1
    if (false === $part) {
5942 1
      return '';
5943
    }
5944
5945 1
    return $part;
5946
  }
5947
5948
  /**
5949
   * Returns the last $n characters of the string.
5950
   *
5951
   * @param string $str      <p>The input string.</p>
5952
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5953
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5954
   *
5955
   * @return string
5956
   */
5957 12
  public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5958
  {
5959 12
    if ($n <= 0) {
5960 4
      return '';
5961
    }
5962
5963 8
    $returnTmp = self::substr($str, -$n, null, $encoding);
5964
5965 8
    return ($returnTmp === false ? '' : $returnTmp);
5966
  }
5967
5968
  /**
5969
   * Limit the number of characters in a string.
5970
   *
5971
   * @param string $str      <p>The input string.</p>
5972
   * @param int    $length   [optional] <p>Default: 100</p>
5973
   * @param string $strAddOn [optional] <p>Default: …</p>
5974
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5975
   *
5976
   * @return string
5977
   */
5978 2
  public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5979
  {
5980 2
    if ('' === $str) {
5981 2
      return '';
5982
    }
5983
5984 2
    if ($length <= 0) {
5985 2
      return '';
5986
    }
5987
5988 2
    if (self::strlen($str, $encoding) <= $length) {
5989 2
      return $str;
5990
    }
5991
5992 2
    return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5992
    return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
5993
  }
5994
5995
  /**
5996
   * Limit the number of characters in a string, but also after the next word.
5997
   *
5998
   * @param string $str      <p>The input string.</p>
5999
   * @param int    $length   [optional] <p>Default: 100</p>
6000
   * @param string $strAddOn [optional] <p>Default: …</p>
6001
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6002
   *
6003
   * @return string
6004
   */
6005 6
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
6006
  {
6007 6
    if ('' === $str) {
6008 2
      return '';
6009
    }
6010
6011 6
    if ($length <= 0) {
6012 2
      return '';
6013
    }
6014
6015 6
    if (self::strlen($str, $encoding) <= $length) {
6016 2
      return $str;
6017
    }
6018
6019 6
    if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6020 5
      return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6020
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
6021
    }
6022
6023 3
    $str = (string)self::substr($str, 0, $length, $encoding);
6024 3
    $array = \explode(' ', $str);
6025 3
    \array_pop($array);
6026 3
    $new_str = \implode(' ', $array);
6027
6028 3
    if ($new_str === '') {
6029 2
      $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
6030
    } else {
6031 3
      $str = $new_str . $strAddOn;
6032
    }
6033
6034 3
    return $str;
6035
  }
6036
6037
  /**
6038
   * Returns the longest common prefix between the string and $otherStr.
6039
   *
6040
   * @param string $str      <p>The input sting.</p>
6041
   * @param string $otherStr <p>Second string for comparison.</p>
6042
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6043
   *
6044
   * @return string
6045
   */
6046 10
  public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6047
  {
6048 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6049
6050 10
    $longestCommonPrefix = '';
6051 10
    for ($i = 0; $i < $maxLength; $i++) {
6052 8
      $char = self::substr($str, $i, 1, $encoding);
6053
6054 8
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
6055 6
        $longestCommonPrefix .= $char;
6056
      } else {
6057 6
        break;
6058
      }
6059
    }
6060
6061 10
    return $longestCommonPrefix;
6062
  }
6063
6064
  /**
6065
   * Returns the longest common substring between the string and $otherStr.
6066
   * In the case of ties, it returns that which occurs first.
6067
   *
6068
   * @param string $str
6069
   * @param string $otherStr <p>Second string for comparison.</p>
6070
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6071
   *
6072
   * @return string String with its $str being the longest common substring.
6073
   */
6074 11
  public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6075
  {
6076
    // Uses dynamic programming to solve
6077
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6078 11
    $strLength = self::strlen($str, $encoding);
6079 11
    $otherLength = self::strlen($otherStr, $encoding);
6080
6081
    // Return if either string is empty
6082 11
    if ($strLength == 0 || $otherLength == 0) {
6083 2
      return '';
6084
    }
6085
6086 9
    $len = 0;
6087 9
    $end = 0;
6088 9
    $table = \array_fill(
6089 9
        0,
6090 9
        $strLength + 1,
6091 9
        \array_fill(0, $otherLength + 1, 0)
6092
    );
6093
6094 9
    for ($i = 1; $i <= $strLength; $i++) {
6095 9
      for ($j = 1; $j <= $otherLength; $j++) {
6096 9
        $strChar = self::substr($str, $i - 1, 1, $encoding);
6097 9
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6098
6099 9
        if ($strChar == $otherChar) {
6100 8
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6101 8
          if ($table[$i][$j] > $len) {
6102 8
            $len = $table[$i][$j];
6103 8
            $end = $i;
6104
          }
6105
        } else {
6106 9
          $table[$i][$j] = 0;
6107
        }
6108
      }
6109
    }
6110
6111 9
    $returnTmp = self::substr($str, $end - $len, $len, $encoding);
6112
6113 9
    return ($returnTmp === false ? '' : $returnTmp);
6114
  }
6115
6116
  /**
6117
   * Returns the longest common suffix between the string and $otherStr.
6118
   *
6119
   * @param string $str
6120
   * @param string $otherStr <p>Second string for comparison.</p>
6121
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6122
   *
6123
   * @return string
6124
   */
6125 10
  public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6126
  {
6127 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6128
6129 10
    $longestCommonSuffix = '';
6130 10
    for ($i = 1; $i <= $maxLength; $i++) {
6131 8
      $char = self::substr($str, -$i, 1, $encoding);
6132
6133 8
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
6134 6
        $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6134
        $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
6135
      } else {
6136 6
        break;
6137
      }
6138
    }
6139
6140 10
    return $longestCommonSuffix;
6141
  }
6142
6143
  /**
6144
   * Returns true if $str matches the supplied pattern, false otherwise.
6145
   *
6146
   * @param string $str     <p>The input string.</p>
6147
   * @param string $pattern <p>Regex pattern to match against.</p>
6148
   *
6149
   * @return bool Whether or not $str matches the pattern.
6150
   */
6151 126
  public static function str_matches_pattern(string $str, string $pattern): bool
6152
  {
6153 126
    if (\preg_match('/' . $pattern . '/u', $str)) {
6154 87
      return true;
6155
    }
6156
6157 39
    return false;
6158
  }
6159
6160
  /**
6161
   * Returns whether or not a character exists at an index. Offsets may be
6162
   * negative to count from the last character in the string. Implements
6163
   * part of the ArrayAccess interface.
6164
   *
6165
   * @param string $str      <p>The input string.</p>
6166
   * @param int    $offset   <p>The index to check.</p>
6167
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6168
   *
6169
   *
6170
   * @return bool Whether or not the index exists.
6171
   */
6172 6
  public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6173
  {
6174
    // init
6175 6
    $length = self::strlen($str, $encoding);
6176
6177 6
    if ($offset >= 0) {
6178 3
      return ($length > $offset);
6179
    }
6180
6181 3
    return ($length >= \abs($offset));
6182
  }
6183
6184
  /**
6185
   * Returns the character at the given index. Offsets may be negative to
6186
   * count from the last character in the string. Implements part of the
6187
   * ArrayAccess interface, and throws an OutOfBoundsException if the index
6188
   * does not exist.
6189
   *
6190
   * @param string $str      <p>The input string.</p>
6191
   * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6192
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6193
   *
6194
   * @return string The character at the specified index.
6195
   *
6196
   * @throws \OutOfBoundsException If the positive or negative offset does not exist.
6197
   */
6198 2
  public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6199
  {
6200
    // init
6201 2
    $length = self::strlen($str);
6202
6203
    if (
6204 2
        ($index >= 0 && $length <= $index)
6205
        ||
6206 2
        $length < \abs($index)
6207
    ) {
6208 1
      throw new \OutOfBoundsException('No character exists at the index');
6209
    }
6210
6211 1
    return self::char_at($str, $index, $encoding);
6212
  }
6213
6214
  /**
6215
   * Pad a UTF-8 string to given length with another string.
6216
   *
6217
   * @param string $str        <p>The input string.</p>
6218
   * @param int    $pad_length <p>The length of return string.</p>
6219
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
6220
   * @param int    $pad_type   [optional] <p>
6221
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
6222
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
6223
   *                           </p>
6224
   * @param string $encoding   [optional] <p>Default: UTF-8</p>
6225
   *
6226
   * @return string Returns the padded string.
6227
   */
6228 41
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
6229
  {
6230 41
    if ('' === $str) {
6231
      return '';
6232
    }
6233
6234 41
    if ($pad_type !== (int)$pad_type) {
6235 13
      if ($pad_type == 'left') {
6236 3
        $pad_type = STR_PAD_LEFT;
6237 10
      } elseif ($pad_type == 'right') {
6238 6
        $pad_type = STR_PAD_RIGHT;
6239 4
      } elseif ($pad_type == 'both') {
6240 3
        $pad_type = STR_PAD_BOTH;
6241
      } else {
6242 1
        throw new \InvalidArgumentException(
6243 1
            'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6244
        );
6245
      }
6246
    }
6247
6248 40
    $str_length = self::strlen($str, $encoding);
6249
6250
    if (
6251 40
        $pad_length > 0
6252
        &&
6253 40
        $pad_length >= $str_length
6254
    ) {
6255 39
      $ps_length = self::strlen($pad_string, $encoding);
6256
6257 39
      $diff = ($pad_length - $str_length);
6258
6259
      switch ($pad_type) {
6260 39
        case STR_PAD_LEFT:
6261 13
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6262 13
          $pre = (string)self::substr($pre, 0, $diff, $encoding);
6263 13
          $post = '';
6264 13
          break;
6265
6266 29
        case STR_PAD_BOTH:
6267 14
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6268 14
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2), $encoding);
6269 14
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6270 14
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2), $encoding);
6271 14
          break;
6272
6273 18
        case STR_PAD_RIGHT:
6274
        default:
6275 18
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6276 18
          $post = (string)self::substr($post, 0, $diff, $encoding);
6277 18
          $pre = '';
6278
      }
6279
6280 39
      return $pre . $str . $post;
6281
    }
6282
6283 4
    return $str;
6284
  }
6285
6286
  /**
6287
   * Returns a new string of a given length such that both sides of the
6288
   * string are padded. Alias for pad() with a $padType of 'both'.
6289
   *
6290
   * @param string $str
6291
   * @param int    $length   <p>Desired string length after padding.</p>
6292
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6293
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6294
   *
6295
   * @return string String with padding applied.
6296
   */
6297 11
  public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6298
  {
6299 11
    $padding = $length - self::strlen($str, $encoding);
6300
6301 11
    return self::apply_padding($str, (int)\floor($padding / 2), (int)\ceil($padding / 2), $padStr, $encoding);
6302
  }
6303
6304
  /**
6305
   * Returns a new string of a given length such that the beginning of the
6306
   * string is padded. Alias for pad() with a $padType of 'left'.
6307
   *
6308
   * @param string $str
6309
   * @param int    $length   <p>Desired string length after padding.</p>
6310
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6311
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6312
   *
6313
   * @return string String with left padding.
6314
   */
6315 7
  public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6316
  {
6317 7
    return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
6318
  }
6319
6320
  /**
6321
   * Returns a new string of a given length such that the end of the string
6322
   * is padded. Alias for pad() with a $padType of 'right'.
6323
   *
6324
   * @param string $str
6325
   * @param int    $length   <p>Desired string length after padding.</p>
6326
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6327
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6328
   *
6329
   * @return string String with right padding.
6330
   */
6331 7
  public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6332
  {
6333 7
    return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
6334
  }
6335
6336
  /**
6337
   * Repeat a string.
6338
   *
6339
   * @param string $str        <p>
6340
   *                           The string to be repeated.
6341
   *                           </p>
6342
   * @param int    $multiplier <p>
6343
   *                           Number of time the input string should be
6344
   *                           repeated.
6345
   *                           </p>
6346
   *                           <p>
6347
   *                           multiplier has to be greater than or equal to 0.
6348
   *                           If the multiplier is set to 0, the function
6349
   *                           will return an empty string.
6350
   *                           </p>
6351
   *
6352
   * @return string The repeated string.
6353
   */
6354 9
  public static function str_repeat(string $str, int $multiplier): string
6355
  {
6356 9
    $str = self::filter($str);
6357
6358 9
    return \str_repeat($str, $multiplier);
6359
  }
6360
6361
  /**
6362
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6363
   *
6364
   * Replace all occurrences of the search string with the replacement string
6365
   *
6366
   * @link http://php.net/manual/en/function.str-replace.php
6367
   *
6368
   * @param mixed $search  <p>
6369
   *                       The value being searched for, otherwise known as the needle.
6370
   *                       An array may be used to designate multiple needles.
6371
   *                       </p>
6372
   * @param mixed $replace <p>
6373
   *                       The replacement value that replaces found search
6374
   *                       values. An array may be used to designate multiple replacements.
6375
   *                       </p>
6376
   * @param mixed $subject <p>
6377
   *                       The string or array being searched and replaced on,
6378
   *                       otherwise known as the haystack.
6379
   *                       </p>
6380
   *                       <p>
6381
   *                       If subject is an array, then the search and
6382
   *                       replace is performed with every entry of
6383
   *                       subject, and the return value is an array as
6384
   *                       well.
6385
   *                       </p>
6386
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
6387
   *
6388
   * @return mixed This function returns a string or an array with the replaced values.
6389
   */
6390 92
  public static function str_replace($search, $replace, $subject, int &$count = null)
6391
  {
6392 92
    return \str_replace($search, $replace, $subject, $count);
6393
  }
6394
6395
  /**
6396
   * Replaces all occurrences of $search from the beginning of string with $replacement.
6397
   *
6398
   * @param string $str         <p>The input string.</p>
6399
   * @param string $search      <p>The string to search for.</p>
6400
   * @param string $replacement <p>The replacement.</p>
6401
   *
6402
   * @return string String after the replacements.
6403
   */
6404 16
  public static function str_replace_beginning(string $str, string $search, string $replacement): string
6405
  {
6406 16
    return self::regex_replace(
6407 16
        $str,
6408 16
        '^' . \preg_quote($search, '/'),
6409 16
        self::str_replace('\\', '\\\\', $replacement)
6410
    );
6411
  }
6412
6413
  /**
6414
   * Replaces all occurrences of $search from the ending of string with $replacement.
6415
   *
6416
   * @param string $str         <p>The input string.</p>
6417
   * @param string $search      <p>The string to search for.</p>
6418
   * @param string $replacement <p>The replacement.</p>
6419
   *
6420
   * @return string String after the replacements.
6421
   */
6422 16
  public static function str_replace_ending(string $str, string $search, string $replacement): string
6423
  {
6424 16
    return self::regex_replace(
6425 16
        $str,
6426 16
        \preg_quote($search, '/') . '$',
6427 16
        self::str_replace('\\', '\\\\', $replacement)
6428
    );
6429
  }
6430
6431
  /**
6432
   * Replace the first "$search"-term with the "$replace"-term.
6433
   *
6434
   * @param string $search
6435
   * @param string $replace
6436
   * @param string $subject
6437
   *
6438
   * @return string
6439
   */
6440 2
  public static function str_replace_first(string $search, string $replace, string $subject): string
6441
  {
6442 2
    $pos = self::strpos($subject, $search);
6443 2
    if ($pos !== false) {
6444 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6444
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
6445
    }
6446
6447 2
    return $subject;
6448
  }
6449
6450
  /**
6451
   * Replace the last "$search"-term with the "$replace"-term.
6452
   *
6453
   * @param string $search
6454
   * @param string $replace
6455
   * @param string $subject
6456
   *
6457
   * @return string
6458
   */
6459 2
  public static function str_replace_last(string $search, string $replace, string $subject): string
6460
  {
6461 2
    $pos = self::strrpos($subject, $search);
6462 2
    if ($pos !== false) {
6463 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6463
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
6464
    }
6465
6466 2
    return $subject;
6467
  }
6468
6469
  /**
6470
   * Shuffles all the characters in the string.
6471
   *
6472
   * PS: uses random algorithm which is weak for cryptography purposes
6473
   *
6474
   * @param string $str <p>The input string</p>
6475
   *
6476
   * @return string The shuffled string.
6477
   */
6478 5
  public static function str_shuffle(string $str): string
6479
  {
6480 5
    $indexes = \range(0, self::strlen($str) - 1);
6481
    /** @noinspection NonSecureShuffleUsageInspection */
6482 5
    \shuffle($indexes);
6483
6484 5
    $shuffledStr = '';
6485 5
    foreach ($indexes as $i) {
6486 5
      $shuffledStr .= self::substr($str, $i, 1);
6487
    }
6488
6489 5
    return $shuffledStr;
6490
  }
6491
6492
  /**
6493
   * Returns the substring beginning at $start, and up to, but not including
6494
   * the index specified by $end. If $end is omitted, the function extracts
6495
   * the remaining string. If $end is negative, it is computed from the end
6496
   * of the string.
6497
   *
6498
   * @param string $str
6499
   * @param int    $start    <p>Initial index from which to begin extraction.</p>
6500
   * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6501
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6502
   *
6503
   * @return string|false
6504
   *                     <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6505
   *                     characters long, <b>FALSE</b> will be returned.
6506
   */
6507 18
  public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6508
  {
6509 18
    if ($end === null) {
6510 6
      $length = self::strlen($str);
6511 12
    } elseif ($end >= 0 && $end <= $start) {
6512 4
      return '';
6513 8
    } elseif ($end < 0) {
6514 2
      $length = self::strlen($str) + $end - $start;
6515
    } else {
6516 6
      $length = $end - $start;
6517
    }
6518
6519 14
    return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6519
    return self::substr($str, $start, /** @scrutinizer ignore-type */ $length, $encoding);
Loading history...
6520
  }
6521
6522
  /**
6523
   * Convert a string to e.g.: "snake_case"
6524
   *
6525
   * @param string $str
6526
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6527
   *
6528
   * @return string String in snake_case.
6529
   */
6530 20
  public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6531
  {
6532 20
    $str = self::normalize_whitespace($str);
6533 20
    $str = \str_replace('-', '_', $str);
6534
6535 20
    $str = (string)\preg_replace_callback(
6536 20
        '/([\d|A-Z])/u',
6537
        function ($matches) use ($encoding) {
6538 8
          $match = $matches[1];
6539 8
          $matchInt = (int)$match;
6540
6541 8
          if ((string)$matchInt == $match) {
6542 4
            return '_' . $match . '_';
6543
          }
6544
6545 4
          return '_' . UTF8::strtolower($match, $encoding);
6546 20
        },
6547 20
        $str
6548
    );
6549
6550 20
    $str = (string)\preg_replace(
6551
        [
6552 20
            '/\s+/',        // convert spaces to "_"
6553
            '/^\s+|\s+$/',  // trim leading & trailing spaces
6554
            '/_+/',         // remove double "_"
6555
        ],
6556
        [
6557 20
            '_',
6558
            '',
6559
            '_',
6560
        ],
6561 20
        $str
6562
    );
6563
6564 20
    $str = self::trim($str, '_'); // trim leading & trailing "_"
6565 20
    $str = self::trim($str); // trim leading & trailing whitespace
6566
6567 20
    return $str;
6568
  }
6569
6570
  /**
6571
   * Sort all characters according to code points.
6572
   *
6573
   * @param string $str    <p>A UTF-8 string.</p>
6574
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6575
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6576
   *
6577
   * @return string String of sorted characters.
6578
   */
6579 2
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6580
  {
6581 2
    $array = self::codepoints($str);
6582
6583 2
    if ($unique) {
6584 2
      $array = \array_flip(\array_flip($array));
6585
    }
6586
6587 2
    if ($desc) {
6588 2
      \arsort($array);
6589
    } else {
6590 2
      \asort($array);
6591
    }
6592
6593 2
    return self::string($array);
6594
  }
6595
6596
  /**
6597
   * alias for "UTF8::split()"
6598
   *
6599
   * @see UTF8::split()
6600
   *
6601
   * @param string|string[] $str
6602
   * @param int             $len
6603
   *
6604
   * @return string[]
6605
   */
6606 25
  public static function str_split($str, int $len = 1): array
6607
  {
6608 25
    return self::split($str, $len);
6609
  }
6610
6611
  /**
6612
   * Splits the string with the provided regular expression, returning an
6613
   * array of Stringy objects. An optional integer $limit will truncate the
6614
   * results.
6615
   *
6616
   * @param string $str
6617
   * @param string $pattern <p>The regex with which to split the string.</p>
6618
   * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6619
   *
6620
   * @return string[] An array of strings.
6621
   */
6622 16
  public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6623
  {
6624 16
    if ($limit === 0) {
6625 2
      return [];
6626
    }
6627
6628
    // this->split errors when supplied an empty pattern in < PHP 5.4.13
6629
    // and current versions of HHVM (3.8 and below)
6630 14
    if ($pattern === '') {
6631 1
      return [$str];
6632
    }
6633
6634
    // this->split returns the remaining unsplit string in the last index when
6635
    // supplying a limit
6636 13
    if ($limit > 0) {
6637 8
      ++$limit;
6638
    } else {
6639 5
      $limit = -1;
6640
    }
6641
6642 13
    $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6643
6644 13
    if ($array === false) {
6645
      return [];
6646
    }
6647
6648 13
    if ($limit > 0 && \count($array) === $limit) {
6649 4
      \array_pop($array);
6650
    }
6651
6652 13
    return $array;
6653
  }
6654
6655
  /**
6656
   * Check if the string starts with the given substring.
6657
   *
6658
   * @param string $haystack <p>The string to search in.</p>
6659
   * @param string $needle   <p>The substring to search for.</p>
6660
   *
6661
   * @return bool
6662
   */
6663 41
  public static function str_starts_with(string $haystack, string $needle): bool
6664
  {
6665 41
    if ('' === $haystack || '' === $needle) {
6666 4
      return false;
6667
    }
6668
6669 39
    if (\strpos($haystack, $needle) === 0) {
6670 19
      return true;
6671
    }
6672
6673 24
    return false;
6674
  }
6675
6676
  /**
6677
   * Returns true if the string begins with any of $substrings, false otherwise.
6678
   *
6679
   * - case-sensitive
6680
   *
6681
   * @param string $str        <p>The input string.</p>
6682
   * @param array  $substrings <p>Substrings to look for.</p>
6683
   *
6684
   * @return bool Whether or not $str starts with $substring.
6685
   */
6686 8
  public static function str_starts_with_any(string $str, array $substrings): bool
6687
  {
6688 8
    if ('' === $str) {
6689
      return false;
6690
    }
6691
6692 8
    if (empty($substrings)) {
6693
      return false;
6694
    }
6695
6696 8
    foreach ($substrings as $substring) {
6697 8
      if (self::str_starts_with($str, $substring)) {
6698 8
        return true;
6699
      }
6700
    }
6701
6702 6
    return false;
6703
  }
6704
6705
  /**
6706
   * Gets the substring after the first occurrence of a separator.
6707
   *
6708
   * @param string $str       <p>The input string.</p>
6709
   * @param string $separator <p>The string separator.</p>
6710
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6711
   *
6712
   * @return string
6713
   */
6714 1
  public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6715
  {
6716
    if (
6717 1
        $separator === ''
6718
        ||
6719 1
        $str === ''
6720
    ) {
6721 1
      return '';
6722
    }
6723
6724 1
    $offset = self::str_index_first($str, $separator);
6725 1
    if ($offset === false) {
6726 1
      return '';
6727
    }
6728
6729 1
    return (string)self::substr(
6730 1
        $str,
6731 1
        $offset + self::strlen($separator, $encoding),
6732 1
        null,
6733 1
        $encoding
6734
    );
6735
  }
6736
6737
  /**
6738
   * Gets the substring after the last occurrence of a separator.
6739
   *
6740
   * @param string $str       <p>The input string.</p>
6741
   * @param string $separator <p>The string separator.</p>
6742
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6743
   *
6744
   * @return string
6745
   */
6746 1
  public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6747
  {
6748
    if (
6749 1
        $separator === ''
6750
        ||
6751 1
        $str === ''
6752
    ) {
6753 1
      return '';
6754
    }
6755
6756 1
    $offset = self::str_index_last($str, $separator);
6757 1
    if ($offset === false) {
6758 1
      return '';
6759
    }
6760
6761 1
    return (string)self::substr(
6762 1
        $str,
6763 1
        $offset + self::strlen($separator, $encoding),
6764 1
        null,
6765 1
        $encoding
6766
    );
6767
  }
6768
6769
  /**
6770
   * Gets the substring before the first occurrence of a separator.
6771
   *
6772
   * @param string $str       <p>The input string.</p>
6773
   * @param string $separator <p>The string separator.</p>
6774
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6775
   *
6776
   * @return string
6777
   */
6778 1
  public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6779
  {
6780
    if (
6781 1
        $separator === ''
6782
        ||
6783 1
        $str === ''
6784
    ) {
6785 1
      return '';
6786
    }
6787
6788 1
    $offset = self::str_index_first($str, $separator);
6789 1
    if ($offset === false) {
6790 1
      return '';
6791
    }
6792
6793 1
    return (string)self::substr(
6794 1
        $str,
6795 1
        0,
6796 1
        $offset,
6797 1
        $encoding
6798
    );
6799
  }
6800
6801
  /**
6802
   * Gets the substring before the last occurrence of a separator.
6803
   *
6804
   * @param string $str       <p>The input string.</p>
6805
   * @param string $separator <p>The string separator.</p>
6806
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6807
   *
6808
   * @return string
6809
   */
6810 1
  public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6811
  {
6812
    if (
6813 1
        $separator === ''
6814
        ||
6815 1
        $str === ''
6816
    ) {
6817 1
      return '';
6818
    }
6819
6820 1
    $offset = self::str_index_last($str, $separator);
6821 1
    if ($offset === false) {
6822 1
      return '';
6823
    }
6824
6825 1
    return (string)self::substr(
6826 1
        $str,
6827 1
        0,
6828 1
        $offset,
6829 1
        $encoding
6830
    );
6831
  }
6832
6833
  /**
6834
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6835
   *
6836
   * @param string $str          <p>The input string.</p>
6837
   * @param string $needle       <p>The string to look for.</p>
6838
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6839
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6840
   *
6841
   * @return string
6842
   */
6843 2
  public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6844
  {
6845
    if (
6846 2
        '' === $str
6847
        ||
6848 2
        '' === $needle
6849
    ) {
6850 2
      return '';
6851
    }
6852
6853 2
    $part = self::strstr(
6854 2
        $str,
6855 2
        $needle,
6856 2
        $beforeNeedle,
6857 2
        $encoding
6858
    );
6859 2
    if (false === $part) {
6860 2
      return '';
6861
    }
6862
6863 2
    return $part;
6864
  }
6865
6866
  /**
6867
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6868
   *
6869
   * @param string $str          <p>The input string.</p>
6870
   * @param string $needle       <p>The string to look for.</p>
6871
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6872
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6873
   *
6874
   * @return string
6875
   */
6876 2
  public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6877
  {
6878
    if (
6879 2
        '' === $str
6880
        ||
6881 2
        '' === $needle
6882
    ) {
6883 2
      return '';
6884
    }
6885
6886 2
    $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6887 2
    if (false === $part) {
6888 2
      return '';
6889
    }
6890
6891 2
    return $part;
6892
  }
6893
6894
  /**
6895
   * Surrounds $str with the given substring.
6896
   *
6897
   * @param string $str
6898
   * @param string $substring <p>The substring to add to both sides.</P>
6899
   *
6900
   * @return string String with the substring both prepended and appended.
6901
   */
6902 5
  public static function str_surround(string $str, string $substring): string
6903
  {
6904 5
    return \implode('', [$substring, $str, $substring]);
6905
  }
6906
6907
  /**
6908
   * Returns a trimmed string with the first letter of each word capitalized.
6909
   * Also accepts an array, $ignore, allowing you to list words not to be
6910
   * capitalized.
6911
   *
6912
   * @param string              $str
6913
   * @param string[]|array|null $ignore   [optional] <p>An array of words not to capitalize or null. Default: null</p>
6914
   * @param string              $encoding [optional] <p>Default: UTF-8</p>
6915
   *
6916
   * @return string The titleized string.
6917
   */
6918 5
  public static function str_titleize(string $str, array $ignore = null, string $encoding = 'UTF-8'): string
6919
  {
6920 5
    $str = self::trim($str);
6921
6922 5
    $str = (string)\preg_replace_callback(
6923 5
        '/([\S]+)/u',
6924
        function ($match) use ($encoding, $ignore) {
6925 5
          if ($ignore && \in_array($match[0], $ignore, true)) {
6926 2
            return $match[0];
6927
          }
6928
6929 5
          return self::str_upper_first(self::strtolower($match[0], $encoding));
6930 5
        },
6931 5
        $str
6932
    );
6933
6934 5
    return $str;
6935
  }
6936
6937
  /**
6938
   * Returns a trimmed string in proper title case.
6939
   *
6940
   * Also accepts an array, $ignore, allowing you to list words not to be
6941
   * capitalized.
6942
   *
6943
   * Adapted from John Gruber's script.
6944
   *
6945
   * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6946
   *
6947
   * @param string $str
6948
   * @param array  $ignore   <p>An array of words not to capitalize.</p>
6949
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6950
   *
6951
   * @return string The titleized string.
6952
   */
6953 35
  public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6954
  {
6955 35
    $smallWords = \array_merge(
6956
        [
6957 35
            '(?<!q&)a',
6958
            'an',
6959
            'and',
6960
            'as',
6961
            'at(?!&t)',
6962
            'but',
6963
            'by',
6964
            'en',
6965
            'for',
6966
            'if',
6967
            'in',
6968
            'of',
6969
            'on',
6970
            'or',
6971
            'the',
6972
            'to',
6973
            'v[.]?',
6974
            'via',
6975
            'vs[.]?',
6976
        ],
6977 35
        $ignore
6978
    );
6979
6980 35
    $smallWordsRx = \implode('|', $smallWords);
6981 35
    $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6982
6983 35
    $str = self::trim($str);
6984
6985 35
    if (self::has_lowercase($str) === false) {
6986 2
      $str = self::strtolower($str);
6987
    }
6988
6989
    // The main substitutions
6990 35
    $str = (string)\preg_replace_callback(
6991
        '~\b (_*) (?:                                                              # 1. Leading underscore and
6992
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6993 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6994
                        |
6995 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6996
                        |
6997 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6998
                        |
6999 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7000
                      ) (_*) \b                                                           # 6. With trailing underscore
7001
                    ~ux',
7002
        function ($matches) use ($encoding) {
7003
          // Preserve leading underscore
7004 35
          $str = $matches[1];
7005 35
          if ($matches[2]) {
7006
            // Preserve URLs, domains, emails and file paths
7007 5
            $str .= $matches[2];
7008 35
          } elseif ($matches[3]) {
7009
            // Lower-case small words
7010 25
            $str .= self::strtolower($matches[3], $encoding);
7011 35
          } elseif ($matches[4]) {
7012
            // Capitalize word w/o internal caps
7013 34
            $str .= static::str_upper_first($matches[4], $encoding);
7014
          } else {
7015
            // Preserve other kinds of word (iPhone)
7016 7
            $str .= $matches[5];
7017
          }
7018
          // Preserve trailing underscore
7019 35
          $str .= $matches[6];
7020
7021 35
          return $str;
7022 35
        },
7023 35
        $str
7024
    );
7025
7026
    // Exceptions for small words: capitalize at start of title...
7027 35
    $str = (string)\preg_replace_callback(
7028
        '~(  \A [[:punct:]]*                # start of title...
7029
                      |  [:.;?!][ ]+               # or of subsentence...
7030
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7031 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7032
                     ~uxi',
7033
        function ($matches) use ($encoding) {
7034 11
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
7035 35
        },
7036 35
        $str
7037
    );
7038
7039
    // ...and end of title
7040 35
    $str = (string)\preg_replace_callback(
7041 35
        '~\b ( ' . $smallWordsRx . ' ) # small word...
7042
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7043
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7044
                     ~uxi',
7045
        function ($matches) use ($encoding) {
7046 3
          return static::str_upper_first($matches[1], $encoding);
7047 35
        },
7048 35
        $str
7049
    );
7050
7051
    // Exceptions for small words in hyphenated compound words
7052
    // e.g. "in-flight" -> In-Flight
7053 35
    $str = (string)\preg_replace_callback(
7054
        '~\b
7055
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7056 35
                        ( ' . $smallWordsRx . ' )
7057
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7058
                       ~uxi',
7059
        function ($matches) use ($encoding) {
7060
          return static::str_upper_first($matches[1], $encoding);
7061 35
        },
7062 35
        $str
7063
    );
7064
7065
    // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7066 35
    $str = (string)\preg_replace_callback(
7067
        '~\b
7068
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7069
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7070 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7071
                      (?!	- )                   # Negative lookahead for another -
7072
                     ~uxi',
7073
        function ($matches) use ($encoding) {
7074
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
7075 35
        },
7076 35
        $str
7077
    );
7078
7079 35
    return $str;
7080
  }
7081
7082
  /**
7083
   * Get a binary representation of a specific string.
7084
   *
7085
   * @param string $str <p>The input string.</p>
7086
   *
7087
   * @return string
7088
   */
7089 2
  public static function str_to_binary(string $str): string
7090
  {
7091 2
    $value = \unpack('H*', $str);
7092
7093 2
    return \base_convert($value[1], 16, 2);
7094
  }
7095
7096
  /**
7097
   * @param string   $str
7098
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7099
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7100
   *
7101
   * @return string[]
7102
   */
7103 17
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7104
  {
7105 17
    if ('' === $str) {
7106 1
      return ($removeEmptyValues === true ? [] : ['']);
7107
    }
7108
7109 16
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
7110
7111 16
    if ($return === false) {
7112
      return ($removeEmptyValues === true ? [] : ['']);
7113
    }
7114
7115
    if (
7116 16
        $removeShortValues === null
7117
        &&
7118 16
        $removeEmptyValues === false
7119
    ) {
7120 16
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7121
    }
7122
7123
    $tmpReturn = self::reduce_string_array(
7124
        $return,
7125
        $removeEmptyValues,
7126
        $removeShortValues
7127
    );
7128
7129
    return $tmpReturn;
7130
  }
7131
7132
  /**
7133
   * Convert a string into an array of words.
7134
   *
7135
   * @param string   $str
7136
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7137
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7138
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7139
   *
7140
   * @return string[]
7141
   */
7142 14
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
7143
  {
7144 14
    if ('' === $str) {
7145 4
      return ($removeEmptyValues === true ? [] : ['']);
7146
    }
7147
7148 14
    $charList = self::rxClass($charList, '\pL');
7149
7150 14
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
7151
7152 14
    if ($return === false) {
7153
      return ($removeEmptyValues === true ? [] : ['']);
7154
    }
7155
7156
    if (
7157 14
        $removeShortValues === null
7158
        &&
7159 14
        $removeEmptyValues === false
7160
    ) {
7161 14
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7162
    }
7163
7164 2
    $tmpReturn = self::reduce_string_array(
7165 2
        $return,
7166 2
        $removeEmptyValues,
7167 2
        $removeShortValues
7168
    );
7169
7170 2
    foreach ($tmpReturn as &$item) {
7171 2
      $item = (string)$item;
7172
    }
7173
7174 2
    return $tmpReturn;
7175
  }
7176
7177
  /**
7178
   * alias for "UTF8::to_ascii()"
7179
   *
7180
   * @see UTF8::to_ascii()
7181
   *
7182
   * @param string $str
7183
   * @param string $unknown
7184
   * @param bool   $strict
7185
   *
7186
   * @return string
7187
   */
7188 8
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7189
  {
7190 8
    return self::to_ascii($str, $unknown, $strict);
7191
  }
7192
7193
  /**
7194
   * Truncates the string to a given length. If $substring is provided, and
7195
   * truncating occurs, the string is further truncated so that the substring
7196
   * may be appended without exceeding the desired length.
7197
   *
7198
   * @param string $str
7199
   * @param int    $length    <p>Desired length of the truncated string.</p>
7200
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7201
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7202
   *
7203
   * @return string String after truncating.
7204
   */
7205 22
  public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7206
  {
7207
    // init
7208 22
    $str = (string)$str;
7209
7210 22
    if ('' === $str) {
7211
      return '';
7212
    }
7213
7214 22
    if ($length >= self::strlen($str, $encoding)) {
7215 4
      return $str;
7216
    }
7217
7218
    // Need to further trim the string so we can append the substring
7219 18
    $substringLength = self::strlen($substring, $encoding);
7220 18
    $length -= $substringLength;
7221
7222 18
    $truncated = self::substr($str, 0, $length, $encoding);
7223
7224 18
    return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7224
    return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7225
  }
7226
7227
  /**
7228
   * Truncates the string to a given length, while ensuring that it does not
7229
   * split words. If $substring is provided, and truncating occurs, the
7230
   * string is further truncated so that the substring may be appended without
7231
   * exceeding the desired length.
7232
   *
7233
   * @param string $str
7234
   * @param int    $length    <p>Desired length of the truncated string.</p>
7235
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7236
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7237
   *
7238
   * @return string String after truncating.
7239
   */
7240 23
  public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7241
  {
7242 23
    if ($length >= self::strlen($str, $encoding)) {
7243 4
      return $str;
7244
    }
7245
7246
    // need to further trim the string so we can append the substring
7247 19
    $substringLength = self::strlen($substring, $encoding);
7248 19
    $length -= $substringLength;
7249
7250 19
    $truncated = self::substr($str, 0, $length, $encoding);
7251 19
    if ($truncated === false) {
7252
      return '';
7253
    }
7254
7255
    // if the last word was truncated
7256 19
    $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7257 19
    if ($strPosSpace != $length) {
7258
      // find pos of the last occurrence of a space, get up to that
7259 12
      $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7260
7261 12
      if ($lastPos !== false || $strPosSpace !== false) {
7262 11
        $truncated = self::substr($truncated, 0, (int)$lastPos, $encoding);
7263
      }
7264
    }
7265
7266 19
    $str = $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7266
    $str = /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7267
7268 19
    return $str;
7269
  }
7270
7271
  /**
7272
   * Returns a lowercase and trimmed string separated by underscores.
7273
   * Underscores are inserted before uppercase characters (with the exception
7274
   * of the first character of the string), and in place of spaces as well as
7275
   * dashes.
7276
   *
7277
   * @param string $str
7278
   *
7279
   * @return string The underscored string.
7280
   */
7281 16
  public static function str_underscored(string $str): string
7282
  {
7283 16
    return self::str_delimit($str, '_');
7284
  }
7285
7286
  /**
7287
   * Returns an UpperCamelCase version of the supplied string. It trims
7288
   * surrounding spaces, capitalizes letters following digits, spaces, dashes
7289
   * and underscores, and removes spaces, dashes, underscores.
7290
   *
7291
   * @param string $str      <p>The input string.</p>
7292
   * @param string $encoding [optional] <p>Default: UTF-8</p>
7293
   *
7294
   * @return string String in UpperCamelCase.
7295
   */
7296 13
  public static function str_upper_camelize(string $str, string $encoding = 'UTF-8'): string
7297
  {
7298 13
    return self::str_upper_first(self::str_camelize($str, $encoding), $encoding);
7299
  }
7300
7301
  /**
7302
   * alias for "UTF8::ucfirst()"
7303
   *
7304
   * @see UTF8::ucfirst()
7305
   *
7306
   * @param string $str
7307
   * @param string $encoding
7308
   * @param bool   $cleanUtf8
7309
   *
7310
   * @return string
7311
   */
7312 58
  public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7313
  {
7314 58
    return self::ucfirst($str, $encoding, $cleanUtf8);
7315
  }
7316
7317
  /**
7318
   * Counts number of words in the UTF-8 string.
7319
   *
7320
   * @param string $str      <p>The input string.</p>
7321
   * @param int    $format   [optional] <p>
7322
   *                         <strong>0</strong> => return a number of words (default)<br>
7323
   *                         <strong>1</strong> => return an array of words<br>
7324
   *                         <strong>2</strong> => return an array of words with word-offset as key
7325
   *                         </p>
7326
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7327
   *
7328
   * @return string[]|int The number of words in the string
7329
   */
7330 2
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7331
  {
7332 2
    $strParts = self::str_to_words($str, $charlist);
7333
7334 2
    $len = \count($strParts);
7335
7336 2
    if ($format === 1) {
7337
7338 2
      $numberOfWords = [];
7339 2
      for ($i = 1; $i < $len; $i += 2) {
7340 2
        $numberOfWords[] = $strParts[$i];
7341
      }
7342
7343 2
    } elseif ($format === 2) {
7344
7345 2
      $numberOfWords = [];
7346 2
      $offset = self::strlen($strParts[0]);
7347 2
      for ($i = 1; $i < $len; $i += 2) {
7348 2
        $numberOfWords[$offset] = $strParts[$i];
7349 2
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
7350
      }
7351
7352
    } else {
7353
7354 2
      $numberOfWords = (int)(($len - 1) / 2);
7355
7356
    }
7357
7358 2
    return $numberOfWords;
7359
  }
7360
7361
  /**
7362
   * Case-insensitive string comparison.
7363
   *
7364
   * INFO: Case-insensitive version of UTF8::strcmp()
7365
   *
7366
   * @param string $str1     <p>The first string.</p>
7367
   * @param string $str2     <p>The second string.</p>
7368
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7369
   *
7370
   * @return int
7371
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7372
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7373
   *             <strong>0</strong> if they are equal.
7374
   */
7375 23
  public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7376
  {
7377 23
    return self::strcmp(
7378 23
        self::strtocasefold($str1, true, false, $encoding, null, false),
7379 23
        self::strtocasefold($str2, true, false, $encoding, null, false)
7380
    );
7381
  }
7382
7383
  /**
7384
   * alias for "UTF8::strstr()"
7385
   *
7386
   * @see UTF8::strstr()
7387
   *
7388
   * @param string $haystack
7389
   * @param string $needle
7390
   * @param bool   $before_needle
7391
   * @param string $encoding
7392
   * @param bool   $cleanUtf8
7393
   *
7394
   * @return string|false
7395
   */
7396 2
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7397
  {
7398 2
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7399
  }
7400
7401
  /**
7402
   * Case-sensitive string comparison.
7403
   *
7404
   * @param string $str1 <p>The first string.</p>
7405
   * @param string $str2 <p>The second string.</p>
7406
   *
7407
   * @return int
7408
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
7409
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
7410
   *              <strong>0</strong> if they are equal.
7411
   */
7412 29
  public static function strcmp(string $str1, string $str2): int
7413
  {
7414
    /** @noinspection PhpUndefinedClassInspection */
7415 29
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7416 24
        \Normalizer::normalize($str1, \Normalizer::NFD),
7417 29
        \Normalizer::normalize($str2, \Normalizer::NFD)
7418
    );
7419
  }
7420
7421
  /**
7422
   * Find length of initial segment not matching mask.
7423
   *
7424
   * @param string $str
7425
   * @param string $charList
7426
   * @param int    $offset
7427
   * @param int    $length
7428
   *
7429
   * @return int|null
7430
   */
7431 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
7432
  {
7433 15
    if ('' === $charList .= '') {
7434 1
      return null;
7435
    }
7436
7437 14
    if ($offset || $length !== null) {
7438 2
      $strTmp = self::substr($str, $offset, $length);
7439 2
      if ($strTmp === false) {
7440
        return null;
7441
      }
7442 2
      $str = (string)$strTmp;
7443
    }
7444
7445 14
    if ('' === $str) {
7446 1
      return null;
7447
    }
7448
7449 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept null|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7449
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7450 13
      return self::strlen($length[1]);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($length[1]) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7451
    }
7452
7453 1
    return self::strlen($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($str) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7454
  }
7455
7456
  /**
7457
   * alias for "UTF8::stristr()"
7458
   *
7459
   * @see UTF8::stristr()
7460
   *
7461
   * @param string $haystack
7462
   * @param string $needle
7463
   * @param bool   $before_needle
7464
   * @param string $encoding
7465
   * @param bool   $cleanUtf8
7466
   *
7467
   * @return string|false
7468
   */
7469 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7470
  {
7471 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7472
  }
7473
7474
  /**
7475
   * Create a UTF-8 string from code points.
7476
   *
7477
   * INFO: opposite to UTF8::codepoints()
7478
   *
7479
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7480
   *
7481
   * @return string UTF-8 encoded string.
7482
   */
7483 4
  public static function string(array $array): string
7484
  {
7485 4
    return \implode(
7486 4
        '',
7487 4
        \array_map(
7488
            [
7489 4
                self::class,
7490
                'chr',
7491
            ],
7492 4
            $array
7493
        )
7494
    );
7495
  }
7496
7497
  /**
7498
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7499
   *
7500
   * @param string $str <p>The input string.</p>
7501
   *
7502
   * @return bool
7503
   *              <strong>true</strong> if the string has BOM at the start,<br>
7504
   *              <strong>false</strong> otherwise.
7505
   */
7506 6
  public static function string_has_bom(string $str): bool
7507
  {
7508 6
    foreach (self::$BOM as $bomString => $bomByteLength) {
7509 6
      if (0 === \strpos($str, $bomString)) {
7510 6
        return true;
7511
      }
7512
    }
7513
7514 6
    return false;
7515
  }
7516
7517
  /**
7518
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7519
   *
7520
   * @link http://php.net/manual/en/function.strip-tags.php
7521
   *
7522
   * @param string $str             <p>
7523
   *                                The input string.
7524
   *                                </p>
7525
   * @param string $allowable_tags  [optional] <p>
7526
   *                                You can use the optional second parameter to specify tags which should
7527
   *                                not be stripped.
7528
   *                                </p>
7529
   *                                <p>
7530
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
7531
   *                                can not be changed with allowable_tags.
7532
   *                                </p>
7533
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
7534
   *
7535
   * @return string The stripped string.
7536
   */
7537 4
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7538
  {
7539 4
    if ('' === $str) {
7540 1
      return '';
7541
    }
7542
7543 4
    if ($cleanUtf8 === true) {
7544 2
      $str = self::clean($str);
7545
    }
7546
7547 4
    return \strip_tags($str, $allowable_tags);
7548
  }
7549
7550
  /**
7551
   * Strip all whitespace characters. This includes tabs and newline
7552
   * characters, as well as multibyte whitespace such as the thin space
7553
   * and ideographic space.
7554
   *
7555
   * @param string $str
7556
   *
7557
   * @return string
7558
   */
7559 36
  public static function strip_whitespace(string $str): string
7560
  {
7561 36
    if ('' === $str) {
7562 3
      return '';
7563
    }
7564
7565 33
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
7566
  }
7567
7568
  /**
7569
   * Finds position of first occurrence of a string within another, case insensitive.
7570
   *
7571
   * @link http://php.net/manual/en/function.mb-stripos.php
7572
   *
7573
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7574
   * @param string $needle    <p>The string to find in haystack.</p>
7575
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7576
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7577
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7578
   *
7579
   * @return int|false
7580
   *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7581
   *                   haystack string,<br> or <strong>false</strong> if needle is not found.
7582
   */
7583 75
  public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7584
  {
7585 75
    if ('' === $haystack || '' === $needle) {
7586 5
      return false;
7587
    }
7588
7589 74
    if ($cleanUtf8 === true) {
7590
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7591
      // if invalid characters are found in $haystack before $needle
7592 1
      $haystack = self::clean($haystack);
7593 1
      $needle = self::clean($needle);
7594
    }
7595
7596 74
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7597 23
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7598
    }
7599
7600 74
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7601
      self::checkForSupport();
7602
    }
7603
7604 74
    if (self::$SUPPORT['mbstring'] === true) {
7605 74
      $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7606 74
      if ($returnTmp !== false) {
7607 54
        return $returnTmp;
7608
      }
7609
    }
7610
7611
    if (
7612 31
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7613
        &&
7614 31
        $offset >= 0 // grapheme_stripos() can't handle negative offset
7615
        &&
7616 31
        self::$SUPPORT['intl'] === true
7617
    ) {
7618 31
      $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7619 31
      if ($returnTmp !== false) {
7620
        return $returnTmp;
7621
      }
7622
    }
7623
7624
    //
7625
    // fallback for ascii only
7626
    //
7627
7628 31
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7629 15
      return \stripos($haystack, $needle, $offset);
7630
    }
7631
7632
    //
7633
    // fallback via vanilla php
7634
    //
7635
7636 20
    $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
7637 20
    $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
7638
7639 20
    return self::strpos($haystack, $needle, $offset, $encoding);
7640
  }
7641
7642
  /**
7643
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
7644
   *
7645
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7646
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7647
   * @param bool   $before_needle  [optional] <p>
7648
   *                               If <b>TRUE</b>, it returns the part of the
7649
   *                               haystack before the first occurrence of the needle (excluding the needle).
7650
   *                               </p>
7651
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
7652
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7653
   *
7654
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
7655
   */
7656 19
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7657
  {
7658 19
    if ('' === $haystack || '' === $needle) {
7659 6
      return false;
7660
    }
7661
7662 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7663 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7664
    }
7665
7666 13
    if ($cleanUtf8 === true) {
7667
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7668
      // if invalid characters are found in $haystack before $needle
7669 1
      $needle = self::clean($needle);
7670 1
      $haystack = self::clean($haystack);
7671
    }
7672
7673 13
    if (!$needle) {
7674
      return $haystack;
7675
    }
7676
7677 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7678
      self::checkForSupport();
7679
    }
7680
7681
    if (
7682 13
        $encoding !== 'UTF-8'
7683
        &&
7684 13
        self::$SUPPORT['mbstring'] === false
7685
    ) {
7686
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7687
    }
7688
7689 13
    if (self::$SUPPORT['mbstring'] === true) {
7690 13
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7691
    }
7692
7693
    if (
7694
        $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7695
        &&
7696
        self::$SUPPORT['intl'] === true
7697
    ) {
7698
      $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7699
      if ($returnTmp !== false) {
7700
        return $returnTmp;
7701
      }
7702
    }
7703
7704
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7705
      return \stristr($haystack, $needle, $before_needle);
7706
    }
7707
7708
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7709
7710
    if (!isset($match[1])) {
7711
      return false;
7712
    }
7713
7714
    if ($before_needle) {
7715
      return $match[1];
7716
    }
7717
7718
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7718
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
7719
  }
7720
7721
  /**
7722
   * Get the string length, not the byte-length!
7723
   *
7724
   * @link     http://php.net/manual/en/function.mb-strlen.php
7725
   *
7726
   * @param string $str       <p>The string being checked for length.</p>
7727
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7728
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7729
   *
7730
   * @return int|false
7731
   *             The number <strong>(int)</strong> of characters in the string $str having character encoding $encoding.
7732
   *             (One multi-byte character counted as +1).
7733
   *             <br>
7734
   *             Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid chars.
7735
   */
7736 283
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7737
  {
7738 283
    if ('' === $str) {
7739 37
      return 0;
7740
    }
7741
7742 281
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7743 81
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7744
    }
7745
7746
    //
7747
    // fallback for binary || ascii only
7748
    //
7749
7750
    if (
7751 281
        $encoding === 'CP850'
7752
        ||
7753 281
        $encoding === 'ASCII'
7754
    ) {
7755 2
      return self::strlen_in_byte($str);
7756
    }
7757
7758 281
    if ($cleanUtf8 === true) {
7759
      // "mb_strlen" and "\iconv_strlen" returns wrong length,
7760
      // if invalid characters are found in $str
7761 4
      $str = self::clean($str);
7762
    }
7763
7764 281
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7765
      self::checkForSupport();
7766
    }
7767
7768
    if (
7769 281
        $encoding !== 'UTF-8'
7770
        &&
7771 281
        self::$SUPPORT['mbstring'] === false
7772
        &&
7773 281
        self::$SUPPORT['iconv'] === false
7774
    ) {
7775
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7776
    }
7777
7778
    //
7779
    // fallback via mbstring
7780
    //
7781
7782 281
    if (self::$SUPPORT['mbstring'] === true) {
7783 281
      $returnTmp = \mb_strlen($str, $encoding);
7784 281
      if ($returnTmp !== false) {
7785 281
        return $returnTmp;
7786
      }
7787
    }
7788
7789
    //
7790
    // fallback via iconv
7791
    //
7792
7793
    if (self::$SUPPORT['iconv'] === true) {
7794
      $returnTmp = \iconv_strlen($str, $encoding);
7795
      if ($returnTmp !== false) {
7796
        return $returnTmp;
7797
      }
7798
    }
7799
7800
    //
7801
    // fallback via intl
7802
    //
7803
7804
    if (
7805
        $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7806
        &&
7807
        self::$SUPPORT['intl'] === true
7808
    ) {
7809
      $returnTmp = \grapheme_strlen($str);
7810
      if ($returnTmp !== null) {
7811
        return $returnTmp;
7812
      }
7813
    }
7814
7815
    //
7816
    // fallback for ascii only
7817
    //
7818
7819
    if (self::is_ascii($str)) {
7820
      return \strlen($str);
7821
    }
7822
7823
    //
7824
    // fallback via vanilla php
7825
    //
7826
7827
    \preg_match_all('/./us', $str, $parts);
7828
7829
    $returnTmp = \count($parts[0]);
7830
    if ($returnTmp === 0 && isset($str[0])) {
7831
      return false;
7832
    }
7833
7834
    return $returnTmp;
7835
  }
7836
7837
  /**
7838
   * Get string length in byte.
7839
   *
7840
   * @param string $str
7841
   *
7842
   * @return int
7843
   */
7844 193
  public static function strlen_in_byte(string $str): int
7845
  {
7846 193
    if ($str === '') {
7847
      return 0;
7848
    }
7849
7850 193
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7851
      self::checkForSupport();
7852
    }
7853
7854 193
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7855
      // "mb_" is available if overload is used, so use it ...
7856 193
      return \mb_strlen($str, 'CP850'); // 8-BIT
7857
    }
7858
7859
    return \strlen($str);
7860
  }
7861
7862
  /**
7863
   * Case insensitive string comparisons using a "natural order" algorithm.
7864
   *
7865
   * INFO: natural order version of UTF8::strcasecmp()
7866
   *
7867
   * @param string $str1     <p>The first string.</p>
7868
   * @param string $str2     <p>The second string.</p>
7869
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7870
   *
7871
   * @return int
7872
   *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7873
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7874
   *             <strong>0</strong> if they are equal
7875
   */
7876 2
  public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7877
  {
7878 2
    return self::strnatcmp(
7879 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
7880 2
        self::strtocasefold($str2, true, false, $encoding, null, false)
7881
    );
7882
  }
7883
7884
  /**
7885
   * String comparisons using a "natural order" algorithm
7886
   *
7887
   * INFO: natural order version of UTF8::strcmp()
7888
   *
7889
   * @link  http://php.net/manual/en/function.strnatcmp.php
7890
   *
7891
   * @param string $str1 <p>The first string.</p>
7892
   * @param string $str2 <p>The second string.</p>
7893
   *
7894
   * @return int
7895
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7896
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7897
   *             <strong>0</strong> if they are equal
7898
   */
7899 4
  public static function strnatcmp(string $str1, string $str2): int
7900
  {
7901 4
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
7902
  }
7903
7904
  /**
7905
   * Case-insensitive string comparison of the first n characters.
7906
   *
7907
   * @link  http://php.net/manual/en/function.strncasecmp.php
7908
   *
7909
   * @param string $str1     <p>The first string.</p>
7910
   * @param string $str2     <p>The second string.</p>
7911
   * @param int    $len      <p>The length of strings to be used in the comparison.</p>
7912
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7913
   *
7914
   * @return int
7915
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7916
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7917
   *             <strong>0</strong> if they are equal
7918
   */
7919 2
  public static function strncasecmp(string $str1, string $str2, int $len, string $encoding = 'UTF-8'): int
7920
  {
7921 2
    return self::strncmp(
7922 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
7923 2
        self::strtocasefold($str2, true, false, $encoding, null, false),
7924 2
        $len
7925
    );
7926
  }
7927
7928
  /**
7929
   * String comparison of the first n characters.
7930
   *
7931
   * @link  http://php.net/manual/en/function.strncmp.php
7932
   *
7933
   * @param string $str1 <p>The first string.</p>
7934
   * @param string $str2 <p>The second string.</p>
7935
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
7936
   *
7937
   * @return int
7938
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7939
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7940
   *             <strong>0</strong> if they are equal
7941
   */
7942 4
  public static function strncmp(string $str1, string $str2, int $len): int
7943
  {
7944 4
    $str1 = (string)self::substr($str1, 0, $len);
7945 4
    $str2 = (string)self::substr($str2, 0, $len);
7946
7947 4
    return self::strcmp($str1, $str2);
7948
  }
7949
7950
  /**
7951
   * Search a string for any of a set of characters.
7952
   *
7953
   * @link  http://php.net/manual/en/function.strpbrk.php
7954
   *
7955
   * @param string $haystack  <p>The string where char_list is looked for.</p>
7956
   * @param string $char_list <p>This parameter is case sensitive.</p>
7957
   *
7958
   * @return string|false String starting from the character found, or false if it is not found.
7959
   */
7960 2
  public static function strpbrk(string $haystack, string $char_list)
7961
  {
7962 2
    if ('' === $haystack || '' === $char_list) {
7963 2
      return false;
7964
    }
7965
7966 2
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7967 2
      return \substr($haystack, (int)\strpos($haystack, $m[0]));
7968
    }
7969
7970 2
    return false;
7971
  }
7972
7973
  /**
7974
   * Find position of first occurrence of string in a string.
7975
   *
7976
   * @link http://php.net/manual/en/function.mb-strpos.php
7977
   *
7978
   * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7979
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7980
   * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7981
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7982
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7983
   *
7984
   * @return int|false
7985
   *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
7986
   *                   string.<br> If needle is not found it returns false.
7987
   */
7988 142
  public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7989
  {
7990 142
    if ('' === $haystack) {
7991 4
      return false;
7992
    }
7993
7994
    // iconv and mbstring do not support integer $needle
7995 141
    if ((int)$needle === $needle && $needle >= 0) {
7996
      $needle = (string)self::chr($needle);
7997
    }
7998 141
    $needle = (string)$needle;
7999
8000 141
    if ('' === $needle) {
8001 2
      return false;
8002
    }
8003
8004 141
    if ($cleanUtf8 === true) {
8005
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8006
      // if invalid characters are found in $haystack before $needle
8007 3
      $needle = self::clean($needle);
8008 3
      $haystack = self::clean($haystack);
8009
    }
8010
8011 141
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8012 55
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8013
    }
8014
8015 141
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8016
      self::checkForSupport();
8017
    }
8018
8019
    //
8020
    // fallback for binary || ascii only
8021
    //
8022
8023
    if (
8024 141
        $encoding === 'CP850'
8025
        ||
8026 141
        $encoding === 'ASCII'
8027
    ) {
8028 2
      return self::strpos_in_byte($haystack, $needle, $offset);
8029
    }
8030
8031
    if (
8032 141
        $encoding !== 'UTF-8'
8033
        &&
8034 141
        self::$SUPPORT['iconv'] === false
8035
        &&
8036 141
        self::$SUPPORT['mbstring'] === false
8037
    ) {
8038
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8039
    }
8040
8041
    //
8042
    // fallback via mbstring
8043
    //
8044
8045 141
    if (self::$SUPPORT['mbstring'] === true) {
8046 141
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
8047 141
      if ($returnTmp !== false) {
8048 86
        return $returnTmp;
8049
      }
8050
    }
8051
8052
    //
8053
    // fallback via intl
8054
    //
8055
8056
    if (
8057 69
        $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8058
        &&
8059 69
        $offset >= 0 // grapheme_strpos() can't handle negative offset
8060
        &&
8061 69
        self::$SUPPORT['intl'] === true
8062
    ) {
8063 69
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
8064 69
      if ($returnTmp !== false) {
8065
        return $returnTmp;
8066
      }
8067
    }
8068
8069
    //
8070
    // fallback via iconv
8071
    //
8072
8073
    if (
8074 69
        $offset >= 0 // iconv_strpos() can't handle negative offset
8075
        &&
8076 69
        self::$SUPPORT['iconv'] === true
8077
    ) {
8078
      // ignore invalid negative offset to keep compatibility
8079
      // with php < 5.5.35, < 5.6.21, < 7.0.6
8080 69
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
8081 69
      if ($returnTmp !== false) {
8082
        return $returnTmp;
8083
      }
8084
    }
8085
8086
    //
8087
    // fallback for ascii only
8088
    //
8089
8090 69
    if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
8091 35
      return \strpos($haystack, $needle, $offset);
8092
    }
8093
8094
    //
8095
    // fallback via vanilla php
8096
    //
8097
8098 39
    if ($haystackIsAscii) {
8099
      $haystackTmp = \substr($haystack, $offset);
8100
    } else {
8101 39
      $haystackTmp = self::substr($haystack, $offset, null, $encoding);
8102
    }
8103 39
    if ($haystackTmp === false) {
8104
      $haystackTmp = '';
8105
    }
8106 39
    $haystack = (string)$haystackTmp;
8107
8108 39
    if ($offset < 0) {
8109 2
      $offset = 0;
8110
    }
8111
8112 39
    $pos = \strpos($haystack, $needle);
8113 39
    if ($pos === false) {
8114 39
      return false;
8115
    }
8116
8117
    if ($pos) {
8118
      return ($offset + (self::strlen(substr($haystack, 0, $pos), $encoding)));
8119
    }
8120
8121
    return ($offset + 0);
8122
  }
8123
8124
  /**
8125
   * Find position of first occurrence of string in a string.
8126
   *
8127
   * @param string $haystack <p>
8128
   *                         The string being checked.
8129
   *                         </p>
8130
   * @param string $needle   <p>
8131
   *                         The position counted from the beginning of haystack.
8132
   *                         </p>
8133
   * @param int    $offset   [optional] <p>
8134
   *                         The search offset. If it is not specified, 0 is used.
8135
   *                         </p>
8136
   *
8137
   * @return int|false The numeric position of the first occurrence of needle in the
8138
   *                   haystack string. If needle is not found, it returns false.
8139
   */
8140 78
  public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8141
  {
8142 78
    if ($haystack === '' || $needle === '') {
8143
      return false;
8144
    }
8145
8146 78
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8147
      self::checkForSupport();
8148
    }
8149
8150 78
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8151
      // "mb_" is available if overload is used, so use it ...
8152 78
      return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8153
    }
8154
8155
    return \strpos($haystack, $needle, $offset);
8156
  }
8157
8158
  /**
8159
   * Finds the last occurrence of a character in a string within another.
8160
   *
8161
   * @link http://php.net/manual/en/function.mb-strrchr.php
8162
   *
8163
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8164
   * @param string $needle        <p>The string to find in haystack</p>
8165
   * @param bool   $before_needle [optional] <p>
8166
   *                              Determines which portion of haystack
8167
   *                              this function returns.
8168
   *                              If set to true, it returns all of haystack
8169
   *                              from the beginning to the last occurrence of needle.
8170
   *                              If set to false, it returns all of haystack
8171
   *                              from the last occurrence of needle to the end,
8172
   *                              </p>
8173
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8174
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8175
   *
8176
   * @return string|false The portion of haystack or false if needle is not found.
8177
   */
8178 4
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8179
  {
8180 4
    if ('' === $haystack || '' === $needle) {
8181 2
      return false;
8182
    }
8183
8184 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8185 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8186
    }
8187
8188 4
    if ($cleanUtf8 === true) {
8189
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8190
      // if invalid characters are found in $haystack before $needle
8191 2
      $needle = self::clean($needle);
8192 2
      $haystack = self::clean($haystack);
8193
    }
8194
8195 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8196
      self::checkForSupport();
8197
    }
8198
8199
    if (
8200 4
        $encoding !== 'UTF-8'
8201
        &&
8202 4
        self::$SUPPORT['mbstring'] === false
8203
    ) {
8204
      \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8205
    }
8206
8207 4
    if (self::$SUPPORT['mbstring'] === true) {
8208 4
      return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8209
    }
8210
8211
    //
8212
    // fallback for binary || ascii only
8213
    //
8214
8215
    if (
8216
        $before_needle === false
8217
        &&
8218
        (
8219
            $encoding === 'CP850'
8220
            ||
8221
            $encoding === 'ASCII'
8222
        )
8223
    ) {
8224
      return \strrchr($haystack, $needle);
8225
    }
8226
8227
    //
8228
    // fallback via iconv
8229
    //
8230
8231
    if (self::$SUPPORT['iconv'] === true) {
8232
      $needleTmp = self::substr($needle, 0, 1, $encoding);
8233
      if ($needleTmp === false) {
8234
        return false;
8235
      }
8236
      $needle = (string)$needleTmp;
8237
8238
      $pos = \iconv_strrpos($haystack, $needle, $encoding);
8239
      if (false === $pos) {
8240
        return false;
8241
      }
8242
8243
      if ($before_needle) {
8244
        return self::substr($haystack, 0, $pos, $encoding);
8245
      }
8246
8247
      return self::substr($haystack, $pos, null, $encoding);
8248
    }
8249
8250
    //
8251
    // fallback via vanilla php
8252
    //
8253
8254
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8255
    if ($needleTmp === false) {
8256
      return false;
8257
    }
8258
    $needle = (string)$needleTmp;
8259
8260
    $pos = self::strrpos($haystack, $needle, null, $encoding);
8261
    if ($pos === false) {
8262
      return false;
8263
    }
8264
8265
    if ($before_needle) {
8266
      return self::substr($haystack, 0, $pos, $encoding);
8267
    }
8268
8269
    return self::substr($haystack, $pos, null, $encoding);
8270
  }
8271
8272
  /**
8273
   * Reverses characters order in the string.
8274
   *
8275
   * @param string $str <p>The input string.</p>
8276
   *
8277
   * @return string The string with characters in the reverse sequence.
8278
   */
8279 10
  public static function strrev(string $str): string
8280
  {
8281 10
    if ('' === $str) {
8282 4
      return '';
8283
    }
8284
8285 8
    $reversed = '';
8286 8
    $i = self::strlen($str);
8287 8
    while ($i--) {
8288 8
      $reversed .= self::substr($str, $i, 1);
0 ignored issues
show
Bug introduced by
It seems like $i can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8288
      $reversed .= self::substr($str, /** @scrutinizer ignore-type */ $i, 1);
Loading history...
8289
    }
8290
8291 8
    return $reversed;
8292
  }
8293
8294
  /**
8295
   * Finds the last occurrence of a character in a string within another, case insensitive.
8296
   *
8297
   * @link http://php.net/manual/en/function.mb-strrichr.php
8298
   *
8299
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
8300
   * @param string $needle         <p>The string to find in haystack.</p>
8301
   * @param bool   $before_needle  [optional] <p>
8302
   *                               Determines which portion of haystack
8303
   *                               this function returns.
8304
   *                               If set to true, it returns all of haystack
8305
   *                               from the beginning to the last occurrence of needle.
8306
   *                               If set to false, it returns all of haystack
8307
   *                               from the last occurrence of needle to the end,
8308
   *                               </p>
8309
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8310
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8311
   *
8312
   * @return string|false The portion of haystack or<br>false if needle is not found.
8313
   */
8314 3
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8315
  {
8316 3
    if ('' === $haystack || '' === $needle) {
8317 2
      return false;
8318
    }
8319
8320 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8321 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8322
    }
8323
8324 3
    if ($cleanUtf8 === true) {
8325
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8326
      // if invalid characters are found in $haystack before $needle
8327 2
      $needle = self::clean($needle);
8328 2
      $haystack = self::clean($haystack);
8329
    }
8330
8331 3
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8332
      self::checkForSupport();
8333
    }
8334
8335
    //
8336
    // fallback via mbstring
8337
    //
8338
8339 3
    if (self::$SUPPORT['mbstring'] === true) {
8340 3
      return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8341
    }
8342
8343
    //
8344
    // fallback via vanilla php
8345
    //
8346
8347
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8348
    if ($needleTmp === false) {
8349
      return false;
8350
    }
8351
    $needle = (string)$needleTmp;
8352
8353
    $pos = self::strripos($haystack, $needle, 0, $encoding);
8354
    if ($pos === false) {
8355
      return false;
8356
    }
8357
8358
    if ($before_needle) {
8359
      return self::substr($haystack, 0, $pos, $encoding);
8360
    }
8361
8362
    return self::substr($haystack, $pos, null, $encoding);
8363
  }
8364
8365
  /**
8366
   * Find position of last occurrence of a case-insensitive string.
8367
   *
8368
   * @param string     $haystack  <p>The string to look in.</p>
8369
   * @param string|int $needle    <p>The string to look for.</p>
8370
   * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8371
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8372
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8373
   *
8374
   * @return int|false
8375
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8376
   *                   string.<br>If needle is not found, it returns false.
8377
   */
8378 4
  public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8379
  {
8380 4
    if ('' === $haystack) {
8381
      return false;
8382
    }
8383
8384
    // iconv and mbstring do not support integer $needle
8385 4
    if ((int)$needle === $needle && $needle >= 0) {
8386
      $needle = (string)self::chr($needle);
8387
    }
8388 4
    $needle = (string)$needle;
8389
8390 4
    if ('' === $needle) {
8391
      return false;
8392
    }
8393
8394 4
    if ($cleanUtf8 === true) {
8395
      // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8396 2
      $needle = self::clean($needle);
8397 2
      $haystack = self::clean($haystack);
8398
    }
8399
8400 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8401 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8402
    }
8403
8404
    //
8405
    // fallback for binary || ascii only
8406
    //
8407
8408
    if (
8409 4
        $encoding === 'CP850'
8410
        ||
8411 4
        $encoding === 'ASCII'
8412
    ) {
8413
      return self::strripos_in_byte($haystack, $needle, $offset);
8414
    }
8415
8416 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8417
      self::checkForSupport();
8418
    }
8419
8420
    if (
8421 4
        $encoding !== 'UTF-8'
8422
        &&
8423 4
        self::$SUPPORT['mbstring'] === false
8424
    ) {
8425
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8426
    }
8427
8428
    //
8429
    // fallback via mbstrig
8430
    //
8431
8432 4
    if (self::$SUPPORT['mbstring'] === true) {
8433 4
      return \mb_strripos($haystack, $needle, $offset, $encoding);
8434
    }
8435
8436
    //
8437
    // fallback via intl
8438
    //
8439
8440
    if (
8441
        $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8442
        &&
8443
        $offset >= 0 // grapheme_strripos() can't handle negative offset
8444
        &&
8445
        self::$SUPPORT['intl'] === true
8446
    ) {
8447
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8448
      if ($returnTmp !== false) {
8449
        return $returnTmp;
8450
      }
8451
    }
8452
8453
    //
8454
    // fallback for ascii only
8455
    //
8456
8457
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8458
      return self::strripos_in_byte($haystack, $needle, $offset);
8459
    }
8460
8461
    //
8462
    // fallback via vanilla php
8463
    //
8464
8465
    $haystack = self::strtocasefold($haystack, true, false, $encoding);
8466
    $needle = self::strtocasefold($needle, true, false, $encoding);
8467
8468
    return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8469
  }
8470
8471
  /**
8472
   * Finds position of last occurrence of a string within another, case insensitive.
8473
   *
8474
   * @param string $haystack <p>
8475
   *                         The string from which to get the position of the last occurrence
8476
   *                         of needle.
8477
   *                         </p>
8478
   * @param string $needle   <p>
8479
   *                         The string to find in haystack.
8480
   *                         </p>
8481
   * @param int    $offset   [optional] <p>
8482
   *                         The position in haystack
8483
   *                         to start searching.
8484
   *                         </p>
8485
   *
8486
   * @return int|false Return the numeric position of the last occurrence of needle in the
8487
   *                   haystack string, or false if needle is not found.
8488
   */
8489
  public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
8490
  {
8491
    if ($haystack === '' || $needle === '') {
8492
      return false;
8493
    }
8494
8495
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8496
      self::checkForSupport();
8497
    }
8498
8499
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8500
      // "mb_" is available if overload is used, so use it ...
8501
      return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8502
    }
8503
8504
    return \strripos($haystack, $needle, $offset);
8505
  }
8506
8507
  /**
8508
   * Find position of last occurrence of a string in a string.
8509
   *
8510
   * @link http://php.net/manual/en/function.mb-strrpos.php
8511
   *
8512
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8513
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8514
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8515
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
8516
   *                              the end of the string.
8517
   *                              </p>
8518
   * @param string     $encoding  [optional] <p>Set the charset.</p>
8519
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8520
   *
8521
   * @return int|false
8522
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8523
   *                   string.<br>If needle is not found, it returns false.
8524
   */
8525 38
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8526
  {
8527 38
    if ('' === $haystack) {
8528 3
      return false;
8529
    }
8530
8531
    // iconv and mbstring do not support integer $needle
8532 37
    if ((int)$needle === $needle && $needle >= 0) {
8533 1
      $needle = (string)self::chr($needle);
8534
    }
8535 37
    $needle = (string)$needle;
8536
8537 37
    if ('' === $needle) {
8538 2
      return false;
8539
    }
8540
8541 37
    if ($cleanUtf8 === true) {
8542
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8543 4
      $needle = self::clean($needle);
8544 4
      $haystack = self::clean($haystack);
8545
    }
8546
8547 37
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8548 14
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8549
    }
8550
8551
    //
8552
    // fallback for binary || ascii only
8553
    //
8554
8555
    if (
8556 37
        $encoding === 'CP850'
8557
        ||
8558 37
        $encoding === 'ASCII'
8559
    ) {
8560 2
      return self::strrpos_in_byte($haystack, $needle, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::strrpos_in_byte() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8560
      return self::strrpos_in_byte($haystack, $needle, /** @scrutinizer ignore-type */ $offset);
Loading history...
8561
    }
8562
8563 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8564
      self::checkForSupport();
8565
    }
8566
8567
    if (
8568 37
        $encoding !== 'UTF-8'
8569
        &&
8570 37
        self::$SUPPORT['mbstring'] === false
8571
    ) {
8572
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8573
    }
8574
8575
    //
8576
    // fallback via mbstring
8577
    //
8578
8579 37
    if (self::$SUPPORT['mbstring'] === true) {
8580 37
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
8581
    }
8582
8583
    //
8584
    // fallback via intl
8585
    //
8586
8587
    if (
8588
        $offset !== null
8589
        &&
8590
        $offset >= 0 // grapheme_strrpos() can't handle negative offset
8591
        &&
8592
        $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8593
        &&
8594
        self::$SUPPORT['intl'] === true
8595
    ) {
8596
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8597
      if ($returnTmp !== false) {
8598
        return $returnTmp;
8599
      }
8600
    }
8601
8602
    //
8603
    // fallback for ascii only
8604
    //
8605
8606
    if (
8607
        $offset !== null
8608
        &&
8609
        self::is_ascii($haystack)
8610
        &&
8611
        self::is_ascii($needle)
8612
    ) {
8613
      return self::strrpos_in_byte($haystack, $needle, $offset);
8614
    }
8615
8616
    //
8617
    // fallback via vanilla php
8618
    //
8619
8620
    $haystackTmp = null;
8621
    if ($offset > 0) {
8622
      $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8622
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
8623
    } elseif ($offset < 0) {
8624
      $haystackTmp = self::substr($haystack, 0, $offset);
8625
      $offset = 0;
8626
    }
8627
8628
    if ($haystackTmp !== null) {
8629
      if ($haystackTmp === false) {
8630
        $haystackTmp = '';
8631
      }
8632
      $haystack = (string)$haystackTmp;
8633
    }
8634
8635
    $pos = self::strrpos_in_byte($haystack, $needle);
8636
    if ($pos === false) {
8637
      return false;
8638
    }
8639
8640
    return $offset + self::strlen(self::substr_in_byte($haystack, 0, $pos));
8641
  }
8642
8643
  /**
8644
   * Find position of last occurrence of a string in a string.
8645
   *
8646
   * @param string $haystack <p>
8647
   *                         The string being checked, for the last occurrence
8648
   *                         of needle.
8649
   *                         </p>
8650
   * @param string $needle   <p>
8651
   *                         The string to find in haystack.
8652
   *                         </p>
8653
   * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
8654
   *                         the string. Negative values will stop searching at an arbitrary point
8655
   *                         prior to the end of the string.
8656
   *
8657
   * @return int|false The numeric position of the last occurrence of needle in the
8658
   *                   haystack string. If needle is not found, it returns false.
8659
   */
8660 2
  public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
8661
  {
8662 2
    if ($haystack === '' || $needle === '') {
8663
      return false;
8664
    }
8665
8666 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8667
      self::checkForSupport();
8668
    }
8669
8670 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8671
      // "mb_" is available if overload is used, so use it ...
8672 2
      return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8673
    }
8674
8675
    return \strrpos($haystack, $needle, $offset);
8676
  }
8677
8678
  /**
8679
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8680
   * mask.
8681
   *
8682
   * @param string $str    <p>The input string.</p>
8683
   * @param string $mask   <p>The mask of chars</p>
8684
   * @param int    $offset [optional]
8685
   * @param int    $length [optional]
8686
   *
8687
   * @return int
8688
   */
8689 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
8690
  {
8691 10
    if ($offset || $length !== null) {
8692 2
      $strTmp = self::substr($str, $offset, $length);
8693 2
      if ($strTmp === false) {
8694
        $strTmp = '';
8695
      }
8696 2
      $str = (string)$strTmp;
8697
    }
8698
8699 10
    if ('' === $str || '' === $mask) {
8700 2
      return 0;
8701
    }
8702
8703 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug introduced by
$str of type string is incompatible with the type null|array expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8703
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
Bug Best Practice introduced by
The expression return preg_match('/^' ....lf::strlen($str[0]) : 0 could return the type false which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
8704
  }
8705
8706
  /**
8707
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8708
   *
8709
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
8710
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
8711
   * @param bool   $before_needle  [optional] <p>
8712
   *                               If <b>TRUE</b>, strstr() returns the part of the
8713
   *                               haystack before the first occurrence of the needle (excluding the needle).
8714
   *                               </p>
8715
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8716
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8717
   *
8718
   * @return string|false
8719
   *                       A sub-string,<br>or <strong>false</strong> if needle is not found.
8720
   */
8721 5
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8722
  {
8723 5
    if ('' === $haystack || '' === $needle) {
8724 2
      return false;
8725
    }
8726
8727 5
    if ($cleanUtf8 === true) {
8728
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8729
      // if invalid characters are found in $haystack before $needle
8730
      $needle = self::clean($needle);
8731
      $haystack = self::clean($haystack);
8732
    }
8733
8734 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8735 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8736
    }
8737
8738
    //
8739
    // fallback for binary || ascii only
8740
    //
8741
8742
    if (
8743 5
        $encoding === 'CP850'
8744
        ||
8745 5
        $encoding === 'ASCII'
8746
    ) {
8747
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8748
    }
8749
8750 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8751
      self::checkForSupport();
8752
    }
8753
8754
    if (
8755 5
        $encoding !== 'UTF-8'
8756
        &&
8757 5
        self::$SUPPORT['mbstring'] === false
8758
    ) {
8759
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8760
    }
8761
8762
    //
8763
    // fallback via mbstring
8764
    //
8765
8766 5
    if (self::$SUPPORT['mbstring'] === true) {
8767 5
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8768
    }
8769
8770
    //
8771
    // fallback via intl
8772
    //
8773
8774
    if (
8775
        $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8776
        &&
8777
        self::$SUPPORT['intl'] === true
8778
    ) {
8779
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8780
      if ($returnTmp !== false) {
8781
        return $returnTmp;
8782
      }
8783
    }
8784
8785
    //
8786
    // fallback for ascii only
8787
    //
8788
8789
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8790
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8791
    }
8792
8793
    //
8794
    // fallback via vanilla php
8795
    //
8796
8797
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8798
8799
    if (!isset($match[1])) {
8800
      return false;
8801
    }
8802
8803
    if ($before_needle) {
8804
      return $match[1];
8805
    }
8806
8807
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8807
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
8808
  }
8809
8810
  /**
8811
   *  * Finds first occurrence of a string within another.
8812
   *
8813
   * @param string $haystack      <p>
8814
   *                              The string from which to get the first occurrence
8815
   *                              of needle.
8816
   *                              </p>
8817
   * @param string $needle        <p>
8818
   *                              The string to find in haystack.
8819
   *                              </p>
8820
   * @param bool   $before_needle [optional] <p>
8821
   *                              Determines which portion of haystack
8822
   *                              this function returns.
8823
   *                              If set to true, it returns all of haystack
8824
   *                              from the beginning to the first occurrence of needle.
8825
   *                              If set to false, it returns all of haystack
8826
   *                              from the first occurrence of needle to the end,
8827
   *                              </p>
8828
   *
8829
   * @return string|false The portion of haystack,
8830
   *                      or false if needle is not found.
8831
   */
8832
  public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
8833
  {
8834
    if ($haystack === '' || $needle === '') {
8835
      return false;
8836
    }
8837
8838
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8839
      self::checkForSupport();
8840
    }
8841
8842
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8843
      // "mb_" is available if overload is used, so use it ...
8844
      return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
8845
    }
8846
8847
    return \strstr($haystack, $needle, $before_needle);
8848
  }
8849
8850
  /**
8851
   * Unicode transformation for case-less matching.
8852
   *
8853
   * @link http://unicode.org/reports/tr21/tr21-5.html
8854
   *
8855
   * @param string      $str       <p>The input string.</p>
8856
   * @param bool        $full      [optional] <p>
8857
   *                               <b>true</b>, replace full case folding chars (default)<br>
8858
   *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
8859
   *                               </p>
8860
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8861
   * @param string      $encoding  [optional] <p>Set the charset.</p>
8862
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8863
   * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase is
8864
   *                               for some languages better ...</p>
8865
   *
8866
   * @return string
8867
   */
8868 53
  public static function strtocasefold(
8869
      string $str,
8870
      bool $full = true,
8871
      bool $cleanUtf8 = false,
8872
      string $encoding = 'UTF-8',
8873
      string $lang = null,
8874
      $lower = true
8875
  ): string
8876
  {
8877 53
    if ('' === $str) {
8878 5
      return '';
8879
    }
8880
8881 52
    $str = self::fixStrCaseHelper($str, $lower, $full);
8882
8883 52
    if ($lower === true) {
8884 2
      return self::strtolower($str, $encoding, $cleanUtf8, $lang);
8885
    }
8886
8887 50
    return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
8888
  }
8889
8890
  /**
8891
   * Make a string lowercase.
8892
   *
8893
   * @link http://php.net/manual/en/function.mb-strtolower.php
8894
   *
8895
   * @param string      $str                   <p>The string being lowercased.</p>
8896
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
8897
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8898
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8899
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8900
   *
8901
   * @return string String with all alphabetic characters converted to lowercase.
8902
   */
8903 151
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8904
  {
8905
    // init
8906 151
    $str = (string)$str;
8907
8908 151
    if ('' === $str) {
8909 4
      return '';
8910
    }
8911
8912 149
    if ($cleanUtf8 === true) {
8913
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8914
      // if invalid characters are found in $haystack before $needle
8915 4
      $str = self::clean($str);
8916
    }
8917
8918 149
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8919 94
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8920
    }
8921
8922
    // hack for old php version or for the polyfill ...
8923 149
    if ($tryToKeepStringLength === true) {
8924
      $str = self::fixStrCaseHelper($str, true);
8925
    }
8926
8927 149
    if ($lang !== null) {
8928
8929 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8930
        self::checkForSupport();
8931
      }
8932
8933 2
      if (self::$SUPPORT['intl'] === true) {
8934
8935 2
        $langCode = $lang . '-Lower';
8936 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8937
          \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, E_USER_WARNING);
8938
8939
          $langCode = 'Any-Lower';
8940
        }
8941
8942
        /** @noinspection PhpComposerExtensionStubsInspection */
8943 2
        return transliterator_transliterate($langCode, $str);
8944
      }
8945
8946
      \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, E_USER_WARNING);
8947
    }
8948
8949
    // always fallback via symfony polyfill
8950 149
    return \mb_strtolower($str, $encoding);
8951
  }
8952
8953
  /**
8954
   * Generic case sensitive transformation for collation matching.
8955
   *
8956
   * @param string $str <p>The input string</p>
8957
   *
8958
   * @return string
8959
   */
8960 6
  private static function strtonatfold(string $str): string
8961
  {
8962
    /** @noinspection PhpUndefinedClassInspection */
8963 6
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
8964
  }
8965
8966
  /**
8967
   * Make a string uppercase.
8968
   *
8969
   * @link http://php.net/manual/en/function.mb-strtoupper.php
8970
   *
8971
   * @param string      $str                   <p>The string being uppercased.</p>
8972
   * @param string      $encoding              [optional] <p>Set the charset.</p>
8973
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8974
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8975
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8976
   *
8977
   * @return string String with all alphabetic characters converted to uppercase.
8978
   */
8979 160
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8980
  {
8981
    // init
8982 160
    $str = (string)$str;
8983
8984 160
    if ('' === $str) {
8985 4
      return '';
8986
    }
8987
8988 158
    if ($cleanUtf8 === true) {
8989
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8990
      // if invalid characters are found in $haystack before $needle
8991 3
      $str = self::clean($str);
8992
    }
8993
8994 158
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8995 72
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8996
    }
8997
8998
    // hack for old php version or for the polyfill ...
8999 158
    if ($tryToKeepStringLength === true) {
9000 2
      $str = self::fixStrCaseHelper($str, false);
9001
    }
9002
9003 158
    if ($lang !== null) {
9004
9005 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9006
        self::checkForSupport();
9007
      }
9008
9009 2
      if (self::$SUPPORT['intl'] === true) {
9010
9011 2
        $langCode = $lang . '-Upper';
9012 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9013
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
9014
9015
          $langCode = 'Any-Upper';
9016
        }
9017
9018
        /** @noinspection PhpComposerExtensionStubsInspection */
9019 2
        return transliterator_transliterate($langCode, $str);
9020
      }
9021
9022
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
9023
    }
9024
9025
    // always fallback via symfony polyfill
9026 158
    return \mb_strtoupper($str, $encoding);
9027
  }
9028
9029
  /**
9030
   * Translate characters or replace sub-strings.
9031
   *
9032
   * @link  http://php.net/manual/en/function.strtr.php
9033
   *
9034
   * @param string          $str  <p>The string being translated.</p>
9035
   * @param string|string[] $from <p>The string replacing from.</p>
9036
   * @param string|string[] $to   <p>The string being translated to to.</p>
9037
   *
9038
   * @return string
9039
   *                This function returns a copy of str, translating all occurrences of each character in from to the
9040
   *                corresponding character in to.
9041
   */
9042 2
  public static function strtr(string $str, $from, $to = INF): string
9043
  {
9044 2
    if ('' === $str) {
9045
      return '';
9046
    }
9047
9048 2
    if ($from === $to) {
9049
      return $str;
9050
    }
9051
9052 2
    if (INF !== $to) {
9053 2
      $from = self::str_split($from);
9054 2
      $to = self::str_split($to);
9055 2
      $countFrom = \count($from);
9056 2
      $countTo = \count($to);
9057
9058 2
      if ($countFrom > $countTo) {
9059 2
        $from = \array_slice($from, 0, $countTo);
9060 2
      } elseif ($countFrom < $countTo) {
9061 2
        $to = \array_slice($to, 0, $countFrom);
9062
      }
9063
9064 2
      $from = \array_combine($from, $to);
9065
    }
9066
9067 2
    if (\is_string($from)) {
9068 2
      return \str_replace($from, '', $str);
9069
    }
9070
9071 2
    return \strtr($str, $from);
9072
  }
9073
9074
  /**
9075
   * Return the width of a string.
9076
   *
9077
   * @param string $str       <p>The input string.</p>
9078
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9079
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9080
   *
9081
   * @return int
9082
   */
9083 2
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
9084
  {
9085 2
    if ('' === $str) {
9086 2
      return 0;
9087
    }
9088
9089 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9090 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9091
    }
9092
9093 2
    if ($cleanUtf8 === true) {
9094
      // iconv and mbstring are not tolerant to invalid encoding
9095
      // further, their behaviour is inconsistent with that of PHP's substr
9096 2
      $str = self::clean($str);
9097
    }
9098
9099 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9100
      self::checkForSupport();
9101
    }
9102
9103
    //
9104
    // fallback via mbstring
9105
    //
9106
9107 2
    if (self::$SUPPORT['mbstring'] === true) {
9108 2
      return \mb_strwidth($str, $encoding);
9109
    }
9110
9111
    //
9112
    // fallback via vanilla php
9113
    //
9114
9115
    if ('UTF-8' !== $encoding) {
9116
      $str = self::encode('UTF-8', $str, false, $encoding);
9117
    }
9118
9119
    $wide = 0;
9120
    $str = (string)preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
9121
9122
    return ($wide << 1) + self::strlen($str, 'UTF-8');
9123
  }
9124
9125
  /**
9126
   * Get part of a string.
9127
   *
9128
   * @link http://php.net/manual/en/function.mb-substr.php
9129
   *
9130
   * @param string $str       <p>The string being checked.</p>
9131
   * @param int    $offset    <p>The first position used in str.</p>
9132
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
9133
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9134
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9135
   *
9136
   * @return string|false
9137
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9138
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9139
   *                      characters long, <b>FALSE</b> will be returned.
9140
   */
9141 400
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
9142
  {
9143 400
    if ('' === $str) {
9144 19
      return '';
9145
    }
9146
9147
    // Empty string
9148 394
    if ($length === 0) {
9149 20
      return '';
9150
    }
9151
9152 391
    if ($cleanUtf8 === true) {
9153
      // iconv and mbstring are not tolerant to invalid encoding
9154
      // further, their behaviour is inconsistent with that of PHP's substr
9155 2
      $str = self::clean($str);
9156
    }
9157
9158
    // Whole string
9159 391
    if (!$offset && $length === null) {
9160 38
      return $str;
9161
    }
9162
9163 362
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9164 157
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9165
    }
9166
9167 362
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9168
      self::checkForSupport();
9169
    }
9170
9171
    //
9172
    // fallback for binary || ascii only
9173
    //
9174
9175
    if (
9176 362
        $encoding === 'CP850'
9177
        ||
9178 362
        $encoding === 'ASCII'
9179
    ) {
9180 3
      return self::substr_in_byte($str, $offset, $length);
9181
    }
9182
9183
    //
9184
    // fallback via mbstring
9185
    //
9186
9187 359
    if (self::$SUPPORT['mbstring'] === true) {
9188 359
      $return = \mb_substr($str, $offset, $length ?? 2147483647, $encoding);
9189 359
      if ($return !== false) {
9190 354
        return $return;
9191
      }
9192
    }
9193
9194
    // otherwise we need the string-length and can't fake it via "2147483647"
9195 29
    $str_length = 0;
9196 29
    if ($offset || $length === null) {
9197 29
      $str_length = self::strlen($str, $encoding);
9198
    }
9199
9200
    // e.g.: invalid chars + mbstring not installed
9201 29
    if ($str_length === false) {
9202
      return false;
9203
    }
9204
9205
    // Empty string
9206 29
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
9207 21
      return '';
9208
    }
9209
9210
    // Impossible
9211 8
    if ($offset && $offset > $str_length) {
9212
      // "false" is the php native return type here,
9213
      //  but we optimized this for performance ... see "2147483647" instead of "strlen"
9214 3
      return '';
9215
9216
    }
9217
9218 5
    if ($length === null) {
9219
      $length = (int)$str_length;
9220
    } else {
9221 5
      $length = (int)$length;
9222
    }
9223
9224
    if (
9225 5
        $encoding !== 'UTF-8'
9226
        &&
9227 5
        self::$SUPPORT['mbstring'] === false
9228
    ) {
9229
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9230
    }
9231
9232
    //
9233
    // fallback via intl
9234
    //
9235
9236
    if (
9237 5
        $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
9238
        &&
9239 5
        $offset >= 0 // grapheme_substr() can't handle negative offset
9240
        &&
9241 5
        self::$SUPPORT['intl'] === true
9242
    ) {
9243 5
      $returnTmp = \grapheme_substr($str, $offset, $length);
9244 5
      if ($returnTmp !== false) {
9245
        return $returnTmp;
9246
      }
9247
    }
9248
9249
    //
9250
    // fallback via iconv
9251
    //
9252
9253
    if (
9254 5
        $length >= 0 // "iconv_substr()" can't handle negative length
9255
        &&
9256 5
        self::$SUPPORT['iconv'] === true
9257
    ) {
9258 5
      $returnTmp = \iconv_substr($str, $offset, $length);
9259 5
      if ($returnTmp !== false) {
9260 5
        return $returnTmp;
9261
      }
9262
    }
9263
9264
    //
9265
    // fallback for ascii only
9266
    //
9267
9268
    if (self::is_ascii($str)) {
9269
      return \substr($str, $offset, $length);
9270
    }
9271
9272
    //
9273
    // fallback via vanilla php
9274
    //
9275
9276
    // split to array, and remove invalid characters
9277
    $array = self::split($str);
9278
9279
    // extract relevant part, and join to make sting again
9280
    return \implode('', \array_slice($array, $offset, $length));
9281
  }
9282
9283
  /**
9284
   * Binary safe comparison of two strings from an offset, up to length characters.
9285
   *
9286
   * @param string   $str1               <p>The main string being compared.</p>
9287
   * @param string   $str2               <p>The secondary string being compared.</p>
9288
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
9289
   *                                     counting from the end of the string.</p>
9290
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
9291
   *                                     the length of the str compared to the length of main_str less the offset.</p>
9292
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
9293
   *                                     insensitive.</p>
9294
   *
9295
   * @return int
9296
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9297
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9298
   *             <strong>0</strong> if they are equal.
9299
   */
9300 2
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
9301
  {
9302
    if (
9303 2
        $offset !== 0
9304
        ||
9305 2
        $length !== null
9306
    ) {
9307 2
      $str1Tmp = self::substr($str1, $offset, $length);
9308 2
      if ($str1Tmp === false) {
9309
        $str1Tmp = '';
9310
      }
9311 2
      $str1 = (string)$str1Tmp;
9312
9313 2
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str1) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9313
      $str2Tmp = self::substr($str2, 0, /** @scrutinizer ignore-type */ self::strlen($str1));
Loading history...
9314 2
      if ($str2Tmp === false) {
9315
        $str2Tmp = '';
9316
      }
9317 2
      $str2 = (string)$str2Tmp;
9318
    }
9319
9320 2
    if ($case_insensitivity === true) {
9321 2
      return self::strcasecmp($str1, $str2);
9322
    }
9323
9324 2
    return self::strcmp($str1, $str2);
9325
  }
9326
9327
  /**
9328
   * Count the number of substring occurrences.
9329
   *
9330
   * @link  http://php.net/manual/en/function.substr-count.php
9331
   *
9332
   * @param string $haystack   <p>The string to search in.</p>
9333
   * @param string $needle     <p>The substring to search for.</p>
9334
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
9335
   * @param int    $length     [optional] <p>
9336
   *                           The maximum length after the specified offset to search for the
9337
   *                           substring. It outputs a warning if the offset plus the length is
9338
   *                           greater than the haystack length.
9339
   *                           </p>
9340
   * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9341
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9342
   *
9343
   * @return int|false This functions returns an integer or false if there isn't a string.
9344
   */
9345 18
  public static function substr_count(
9346
      string $haystack,
9347
      string $needle,
9348
      int $offset = 0,
9349
      int $length = null,
9350
      string $encoding = 'UTF-8',
9351
      bool $cleanUtf8 = false
9352
  )
9353
  {
9354 18
    if ('' === $haystack || '' === $needle) {
9355 2
      return false;
9356
    }
9357
9358 18
    if ($offset || $length !== null) {
9359
9360 2
      if ($length === null) {
9361 2
        $lengthTmp = self::strlen($haystack);
9362 2
        if ($lengthTmp === false) {
9363
          return false;
9364
        }
9365 2
        $length = (int)$lengthTmp;
9366
      }
9367
9368
      if (
9369
          (
9370 2
              $length !== 0
9371
              &&
9372 2
              $offset !== 0
9373
          )
9374
          &&
9375 2
          ($length + $offset) <= 0
9376
          &&
9377 2
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9378
      ) {
9379
        return false;
9380
      }
9381
9382 2
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
9383 2
      if ($haystackTmp === false) {
9384
        $haystackTmp = '';
9385
      }
9386 2
      $haystack = (string)$haystackTmp;
9387
    }
9388
9389 18
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9390 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9391
    }
9392
9393 18
    if ($cleanUtf8 === true) {
9394
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9395
      // if invalid characters are found in $haystack before $needle
9396
      $needle = self::clean($needle);
9397
      $haystack = self::clean($haystack);
9398
    }
9399
9400 18
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9401
      self::checkForSupport();
9402
    }
9403
9404
    if (
9405 18
        $encoding !== 'UTF-8'
9406
        &&
9407 18
        self::$SUPPORT['mbstring'] === false
9408
    ) {
9409
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9410
    }
9411
9412 18
    if (self::$SUPPORT['mbstring'] === true) {
9413 18
      return \mb_substr_count($haystack, $needle, $encoding);
9414
    }
9415
9416
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
9417
9418
    return \count($matches);
9419
  }
9420
9421
  /**
9422
   * Count the number of substring occurrences.
9423
   *
9424
   * @param string $haystack <p>
9425
   *                         The string being checked.
9426
   *                         </p>
9427
   * @param string $needle   <p>
9428
   *                         The string being found.
9429
   *                         </p>
9430
   * @param int    $offset   [optional] <p>
9431
   *                         The offset where to start counting
9432
   *                         </p>
9433
   * @param int    $length   [optional] <p>
9434
   *                         The maximum length after the specified offset to search for the
9435
   *                         substring. It outputs a warning if the offset plus the length is
9436
   *                         greater than the haystack length.
9437
   *                         </p>
9438
   *
9439
   * @return int|false The number of times the
9440
   *                   needle substring occurs in the
9441
   *                   haystack string.
9442
   */
9443 38
  public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null)
9444
  {
9445 38
    if ($haystack === '' || $needle === '') {
9446
      return 0;
9447
    }
9448
9449 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9450
      self::checkForSupport();
9451
    }
9452
9453
    if (
9454 38
        ($offset || $length !== null)
9455
        &&
9456 38
        self::$SUPPORT['mbstring_func_overload'] === true
9457
    ) {
9458
9459 38
      if ($length === null) {
9460
        $lengthTmp = self::strlen($haystack);
9461
        if ($lengthTmp === false) {
9462
          return false;
9463
        }
9464
        $length = (int)$lengthTmp;
9465
      }
9466
9467
      if (
9468
          (
9469 38
              $length !== 0
9470
              &&
9471 38
              $offset !== 0
9472
          )
9473
          &&
9474 38
          ($length + $offset) <= 0
9475
          &&
9476 38
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9477
      ) {
9478
        return false;
9479
      }
9480
9481 38
      $haystackTmp = self::substr_in_byte($haystack, $offset, $length);
9482 38
      if ($haystackTmp === false) {
0 ignored issues
show
introduced by
The condition $haystackTmp === false is always false.
Loading history...
9483
        $haystackTmp = '';
9484
      }
9485 38
      $haystack = (string)$haystackTmp;
9486
    }
9487
9488 38
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9489
      // "mb_" is available if overload is used, so use it ...
9490 38
      return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
9491
    }
9492
9493
    return \substr_count($haystack, $needle, $offset, $length);
9494
  }
9495
9496
  /**
9497
   * Returns the number of occurrences of $substring in the given string.
9498
   * By default, the comparison is case-sensitive, but can be made insensitive
9499
   * by setting $caseSensitive to false.
9500
   *
9501
   * @param string $str           <p>The input string.</p>
9502
   * @param string $substring     <p>The substring to search for.</p>
9503
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
9504
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9505
   *
9506
   * @return int
9507
   */
9508 15
  public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
9509
  {
9510 15
    if ('' === $str || '' === $substring) {
9511 2
      return 0;
9512
    }
9513
9514
    // only a fallback to prevent BC in the api ...
9515 13
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
9516 4
      $encoding = (string)$caseSensitive;
9517
    }
9518
9519 13
    if (!$caseSensitive) {
9520 6
      $str = self::strtocasefold($str, true, false, $encoding, null, false);
9521 6
      $substring = self::strtocasefold($substring, true, false, $encoding, null, false);
9522
    }
9523
9524 13
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
9525
  }
9526
9527
  /**
9528
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
9529
   *
9530
   * @param string $haystack <p>The string to search in.</p>
9531
   * @param string $needle   <p>The substring to search for.</p>
9532
   *
9533
   * @return string Return the sub-string.
9534
   */
9535 2
  public static function substr_ileft(string $haystack, string $needle): string
9536
  {
9537 2
    if ('' === $haystack) {
9538 2
      return '';
9539
    }
9540
9541 2
    if ('' === $needle) {
9542 2
      return $haystack;
9543
    }
9544
9545 2
    if (self::str_istarts_with($haystack, $needle) === true) {
9546 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9546
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9547 2
      if ($haystackTmp === false) {
9548
        $haystackTmp = '';
9549
      }
9550 2
      $haystack = (string)$haystackTmp;
9551
    }
9552
9553 2
    return $haystack;
9554
  }
9555
9556
  /**
9557
   * Get part of a string process in bytes.
9558
   *
9559
   * @param string $str    <p>The string being checked.</p>
9560
   * @param int    $offset <p>The first position used in str.</p>
9561
   * @param int    $length [optional] <p>The maximum length of the returned string.</p>
9562
   *
9563
   * @return string|false
9564
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9565
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9566
   *                      characters long, <b>FALSE</b> will be returned.
9567
   */
9568 54
  public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
9569
  {
9570 54
    if ($str === '') {
9571
      return '';
9572
    }
9573
9574
    // Empty string
9575 54
    if ($length === 0) {
9576
      return '';
9577
    }
9578
9579
    // Whole string
9580 54
    if (!$offset && $length === null) {
9581
      return $str;
9582
    }
9583
9584 54
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9585
      self::checkForSupport();
9586
    }
9587
9588 54
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9589
      // "mb_" is available if overload is used, so use it ...
9590 54
      return \mb_substr($str, $offset, $length ?? 2147483647, 'CP850'); // 8-BIT
9591
    }
9592
9593
    return \substr($str, $offset, $length ?? 2147483647);
9594
  }
9595
9596
  /**
9597
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
9598
   *
9599
   * @param string $haystack <p>The string to search in.</p>
9600
   * @param string $needle   <p>The substring to search for.</p>
9601
   *
9602
   * @return string Return the sub-string.
9603
   */
9604 2
  public static function substr_iright(string $haystack, string $needle): string
9605
  {
9606 2
    if ('' === $haystack) {
9607 2
      return '';
9608
    }
9609
9610 2
    if ('' === $needle) {
9611 2
      return $haystack;
9612
    }
9613
9614 2
    if (self::str_iends_with($haystack, $needle) === true) {
9615 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9616 2
      if ($haystackTmp === false) {
9617
        $haystackTmp = '';
9618
      }
9619 2
      $haystack = (string)$haystackTmp;
9620
    }
9621
9622 2
    return $haystack;
9623
  }
9624
9625
  /**
9626
   * Removes an prefix ($needle) from start of the string ($haystack).
9627
   *
9628
   * @param string $haystack <p>The string to search in.</p>
9629
   * @param string $needle   <p>The substring to search for.</p>
9630
   *
9631
   * @return string Return the sub-string.
9632
   */
9633 2
  public static function substr_left(string $haystack, string $needle): string
9634
  {
9635 2
    if ('' === $haystack) {
9636 2
      return '';
9637
    }
9638
9639 2
    if ('' === $needle) {
9640 2
      return $haystack;
9641
    }
9642
9643 2
    if (self::str_starts_with($haystack, $needle) === true) {
9644 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9644
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9645 2
      if ($haystackTmp === false) {
9646
        $haystackTmp = '';
9647
      }
9648 2
      $haystack = (string)$haystackTmp;
9649
    }
9650
9651 2
    return $haystack;
9652
  }
9653
9654
  /**
9655
   * Replace text within a portion of a string.
9656
   *
9657
   * source: https://gist.github.com/stemar/8287074
9658
   *
9659
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
9660
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
9661
   * @param int|int[]       $offset           <p>
9662
   *                                          If start is positive, the replacing will begin at the start'th offset
9663
   *                                          into string.
9664
   *                                          <br><br>
9665
   *                                          If start is negative, the replacing will begin at the start'th character
9666
   *                                          from the end of string.
9667
   *                                          </p>
9668
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
9669
   *                                          portion of string which is to be replaced. If it is negative, it
9670
   *                                          represents the number of characters from the end of string at which to
9671
   *                                          stop replacing. If it is not given, then it will default to strlen(
9672
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
9673
   *                                          length is zero then this function will have the effect of inserting
9674
   *                                          replacement into string at the given start offset.</p>
9675
   * @param string          $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
9676
   *
9677
   * @return string|string[] The result string is returned. If string is an array then array is returned.
9678
   */
9679 10
  public static function substr_replace($str, $replacement, $offset, $length = null, string $encoding = 'UTF-8')
9680
  {
9681 10
    if (\is_array($str) === true) {
9682 1
      $num = \count($str);
9683
9684
      // the replacement
9685 1
      if (\is_array($replacement) === true) {
9686 1
        $replacement = \array_slice($replacement, 0, $num);
9687
      } else {
9688 1
        $replacement = \array_pad([$replacement], $num, $replacement);
9689
      }
9690
9691
      // the offset
9692 1
      if (\is_array($offset) === true) {
9693 1
        $offset = \array_slice($offset, 0, $num);
9694 1
        foreach ($offset as &$valueTmp) {
9695 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
9696
        }
9697 1
        unset($valueTmp);
9698
      } else {
9699 1
        $offset = \array_pad([$offset], $num, $offset);
9700
      }
9701
9702
      // the length
9703 1
      if (null === $length) {
9704 1
        $length = \array_fill(0, $num, 0);
9705 1
      } elseif (\is_array($length) === true) {
9706 1
        $length = \array_slice($length, 0, $num);
9707 1
        foreach ($length as &$valueTmpV2) {
9708 1
          if (null !== $valueTmpV2) {
9709 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9710
          } else {
9711 1
            $valueTmpV2 = 0;
9712
          }
9713
        }
9714 1
        unset($valueTmpV2);
9715
      } else {
9716 1
        $length = \array_pad([$length], $num, $length);
9717
      }
9718
9719
      // recursive call
9720 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9721
    }
9722
9723 10
    if (\is_array($replacement) === true) {
9724 1
      if (\count($replacement) > 0) {
9725 1
        $replacement = $replacement[0];
9726
      } else {
9727 1
        $replacement = '';
9728
      }
9729
    }
9730
9731
    // init
9732 10
    $str = (string)$str;
9733 10
    $replacement = (string)$replacement;
9734
9735 10
    if ('' === $str) {
9736 1
      return $replacement;
9737
    }
9738
9739 9
    if (self::is_ascii($str)) {
9740 6
      return ($length === null) ?
9741
          \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9741
          \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
9742 6
          \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9742
          \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
9743
    }
9744
9745 8
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9746
      self::checkForSupport();
9747
    }
9748
9749 8
    if (self::$SUPPORT['mbstring'] === true) {
9750 8
      $string_length = self::strlen($str, $encoding);
9751
9752 8
      if ($offset < 0) {
9753 1
        $offset = \max(0, $string_length + $offset);
9754 8
      } elseif ($offset > $string_length) {
9755
        $offset = $string_length;
9756
      }
9757
9758 8
      if ($length < 0) {
9759 1
        $length = \max(0, $string_length - $offset + $length);
9760 8
      } elseif ($length === null || $length > $string_length) {
9761 3
        $length = $string_length;
9762
      }
9763
9764 8
      if (($offset + $length) > $string_length) {
9765 3
        $length = $string_length - $offset;
9766
      }
9767
9768 8
      return self::substr($str, 0, $offset, $encoding) . $replacement . self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, $offs...t - $length, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9768
      return self::substr($str, 0, $offset, $encoding) . $replacement . /** @scrutinizer ignore-type */ self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
Loading history...
Bug introduced by
Are you sure self::substr($str, 0, $offset, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9768
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $offset, $encoding) . $replacement . self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
Loading history...
9769
    }
9770
9771
    \preg_match_all('/./us', $str, $smatches);
9772
    \preg_match_all('/./us', $replacement, $rmatches);
9773
9774
    if ($length === null) {
9775
      $lengthTmp = self::strlen($str, $encoding);
9776
      if ($lengthTmp === false) {
9777
        // e.g.: non mbstring support + invalid chars
9778
        return '';
9779
      }
9780
      $length = (int)$lengthTmp;
9781
    }
9782
9783
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9783
    \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9783
    \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
9784
9785
    return \implode('', $smatches[0]);
9786
  }
9787
9788
  /**
9789
   * Removes an suffix ($needle) from end of the string ($haystack).
9790
   *
9791
   * @param string $haystack <p>The string to search in.</p>
9792
   * @param string $needle   <p>The substring to search for.</p>
9793
   *
9794
   * @return string Return the sub-string.
9795
   */
9796 2
  public static function substr_right(string $haystack, string $needle): string
9797
  {
9798 2
    if ('' === $haystack) {
9799 2
      return '';
9800
    }
9801
9802 2
    if ('' === $needle) {
9803 2
      return $haystack;
9804
    }
9805
9806 2
    if (self::str_ends_with($haystack, $needle) === true) {
9807 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9808 2
      if ($haystackTmp === false) {
9809
        $haystackTmp = '';
9810
      }
9811 2
      $haystack = (string)$haystackTmp;
9812
    }
9813
9814 2
    return $haystack;
9815
  }
9816
9817
  /**
9818
   * Returns a case swapped version of the string.
9819
   *
9820
   * @param string $str       <p>The input string.</p>
9821
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9822
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9823
   *
9824
   * @return string Each character's case swapped.
9825
   */
9826 6
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9827
  {
9828 6
    if ('' === $str) {
9829 1
      return '';
9830
    }
9831
9832 6
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9833 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9834
    }
9835
9836 6
    if ($cleanUtf8 === true) {
9837
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9838
      // if invalid characters are found in $haystack before $needle
9839 2
      $str = self::clean($str);
9840
    }
9841
9842 6
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9843
  }
9844
9845
  /**
9846
   * Checks whether mbstring is available on the server.
9847
   *
9848
   * @return bool
9849
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
9850
   */
9851
  public static function symfony_polyfill_used(): bool
9852
  {
9853
    // init
9854
    $return = false;
9855
9856
    $returnTmp = \extension_loaded('mbstring') ? true : false;
9857
    if ($returnTmp === false && \function_exists('mb_strlen')) {
9858
      $return = true;
9859
    }
9860
9861
    $returnTmp = \extension_loaded('iconv') ? true : false;
9862
    if ($returnTmp === false && \function_exists('iconv')) {
9863
      $return = true;
9864
    }
9865
9866
    return $return;
9867
  }
9868
9869
  /**
9870
   * @param string $str
9871
   * @param int    $tabLength
9872
   *
9873
   * @return string
9874
   */
9875 6
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9876
  {
9877 6
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9878
  }
9879
9880
  /**
9881
   * Converts the first character of each word in the string to uppercase
9882
   * and all other chars to lowercase.
9883
   *
9884
   * @param string $str      <p>The input string.</p>
9885
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9886
   *
9887
   * @return string String with all characters of $str being title-cased.
9888
   */
9889 5
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
9890
  {
9891 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9892 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9893
    }
9894
9895
    // always fallback via symfony polyfill
9896 5
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
9897
  }
9898
9899
  /**
9900
   * alias for "UTF8::to_ascii()"
9901
   *
9902
   * @see        UTF8::to_ascii()
9903
   *
9904
   * @param string $str
9905
   * @param string $subst_chr
9906
   * @param bool   $strict
9907
   *
9908
   * @return string
9909
   *
9910
   * @deprecated <p>use "UTF8::to_ascii()"</p>
9911
   */
9912 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
9913
  {
9914 7
    return self::to_ascii($str, $subst_chr, $strict);
9915
  }
9916
9917
  /**
9918
   * alias for "UTF8::to_iso8859()"
9919
   *
9920
   * @see        UTF8::to_iso8859()
9921
   *
9922
   * @param string|string[] $str
9923
   *
9924
   * @return string|string[]
9925
   *
9926
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
9927
   */
9928 2
  public static function toIso8859($str)
9929
  {
9930 2
    return self::to_iso8859($str);
9931
  }
9932
9933
  /**
9934
   * alias for "UTF8::to_latin1()"
9935
   *
9936
   * @see        UTF8::to_latin1()
9937
   *
9938
   * @param string|string[] $str
9939
   *
9940
   * @return string|string[]
9941
   *
9942
   * @deprecated <p>use "UTF8::to_latin1()"</p>
9943
   */
9944 2
  public static function toLatin1($str)
9945
  {
9946 2
    return self::to_latin1($str);
9947
  }
9948
9949
  /**
9950
   * alias for "UTF8::to_utf8()"
9951
   *
9952
   * @see        UTF8::to_utf8()
9953
   *
9954
   * @param string|string[] $str
9955
   *
9956
   * @return string|string[]
9957
   *
9958
   * @deprecated <p>use "UTF8::to_utf8()"</p>
9959
   */
9960 2
  public static function toUTF8($str)
9961
  {
9962 2
    return self::to_utf8($str);
9963
  }
9964
9965
  /**
9966
   * Convert a string into ASCII.
9967
   *
9968
   * @param string $str     <p>The input string.</p>
9969
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
9970
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
9971
   *                        performance</p>
9972
   *
9973
   * @return string
9974
   */
9975 38
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
9976
  {
9977 38
    static $UTF8_TO_ASCII;
9978
9979 38
    if ('' === $str) {
9980 3
      return '';
9981
    }
9982
9983
    // check if we only have ASCII, first (better performance)
9984 35
    if (self::is_ascii($str) === true) {
9985 7
      return $str;
9986
    }
9987
9988 30
    $str = self::clean(
9989 30
        $str,
9990 30
        true,
9991 30
        true,
9992 30
        true,
9993 30
        false,
9994 30
        true,
9995 30
        true
9996
    );
9997
9998
    // check again, if we only have ASCII, now ...
9999 30
    if (self::is_ascii($str) === true) {
10000 12
      return $str;
10001
    }
10002
10003 19
    if ($strict === true) {
10004
10005 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10006
        self::checkForSupport();
10007
      }
10008
10009 1
      if (self::$SUPPORT['intl'] === true) {
10010
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
10011
        /** @noinspection PhpComposerExtensionStubsInspection */
10012 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
10013
10014
        // check again, if we only have ASCII, now ...
10015 1
        if (self::is_ascii($str) === true) {
10016 1
          return $str;
10017
        }
10018
10019
      }
10020
    }
10021
10022 19
    if (self::$ORD === null) {
10023
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10024
    }
10025
10026 19
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
10027 19
    $chars = $ar[0];
10028 19
    $ord = null;
10029 19
    foreach ($chars as &$c) {
10030
10031 19
      $ordC0 = self::$ORD[$c[0]];
10032
10033 19
      if ($ordC0 >= 0 && $ordC0 <= 127) {
10034 15
        continue;
10035
      }
10036
10037 19
      $ordC1 = self::$ORD[$c[1]];
10038
10039
      // ASCII - next please
10040 19
      if ($ordC0 >= 192 && $ordC0 <= 223) {
10041 17
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
10042
      }
10043
10044 19
      if ($ordC0 >= 224) {
10045 8
        $ordC2 = self::$ORD[$c[2]];
10046
10047 8
        if ($ordC0 <= 239) {
10048 7
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
10049
        }
10050
10051 8
        if ($ordC0 >= 240) {
10052 2
          $ordC3 = self::$ORD[$c[3]];
10053
10054 2
          if ($ordC0 <= 247) {
10055 2
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
10056
          }
10057
10058 2
          if ($ordC0 >= 248) {
10059
            $ordC4 = self::$ORD[$c[4]];
10060
10061
            if ($ordC0 <= 251) {
10062
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
10063
            }
10064
10065
            if ($ordC0 >= 252) {
10066
              $ordC5 = self::$ORD[$c[5]];
10067
10068
              if ($ordC0 <= 253) {
10069
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
10070
              }
10071
            }
10072
          }
10073
        }
10074
      }
10075
10076 19
      if ($ordC0 === 254 || $ordC0 === 255) {
10077
        $c = $unknown;
10078
        continue;
10079
      }
10080
10081 19
      if ($ord === null) {
10082
        $c = $unknown;
10083
        continue;
10084
      }
10085
10086 19
      $bank = $ord >> 8;
10087 19
      if (!isset($UTF8_TO_ASCII[$bank])) {
10088 9
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
10089 9
        if ($UTF8_TO_ASCII[$bank] === false) {
10090 2
          $UTF8_TO_ASCII[$bank] = [];
10091
        }
10092
      }
10093
10094 19
      $newchar = $ord & 255;
10095
10096 19
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
10097
10098
        // keep for debugging
10099
        /*
10100
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10101
        echo "char: " . $c . "\n";
10102
        echo "ord: " . $ord . "\n";
10103
        echo "newchar: " . $newchar . "\n";
10104
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
10105
        echo "bank:" . $bank . "\n\n";
10106
        */
10107
10108 18
        $c = $UTF8_TO_ASCII[$bank][$newchar];
10109
      } else {
10110
10111
        // keep for debugging missing chars
10112
        /*
10113
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10114
        echo "char: " . $c . "\n";
10115
        echo "ord: " . $ord . "\n";
10116
        echo "newchar: " . $newchar . "\n";
10117
        echo "bank:" . $bank . "\n\n";
10118
        */
10119
10120 19
        $c = $unknown;
10121
      }
10122
    }
10123
10124 19
    return \implode('', $chars);
10125
  }
10126
10127
  /**
10128
   * @param mixed $str
10129
   *
10130
   * @return bool
10131
   */
10132 19
  public static function to_boolean($str): bool
10133
  {
10134
    // init
10135 19
    $str = (string)$str;
10136
10137 19
    if ('' === $str) {
10138 2
      return false;
10139
    }
10140
10141 17
    $key = \strtolower($str);
10142
10143
    // Info: http://php.net/manual/en/filter.filters.validate.php
10144
    $map = [
10145 17
        'true'  => true,
10146
        '1'     => true,
10147
        'on'    => true,
10148
        'yes'   => true,
10149
        'false' => false,
10150
        '0'     => false,
10151
        'off'   => false,
10152
        'no'    => false,
10153
    ];
10154
10155 17
    if (isset($map[$key])) {
10156 13
      return $map[$key];
10157
    }
10158
10159
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10160 4
    if (\is_numeric($str)) {
10161 2
      return (((float)$str + 0) > 0);
10162
    }
10163
10164 2
    return (bool)self::trim($str);
10165
  }
10166
10167
  /**
10168
   * Convert given string to safe filename (and keep string case).
10169
   *
10170
   * @param string $string
10171
   * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
10172
   *                                  simply replaced with hyphen.
10173
   * @param string $fallback_char
10174
   *
10175
   * @return string
10176
   */
10177 1
  public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
10178
  {
10179 1
    if ($use_transliterate === true) {
10180 1
      $string = self::str_transliterate($string, $fallback_char);
10181
    }
10182
10183 1
    $fallback_char_escaped = \preg_quote($fallback_char, '/');
10184
10185 1
    $string = (string)\preg_replace(
10186
        [
10187 1
            '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
10188 1
            '/[\s]+/',                                            // 2) convert spaces to $fallback_char
10189 1
            '/[' . $fallback_char_escaped . ']+/',                // 3) remove double $fallback_char's
10190
        ],
10191
        [
10192 1
            '',
10193 1
            $fallback_char,
10194 1
            $fallback_char,
10195
        ],
10196 1
        $string
10197
    );
10198
10199
    // trim "$fallback_char" from beginning and end of the string
10200 1
    return \trim($string, $fallback_char);
10201
  }
10202
10203
  /**
10204
   * Convert a string into "ISO-8859"-encoding (Latin-1).
10205
   *
10206
   * @param string|string[] $str
10207
   *
10208
   * @return string|string[]
10209
   */
10210 8
  public static function to_iso8859($str)
10211
  {
10212 8
    if (\is_array($str) === true) {
10213 2
      foreach ($str as $k => $v) {
10214 2
        $str[$k] = self::to_iso8859($v);
10215
      }
10216
10217 2
      return $str;
10218
    }
10219
10220 8
    $str = (string)$str;
10221 8
    if ('' === $str) {
10222 2
      return '';
10223
    }
10224
10225 8
    return self::utf8_decode($str);
10226
  }
10227
10228
  /**
10229
   * alias for "UTF8::to_iso8859()"
10230
   *
10231
   * @see UTF8::to_iso8859()
10232
   *
10233
   * @param string|string[] $str
10234
   *
10235
   * @return string|string[]
10236
   */
10237 2
  public static function to_latin1($str)
10238
  {
10239 2
    return self::to_iso8859($str);
10240
  }
10241
10242
  /**
10243
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
10244
   *
10245
   * <ul>
10246
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
10247
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
10248
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
10249
   * case.</li>
10250
   * </ul>
10251
   *
10252
   * @param string|string[] $str                    <p>Any string or array.</p>
10253
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
10254
   *
10255
   * @return string|string[] The UTF-8 encoded string.
10256
   */
10257 38
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
10258
  {
10259 38
    if (\is_array($str) === true) {
10260 4
      foreach ($str as $k => $v) {
10261 4
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
10262
      }
10263
10264 4
      return $str;
10265
    }
10266
10267 38
    $str = (string)$str;
10268 38
    if ('' === $str) {
10269 6
      return $str;
10270
    }
10271
10272 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10273
      self::checkForSupport();
10274
    }
10275
10276 38
    $max = self::strlen_in_byte($str);
10277 38
    $buf = '';
10278
10279
    /** @noinspection ForeachInvariantsInspection */
10280 38
    for ($i = 0; $i < $max; $i++) {
10281 38
      $c1 = $str[$i];
10282
10283 38
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
10284
10285 34
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
10286
10287 31
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10288
10289 31
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
10290 17
            $buf .= $c1 . $c2;
10291 17
            $i++;
10292
          } else { // not valid UTF8 - convert it
10293 31
            $buf .= self::to_utf8_convert_helper($c1);
10294
          }
10295
10296 34
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
10297
10298 32
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10299 32
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10300
10301 32
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
10302 14
            $buf .= $c1 . $c2 . $c3;
10303 14
            $i += 2;
10304
          } else { // not valid UTF8 - convert it
10305 32
            $buf .= self::to_utf8_convert_helper($c1);
10306
          }
10307
10308 26
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
10309
10310 26
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10311 26
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10312 26
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
10313
10314 26
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
10315 8
            $buf .= $c1 . $c2 . $c3 . $c4;
10316 8
            $i += 3;
10317
          } else { // not valid UTF8 - convert it
10318 26
            $buf .= self::to_utf8_convert_helper($c1);
10319
          }
10320
10321
        } else { // doesn't look like UTF8, but should be converted
10322 34
          $buf .= self::to_utf8_convert_helper($c1);
10323
        }
10324
10325 35
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
10326
10327 4
        $buf .= self::to_utf8_convert_helper($c1);
10328
10329
      } else { // it doesn't need conversion
10330 35
        $buf .= $c1;
10331
      }
10332
    }
10333
10334
    // decode unicode escape sequences
10335 38
    $buf = \preg_replace_callback(
10336 38
        '/\\\\u([0-9a-f]{4})/i',
10337
        function ($match) {
10338
          // always fallback via symfony polyfill
10339 8
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
10340 38
        },
10341 38
        $buf
10342
    );
10343
10344
    // decode UTF-8 codepoints
10345 38
    if ($decodeHtmlEntityToUtf8 === true) {
10346 2
      $buf = self::html_entity_decode($buf);
10347
    }
10348
10349 38
    return $buf;
10350
  }
10351
10352
  /**
10353
   * @param int|string $input
10354
   *
10355
   * @return string
10356
   */
10357 30
  private static function to_utf8_convert_helper($input): string
10358
  {
10359
    // init
10360 30
    $buf = '';
10361
10362 30
    if (self::$ORD === null) {
10363 1
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10364
    }
10365
10366 30
    if (self::$CHR === null) {
10367 1
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10368
    }
10369
10370 30
    if (self::$WIN1252_TO_UTF8 === null) {
10371 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10372
    }
10373
10374 30
    $ordC1 = self::$ORD[$input];
10375 30
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
10376 30
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
10377
    } else {
10378 2
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
10379 2
      $cc2 = ((string)$input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
10380 2
      $buf .= $cc1 . $cc2;
10381
    }
10382
10383 30
    return $buf;
10384
  }
10385
10386
  /**
10387
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
10388
   *
10389
   * INFO: This is slower then "trim()"
10390
   *
10391
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
10392
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
10393
   *
10394
   * @param string $str   <p>The string to be trimmed</p>
10395
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
10396
   *
10397
   * @return string The trimmed string.
10398
   */
10399 214
  public static function trim(string $str = '', $chars = INF): string
10400
  {
10401 214
    if ('' === $str) {
10402 11
      return '';
10403
    }
10404
10405
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
10406 206
    if ($chars === INF || !$chars) {
10407 179
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
10408
    } else {
10409 47
      $chars = \preg_quote($chars, '/');
10410 47
      $pattern = "^[$chars]+|[$chars]+\$";
10411
    }
10412
10413 206
    return self::regex_replace($str, $pattern, '', '', '/');
10414
  }
10415
10416
  /**
10417
   * Makes string's first char uppercase.
10418
   *
10419
   * @param string $str       <p>The input string.</p>
10420
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10421
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10422
   *
10423
   * @return string The resulting string.
10424
   */
10425 76
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10426
  {
10427 76
    if ($cleanUtf8 === true) {
10428
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10429
      // if invalid characters are found in $haystack before $needle
10430 1
      $str = self::clean($str);
10431
    }
10432
10433 76
    $strPartTwo = self::substr($str, 1, null, $encoding);
10434 76
    if ($strPartTwo === false) {
10435
      $strPartTwo = '';
10436
    }
10437
10438 76
    $strPartOne = self::strtoupper(
10439 76
        (string)self::substr($str, 0, 1, $encoding),
10440 76
        $encoding,
10441 76
        $cleanUtf8
10442
    );
10443
10444 76
    return $strPartOne . $strPartTwo;
10445
  }
10446
10447
  /**
10448
   * alias for "UTF8::ucfirst()"
10449
   *
10450
   * @see UTF8::ucfirst()
10451
   *
10452
   * @param string $str
10453
   * @param string $encoding
10454
   * @param bool   $cleanUtf8
10455
   *
10456
   * @return string
10457
   */
10458 1
  public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10459
  {
10460 1
    return self::ucfirst($str, $encoding, $cleanUtf8);
10461
  }
10462
10463
  /**
10464
   * Uppercase for all words in the string.
10465
   *
10466
   * @param string   $str        <p>The input string.</p>
10467
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
10468
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
10469
   * @param string   $encoding   [optional] <p>Set the charset.</p>
10470
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
10471
   *
10472
   * @return string
10473
   */
10474 9
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10475
  {
10476 9
    if (!$str) {
10477 2
      return '';
10478
    }
10479
10480
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
10481
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
10482
10483 8
    if ($cleanUtf8 === true) {
10484
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10485
      // if invalid characters are found in $haystack before $needle
10486 1
      $str = self::clean($str);
10487
    }
10488
10489 8
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
10490
10491
    if (
10492 8
        $usePhpDefaultFunctions === true
10493
        &&
10494 8
        self::is_ascii($str) === true
10495
    ) {
10496
      return \ucwords($str);
10497
    }
10498
10499 8
    $words = self::str_to_words($str, $charlist);
10500 8
    $newWords = [];
10501
10502 8
    if (\count($exceptions) > 0) {
10503 1
      $useExceptions = true;
10504
    } else {
10505 8
      $useExceptions = false;
10506
    }
10507
10508 8
    foreach ($words as $word) {
10509
10510 8
      if (!$word) {
10511 8
        continue;
10512
      }
10513
10514
      if (
10515 8
          $useExceptions === false
10516
          ||
10517
          (
10518 1
              $useExceptions === true
10519
              &&
10520 8
              !\in_array($word, $exceptions, true)
10521
          )
10522
      ) {
10523 8
        $word = self::ucfirst($word, $encoding);
10524
      }
10525
10526 8
      $newWords[] = $word;
10527
    }
10528
10529 8
    return \implode('', $newWords);
10530
  }
10531
10532
  /**
10533
   * Multi decode html entity & fix urlencoded-win1252-chars.
10534
   *
10535
   * e.g:
10536
   * 'test+test'                     => 'test test'
10537
   * 'D&#252;sseldorf'               => 'Düsseldorf'
10538
   * 'D%FCsseldorf'                  => 'Düsseldorf'
10539
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
10540
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
10541
   * 'Düsseldorf'                   => 'Düsseldorf'
10542
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
10543
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
10544
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
10545
   *
10546
   * @param string $str          <p>The input string.</p>
10547
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
10548
   *
10549
   * @return string
10550
   */
10551 2
  public static function urldecode(string $str, bool $multi_decode = true): string
10552
  {
10553 2
    if ('' === $str) {
10554 2
      return '';
10555
    }
10556
10557 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
10558 2
    if (\preg_match($pattern, $str)) {
10559 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
10560
    }
10561
10562 2
    $flags = ENT_QUOTES | ENT_HTML5;
10563
10564
    do {
10565 2
      $str_compare = $str;
10566
10567 2
      $str = self::fix_simple_utf8(
10568 2
          \urldecode(
10569 2
              self::html_entity_decode(
10570 2
                  self::to_utf8($str),
10571 2
                  $flags
10572
              )
10573
          )
10574
      );
10575
10576 2
    } while ($multi_decode === true && $str_compare !== $str);
10577
10578 2
    return $str;
10579
  }
10580
10581
  /**
10582
   * Return a array with "urlencoded"-win1252 -> UTF-8
10583
   *
10584
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
10585
   *
10586
   * @return string[]
10587
   */
10588 2
  public static function urldecode_fix_win1252_chars(): array
10589
  {
10590
    return [
10591 2
        '%20' => ' ',
10592
        '%21' => '!',
10593
        '%22' => '"',
10594
        '%23' => '#',
10595
        '%24' => '$',
10596
        '%25' => '%',
10597
        '%26' => '&',
10598
        '%27' => "'",
10599
        '%28' => '(',
10600
        '%29' => ')',
10601
        '%2A' => '*',
10602
        '%2B' => '+',
10603
        '%2C' => ',',
10604
        '%2D' => '-',
10605
        '%2E' => '.',
10606
        '%2F' => '/',
10607
        '%30' => '0',
10608
        '%31' => '1',
10609
        '%32' => '2',
10610
        '%33' => '3',
10611
        '%34' => '4',
10612
        '%35' => '5',
10613
        '%36' => '6',
10614
        '%37' => '7',
10615
        '%38' => '8',
10616
        '%39' => '9',
10617
        '%3A' => ':',
10618
        '%3B' => ';',
10619
        '%3C' => '<',
10620
        '%3D' => '=',
10621
        '%3E' => '>',
10622
        '%3F' => '?',
10623
        '%40' => '@',
10624
        '%41' => 'A',
10625
        '%42' => 'B',
10626
        '%43' => 'C',
10627
        '%44' => 'D',
10628
        '%45' => 'E',
10629
        '%46' => 'F',
10630
        '%47' => 'G',
10631
        '%48' => 'H',
10632
        '%49' => 'I',
10633
        '%4A' => 'J',
10634
        '%4B' => 'K',
10635
        '%4C' => 'L',
10636
        '%4D' => 'M',
10637
        '%4E' => 'N',
10638
        '%4F' => 'O',
10639
        '%50' => 'P',
10640
        '%51' => 'Q',
10641
        '%52' => 'R',
10642
        '%53' => 'S',
10643
        '%54' => 'T',
10644
        '%55' => 'U',
10645
        '%56' => 'V',
10646
        '%57' => 'W',
10647
        '%58' => 'X',
10648
        '%59' => 'Y',
10649
        '%5A' => 'Z',
10650
        '%5B' => '[',
10651
        '%5C' => '\\',
10652
        '%5D' => ']',
10653
        '%5E' => '^',
10654
        '%5F' => '_',
10655
        '%60' => '`',
10656
        '%61' => 'a',
10657
        '%62' => 'b',
10658
        '%63' => 'c',
10659
        '%64' => 'd',
10660
        '%65' => 'e',
10661
        '%66' => 'f',
10662
        '%67' => 'g',
10663
        '%68' => 'h',
10664
        '%69' => 'i',
10665
        '%6A' => 'j',
10666
        '%6B' => 'k',
10667
        '%6C' => 'l',
10668
        '%6D' => 'm',
10669
        '%6E' => 'n',
10670
        '%6F' => 'o',
10671
        '%70' => 'p',
10672
        '%71' => 'q',
10673
        '%72' => 'r',
10674
        '%73' => 's',
10675
        '%74' => 't',
10676
        '%75' => 'u',
10677
        '%76' => 'v',
10678
        '%77' => 'w',
10679
        '%78' => 'x',
10680
        '%79' => 'y',
10681
        '%7A' => 'z',
10682
        '%7B' => '{',
10683
        '%7C' => '|',
10684
        '%7D' => '}',
10685
        '%7E' => '~',
10686
        '%7F' => '',
10687
        '%80' => '`',
10688
        '%81' => '',
10689
        '%82' => '‚',
10690
        '%83' => 'ƒ',
10691
        '%84' => '„',
10692
        '%85' => '…',
10693
        '%86' => '†',
10694
        '%87' => '‡',
10695
        '%88' => 'ˆ',
10696
        '%89' => '‰',
10697
        '%8A' => 'Š',
10698
        '%8B' => '‹',
10699
        '%8C' => 'Œ',
10700
        '%8D' => '',
10701
        '%8E' => 'Ž',
10702
        '%8F' => '',
10703
        '%90' => '',
10704
        '%91' => '‘',
10705
        '%92' => '’',
10706
        '%93' => '“',
10707
        '%94' => '”',
10708
        '%95' => '•',
10709
        '%96' => '–',
10710
        '%97' => '—',
10711
        '%98' => '˜',
10712
        '%99' => '™',
10713
        '%9A' => 'š',
10714
        '%9B' => '›',
10715
        '%9C' => 'œ',
10716
        '%9D' => '',
10717
        '%9E' => 'ž',
10718
        '%9F' => 'Ÿ',
10719
        '%A0' => '',
10720
        '%A1' => '¡',
10721
        '%A2' => '¢',
10722
        '%A3' => '£',
10723
        '%A4' => '¤',
10724
        '%A5' => '¥',
10725
        '%A6' => '¦',
10726
        '%A7' => '§',
10727
        '%A8' => '¨',
10728
        '%A9' => '©',
10729
        '%AA' => 'ª',
10730
        '%AB' => '«',
10731
        '%AC' => '¬',
10732
        '%AD' => '',
10733
        '%AE' => '®',
10734
        '%AF' => '¯',
10735
        '%B0' => '°',
10736
        '%B1' => '±',
10737
        '%B2' => '²',
10738
        '%B3' => '³',
10739
        '%B4' => '´',
10740
        '%B5' => 'µ',
10741
        '%B6' => '¶',
10742
        '%B7' => '·',
10743
        '%B8' => '¸',
10744
        '%B9' => '¹',
10745
        '%BA' => 'º',
10746
        '%BB' => '»',
10747
        '%BC' => '¼',
10748
        '%BD' => '½',
10749
        '%BE' => '¾',
10750
        '%BF' => '¿',
10751
        '%C0' => 'À',
10752
        '%C1' => 'Á',
10753
        '%C2' => 'Â',
10754
        '%C3' => 'Ã',
10755
        '%C4' => 'Ä',
10756
        '%C5' => 'Å',
10757
        '%C6' => 'Æ',
10758
        '%C7' => 'Ç',
10759
        '%C8' => 'È',
10760
        '%C9' => 'É',
10761
        '%CA' => 'Ê',
10762
        '%CB' => 'Ë',
10763
        '%CC' => 'Ì',
10764
        '%CD' => 'Í',
10765
        '%CE' => 'Î',
10766
        '%CF' => 'Ï',
10767
        '%D0' => 'Ð',
10768
        '%D1' => 'Ñ',
10769
        '%D2' => 'Ò',
10770
        '%D3' => 'Ó',
10771
        '%D4' => 'Ô',
10772
        '%D5' => 'Õ',
10773
        '%D6' => 'Ö',
10774
        '%D7' => '×',
10775
        '%D8' => 'Ø',
10776
        '%D9' => 'Ù',
10777
        '%DA' => 'Ú',
10778
        '%DB' => 'Û',
10779
        '%DC' => 'Ü',
10780
        '%DD' => 'Ý',
10781
        '%DE' => 'Þ',
10782
        '%DF' => 'ß',
10783
        '%E0' => 'à',
10784
        '%E1' => 'á',
10785
        '%E2' => 'â',
10786
        '%E3' => 'ã',
10787
        '%E4' => 'ä',
10788
        '%E5' => 'å',
10789
        '%E6' => 'æ',
10790
        '%E7' => 'ç',
10791
        '%E8' => 'è',
10792
        '%E9' => 'é',
10793
        '%EA' => 'ê',
10794
        '%EB' => 'ë',
10795
        '%EC' => 'ì',
10796
        '%ED' => 'í',
10797
        '%EE' => 'î',
10798
        '%EF' => 'ï',
10799
        '%F0' => 'ð',
10800
        '%F1' => 'ñ',
10801
        '%F2' => 'ò',
10802
        '%F3' => 'ó',
10803
        '%F4' => 'ô',
10804
        '%F5' => 'õ',
10805
        '%F6' => 'ö',
10806
        '%F7' => '÷',
10807
        '%F8' => 'ø',
10808
        '%F9' => 'ù',
10809
        '%FA' => 'ú',
10810
        '%FB' => 'û',
10811
        '%FC' => 'ü',
10812
        '%FD' => 'ý',
10813
        '%FE' => 'þ',
10814
        '%FF' => 'ÿ',
10815
    ];
10816
  }
10817
10818
  /**
10819
   * Decodes an UTF-8 string to ISO-8859-1.
10820
   *
10821
   * @param string $str <p>The input string.</p>
10822
   * @param bool   $keepUtf8Chars
10823
   *
10824
   * @return string
10825
   */
10826 14
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10827
  {
10828 14
    if ('' === $str) {
10829 5
      return '';
10830
    }
10831
10832 14
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10833 14
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10834
10835 14
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10836
10837 1
      if (self::$WIN1252_TO_UTF8 === null) {
10838
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10839
      }
10840
10841 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10841
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10842 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10842
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10843
    }
10844
10845
    /** @noinspection PhpInternalEntityUsedInspection */
10846 14
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10847
10848 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10849
      self::checkForSupport();
10850
    }
10851
10852
    // save for later comparision
10853 14
    $str_backup = $str;
10854 14
    $len = self::strlen_in_byte($str);
10855
10856 14
    if (self::$ORD === null) {
10857
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10858
    }
10859
10860 14
    if (self::$CHR === null) {
10861
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10862
    }
10863
10864 14
    $noCharFound = '?';
10865
    /** @noinspection ForeachInvariantsInspection */
10866 14
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10867 14
      switch ($str[$i] & "\xF0") {
10868 14
        case "\xC0":
10869 12
        case "\xD0":
10870 14
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10871 14
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10872 14
          break;
10873
10874
        /** @noinspection PhpMissingBreakStatementInspection */
10875 12
        case "\xF0":
10876
          ++$i;
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment if this fall-through is intended.
Loading history...
10877 12
        case "\xE0":
10878 10
          $str[$j] = $noCharFound;
10879 10
          $i += 2;
10880 10
          break;
10881
10882
        default:
10883 12
          $str[$j] = $str[$i];
10884
      }
10885
    }
10886
10887 14
    $return = self::substr_in_byte($str, 0, $j);
10888 14
    if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
10889
      $return = '';
10890
    }
10891
10892
    if (
10893 14
        $keepUtf8Chars === true
10894
        &&
10895 14
        self::strlen($return) >= self::strlen($str_backup)
10896
    ) {
10897 2
      return $str_backup;
10898
    }
10899
10900 14
    return $return;
10901
  }
10902
10903
  /**
10904
   * Encodes an ISO-8859-1 string to UTF-8.
10905
   *
10906
   * @param string $str <p>The input string.</p>
10907
   *
10908
   * @return string
10909
   */
10910 14
  public static function utf8_encode(string $str): string
10911
  {
10912 14
    if ('' === $str) {
10913 13
      return '';
10914
    }
10915
10916 14
    $str = \utf8_encode($str);
10917
10918
    // the polyfill maybe return false
10919
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10920 14
    if ($str === false) {
10921
      return '';
10922
    }
10923
10924 14
    if (false === \strpos($str, "\xC2")) {
10925 6
      return $str;
10926
    }
10927
10928 12
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
10929 12
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
10930
10931 12
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
10932
10933 1
      if (self::$WIN1252_TO_UTF8 === null) {
10934
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10935
      }
10936
10937 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10937
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10938 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10938
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10939
    }
10940
10941 12
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
10942
  }
10943
10944
  /**
10945
   * fix -> utf8-win1252 chars
10946
   *
10947
   * @param string $str <p>The input string.</p>
10948
   *
10949
   * @return string
10950
   *
10951
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
10952
   */
10953 2
  public static function utf8_fix_win1252_chars(string $str): string
10954
  {
10955 2
    return self::fix_simple_utf8($str);
10956
  }
10957
10958
  /**
10959
   * Returns an array with all utf8 whitespace characters.
10960
   *
10961
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
10962
   *
10963
   * @author: Derek E. [email protected]
10964
   *
10965
   * @return string[]
10966
   *                 An array with all known whitespace characters as values and the type of whitespace as keys
10967
   *                 as defined in above URL.
10968
   */
10969 2
  public static function whitespace_table(): array
10970
  {
10971 2
    return self::$WHITESPACE_TABLE;
10972
  }
10973
10974
  /**
10975
   * Limit the number of words in a string.
10976
   *
10977
   * @param string $str      <p>The input string.</p>
10978
   * @param int    $limit    <p>The limit of words as integer.</p>
10979
   * @param string $strAddOn <p>Replacement for the striped string.</p>
10980
   *
10981
   * @return string
10982
   */
10983 2
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
10984
  {
10985 2
    if ('' === $str) {
10986 2
      return '';
10987
    }
10988
10989 2
    if ($limit < 1) {
10990 2
      return '';
10991
    }
10992
10993 2
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
10994
10995
    if (
10996 2
        !isset($matches[0])
10997
        ||
10998 2
        self::strlen($str) === self::strlen($matches[0])
10999
    ) {
11000 2
      return $str;
11001
    }
11002
11003 2
    return self::rtrim($matches[0]) . $strAddOn;
11004
  }
11005
11006
  /**
11007
   * Wraps a string to a given number of characters
11008
   *
11009
   * @link  http://php.net/manual/en/function.wordwrap.php
11010
   *
11011
   * @param string $str   <p>The input string.</p>
11012
   * @param int    $width [optional] <p>The column width.</p>
11013
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
11014
   * @param bool   $cut   [optional] <p>
11015
   *                      If the cut is set to true, the string is
11016
   *                      always wrapped at or before the specified width. So if you have
11017
   *                      a word that is larger than the given width, it is broken apart.
11018
   *                      </p>
11019
   *
11020
   * @return string The given string wrapped at the specified column.
11021
   */
11022 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
11023
  {
11024 10
    if ('' === $str || '' === $break) {
11025 3
      return '';
11026
    }
11027
11028 8
    $w = '';
11029 8
    $strSplit = \explode($break, $str);
11030 8
    if ($strSplit === false) {
11031
      $count = 0;
11032
    } else {
11033 8
      $count = \count($strSplit);
11034
    }
11035
11036 8
    $chars = [];
11037
    /** @noinspection ForeachInvariantsInspection */
11038 8
    for ($i = 0; $i < $count; ++$i) {
11039
11040 8
      if ($i) {
11041 1
        $chars[] = $break;
11042 1
        $w .= '#';
11043
      }
11044
11045 8
      $c = $strSplit[$i];
11046 8
      unset($strSplit[$i]);
11047
11048 8
      if ($c !== null) {
11049 8
        foreach (self::split($c) as $c) {
11050 8
          $chars[] = $c;
11051 8
          $w .= ' ' === $c ? ' ' : '?';
11052
        }
11053
      }
11054
    }
11055
11056 8
    $strReturn = '';
11057 8
    $j = 0;
11058 8
    $b = $i = -1;
11059 8
    $w = \wordwrap($w, $width, '#', $cut);
11060
11061 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
11062 6
      for (++$i; $i < $b; ++$i) {
11063 6
        $strReturn .= $chars[$j];
11064 6
        unset($chars[$j++]);
11065
      }
11066
11067 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
11068 3
        unset($chars[$j++]);
11069
      }
11070
11071 6
      $strReturn .= $break;
11072
    }
11073
11074 8
    return $strReturn . \implode('', $chars);
11075
  }
11076
11077
  /**
11078
   * Line-Wrap the string after $limit, but also after the next word.
11079
   *
11080
   * @param string $str
11081
   * @param int    $limit
11082
   *
11083
   * @return string
11084
   */
11085 1
  public static function wordwrap_per_line(string $str, int $limit): string
11086
  {
11087 1
    $strings = (array)\preg_split('/\\r\\n|\\r|\\n/', $str);
11088
11089 1
    $string = '';
11090 1
    foreach ($strings as $value) {
11091 1
      if ($value === false) {
11092
        continue;
11093
      }
11094
11095 1
      $string .= wordwrap($value, $limit);
11096 1
      $string .= "\n";
11097
    }
11098
11099 1
    return $string;
11100
  }
11101
11102
  /**
11103
   * Returns an array of Unicode White Space characters.
11104
   *
11105
   * @return string[] An array with numeric code point as key and White Space Character as value.
11106
   */
11107 2
  public static function ws(): array
11108
  {
11109 2
    return self::$WHITESPACE;
11110
  }
11111
11112
11113
}
11114