Passed
Push — master ( d83c95...a41c45 )
by Lars
02:57
created

UTF8::is_utf8()   D

Complexity

Conditions 30
Paths 104

Size

Total Lines 146
Code Lines 80

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 66
CRAP Score 30.5202

Importance

Changes 0
Metric Value
cc 30
eloc 80
nc 104
nop 2
dl 0
loc 146
ccs 66
cts 72
cp 0.9167
crap 30.5202
rs 4.1333
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'upper' => [
157
          'µ',
158
          'ſ',
159
          "\xCD\x85",
160
          'ς',
161
          'ẞ',
162
          "\xCF\x90",
163
          "\xCF\x91",
164
          "\xCF\x95",
165
          "\xCF\x96",
166
          "\xCF\xB0",
167
          "\xCF\xB1",
168
          "\xCF\xB5",
169
          "\xE1\xBA\x9B",
170
          "\xE1\xBE\xBE",
171
      ],
172
      'lower' => [
173
          'μ',
174
          's',
175
          'ι',
176
          'σ',
177
          'ß',
178
          'β',
179
          'θ',
180
          'φ',
181
          'π',
182
          'κ',
183
          'ρ',
184
          'ε',
185
          "\xE1\xB9\xA1",
186
          'ι',
187
      ],
188
  ];
189
190
191
  /**
192
   * @var array
193
   */
194
  private static $SUPPORT = [];
195
196
  /**
197
   * @var null|array
198
   */
199
  private static $UTF8_MSWORD;
200
201
  /**
202
   * @var null|array
203
   */
204
  private static $BROKEN_UTF8_FIX;
205
206
  /**
207
   * @var null|array
208
   */
209
  private static $WIN1252_TO_UTF8;
210
211
  /**
212
   * @var null|array
213
   */
214
  private static $ENCODINGS;
215
216
  /**
217
   * @var null|array
218
   */
219
  private static $ORD;
220
221
  /**
222
   * @var null|array
223
   */
224
  private static $CHR;
225
226
  /**
227
   * __construct()
228
   */
229 32
  public function __construct()
230
  {
231 32
    self::checkForSupport();
232 32
  }
233
234
  /**
235
   * Return the character at the specified position: $str[1] like functionality.
236
   *
237
   * @param string $str <p>A UTF-8 string.</p>
238
   * @param int    $pos <p>The position of character to return.</p>
239
   *
240
   * @return string Single Multi-Byte character.
241
   */
242 3
  public static function access(string $str, int $pos): string
243
  {
244 3
    if ('' === $str) {
245 1
      return '';
246
    }
247
248 3
    if ($pos < 0) {
249 2
      return '';
250
    }
251
252 3
    return (string)self::substr($str, $pos, 1);
253
  }
254
255
  /**
256
   * Prepends UTF-8 BOM character to the string and returns the whole string.
257
   *
258
   * INFO: If BOM already existed there, the Input string is returned.
259
   *
260
   * @param string $str <p>The input string.</p>
261
   *
262
   * @return string The output string that contains BOM.
263
   */
264 2
  public static function add_bom_to_string(string $str): string
265
  {
266 2
    if (self::string_has_bom($str) === false) {
267 2
      $str = self::bom() . $str;
268
    }
269
270 2
    return $str;
271
  }
272
273
  /**
274
   * Adds the specified amount of left and right padding to the given string.
275
   * The default character used is a space.
276
   *
277
   * @param string $str
278
   * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
279
   * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
280
   * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
281
   * @param string $encoding [optional] <p>Default: UTF-8</p>
282
   *
283
   * @return string String with padding applied.
284
   */
285 25
  private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
286
  {
287 25
    $strlen = self::strlen($str, $encoding);
288
289 25
    if ($left && $right) {
290 8
      $length = ($left + $right) + $strlen;
291 8
      $type = STR_PAD_BOTH;
292 17
    } elseif ($left) {
293 7
      $length = $left + $strlen;
294 7
      $type = STR_PAD_LEFT;
295 10
    } elseif ($right) {
296 10
      $length = $right + $strlen;
297 10
      $type = STR_PAD_RIGHT;
298
    } else {
299
      $length = ($left + $right) + $strlen;
300
      $type = STR_PAD_BOTH;
301
    }
302
303 25
    return self::str_pad($str, $length, $padStr, $type, $encoding);
304
  }
305
306
  /**
307
   * Changes all keys in an array.
308
   *
309
   * @param array $array <p>The array to work on</p>
310
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
311
   *                     or <strong>CASE_LOWER</strong> (default)</p>
312
   *
313
   * @return string[] An array with its keys lower or uppercased.
314
   */
315 2
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
316
  {
317
    if (
318 2
        $case !== CASE_LOWER
319
        &&
320 2
        $case !== CASE_UPPER
321
    ) {
322
      $case = CASE_LOWER;
323
    }
324
325 2
    $return = [];
326 2
    foreach ($array as $key => $value) {
327 2
      if ($case === CASE_LOWER) {
328 2
        $key = self::strtolower($key);
329
      } else {
330 2
        $key = self::strtoupper($key);
331
      }
332
333 2
      $return[$key] = $value;
334
    }
335
336 2
    return $return;
337
  }
338
339
  /**
340
   * Returns the substring between $start and $end, if found, or an empty
341
   * string. An optional offset may be supplied from which to begin the
342
   * search for the start string.
343
   *
344
   * @param string $str
345
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
346
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
347
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
348
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
349
   *
350
   * @return string
351
   */
352 16
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
353
  {
354 16
    $posStart = self::strpos($str, $start, $offset, $encoding);
355 16
    if ($posStart === false) {
356 2
      return '';
357
    }
358
359 14
    $substrIndex = $posStart + self::strlen($start, $encoding);
360 14
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
361
    if (
362 14
        $posEnd === false
363
        ||
364 14
        $posEnd === $substrIndex
365
    ) {
366 4
      return '';
367
    }
368
369 10
    $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
370
371 10
    if ($return === false) {
372
      return '';
373
    }
374
375 10
    return $return;
376
  }
377
378
  /**
379
   * Convert binary into an string.
380
   *
381
   * @param mixed $bin 1|0
382
   *
383
   * @return string
384
   */
385 2
  public static function binary_to_str($bin): string
386
  {
387 2
    if (!isset($bin[0])) {
388
      return '';
389
    }
390
391 2
    $convert = \base_convert($bin, 2, 16);
392 2
    if ($convert === '0') {
393 1
      return '';
394
    }
395
396 2
    return \pack('H*', $convert);
397
  }
398
399
  /**
400
   * Returns the UTF-8 Byte Order Mark Character.
401
   *
402
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
403
   *
404
   * @return string UTF-8 Byte Order Mark
405
   */
406 4
  public static function bom(): string
407
  {
408 4
    return "\xef\xbb\xbf";
409
  }
410
411
  /**
412
   * @alias of UTF8::chr_map()
413
   *
414
   * @see   UTF8::chr_map()
415
   *
416
   * @param string|array $callback
417
   * @param string       $str
418
   *
419
   * @return string[]
420
   */
421 2
  public static function callback($callback, string $str): array
422
  {
423 2
    return self::chr_map($callback, $str);
424
  }
425
426
  /**
427
   * Returns the character at $index, with indexes starting at 0.
428
   *
429
   * @param string $str
430
   * @param int    $index    <p>Position of the character.</p>
431
   * @param string $encoding [optional] <p>Default is UTF-8</p>
432
   *
433
   * @return string The character at $index.
434
   */
435 9
  public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
436
  {
437 9
    return (string)self::substr($str, $index, 1, $encoding);
438
  }
439
440
  /**
441
   * Returns an array consisting of the characters in the string.
442
   *
443
   * @param string $str <p>The input string.</p>
444
   *
445
   * @return string[] An array of chars.
446
   */
447 3
  public static function chars(string $str): array
448
  {
449 3
    return self::str_split($str, 1);
450
  }
451
452
  /**
453
   * This method will auto-detect your server environment for UTF-8 support.
454
   *
455
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
456
   */
457 37
  public static function checkForSupport()
458
  {
459 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
460
461
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
462
463
      // http://php.net/manual/en/book.mbstring.php
464
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
465
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
466
467
      // http://php.net/manual/en/book.iconv.php
468
      self::$SUPPORT['iconv'] = self::iconv_loaded();
469
470
      // http://php.net/manual/en/book.intl.php
471
      self::$SUPPORT['intl'] = self::intl_loaded();
472
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
473
474
      self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
475
476
      if (
477
          self::$SUPPORT['intl'] === true
478
          &&
479
          \function_exists('transliterator_list_ids') === true
480
      ) {
481
        /** @noinspection PhpComposerExtensionStubsInspection */
482
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
483
      }
484
485
      // http://php.net/manual/en/class.intlchar.php
486
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
487
488
      // http://php.net/manual/en/book.ctype.php
489
      self::$SUPPORT['ctype'] = self::ctype_loaded();
490
491
      // http://php.net/manual/en/class.finfo.php
492
      self::$SUPPORT['finfo'] = self::finfo_loaded();
493
494
      // http://php.net/manual/en/book.json.php
495
      self::$SUPPORT['json'] = self::json_loaded();
496
497
      // http://php.net/manual/en/book.pcre.php
498
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
499
    }
500 37
  }
501
502
  /**
503
   * Generates a UTF-8 encoded character from the given code point.
504
   *
505
   * INFO: opposite to UTF8::ord()
506
   *
507
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
508
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
509
   *
510
   * @return string|null Multi-Byte character, returns null on failure or empty input.
511
   */
512 17
  public static function chr($code_point, string $encoding = 'UTF-8')
513
  {
514
    // init
515 17
    static $CHAR_CACHE = [];
516
517 17
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
518
      self::checkForSupport();
519
    }
520
521 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
522 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
523
    }
524
525
    if (
526 17
        $encoding !== 'UTF-8'
527
        &&
528 17
        $encoding !== 'ISO-8859-1'
529
        &&
530 17
        $encoding !== 'WINDOWS-1252'
531
        &&
532 17
        self::$SUPPORT['mbstring'] === false
533
    ) {
534
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
535
    }
536
537 17
    $cacheKey = $code_point . $encoding;
538 17
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
539 16
      return $CHAR_CACHE[$cacheKey];
540
    }
541
542 11
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
543
544 10
      if (self::$CHR === null) {
545
        $chrTmp = self::getData('chr');
546
        if ($chrTmp) {
547
          self::$CHR = (array)$chrTmp;
548
        }
549
      }
550
551 10
      $chr = self::$CHR[$code_point];
552
553 10
      if ($encoding !== 'UTF-8') {
554 1
        $chr = self::encode($encoding, $chr);
555
      }
556
557 10
      return $CHAR_CACHE[$cacheKey] = $chr;
558
    }
559
560 7
    if (self::$SUPPORT['intlChar'] === true) {
561
      /** @noinspection PhpComposerExtensionStubsInspection */
562 7
      $chr = \IntlChar::chr($code_point);
563
564 7
      if ($encoding !== 'UTF-8') {
565
        $chr = self::encode($encoding, $chr);
566
      }
567
568 7
      return $CHAR_CACHE[$cacheKey] = $chr;
569
    }
570
571
    if (self::$CHR === null) {
572
      $chrTmp = self::getData('chr');
573
      if ($chrTmp) {
574
        self::$CHR = (array)$chrTmp;
575
      }
576
    }
577
578
    $code_point = (int)$code_point;
579
    if ($code_point <= 0x7F) {
580
      $chr = self::$CHR[$code_point];
581
    } elseif ($code_point <= 0x7FF) {
582
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
583
             self::$CHR[($code_point & 0x3F) + 0x80];
584
    } elseif ($code_point <= 0xFFFF) {
585
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
586
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
587
             self::$CHR[($code_point & 0x3F) + 0x80];
588
    } else {
589
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
590
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
591
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
592
             self::$CHR[($code_point & 0x3F) + 0x80];
593
    }
594
595
    if ($encoding !== 'UTF-8') {
596
      $chr = self::encode($encoding, $chr);
597
    }
598
599
    return $CHAR_CACHE[$cacheKey] = $chr;
600
  }
601
602
  /**
603
   * Applies callback to all characters of a string.
604
   *
605
   * @param string|array $callback <p>The callback function.</p>
606
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
607
   *
608
   * @return string[] The outcome of callback.
609
   */
610 2
  public static function chr_map($callback, string $str): array
611
  {
612 2
    $chars = self::split($str);
613
614 2
    return \array_map($callback, $chars);
615
  }
616
617
  /**
618
   * Generates an array of byte length of each character of a Unicode string.
619
   *
620
   * 1 byte => U+0000  - U+007F
621
   * 2 byte => U+0080  - U+07FF
622
   * 3 byte => U+0800  - U+FFFF
623
   * 4 byte => U+10000 - U+10FFFF
624
   *
625
   * @param string $str <p>The original unicode string.</p>
626
   *
627
   * @return int[] An array of byte lengths of each character.
628
   */
629 4
  public static function chr_size_list(string $str): array
630
  {
631 4
    if ('' === $str) {
632 4
      return [];
633
    }
634
635 4
    $strSplit = self::split($str);
636
637 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
638
      self::checkForSupport();
639
    }
640
641 4
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
642
      return \array_map(
643
          function ($data) {
644
            return UTF8::strlen_in_byte($data);
645
          },
646
          $strSplit
647
      );
648
    }
649
650 4
    return \array_map('\strlen', $strSplit);
651
  }
652
653
  /**
654
   * Get a decimal code representation of a specific character.
655
   *
656
   * @param string $char <p>The input character.</p>
657
   *
658
   * @return int
659
   */
660 4
  public static function chr_to_decimal(string $char): int
661
  {
662 4
    $code = self::ord($char[0]);
663 4
    $bytes = 1;
664
665 4
    if (!($code & 0x80)) {
666
      // 0xxxxxxx
667 4
      return $code;
668
    }
669
670 4
    if (($code & 0xe0) === 0xc0) {
671
      // 110xxxxx
672 4
      $bytes = 2;
673 4
      $code &= ~0xc0;
674 4
    } elseif (($code & 0xf0) === 0xe0) {
675
      // 1110xxxx
676 4
      $bytes = 3;
677 4
      $code &= ~0xe0;
678 2
    } elseif (($code & 0xf8) === 0xf0) {
679
      // 11110xxx
680 2
      $bytes = 4;
681 2
      $code &= ~0xf0;
682
    }
683
684 4
    for ($i = 2; $i <= $bytes; $i++) {
685
      // 10xxxxxx
686 4
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
687
    }
688
689 4
    return $code;
690
  }
691
692
  /**
693
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
694
   *
695
   * @param string|int $char <p>The input character</p>
696
   * @param string     $pfix [optional]
697
   *
698
   * @return string The code point encoded as U+xxxx
699
   */
700 2
  public static function chr_to_hex($char, string $pfix = 'U+'): string
701
  {
702 2
    if ('' === $char) {
703 2
      return '';
704
    }
705
706 2
    if ($char === '&#0;') {
707 2
      $char = '';
708
    }
709
710 2
    return self::int_to_hex(self::ord($char), $pfix);
711
  }
712
713
  /**
714
   * alias for "UTF8::chr_to_decimal()"
715
   *
716
   * @see UTF8::chr_to_decimal()
717
   *
718
   * @param string $chr
719
   *
720
   * @return int
721
   */
722 2
  public static function chr_to_int(string $chr): int
723
  {
724 2
    return self::chr_to_decimal($chr);
725
  }
726
727
  /**
728
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
729
   *
730
   * @param string $body     <p>The original string to be split.</p>
731
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
732
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
733
   *
734
   * @return string The chunked string.
735
   */
736 4
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
737
  {
738 4
    return \implode($end, self::split($body, $chunklen));
739
  }
740
741
  /**
742
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
743
   *
744
   * @param string $str                           <p>The string to be sanitized.</p>
745
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
746
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
747
   *                                              whitespace.</p>
748
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
749
   *                                              e.g.: "…"
750
   *                                              => "..."</p>
751
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
752
   *                                              combination with
753
   *                                              $normalize_whitespace</p>
754
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
755
   *                                              mark e.g.: "�"</p>
756
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
757
   *                                              characters e.g.: "\0"</p>
758
   *
759
   * @return string Clean UTF-8 encoded string.
760
   */
761 111
  public static function clean(
762
      string $str,
763
      bool $remove_bom = false,
764
      bool $normalize_whitespace = false,
765
      bool $normalize_msword = false,
766
      bool $keep_non_breaking_space = false,
767
      bool $replace_diamond_question_mark = false,
768
      bool $remove_invisible_characters = true
769
  ): string
770
  {
771
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
772
    // caused connection reset problem on larger strings
773
774 111
    $regx = '/
775
      (
776
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
777
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
778
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
779
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
780
        ){1,100}                      # ...one or more times
781
      )
782
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
783
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
784
    /x';
785 111
    $str = (string)\preg_replace($regx, '$1', $str);
786
787 111
    if ($replace_diamond_question_mark === true) {
788 61
      $str = self::replace_diamond_question_mark($str, '');
789
    }
790
791 111
    if ($remove_invisible_characters === true) {
792 111
      $str = self::remove_invisible_characters($str);
793
    }
794
795 111
    if ($normalize_whitespace === true) {
796 65
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
797
    }
798
799 111
    if ($normalize_msword === true) {
800 33
      $str = self::normalize_msword($str);
801
    }
802
803 111
    if ($remove_bom === true) {
804 63
      $str = self::remove_bom($str);
805
    }
806
807 111
    return $str;
808
  }
809
810
  /**
811
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
812
   *
813
   * @param string $str <p>The input string.</p>
814
   *
815
   * @return string
816
   */
817 33
  public static function cleanup($str): string
818
  {
819
    // init
820 33
    $str = (string)$str;
821
822 33
    if ('' === $str) {
823 5
      return '';
824
    }
825
826
    // fixed ISO <-> UTF-8 Errors
827 33
    $str = self::fix_simple_utf8($str);
828
829
    // remove all none UTF-8 symbols
830
    // && remove diamond question mark (�)
831
    // && remove remove invisible characters (e.g. "\0")
832
    // && remove BOM
833
    // && normalize whitespace chars (but keep non-breaking-spaces)
834 33
    $str = self::clean(
835 33
        $str,
836 33
        true,
837 33
        true,
838 33
        false,
839 33
        true,
840 33
        true,
841 33
        true
842
    );
843
844 33
    return $str;
845
  }
846
847
  /**
848
   * Accepts a string or a array of strings and returns an array of Unicode code points.
849
   *
850
   * INFO: opposite to UTF8::string()
851
   *
852
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
853
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
854
   *                                    default, code points will be returned as integers.</p>
855
   *
856
   * @return array<int|string>
857
   *                           The array of code points:<br>
858
   *                           array<int> for $u_style === false<br>
859
   *                           array<string> for $u_style === true<br>
860
   */
861 12
  public static function codepoints($arg, bool $u_style = false): array
862
  {
863 12
    if (\is_string($arg) === true) {
864 12
      $arg = self::split($arg);
865
    }
866
867 12
    $arg = \array_map(
868
        [
869 12
            self::class,
870
            'ord',
871
        ],
872 12
        $arg
873
    );
874
875 12
    if (\count($arg) === 0) {
876 7
      return [];
877
    }
878
879 11
    if ($u_style) {
880 2
      $arg = \array_map(
881
          [
882 2
              self::class,
883
              'int_to_hex',
884
          ],
885 2
          $arg
886
      );
887
    }
888
889 11
    return $arg;
890
  }
891
892
  /**
893
   * Trims the string and replaces consecutive whitespace characters with a
894
   * single space. This includes tabs and newline characters, as well as
895
   * multibyte whitespace such as the thin space and ideographic space.
896
   *
897
   * @param string $str <p>The input string.</p>
898
   *
899
   * @return string String with a trimmed $str and condensed whitespace.
900
   */
901 13
  public static function collapse_whitespace(string $str): string
902
  {
903 13
    return self::trim(
904 13
        self::regex_replace($str, '[[:space:]]+', ' ')
905
    );
906
  }
907
908
  /**
909
   * Returns count of characters used in a string.
910
   *
911
   * @param string $str       <p>The input string.</p>
912
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
913
   *
914
   * @return int[] An associative array of Character as keys and
915
   *               their count as values.
916
   */
917 18
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
918
  {
919 18
    return \array_count_values(self::split($str, 1, $cleanUtf8));
920
  }
921
922
  /**
923
   * Remove css media-queries.
924
   *
925
   * @param string $str
926
   *
927
   * @return string
928
   */
929 1
  public static function css_stripe_media_queries(string $str): string
930
  {
931 1
    return (string)\preg_replace(
932 1
        '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
933 1
        '',
934 1
        $str
935
    );
936
  }
937
938
  /**
939
   * Checks whether ctype is available on the server.
940
   *
941
   * @return bool
942
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
943
   */
944
  public static function ctype_loaded(): bool
945
  {
946
    return \extension_loaded('ctype');
947
  }
948
949
  /**
950
   * Converts a int-value into an UTF-8 character.
951
   *
952
   * @param mixed $int
953
   *
954
   * @return string
955
   */
956 10
  public static function decimal_to_chr($int): string
957
  {
958 10
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
959
  }
960
961
  /**
962
   * Decodes a MIME header field
963
   *
964
   * @param string $str
965
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
966
   *
967
   * @return string|false
968
   *                      A decoded MIME field on success,
969
   *                      or false if an error occurs during the decoding.
970
   */
971
  public static function decode_mimeheader($str, $encoding = 'UTF-8')
972
  {
973
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
974
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
975
    }
976
977
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
978
      self::checkForSupport();
979
    }
980
981
    if (self::$SUPPORT['iconv'] === true) {
982
      return \iconv_mime_decode($str, ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
983
    }
984
985
    if ($encoding != 'UTF-8') {
986
      $str = self::encode($encoding, $str);
987
    }
988
989
    return \mb_decode_mimeheader($str);
990
  }
991
992
  /**
993
   * Encode a string with a new charset-encoding.
994
   *
995
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
996
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
997
   *
998
   * @param string $toEncoding                  <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
999
   * @param string $str                         <p>The input string</p>
1000
   * @param bool   $autodetectFromEncoding      [optional] <p>Force the new encoding (we try to fix broken / double
1001
   *                                            encoding for UTF-8)<br> otherwise we auto-detect the current
1002
   *                                            string-encoding</p>
1003
   * @param string $fromEncoding                [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1004
   *                                            A empty string will trigger the autodetect anyway.</p>
1005
   *
1006
   * @return string
1007
   */
1008 30
  public static function encode(string $toEncoding, string $str, bool $autodetectFromEncoding = true, string $fromEncoding = ''): string
1009
  {
1010 30
    if ('' === $str || '' === $toEncoding) {
1011 12
      return $str;
1012
    }
1013
1014 30
    if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1015 8
      $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1016
    }
1017
1018 30
    if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1019 2
      $fromEncoding = self::normalize_encoding($fromEncoding, null);
1020
    }
1021
1022 30
    if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) {
1023
      return $str;
1024
    }
1025
1026 30
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1027
      self::checkForSupport();
1028
    }
1029
1030 30
    if ($fromEncoding === 'BASE64') {
1031 2
      $str = base64_decode($str);
1032 2
      $fromEncoding = null;
1033
    }
1034
1035 30
    if ($toEncoding === 'BASE64') {
1036 2
      return base64_encode($str);
1037
    }
1038
1039 30
    if ($toEncoding === 'HTML-ENTITIES') {
1040
1041 2
      if ($fromEncoding === 'HTML-ENTITIES') {
1042
        $fromEncoding = 'UTF-8';
1043
      }
1044
1045 2
      if ($fromEncoding !== 'UTF-8') {
1046 2
        $str = self::encode('UTF-8', $str, false, $fromEncoding);
1047
      }
1048
1049 2
      return self::html_encode($str, true, 'UTF-8');
1050
    }
1051
1052 30
    if ($fromEncoding === 'HTML-ENTITIES') {
1053 2
      $str = self::html_entity_decode($str, ENT_COMPAT, 'UTF-8');
1054 2
      $fromEncoding = 'UTF-8';
1055
    }
1056
1057 30
    $fromEncodingDetected = false;
1058
    if (
1059 30
        $autodetectFromEncoding === true
1060
        ||
1061 30
        !$fromEncoding
1062
    ) {
1063 30
      $fromEncodingDetected = self::str_detect_encoding($str);
1064
    }
1065
1066
    // DEBUG
1067
    //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1068
1069 30
    if ($fromEncodingDetected !== false) {
1070 25
      $fromEncoding = $fromEncodingDetected;
1071 9
    } elseif ($fromEncodingDetected === false && $autodetectFromEncoding === true) {
1072
      // fallback for the "autodetect"-mode
1073 7
      return self::to_utf8($str);
1074
    }
1075
1076
    if (
1077 25
        !$fromEncoding
1078
        ||
1079 25
        $fromEncoding === $toEncoding
1080
    ) {
1081 15
      return $str;
1082
    }
1083
1084
    if (
1085 19
        $toEncoding === 'UTF-8'
1086
        &&
1087
        (
1088 17
            $fromEncoding === 'WINDOWS-1252'
1089
            ||
1090 19
            $fromEncoding === 'ISO-8859-1'
1091
        )
1092
    ) {
1093 14
      return self::to_utf8($str);
1094
    }
1095
1096
    if (
1097 11
        $toEncoding === 'ISO-8859-1'
1098
        &&
1099
        (
1100 6
            $fromEncoding === 'WINDOWS-1252'
1101
            ||
1102 11
            $fromEncoding === 'UTF-8'
1103
        )
1104
    ) {
1105 6
      return self::to_iso8859($str);
1106
    }
1107
1108
    if (
1109 9
        $toEncoding !== 'UTF-8'
1110
        &&
1111 9
        $toEncoding !== 'ISO-8859-1'
1112
        &&
1113 9
        $toEncoding !== 'WINDOWS-1252'
1114
        &&
1115 9
        self::$SUPPORT['mbstring'] === false
1116
    ) {
1117
      \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', E_USER_WARNING);
1118
    }
1119
1120 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1121
      self::checkForSupport();
1122
    }
1123
1124 9
    if (self::$SUPPORT['mbstring'] === true) {
1125
      // info: do not use the symfony polyfill here
1126 9
      $strEncoded = \mb_convert_encoding(
1127 9
          $str,
1128 9
          $toEncoding,
1129 9
          ($autodetectFromEncoding === true ? $toEncoding : $fromEncoding)
1130
      );
1131
1132 9
      if ($strEncoded) {
1133 9
        return $strEncoded;
1134
      }
1135
    }
1136
1137
    $return = \iconv($fromEncoding, $toEncoding . '//IGNORE', $str);
1138
    if ($return !== false) {
1139
      return $return;
1140
    }
1141
1142
    return $str;
1143
  }
1144
1145
  /**
1146
   * @param string $str
1147
   * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1148
   * @param string $toCharset        [optional] <p>Set the output charset.</p>
1149
   * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1150
   * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1151
   * @param int    $indent           [optional] <p>Set the max length indent.</p>
1152
   *
1153
   * @return string|false
1154
   *                      An encoded MIME field on success,
1155
   *                      or false if an error occurs during the encoding.
1156
   */
1157
  public static function encode_mimeheader(
1158
      $str,
1159
      $fromCharset = 'UTF-8',
1160
      $toCharset = 'UTF-8',
1161
      $transferEncoding = 'Q',
1162
      $linefeed = "\r\n",
1163
      $indent = 76
1164
  )
1165
  {
1166
    if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1167
      $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1168
    }
1169
1170
    if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1171
      $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1172
    }
1173
1174
    $output = \iconv_mime_encode(
1175
        '',
1176
        $str,
1177
        [
1178
            'scheme'           => $transferEncoding,
1179
            'line-length'      => $indent,
1180
            'input-charset'    => $fromCharset,
1181
            'output-charset'   => $toCharset,
1182
            'line-break-chars' => $linefeed,
1183
        ]
1184
    );
1185
1186
    return $output;
1187
  }
1188
1189
  /**
1190
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1191
   *
1192
   * @param string   $str                    <p>The input string.</p>
1193
   * @param string   $search                 <p>The searched string.</p>
1194
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1195
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1196
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1197
   *
1198
   * @return string
1199
   */
1200 1
  public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1201
  {
1202 1
    if ('' === $str) {
1203 1
      return '';
1204
    }
1205
1206 1
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1207
1208 1
    if ($length === null) {
1209 1
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
1210
    }
1211
1212 1
    if (empty($search)) {
1213
1214 1
      $stringLength = self::strlen($str, $encoding);
1215
1216 1
      if ($length > 0) {
1217 1
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1218
      } else {
1219 1
        $end = 0;
1220
      }
1221
1222 1
      $pos = (int)\min(
1223 1
          self::strpos($str, ' ', $end, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $end can also be of type false; however, parameter $offset of voku\helper\UTF8::strpos() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1223
          self::strpos($str, ' ', /** @scrutinizer ignore-type */ $end, $encoding),
Loading history...
1224 1
          self::strpos($str, '.', $end, $encoding)
1225
      );
1226
1227 1
      if ($pos) {
1228 1
        $strSub = self::substr($str, 0, $pos, $encoding);
1229 1
        if ($strSub === false) {
1230
          return '';
1231
        }
1232
1233 1
        return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1234
      }
1235
1236
      return $str;
1237
    }
1238
1239 1
    $wordPos = self::stripos($str, $search, 0, $encoding);
1240 1
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1241
1242 1
    $pos_start = 0;
1243 1
    if ($halfSide > 0) {
1244 1
      $halfText = self::substr($str, 0, $halfSide, $encoding);
1245 1
      if ($halfText !== false) {
1246 1
        $pos_start = (int)\max(
1247 1
            self::strrpos($halfText, ' ', 0, $encoding),
1248 1
            self::strrpos($halfText, '.', 0, $encoding)
1249
        );
1250
      }
1251
    }
1252
1253 1
    if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type integer|false is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1254 1
      $l = $pos_start + $length - 1;
1255 1
      $realLength = self::strlen($str, $encoding);
1256
1257 1
      if ($l > $realLength) {
1258
        $l = $realLength;
1259
      }
1260
1261 1
      $pos_end = (int)\min(
1262 1
              self::strpos($str, ' ', $l, $encoding),
1263 1
              self::strpos($str, '.', $l, $encoding)
1264 1
          ) - $pos_start;
1265
1266 1
      if (!$pos_end || $pos_end <= 0) {
1267 1
        $strSub = self::substr($str, $pos_start, self::strlen($str), $encoding);
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1267
        $strSub = self::substr($str, $pos_start, /** @scrutinizer ignore-type */ self::strlen($str), $encoding);
Loading history...
1268 1
        if ($strSub !== false) {
1269 1
          $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1270
        } else {
1271 1
          $extract = '';
1272
        }
1273
      } else {
1274 1
        $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1275 1
        if ($strSub !== false) {
1276 1
          $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1277
        } else {
1278 1
          $extract = '';
1279
        }
1280
      }
1281
1282
    } else {
1283
1284 1
      $l = $length - 1;
1285 1
      $trueLength = self::strlen($str, $encoding);
1286
1287 1
      if ($l > $trueLength) {
1288
        $l = $trueLength;
1289
      }
1290
1291 1
      $pos_end = \min(
1292 1
          self::strpos($str, ' ', $l, $encoding),
1293 1
          self::strpos($str, '.', $l, $encoding)
1294
      );
1295
1296 1
      if ($pos_end) {
1297 1
        $strSub = self::substr($str, 0, $pos_end, $encoding);
1298 1
        if ($strSub !== false) {
1299 1
          $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1300
        } else {
1301 1
          $extract = '';
1302
        }
1303
      } else {
1304 1
        $extract = $str;
1305
      }
1306
    }
1307
1308 1
    return $extract;
1309
  }
1310
1311
  /**
1312
   * Reads entire file into a string.
1313
   *
1314
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1315
   *
1316
   * @link http://php.net/manual/en/function.file-get-contents.php
1317
   *
1318
   * @param string        $filename             <p>
1319
   *                                            Name of the file to read.
1320
   *                                            </p>
1321
   * @param bool          $use_include_path     [optional] <p>
1322
   *                                            Prior to PHP 5, this parameter is called
1323
   *                                            use_include_path and is a bool.
1324
   *                                            As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1325
   *                                            to trigger include path
1326
   *                                            search.
1327
   *                                            </p>
1328
   * @param resource|null $context              [optional] <p>
1329
   *                                            A valid context resource created with
1330
   *                                            stream_context_create. If you don't need to use a
1331
   *                                            custom context, you can skip this parameter by &null;.
1332
   *                                            </p>
1333
   * @param int|null      $offset               [optional] <p>
1334
   *                                            The offset where the reading starts.
1335
   *                                            </p>
1336
   * @param int|null      $maxLength            [optional] <p>
1337
   *                                            Maximum length of data read. The default is to read until end
1338
   *                                            of file is reached.
1339
   *                                            </p>
1340
   * @param int           $timeout              <p>The time in seconds for the timeout.</p>
1341
   *
1342
   * @param bool          $convertToUtf8        <strong>WARNING!!!</strong> <p>Maybe you can't use this option for some
1343
   *                                            files, because they used non default utf-8 chars. Binary files like
1344
   *                                            images or pdf will not be converted.</p>
1345
   * @param string        $fromEncoding         [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1346
   *                                            A empty string will trigger the autodetect anyway.</p>
1347
   *
1348
   * @return string|false The function returns the read data or false on failure.
1349
   */
1350 11
  public static function file_get_contents(
1351
      string $filename,
1352
      bool $use_include_path = false,
1353
      $context = null,
1354
      int $offset = null,
1355
      int $maxLength = null,
1356
      int $timeout = 10,
1357
      bool $convertToUtf8 = true,
1358
      string $fromEncoding = ''
1359
  )
1360
  {
1361
    // init
1362 11
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1363
1364 11
    if ($timeout && $context === null) {
1365 9
      $context = \stream_context_create(
1366
          [
1367
              'http' =>
1368
                  [
1369 9
                      'timeout' => $timeout,
1370
                  ],
1371
          ]
1372
      );
1373
    }
1374
1375 11
    if ($offset === null) {
1376 11
      $offset = 0;
1377
    }
1378
1379 11
    if (\is_int($maxLength) === true) {
1380 2
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1381
    } else {
1382 11
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1383
    }
1384
1385
    // return false on error
1386 11
    if ($data === false) {
1387
      return false;
1388
    }
1389
1390 11
    if ($convertToUtf8 === true) {
1391
      // only for non binary, but also for UTF-16 or UTF-32
1392
      if (
1393 11
          self::is_binary($data, true) !== true
1394
          ||
1395 8
          self::is_utf16($data) !== false
1396
          ||
1397 11
          self::is_utf32($data) !== false
1398
      ) {
1399 9
        $data = self::encode('UTF-8', $data, false, $fromEncoding);
1400 9
        $data = self::cleanup($data);
1401
      }
1402
    }
1403
1404 11
    return $data;
1405
  }
1406
1407
  /**
1408
   * Checks if a file starts with BOM (Byte Order Mark) character.
1409
   *
1410
   * @param string $file_path <p>Path to a valid file.</p>
1411
   *
1412
   * @throws \RuntimeException if file_get_contents() returned false
1413
   *
1414
   * @return bool
1415
   *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.
1416
   */
1417 2
  public static function file_has_bom(string $file_path): bool
1418
  {
1419 2
    $file_content = \file_get_contents($file_path);
1420 2
    if ($file_content === false) {
1421
      throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1422
    }
1423
1424 2
    return self::string_has_bom($file_content);
1425
  }
1426
1427
  /**
1428
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1429
   *
1430
   * @param mixed  $var
1431
   * @param int    $normalization_form
1432
   * @param string $leading_combining
1433
   *
1434
   * @return mixed
1435
   */
1436 43
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1437
  {
1438 43
    switch (\gettype($var)) {
1439 43
      case 'array':
1440 6
        foreach ($var as $k => $v) {
1441
          /** @noinspection AlterInForeachInspection */
1442 6
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1443
        }
1444 6
        break;
1445 43
      case 'object':
1446 4
        foreach ($var as $k => $v) {
1447 4
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1448
        }
1449 4
        break;
1450 43
      case 'string':
1451
1452 43
        if (false !== \strpos($var, "\r")) {
1453
          // Workaround https://bugs.php.net/65732
1454 3
          $var = self::normalize_line_ending($var);
1455
        }
1456
1457 43
        if (self::is_ascii($var) === false) {
1458
          /** @noinspection PhpUndefinedClassInspection */
1459 26
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1460 20
            $n = '-';
1461
          } else {
1462
            /** @noinspection PhpUndefinedClassInspection */
1463 13
            $n = \Normalizer::normalize($var, $normalization_form);
1464
1465 13
            if (isset($n[0])) {
1466 7
              $var = $n;
1467
            } else {
1468 9
              $var = self::encode('UTF-8', $var, true);
1469
            }
1470
          }
1471
1472
          if (
1473 26
              $var[0] >= "\x80"
1474
              &&
1475 26
              isset($n[0], $leading_combining[0])
1476
              &&
1477 26
              \preg_match('/^\p{Mn}/u', $var)
1478
          ) {
1479
            // Prevent leading combining chars
1480
            // for NFC-safe concatenations.
1481 3
            $var = $leading_combining . $var;
1482
          }
1483
        }
1484
1485 43
        break;
1486
    }
1487
1488 43
    return $var;
1489
  }
1490
1491
  /**
1492
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1493
   *
1494
   * Gets a specific external variable by name and optionally filters it
1495
   *
1496
   * @link  http://php.net/manual/en/function.filter-input.php
1497
   *
1498
   * @param int    $type          <p>
1499
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1500
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1501
   *                              <b>INPUT_ENV</b>.
1502
   *                              </p>
1503
   * @param string $variable_name <p>
1504
   *                              Name of a variable to get.
1505
   *                              </p>
1506
   * @param int    $filter        [optional] <p>
1507
   *                              The ID of the filter to apply. The
1508
   *                              manual page lists the available filters.
1509
   *                              </p>
1510
   * @param mixed  $options       [optional] <p>
1511
   *                              Associative array of options or bitwise disjunction of flags. If filter
1512
   *                              accepts options, flags can be provided in "flags" field of array.
1513
   *                              </p>
1514
   *
1515
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1516
   *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1517
   *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1518
   */
1519
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
1520
  {
1521
    if (4 > \func_num_args()) {
1522
      $var = \filter_input($type, $variable_name, $filter);
1523
    } else {
1524
      $var = \filter_input($type, $variable_name, $filter, $options);
1525
    }
1526
1527
    return self::filter($var);
1528
  }
1529
1530
  /**
1531
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1532
   *
1533
   * Gets external variables and optionally filters them
1534
   *
1535
   * @link  http://php.net/manual/en/function.filter-input-array.php
1536
   *
1537
   * @param int   $type       <p>
1538
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1539
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1540
   *                          <b>INPUT_ENV</b>.
1541
   *                          </p>
1542
   * @param mixed $definition [optional] <p>
1543
   *                          An array defining the arguments. A valid key is a string
1544
   *                          containing a variable name and a valid value is either a filter type, or an array
1545
   *                          optionally specifying the filter, flags and options. If the value is an
1546
   *                          array, valid keys are filter which specifies the
1547
   *                          filter type,
1548
   *                          flags which specifies any flags that apply to the
1549
   *                          filter, and options which specifies any options that
1550
   *                          apply to the filter. See the example below for a better understanding.
1551
   *                          </p>
1552
   *                          <p>
1553
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1554
   *                          input array are filtered by this filter.
1555
   *                          </p>
1556
   * @param bool  $add_empty  [optional] <p>
1557
   *                          Add missing keys as <b>NULL</b> to the return value.
1558
   *                          </p>
1559
   *
1560
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1561
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set. Or
1562
   *               if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable is not
1563
   *               set and <b>NULL</b> if the filter fails.
1564
   */
1565
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1566
  {
1567
    if (2 > \func_num_args()) {
1568
      $a = \filter_input_array($type);
1569
    } else {
1570
      $a = \filter_input_array($type, $definition, $add_empty);
1571
    }
1572
1573
    return self::filter($a);
1574
  }
1575
1576
  /**
1577
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1578
   *
1579
   * Filters a variable with a specified filter
1580
   *
1581
   * @link  http://php.net/manual/en/function.filter-var.php
1582
   *
1583
   * @param mixed $variable <p>
1584
   *                        Value to filter.
1585
   *                        </p>
1586
   * @param int   $filter   [optional] <p>
1587
   *                        The ID of the filter to apply. The
1588
   *                        manual page lists the available filters.
1589
   *                        </p>
1590
   * @param mixed $options  [optional] <p>
1591
   *                        Associative array of options or bitwise disjunction of flags. If filter
1592
   *                        accepts options, flags can be provided in "flags" field of array. For
1593
   *                        the "callback" filter, callable type should be passed. The
1594
   *                        callback must accept one argument, the value to be filtered, and return
1595
   *                        the value after filtering/sanitizing it.
1596
   *                        </p>
1597
   *                        <p>
1598
   *                        <code>
1599
   *                        // for filters that accept options, use this format
1600
   *                        $options = array(
1601
   *                        'options' => array(
1602
   *                        'default' => 3, // value to return if the filter fails
1603
   *                        // other options here
1604
   *                        'min_range' => 0
1605
   *                        ),
1606
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1607
   *                        );
1608
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1609
   *                        // for filter that only accept flags, you can pass them directly
1610
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1611
   *                        // for filter that only accept flags, you can also pass as an array
1612
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1613
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1614
   *                        // callback validate filter
1615
   *                        function foo($value)
1616
   *                        {
1617
   *                        // Expected format: Surname, GivenNames
1618
   *                        if (strpos($value, ", ") === false) return false;
1619
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1620
   *                        $empty = (empty($surname) || empty($givennames));
1621
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1622
   *                        if ($empty || $notstrings) {
1623
   *                        return false;
1624
   *                        } else {
1625
   *                        return $value;
1626
   *                        }
1627
   *                        }
1628
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1629
   *                        </code>
1630
   *                        </p>
1631
   *
1632
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1633
   */
1634 2
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
1635
  {
1636 2
    if (3 > \func_num_args()) {
1637 2
      $variable = \filter_var($variable, $filter);
1638
    } else {
1639 2
      $variable = \filter_var($variable, $filter, $options);
1640
    }
1641
1642 2
    return self::filter($variable);
1643
  }
1644
1645
  /**
1646
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1647
   *
1648
   * Gets multiple variables and optionally filters them
1649
   *
1650
   * @link  http://php.net/manual/en/function.filter-var-array.php
1651
   *
1652
   * @param array $data       <p>
1653
   *                          An array with string keys containing the data to filter.
1654
   *                          </p>
1655
   * @param mixed $definition [optional] <p>
1656
   *                          An array defining the arguments. A valid key is a string
1657
   *                          containing a variable name and a valid value is either a
1658
   *                          filter type, or an
1659
   *                          array optionally specifying the filter, flags and options.
1660
   *                          If the value is an array, valid keys are filter
1661
   *                          which specifies the filter type,
1662
   *                          flags which specifies any flags that apply to the
1663
   *                          filter, and options which specifies any options that
1664
   *                          apply to the filter. See the example below for a better understanding.
1665
   *                          </p>
1666
   *                          <p>
1667
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1668
   *                          input array are filtered by this filter.
1669
   *                          </p>
1670
   * @param bool  $add_empty  [optional] <p>
1671
   *                          Add missing keys as <b>NULL</b> to the return value.
1672
   *                          </p>
1673
   *
1674
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1675
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set.
1676
   */
1677 2
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1678
  {
1679 2
    if (2 > \func_num_args()) {
1680 2
      $a = \filter_var_array($data);
1681
    } else {
1682 2
      $a = \filter_var_array($data, $definition, $add_empty);
1683
    }
1684
1685 2
    return self::filter($a);
1686
  }
1687
1688
  /**
1689
   * Checks whether finfo is available on the server.
1690
   *
1691
   * @return bool
1692
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
1693
   */
1694
  public static function finfo_loaded(): bool
1695
  {
1696
    return \class_exists('finfo');
1697
  }
1698
1699
  /**
1700
   * Returns the first $n characters of the string.
1701
   *
1702
   * @param string $str      <p>The input string.</p>
1703
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1704
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1705
   *
1706
   * @return string
1707
   */
1708 13
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1709
  {
1710 13
    if ($n <= 0) {
1711 4
      return '';
1712
    }
1713
1714 9
    $strSub = self::substr($str, 0, $n, $encoding);
1715 9
    if ($strSub === false) {
1716
      return '';
1717
    }
1718
1719 9
    return $strSub;
1720
  }
1721
1722
  /**
1723
   * Check if the number of unicode characters are not more than the specified integer.
1724
   *
1725
   * @param string $str      The original string to be checked.
1726
   * @param int    $box_size The size in number of chars to be checked against string.
1727
   *
1728
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1729
   */
1730 2
  public static function fits_inside(string $str, int $box_size): bool
1731
  {
1732 2
    return (self::strlen($str) <= $box_size);
1733
  }
1734
1735
  /**
1736
   * @param string $str
1737
   * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
1738
   * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
1739
   *
1740
   * @return string
1741
   */
1742 54
  private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
1743
  {
1744 54
    $upper = self::$COMMON_CASE_FOLD['upper'];
1745 54
    $lower = self::$COMMON_CASE_FOLD['lower'];
1746
1747 54
    if ($useLower === true) {
1748 2
      $str = (string)\str_replace(
1749 2
          $upper,
1750 2
          $lower,
1751 2
          $str
1752
      );
1753
    } else {
1754 52
      $str = (string)\str_replace(
1755 52
          $lower,
1756 52
          $upper,
1757 52
          $str
1758
      );
1759
    }
1760
1761 54
    if ($fullCaseFold) {
1762
1763 52
      static $FULL_CASE_FOLD = null;
1764 52
      if ($FULL_CASE_FOLD === null) {
1765 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
1766
      }
1767
1768 52
      if ($useLower === true) {
1769 2
        $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
1770
      } else {
1771 50
        $str = (string)\str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
1772
      }
1773
    }
1774
1775 54
    return $str;
1776
  }
1777
1778
  /**
1779
   * Try to fix simple broken UTF-8 strings.
1780
   *
1781
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1782
   *
1783
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1784
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1785
   * See: http://en.wikipedia.org/wiki/Windows-1252
1786
   *
1787
   * @param string $str <p>The input string</p>
1788
   *
1789
   * @return string
1790
   */
1791 42
  public static function fix_simple_utf8(string $str): string
1792
  {
1793 42
    if ('' === $str) {
1794 4
      return '';
1795
    }
1796
1797 42
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1798 42
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1799
1800 42
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1801
1802 1
      if (self::$BROKEN_UTF8_FIX === null) {
1803 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_fix') can also be of type false. However, the property $BROKEN_UTF8_FIX is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1804
      }
1805
1806 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1806
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1807 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1807
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1808
    }
1809
1810 42
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1811
  }
1812
1813
  /**
1814
   * Fix a double (or multiple) encoded UTF8 string.
1815
   *
1816
   * @param string[]|string $str You can use a string or an array of strings.
1817
   *
1818
   * @return string[]|string
1819
   *                          Will return the fixed input-"array" or
1820
   *                          the fixed input-"string".
1821
   */
1822 2
  public static function fix_utf8($str)
1823
  {
1824 2
    if (\is_array($str) === true) {
1825 2
      foreach ($str as $k => $v) {
1826 2
        $str[$k] = self::fix_utf8($v);
1827
      }
1828
1829 2
      return $str;
1830
    }
1831
1832 2
    $str = (string)$str;
1833 2
    $last = '';
1834 2
    while ($last !== $str) {
1835 2
      $last = $str;
1836 2
      $str = self::to_utf8(
1837 2
          self::utf8_decode($str, true)
1838
      );
1839
    }
1840
1841 2
    return $str;
1842
  }
1843
1844
  /**
1845
   * Get character of a specific character.
1846
   *
1847
   * @param string $char
1848
   *
1849
   * @return string 'RTL' or 'LTR'
1850
   */
1851 2
  public static function getCharDirection(string $char): string
1852
  {
1853 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1854
      self::checkForSupport();
1855
    }
1856
1857 2
    if (self::$SUPPORT['intlChar'] === true) {
1858
      /** @noinspection PhpComposerExtensionStubsInspection */
1859 2
      $tmpReturn = \IntlChar::charDirection($char);
1860
1861
      // from "IntlChar"-Class
1862
      $charDirection = [
1863 2
          'RTL' => [1, 13, 14, 15, 21],
1864
          'LTR' => [0, 11, 12, 20],
1865
      ];
1866
1867 2
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1868
        return 'LTR';
1869
      }
1870
1871 2
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1872 2
        return 'RTL';
1873
      }
1874
    }
1875
1876 2
    $c = static::chr_to_decimal($char);
1877
1878 2
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1879 2
      return 'LTR';
1880
    }
1881
1882 2
    if (0x85e >= $c) {
1883
1884 2
      if (0x5be === $c ||
1885 2
          0x5c0 === $c ||
1886 2
          0x5c3 === $c ||
1887 2
          0x5c6 === $c ||
1888 2
          (0x5d0 <= $c && 0x5ea >= $c) ||
1889 2
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1890 2
          0x608 === $c ||
1891 2
          0x60b === $c ||
1892 2
          0x60d === $c ||
1893 2
          0x61b === $c ||
1894 2
          (0x61e <= $c && 0x64a >= $c) ||
1895
          (0x66d <= $c && 0x66f >= $c) ||
1896
          (0x671 <= $c && 0x6d5 >= $c) ||
1897
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1898
          (0x6ee <= $c && 0x6ef >= $c) ||
1899
          (0x6fa <= $c && 0x70d >= $c) ||
1900
          0x710 === $c ||
1901
          (0x712 <= $c && 0x72f >= $c) ||
1902
          (0x74d <= $c && 0x7a5 >= $c) ||
1903
          0x7b1 === $c ||
1904
          (0x7c0 <= $c && 0x7ea >= $c) ||
1905
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1906
          0x7fa === $c ||
1907
          (0x800 <= $c && 0x815 >= $c) ||
1908
          0x81a === $c ||
1909
          0x824 === $c ||
1910
          0x828 === $c ||
1911
          (0x830 <= $c && 0x83e >= $c) ||
1912
          (0x840 <= $c && 0x858 >= $c) ||
1913 2
          0x85e === $c
1914
      ) {
1915 2
        return 'RTL';
1916
      }
1917
1918 2
    } elseif (0x200f === $c) {
1919
1920
      return 'RTL';
1921
1922 2
    } elseif (0xfb1d <= $c) {
1923
1924 2
      if (0xfb1d === $c ||
1925 2
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1926 2
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1927 2
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1928 2
          0xfb3e === $c ||
1929 2
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1930 2
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1931 2
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1932 2
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1933 2
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1934 2
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1935 2
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1936 2
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1937 2
          (0xfe76 <= $c && 0xfefc >= $c) ||
1938 2
          (0x10800 <= $c && 0x10805 >= $c) ||
1939 2
          0x10808 === $c ||
1940 2
          (0x1080a <= $c && 0x10835 >= $c) ||
1941 2
          (0x10837 <= $c && 0x10838 >= $c) ||
1942 2
          0x1083c === $c ||
1943 2
          (0x1083f <= $c && 0x10855 >= $c) ||
1944 2
          (0x10857 <= $c && 0x1085f >= $c) ||
1945 2
          (0x10900 <= $c && 0x1091b >= $c) ||
1946 2
          (0x10920 <= $c && 0x10939 >= $c) ||
1947 2
          0x1093f === $c ||
1948 2
          0x10a00 === $c ||
1949 2
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1950 2
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1951 2
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1952 2
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1953 2
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1954 2
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1955 2
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1956 2
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1957 2
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1958 2
          (0x10b78 <= $c && 0x10b7f >= $c)
1959
      ) {
1960 2
        return 'RTL';
1961
      }
1962
    }
1963
1964 2
    return 'LTR';
1965
  }
1966
1967
  /**
1968
   * get data from "/data/*.ser"
1969
   *
1970
   * @param string $file
1971
   *
1972
   * @return mixed|false Will return false on error.
1973
   */
1974 13
  private static function getData(string $file)
1975
  {
1976 13
    $file = __DIR__ . '/data/' . $file . '.php';
1977 13
    if (\file_exists($file)) {
1978
      /** @noinspection PhpIncludeInspection */
1979 12
      return require $file;
1980
    }
1981
1982 2
    return false;
1983
  }
1984
1985
  /**
1986
   * Check for php-support.
1987
   *
1988
   * @param string|null $key
1989
   *
1990
   * @return mixed
1991
   *               Return the full support-"array", if $key === null<br>
1992
   *               return bool-value, if $key is used and available<br>
1993
   *               otherwise return <strong>null</strong>.
1994
   */
1995 26
  public static function getSupportInfo(string $key = null)
1996
  {
1997 26
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1998
      self::checkForSupport();
1999
    }
2000
2001 26
    if ($key === null) {
2002 4
      return self::$SUPPORT;
2003
    }
2004
2005 24
    if (!isset(self::$SUPPORT[$key])) {
2006 2
      return null;
2007
    }
2008
2009 22
    return self::$SUPPORT[$key];
2010
  }
2011
2012
  /**
2013
   * @param string $str
2014
   *
2015
   * @return string[]
2016
   */
2017 40
  private static function get_file_type($str)
2018
  {
2019 40
    if ('' === $str) {
2020
      return ['ext' => '', 'type' => ''];
2021
    }
2022
2023 40
    $str_info = self::substr_in_byte($str, 0, 2);
2024 40
    if (self::strlen_in_byte($str_info) !== 2) {
2025 11
      return ['ext' => '', 'type' => ''];
2026
    }
2027
2028 35
    $str_info = \unpack("C2chars", $str_info);
2029 35
    $type_code = (int)($str_info['chars1'] . $str_info['chars2']);
2030
2031
    // DEBUG
2032
    //var_dump($type_code);
2033
2034
    switch ($type_code) {
2035 35
      case 3780:
2036 4
        $ext = 'pdf';
2037 4
        $type = 'binary';
2038 4
        break;
2039 35
      case 7790:
2040
        $ext = 'exe';
2041
        $type = 'binary';
2042
        break;
2043 35
      case 7784:
2044
        $ext = 'midi';
2045
        $type = 'binary';
2046
        break;
2047 35
      case 8075:
2048 6
        $ext = 'zip';
2049 6
        $type = 'binary';
2050 6
        break;
2051 35
      case 8297:
2052
        $ext = 'rar';
2053
        $type = 'binary';
2054
        break;
2055 35
      case 255216:
2056
        $ext = 'jpg';
2057
        $type = 'binary';
2058
        break;
2059 35
      case 7173:
2060
        $ext = 'gif';
2061
        $type = 'binary';
2062
        break;
2063 35
      case 6677:
2064
        $ext = 'bmp';
2065
        $type = 'binary';
2066
        break;
2067 35
      case 13780:
2068 6
        $ext = 'png';
2069 6
        $type = 'binary';
2070 6
        break;
2071
      default:
2072 33
        $ext = '???';
2073 33
        $type = '???';
2074 33
        break;
2075
    }
2076
2077 35
    return ['ext' => $ext, 'type' => $type];
2078
  }
2079
2080
  /**
2081
   * @param int    $length        <p>Length of the random string.</p>
2082
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2083
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2084
   *
2085
   * @return string
2086
   */
2087 1
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2088
  {
2089
    // init
2090 1
    $i = 0;
2091 1
    $str = '';
2092 1
    $maxlength = self::strlen($possibleChars, $encoding);
2093
2094 1
    if ($maxlength === 0) {
2095 1
      return '';
2096
    }
2097
2098
    // add random chars
2099 1
    while ($i < $length) {
2100
      try {
2101 1
        $randInt = \random_int(0, $maxlength - 1);
2102
      } catch (\Exception $e) {
2103
        /** @noinspection RandomApiMigrationInspection */
2104
        $randInt = \mt_rand(0, $maxlength - 1);
2105
      }
2106 1
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
2107 1
      $str .= $char;
2108 1
      $i++;
2109
    }
2110
2111 1
    return $str;
2112
  }
2113
2114
  /**
2115
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2116
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2117
   *
2118
   * @return string
2119
   */
2120 1
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2121
  {
2122 1
    $uniqueHelper = \mt_rand() .
2123 1
                    \session_id() .
2124 1
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
2125 1
                    ($_SERVER['SERVER_ADDR'] ?? '') .
2126 1
                    $entropyExtra;
2127
2128 1
    $uniqueString = \uniqid($uniqueHelper, true);
2129
2130 1
    if ($md5) {
2131 1
      $uniqueString = \md5($uniqueString . $uniqueHelper);
2132
    }
2133
2134 1
    return $uniqueString;
2135
  }
2136
2137
  /**
2138
   * alias for "UTF8::string_has_bom()"
2139
   *
2140
   * @see        UTF8::string_has_bom()
2141
   *
2142
   * @param string $str
2143
   *
2144
   * @return bool
2145
   *
2146
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2147
   */
2148 2
  public static function hasBom(string $str): bool
2149
  {
2150 2
    return self::string_has_bom($str);
2151
  }
2152
2153
  /**
2154
   * Returns true if the string contains a lower case char, false otherwise.
2155
   *
2156
   * @param string $str <p>The input string.</p>
2157
   *
2158
   * @return bool Whether or not the string contains a lower case character.
2159
   */
2160 47
  public static function has_lowercase(string $str): bool
2161
  {
2162 47
    return self::str_matches_pattern($str, '.*[[:lower:]]');
2163
  }
2164
2165
  /**
2166
   * Returns true if the string contains an upper case char, false otherwise.
2167
   *
2168
   * @param string $str <p>The input string.</p>
2169
   *
2170
   * @return bool Whether or not the string contains an upper case character.
2171
   */
2172 12
  public static function has_uppercase(string $str): bool
2173
  {
2174 12
    return self::str_matches_pattern($str, '.*[[:upper:]]');
2175
  }
2176
2177
  /**
2178
   * Converts a hexadecimal-value into an UTF-8 character.
2179
   *
2180
   * @param string $hexdec <p>The hexadecimal value.</p>
2181
   *
2182
   * @return string|false One single UTF-8 character.
2183
   */
2184 4
  public static function hex_to_chr(string $hexdec)
2185
  {
2186 4
    return self::decimal_to_chr(\hexdec($hexdec));
2187
  }
2188
2189
  /**
2190
   * Converts hexadecimal U+xxxx code point representation to integer.
2191
   *
2192
   * INFO: opposite to UTF8::int_to_hex()
2193
   *
2194
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2195
   *
2196
   * @return int|false The code point, or false on failure.
2197
   */
2198 2
  public static function hex_to_int($hexDec)
2199
  {
2200
    // init
2201 2
    $hexDec = (string)$hexDec;
2202
2203 2
    if ('' === $hexDec) {
2204 2
      return false;
2205
    }
2206
2207 2
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2208 2
      return \intval($match[1], 16);
2209
    }
2210
2211 2
    return false;
2212
  }
2213
2214
  /**
2215
   * alias for "UTF8::html_entity_decode()"
2216
   *
2217
   * @see UTF8::html_entity_decode()
2218
   *
2219
   * @param string $str
2220
   * @param int    $flags
2221
   * @param string $encoding
2222
   *
2223
   * @return string
2224
   */
2225 2
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2226
  {
2227 2
    return self::html_entity_decode($str, $flags, $encoding);
2228
  }
2229
2230
  /**
2231
   * Converts a UTF-8 string to a series of HTML numbered entities.
2232
   *
2233
   * INFO: opposite to UTF8::html_decode()
2234
   *
2235
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2236
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2237
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2238
   *
2239
   * @return string HTML numbered entities.
2240
   */
2241 13
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2242
  {
2243 13
    if ('' === $str) {
2244 4
      return '';
2245
    }
2246
2247 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2248 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2249
    }
2250
2251 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2252
      self::checkForSupport();
2253
    }
2254
2255
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2256 13
    if (self::$SUPPORT['mbstring'] === true) {
2257 13
      $startCode = 0x00;
2258 13
      if ($keepAsciiChars === true) {
2259 13
        $startCode = 0x80;
2260
      }
2261
2262 13
      return \mb_encode_numericentity(
2263 13
          $str,
2264 13
          [$startCode, 0xfffff, 0, 0xfffff, 0],
2265 13
          $encoding
2266
      );
2267
    }
2268
2269
    //
2270
    // fallback via vanilla php
2271
    //
2272
2273
    return \implode(
2274
        '',
2275
        \array_map(
2276
            function ($chr) use ($keepAsciiChars, $encoding) {
2277
              return UTF8::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2278
            },
2279
            self::split($str)
2280
        )
2281
    );
2282
  }
2283
2284
  /**
2285
   * UTF-8 version of html_entity_decode()
2286
   *
2287
   * The reason we are not using html_entity_decode() by itself is because
2288
   * while it is not technically correct to leave out the semicolon
2289
   * at the end of an entity most browsers will still interpret the entity
2290
   * correctly. html_entity_decode() does not convert entities without
2291
   * semicolons, so we are left with our own little solution here. Bummer.
2292
   *
2293
   * Convert all HTML entities to their applicable characters
2294
   *
2295
   * INFO: opposite to UTF8::html_encode()
2296
   *
2297
   * @link http://php.net/manual/en/function.html-entity-decode.php
2298
   *
2299
   * @param string $str      <p>
2300
   *                         The input string.
2301
   *                         </p>
2302
   * @param int    $flags    [optional] <p>
2303
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2304
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2305
   *                         <table>
2306
   *                         Available <i>flags</i> constants
2307
   *                         <tr valign="top">
2308
   *                         <td>Constant Name</td>
2309
   *                         <td>Description</td>
2310
   *                         </tr>
2311
   *                         <tr valign="top">
2312
   *                         <td><b>ENT_COMPAT</b></td>
2313
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2314
   *                         </tr>
2315
   *                         <tr valign="top">
2316
   *                         <td><b>ENT_QUOTES</b></td>
2317
   *                         <td>Will convert both double and single quotes.</td>
2318
   *                         </tr>
2319
   *                         <tr valign="top">
2320
   *                         <td><b>ENT_NOQUOTES</b></td>
2321
   *                         <td>Will leave both double and single quotes unconverted.</td>
2322
   *                         </tr>
2323
   *                         <tr valign="top">
2324
   *                         <td><b>ENT_HTML401</b></td>
2325
   *                         <td>
2326
   *                         Handle code as HTML 4.01.
2327
   *                         </td>
2328
   *                         </tr>
2329
   *                         <tr valign="top">
2330
   *                         <td><b>ENT_XML1</b></td>
2331
   *                         <td>
2332
   *                         Handle code as XML 1.
2333
   *                         </td>
2334
   *                         </tr>
2335
   *                         <tr valign="top">
2336
   *                         <td><b>ENT_XHTML</b></td>
2337
   *                         <td>
2338
   *                         Handle code as XHTML.
2339
   *                         </td>
2340
   *                         </tr>
2341
   *                         <tr valign="top">
2342
   *                         <td><b>ENT_HTML5</b></td>
2343
   *                         <td>
2344
   *                         Handle code as HTML 5.
2345
   *                         </td>
2346
   *                         </tr>
2347
   *                         </table>
2348
   *                         </p>
2349
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2350
   *
2351
   * @return string The decoded string.
2352
   */
2353 40
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2354
  {
2355 40
    if ('' === $str) {
2356 12
      return '';
2357
    }
2358
2359 40
    if (!isset($str[3])) { // examples: &; || &x;
2360 19
      return $str;
2361
    }
2362
2363
    if (
2364 39
        \strpos($str, '&') === false
2365
        ||
2366
        (
2367 39
            \strpos($str, '&#') === false
2368
            &&
2369 39
            \strpos($str, ';') === false
2370
        )
2371
    ) {
2372 18
      return $str;
2373
    }
2374
2375 39
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2376 9
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2377
    }
2378
2379 39
    if ($flags === null) {
2380 10
      $flags = ENT_QUOTES | ENT_HTML5;
2381
    }
2382
2383
    if (
2384 39
        $encoding !== 'UTF-8'
2385
        &&
2386 39
        $encoding !== 'ISO-8859-1'
2387
        &&
2388 39
        $encoding !== 'WINDOWS-1252'
2389
        &&
2390 39
        self::$SUPPORT['mbstring'] === false
2391
    ) {
2392
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2393
    }
2394
2395 39
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2396
      self::checkForSupport();
2397
    }
2398
2399
    do {
2400 39
      $str_compare = $str;
2401
2402
      # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2403 39
      if (self::$SUPPORT['mbstring'] === true) {
2404
2405 39
        $str = \mb_decode_numericentity(
2406 39
            $str,
2407 39
            [0x80, 0xfffff, 0, 0xfffff, 0],
2408 39
            $encoding
2409
        );
2410
2411
      } else {
2412
2413
        $str = (string)\preg_replace_callback(
2414
            "/&#\d{2,6};/",
2415
            function ($matches) use ($encoding) {
2416
              // always fallback via symfony polyfill
2417
              $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2418
2419
              if ($returnTmp !== '"' && $returnTmp !== "'") {
2420
                return $returnTmp;
2421
              }
2422
2423
              return $matches[0];
2424
            },
2425
            $str
2426
        );
2427
2428
      }
2429
2430
      // decode numeric & UTF16 two byte entities
2431 39
      $str = \html_entity_decode(
2432 39
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2433 39
          $flags,
2434 39
          $encoding
2435
      );
2436
2437 39
    } while ($str_compare !== $str);
2438
2439 39
    return $str;
2440
  }
2441
2442
  /**
2443
   * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2444
   *
2445
   * @param string $str
2446
   * @param string $encoding [optional] <p>Default: UTF-8</p>
2447
   *
2448
   * @return string
2449
   */
2450 6
  public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2451
  {
2452 6
    return self::htmlspecialchars(
2453 6
        $str,
2454 6
        ENT_QUOTES | ENT_SUBSTITUTE,
2455 6
        $encoding
2456
    );
2457
  }
2458
2459
  /**
2460
   * Remove empty html-tag.
2461
   *
2462
   * e.g.: <tag></tag>
2463
   *
2464
   * @param string $str
2465
   *
2466
   * @return string
2467
   */
2468 1
  public static function html_stripe_empty_tags(string $str): string
2469
  {
2470 1
    return (string)\preg_replace(
2471 1
        "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2472 1
        '',
2473 1
        $str
2474
    );
2475
  }
2476
2477
  /**
2478
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2479
   *
2480
   * @link http://php.net/manual/en/function.htmlentities.php
2481
   *
2482
   * @param string $str           <p>
2483
   *                              The input string.
2484
   *                              </p>
2485
   * @param int    $flags         [optional] <p>
2486
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2487
   *                              invalid code unit sequences and the used document type. The default is
2488
   *                              ENT_COMPAT | ENT_HTML401.
2489
   *                              <table>
2490
   *                              Available <i>flags</i> constants
2491
   *                              <tr valign="top">
2492
   *                              <td>Constant Name</td>
2493
   *                              <td>Description</td>
2494
   *                              </tr>
2495
   *                              <tr valign="top">
2496
   *                              <td><b>ENT_COMPAT</b></td>
2497
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2498
   *                              </tr>
2499
   *                              <tr valign="top">
2500
   *                              <td><b>ENT_QUOTES</b></td>
2501
   *                              <td>Will convert both double and single quotes.</td>
2502
   *                              </tr>
2503
   *                              <tr valign="top">
2504
   *                              <td><b>ENT_NOQUOTES</b></td>
2505
   *                              <td>Will leave both double and single quotes unconverted.</td>
2506
   *                              </tr>
2507
   *                              <tr valign="top">
2508
   *                              <td><b>ENT_IGNORE</b></td>
2509
   *                              <td>
2510
   *                              Silently discard invalid code unit sequences instead of returning
2511
   *                              an empty string. Using this flag is discouraged as it
2512
   *                              may have security implications.
2513
   *                              </td>
2514
   *                              </tr>
2515
   *                              <tr valign="top">
2516
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2517
   *                              <td>
2518
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2519
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2520
   *                              </td>
2521
   *                              </tr>
2522
   *                              <tr valign="top">
2523
   *                              <td><b>ENT_DISALLOWED</b></td>
2524
   *                              <td>
2525
   *                              Replace invalid code points for the given document type with a
2526
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2527
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2528
   *                              instance, to ensure the well-formedness of XML documents with
2529
   *                              embedded external content.
2530
   *                              </td>
2531
   *                              </tr>
2532
   *                              <tr valign="top">
2533
   *                              <td><b>ENT_HTML401</b></td>
2534
   *                              <td>
2535
   *                              Handle code as HTML 4.01.
2536
   *                              </td>
2537
   *                              </tr>
2538
   *                              <tr valign="top">
2539
   *                              <td><b>ENT_XML1</b></td>
2540
   *                              <td>
2541
   *                              Handle code as XML 1.
2542
   *                              </td>
2543
   *                              </tr>
2544
   *                              <tr valign="top">
2545
   *                              <td><b>ENT_XHTML</b></td>
2546
   *                              <td>
2547
   *                              Handle code as XHTML.
2548
   *                              </td>
2549
   *                              </tr>
2550
   *                              <tr valign="top">
2551
   *                              <td><b>ENT_HTML5</b></td>
2552
   *                              <td>
2553
   *                              Handle code as HTML 5.
2554
   *                              </td>
2555
   *                              </tr>
2556
   *                              </table>
2557
   *                              </p>
2558
   * @param string $encoding      [optional] <p>
2559
   *                              Like <b>htmlspecialchars</b>,
2560
   *                              <b>htmlentities</b> takes an optional third argument
2561
   *                              <i>encoding</i> which defines encoding used in
2562
   *                              conversion.
2563
   *                              Although this argument is technically optional, you are highly
2564
   *                              encouraged to specify the correct value for your code.
2565
   *                              </p>
2566
   * @param bool   $double_encode [optional] <p>
2567
   *                              When <i>double_encode</i> is turned off PHP will not
2568
   *                              encode existing html entities. The default is to convert everything.
2569
   *                              </p>
2570
   *
2571
   *
2572
   * @return string The encoded string.
2573
   * </p>
2574
   * <p>
2575
   * If the input <i>string</i> contains an invalid code unit
2576
   * sequence within the given <i>encoding</i> an empty string
2577
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2578
   * <b>ENT_SUBSTITUTE</b> flags are set.
2579
   */
2580 9
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2581
  {
2582 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2583 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2584
    }
2585
2586 9
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2587
2588
    /**
2589
     * PHP doesn't replace a backslash to its html entity since this is something
2590
     * that's mostly used to escape characters when inserting in a database. Since
2591
     * we're using a decent database layer, we don't need this shit and we're replacing
2592
     * the double backslashes by its' html entity equivalent.
2593
     *
2594
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2595
     */
2596 9
    $str = \str_replace('\\', '&#92;', $str);
2597
2598 9
    return self::html_encode($str, true, $encoding);
2599
  }
2600
2601
  /**
2602
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2603
   *
2604
   * INFO: Take a look at "UTF8::htmlentities()"
2605
   *
2606
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2607
   *
2608
   * @param string $str           <p>
2609
   *                              The string being converted.
2610
   *                              </p>
2611
   * @param int    $flags         [optional] <p>
2612
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2613
   *                              invalid code unit sequences and the used document type. The default is
2614
   *                              ENT_COMPAT | ENT_HTML401.
2615
   *                              <table>
2616
   *                              Available <i>flags</i> constants
2617
   *                              <tr valign="top">
2618
   *                              <td>Constant Name</td>
2619
   *                              <td>Description</td>
2620
   *                              </tr>
2621
   *                              <tr valign="top">
2622
   *                              <td><b>ENT_COMPAT</b></td>
2623
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2624
   *                              </tr>
2625
   *                              <tr valign="top">
2626
   *                              <td><b>ENT_QUOTES</b></td>
2627
   *                              <td>Will convert both double and single quotes.</td>
2628
   *                              </tr>
2629
   *                              <tr valign="top">
2630
   *                              <td><b>ENT_NOQUOTES</b></td>
2631
   *                              <td>Will leave both double and single quotes unconverted.</td>
2632
   *                              </tr>
2633
   *                              <tr valign="top">
2634
   *                              <td><b>ENT_IGNORE</b></td>
2635
   *                              <td>
2636
   *                              Silently discard invalid code unit sequences instead of returning
2637
   *                              an empty string. Using this flag is discouraged as it
2638
   *                              may have security implications.
2639
   *                              </td>
2640
   *                              </tr>
2641
   *                              <tr valign="top">
2642
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2643
   *                              <td>
2644
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2645
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2646
   *                              </td>
2647
   *                              </tr>
2648
   *                              <tr valign="top">
2649
   *                              <td><b>ENT_DISALLOWED</b></td>
2650
   *                              <td>
2651
   *                              Replace invalid code points for the given document type with a
2652
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2653
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2654
   *                              instance, to ensure the well-formedness of XML documents with
2655
   *                              embedded external content.
2656
   *                              </td>
2657
   *                              </tr>
2658
   *                              <tr valign="top">
2659
   *                              <td><b>ENT_HTML401</b></td>
2660
   *                              <td>
2661
   *                              Handle code as HTML 4.01.
2662
   *                              </td>
2663
   *                              </tr>
2664
   *                              <tr valign="top">
2665
   *                              <td><b>ENT_XML1</b></td>
2666
   *                              <td>
2667
   *                              Handle code as XML 1.
2668
   *                              </td>
2669
   *                              </tr>
2670
   *                              <tr valign="top">
2671
   *                              <td><b>ENT_XHTML</b></td>
2672
   *                              <td>
2673
   *                              Handle code as XHTML.
2674
   *                              </td>
2675
   *                              </tr>
2676
   *                              <tr valign="top">
2677
   *                              <td><b>ENT_HTML5</b></td>
2678
   *                              <td>
2679
   *                              Handle code as HTML 5.
2680
   *                              </td>
2681
   *                              </tr>
2682
   *                              </table>
2683
   *                              </p>
2684
   * @param string $encoding      [optional] <p>
2685
   *                              Defines encoding used in conversion.
2686
   *                              </p>
2687
   *                              <p>
2688
   *                              For the purposes of this function, the encodings
2689
   *                              ISO-8859-1, ISO-8859-15,
2690
   *                              UTF-8, cp866,
2691
   *                              cp1251, cp1252, and
2692
   *                              KOI8-R are effectively equivalent, provided the
2693
   *                              <i>string</i> itself is valid for the encoding, as
2694
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2695
   *                              the same positions in all of these encodings.
2696
   *                              </p>
2697
   * @param bool   $double_encode [optional] <p>
2698
   *                              When <i>double_encode</i> is turned off PHP will not
2699
   *                              encode existing html entities, the default is to convert everything.
2700
   *                              </p>
2701
   *
2702
   * @return string The converted string.
2703
   * </p>
2704
   * <p>
2705
   * If the input <i>string</i> contains an invalid code unit
2706
   * sequence within the given <i>encoding</i> an empty string
2707
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2708
   * <b>ENT_SUBSTITUTE</b> flags are set.
2709
   */
2710 8
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2711
  {
2712 8
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2713 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2714
    }
2715
2716 8
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2717
  }
2718
2719
  /**
2720
   * Checks whether iconv is available on the server.
2721
   *
2722
   * @return bool
2723
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2724
   */
2725
  public static function iconv_loaded(): bool
2726
  {
2727
    return \extension_loaded('iconv') ? true : false;
2728
  }
2729
2730
  /**
2731
   * alias for "UTF8::decimal_to_chr()"
2732
   *
2733
   * @see UTF8::decimal_to_chr()
2734
   *
2735
   * @param mixed $int
2736
   *
2737
   * @return string
2738
   */
2739 4
  public static function int_to_chr($int): string
2740
  {
2741 4
    return self::decimal_to_chr($int);
2742
  }
2743
2744
  /**
2745
   * Converts Integer to hexadecimal U+xxxx code point representation.
2746
   *
2747
   * INFO: opposite to UTF8::hex_to_int()
2748
   *
2749
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2750
   * @param string $pfix [optional]
2751
   *
2752
   * @return string The code point, or empty string on failure.
2753
   */
2754 6
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2755
  {
2756 6
    $hex = \dechex($int);
2757
2758 6
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2759
2760 6
    return $pfix . $hex . '';
2761
  }
2762
2763
  /**
2764
   * Checks whether intl-char is available on the server.
2765
   *
2766
   * @return bool
2767
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2768
   */
2769
  public static function intlChar_loaded(): bool
2770
  {
2771
    return \class_exists('IntlChar');
2772
  }
2773
2774
  /**
2775
   * Checks whether intl is available on the server.
2776
   *
2777
   * @return bool
2778
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2779
   */
2780 5
  public static function intl_loaded(): bool
2781
  {
2782 5
    return \extension_loaded('intl');
2783
  }
2784
2785
  /**
2786
   * alias for "UTF8::is_ascii()"
2787
   *
2788
   * @see        UTF8::is_ascii()
2789
   *
2790
   * @param string $str
2791
   *
2792
   * @return bool
2793
   *
2794
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2795
   */
2796 2
  public static function isAscii(string $str): bool
2797
  {
2798 2
    return self::is_ascii($str);
2799
  }
2800
2801
  /**
2802
   * alias for "UTF8::is_base64()"
2803
   *
2804
   * @see        UTF8::is_base64()
2805
   *
2806
   * @param string $str
2807
   *
2808
   * @return bool
2809
   *
2810
   * @deprecated <p>use "UTF8::is_base64()"</p>
2811
   */
2812 2
  public static function isBase64($str): bool
2813
  {
2814 2
    return self::is_base64($str);
2815
  }
2816
2817
  /**
2818
   * alias for "UTF8::is_binary()"
2819
   *
2820
   * @see        UTF8::is_binary()
2821
   *
2822
   * @param mixed $str
2823
   * @param bool  $strict
2824
   *
2825
   * @return bool
2826
   *
2827
   * @deprecated <p>use "UTF8::is_binary()"</p>
2828
   */
2829 4
  public static function isBinary($str, $strict = false): bool
2830
  {
2831 4
    return self::is_binary($str, $strict);
2832
  }
2833
2834
  /**
2835
   * alias for "UTF8::is_bom()"
2836
   *
2837
   * @see        UTF8::is_bom()
2838
   *
2839
   * @param string $utf8_chr
2840
   *
2841
   * @return bool
2842
   *
2843
   * @deprecated <p>use "UTF8::is_bom()"</p>
2844
   */
2845 2
  public static function isBom(string $utf8_chr): bool
2846
  {
2847 2
    return self::is_bom($utf8_chr);
2848
  }
2849
2850
  /**
2851
   * alias for "UTF8::is_html()"
2852
   *
2853
   * @see        UTF8::is_html()
2854
   *
2855
   * @param string $str
2856
   *
2857
   * @return bool
2858
   *
2859
   * @deprecated <p>use "UTF8::is_html()"</p>
2860
   */
2861 2
  public static function isHtml(string $str): bool
2862
  {
2863 2
    return self::is_html($str);
2864
  }
2865
2866
  /**
2867
   * alias for "UTF8::is_json()"
2868
   *
2869
   * @see        UTF8::is_json()
2870
   *
2871
   * @param string $str
2872
   *
2873
   * @return bool
2874
   *
2875
   * @deprecated <p>use "UTF8::is_json()"</p>
2876
   */
2877
  public static function isJson(string $str): bool
2878
  {
2879
    return self::is_json($str);
2880
  }
2881
2882
  /**
2883
   * alias for "UTF8::is_utf16()"
2884
   *
2885
   * @see        UTF8::is_utf16()
2886
   *
2887
   * @param mixed $str
2888
   *
2889
   * @return int|false
2890
   *                    <strong>false</strong> if is't not UTF16,<br>
2891
   *                    <strong>1</strong> for UTF-16LE,<br>
2892
   *                    <strong>2</strong> for UTF-16BE.
2893
   *
2894
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2895
   */
2896 2
  public static function isUtf16($str)
2897
  {
2898 2
    return self::is_utf16($str);
2899
  }
2900
2901
  /**
2902
   * alias for "UTF8::is_utf32()"
2903
   *
2904
   * @see        UTF8::is_utf32()
2905
   *
2906
   * @param mixed $str
2907
   *
2908
   * @return int|false
2909
   *                   <strong>false</strong> if is't not UTF16,
2910
   *                   <strong>1</strong> for UTF-32LE,
2911
   *                   <strong>2</strong> for UTF-32BE.
2912
   *
2913
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2914
   */
2915 2
  public static function isUtf32($str)
2916
  {
2917 2
    return self::is_utf32($str);
2918
  }
2919
2920
  /**
2921
   * alias for "UTF8::is_utf8()"
2922
   *
2923
   * @see        UTF8::is_utf8()
2924
   *
2925
   * @param string $str
2926
   * @param bool   $strict
2927
   *
2928
   * @return bool
2929
   *
2930
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2931
   */
2932 17
  public static function isUtf8($str, $strict = false): bool
2933
  {
2934 17
    return self::is_utf8($str, $strict);
2935
  }
2936
2937
  /**
2938
   * Returns true if the string contains only alphabetic chars, false otherwise.
2939
   *
2940
   * @param string $str
2941
   *
2942
   * @return bool
2943
   *               Whether or not $str contains only alphabetic chars.
2944
   */
2945 10
  public static function is_alpha(string $str): bool
2946
  {
2947 10
    return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2948
  }
2949
2950
  /**
2951
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2952
   *
2953
   * @param string $str
2954
   *
2955
   * @return bool
2956
   *               Whether or not $str contains only alphanumeric chars.
2957
   */
2958 13
  public static function is_alphanumeric(string $str): bool
2959
  {
2960 13
    return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2961
  }
2962
2963
  /**
2964
   * Checks if a string is 7 bit ASCII.
2965
   *
2966
   * @param string $str <p>The string to check.</p>
2967
   *
2968
   * @return bool
2969
   *              <strong>true</strong> if it is ASCII<br>
2970
   *              <strong>false</strong> otherwise
2971
   *
2972
   */
2973 201
  public static function is_ascii(string $str): bool
2974
  {
2975 201
    if ('' === $str) {
2976 10
      return true;
2977
    }
2978
2979 200
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2980
  }
2981
2982
  /**
2983
   * Returns true if the string is base64 encoded, false otherwise.
2984
   *
2985
   * @param string $str <p>The input string.</p>
2986
   *
2987
   * @return bool Whether or not $str is base64 encoded.
2988
   */
2989 9
  public static function is_base64($str): bool
2990
  {
2991 9
    if ('' === $str) {
2992 3
      return false;
2993
    }
2994
2995 8
    if (\is_string($str) === false) {
0 ignored issues
show
introduced by
The condition is_string($str) === false is always false.
Loading history...
2996 2
      return false;
2997
    }
2998
2999 8
    $base64String = (string)\base64_decode($str, true);
3000
3001 8
    return $base64String && \base64_encode($base64String) === $str;
3002
  }
3003
3004
  /**
3005
   * Check if the input is binary... (is look like a hack).
3006
   *
3007
   * @param mixed $input
3008
   * @param bool  $strict
3009
   *
3010
   * @return bool
3011
   */
3012 40
  public static function is_binary($input, bool $strict = false): bool
3013
  {
3014 40
    $input = (string)$input;
3015 40
    if ('' === $input) {
3016 10
      return false;
3017
    }
3018
3019 40
    if (\preg_match('~^[01]+$~', $input)) {
3020 12
      return true;
3021
    }
3022
3023 40
    if ($strict === true) {
3024
3025 34
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3026
        self::checkForSupport();
3027
      }
3028
3029 34
      if (self::$SUPPORT['finfo'] === false) {
3030
        throw new \RuntimeException('ext-fileinfo: is not installed');
3031
      }
3032
3033
      /** @noinspection PhpComposerExtensionStubsInspection */
3034 34
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
3035 34
      $finfo_encoding = $finfo->buffer($input);
3036 34
      if ($finfo_encoding && $finfo_encoding === 'binary') {
3037 15
        return true;
3038
      }
3039
3040
    }
3041
3042 40
    $ext = self::get_file_type($input);
3043 40
    if ($ext['type'] === 'binary') {
3044 6
      return true;
3045
    }
3046
3047 38
    $testLength = self::strlen_in_byte($input);
3048 38
    if ($testLength) {
3049 38
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3050
        self::checkForSupport();
3051
      }
3052
3053 38
      $testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength);
3054 38
      if (($testNull / $testLength) > 0.256) {
3055 12
        return true;
3056
      }
3057
    }
3058
3059 36
    return false;
3060
  }
3061
3062
  /**
3063
   * Check if the file is binary.
3064
   *
3065
   * @param string $file
3066
   *
3067
   * @return bool
3068
   */
3069 6
  public static function is_binary_file($file): bool
3070
  {
3071
    // init
3072 6
    $block = '';
3073
3074 6
    $fp = \fopen($file, 'rb');
3075 6
    if (\is_resource($fp)) {
3076 6
      $block = \fread($fp, 512);
3077 6
      \fclose($fp);
3078
    }
3079
3080 6
    if ($block === '') {
3081 2
      return false;
3082
    }
3083
3084 6
    return self::is_binary($block, true);
3085
  }
3086
3087
  /**
3088
   * Returns true if the string contains only whitespace chars, false otherwise.
3089
   *
3090
   * @param string $str
3091
   *
3092
   * @return bool
3093
   *               Whether or not $str contains only whitespace characters.
3094
   */
3095 15
  public static function is_blank(string $str): bool
3096
  {
3097 15
    return self::str_matches_pattern($str, '^[[:space:]]*$');
3098
  }
3099
3100
  /**
3101
   * Checks if the given string is equal to any "Byte Order Mark".
3102
   *
3103
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3104
   *
3105
   * @param string $str <p>The input string.</p>
3106
   *
3107
   * @return bool
3108
   *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.
3109
   */
3110 2
  public static function is_bom($str): bool
3111
  {
3112 2
    foreach (self::$BOM as $bomString => $bomByteLength) {
3113 2
      if ($str === $bomString) {
3114 2
        return true;
3115
      }
3116
    }
3117
3118 2
    return false;
3119
  }
3120
3121
  /**
3122
   * Determine whether the string is considered to be empty.
3123
   *
3124
   * A variable is considered empty if it does not exist or if its value equals FALSE.
3125
   * empty() does not generate a warning if the variable does not exist.
3126
   *
3127
   * @param mixed $str
3128
   *
3129
   * @return bool Whether or not $str is empty().
3130
   */
3131
  public static function is_empty($str): bool
3132
  {
3133
    return empty($str);
3134
  }
3135
3136
  /**
3137
   * Returns true if the string contains only hexadecimal chars, false otherwise.
3138
   *
3139
   * @param string $str
3140
   *
3141
   * @return bool
3142
   *               Whether or not $str contains only hexadecimal chars.
3143
   */
3144 13
  public static function is_hexadecimal(string $str): bool
3145
  {
3146 13
    return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3147
  }
3148
3149
  /**
3150
   * Check if the string contains any html-tags <lall>.
3151
   *
3152
   * @param string $str <p>The input string.</p>
3153
   *
3154
   * @return bool
3155
   */
3156 3
  public static function is_html(string $str): bool
3157
  {
3158 3
    if ('' === $str) {
3159 3
      return false;
3160
    }
3161
3162
    // init
3163 3
    $matches = [];
3164
3165 3
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3166
3167 3
    return !(\count($matches) === 0);
3168
  }
3169
3170
  /**
3171
   * Try to check if "$str" is an json-string.
3172
   *
3173
   * @param string $str <p>The input string.</p>
3174
   *
3175
   * @return bool
3176
   */
3177 22
  public static function is_json(string $str): bool
3178
  {
3179 22
    if ('' === $str) {
3180 3
      return false;
3181
    }
3182
3183 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3184
      self::checkForSupport();
3185
    }
3186
3187 21
    if (self::$SUPPORT['json'] === false) {
3188
      throw new \RuntimeException('ext-json: is not installed');
3189
    }
3190
3191 21
    $json = self::json_decode($str);
3192
3193
    /** @noinspection PhpComposerExtensionStubsInspection */
3194
    return (
3195 21
               \is_object($json) === true
3196
               ||
3197 21
               \is_array($json) === true
3198
           )
3199
           &&
3200 21
           \json_last_error() === JSON_ERROR_NONE;
3201
  }
3202
3203
  /**
3204
   * @param string $str
3205
   *
3206
   * @return bool
3207
   */
3208 8
  public static function is_lowercase(string $str): bool
3209
  {
3210 8
    if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
3211 3
      return true;
3212
    }
3213
3214 5
    return false;
3215
  }
3216
3217
  /**
3218
   * Returns true if the string is serialized, false otherwise.
3219
   *
3220
   * @param string $str
3221
   *
3222
   * @return bool Whether or not $str is serialized.
3223
   */
3224 7
  public static function is_serialized(string $str): bool
3225
  {
3226 7
    if ('' === $str) {
3227 1
      return false;
3228
    }
3229
3230
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3231
    /** @noinspection UnserializeExploitsInspection */
3232 6
    return $str === 'b:0;'
3233
           ||
3234 6
           @\unserialize($str) !== false;
3235
  }
3236
3237
  /**
3238
   * Returns true if the string contains only lower case chars, false
3239
   * otherwise.
3240
   *
3241
   * @param string $str <p>The input string.</p>
3242
   *
3243
   * @return bool
3244
   *               Whether or not $str contains only lower case characters.
3245
   */
3246 8
  public static function is_uppercase(string $str): bool
3247
  {
3248 8
    return self::str_matches_pattern($str, '^[[:upper:]]*$');
3249
  }
3250
3251
  /**
3252
   * Check if the string is UTF-16.
3253
   *
3254
   * @param mixed $str <p>The input string.</p>
3255
   *
3256
   * @return int|false
3257
   *                   <strong>false</strong> if is't not UTF-16,<br>
3258
   *                   <strong>1</strong> for UTF-16LE,<br>
3259
   *                   <strong>2</strong> for UTF-16BE.
3260
   */
3261 21
  public static function is_utf16($str)
3262
  {
3263
    // init
3264 21
    $str = (string)$str;
3265
3266 21
    if (self::is_binary($str) === false) {
3267 9
      return false;
3268
    }
3269
3270 16
    if (self::$SUPPORT['mbstring'] === false) {
3271 2
      \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', E_USER_WARNING);
3272
    }
3273
3274
    // init
3275 16
    $strChars = [];
3276
3277 16
    $str = self::remove_bom($str);
3278
3279 16
    $maybeUTF16LE = 0;
3280 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3281 16
    if ($test) {
3282 14
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3283 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3284 14
      if ($test3 === $test) {
3285 14
        if (\count($strChars) === 0) {
3286 14
          $strChars = self::count_chars($str, true);
3287
        }
3288 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3289 14
          if (\in_array($test3char, $strChars, true) === true) {
3290 14
            $maybeUTF16LE++;
3291
          }
3292
        }
3293
      }
3294
    }
3295
3296 16
    $maybeUTF16BE = 0;
3297 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3298 16
    if ($test) {
3299 14
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3300 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3301 14
      if ($test3 === $test) {
3302 14
        if (\count($strChars) === 0) {
3303 6
          $strChars = self::count_chars($str, true);
3304
        }
3305 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3306 14
          if (\in_array($test3char, $strChars, true) === true) {
3307 14
            $maybeUTF16BE++;
3308
          }
3309
        }
3310
      }
3311
    }
3312
3313 16
    if ($maybeUTF16BE !== $maybeUTF16LE) {
3314 6
      if ($maybeUTF16LE > $maybeUTF16BE) {
3315 4
        return 1;
3316
      }
3317
3318 6
      return 2;
3319
    }
3320
3321 12
    return false;
3322
  }
3323
3324
  /**
3325
   * Check if the string is UTF-32.
3326
   *
3327
   * @param mixed $str
3328
   *
3329
   * @return int|false
3330
   *                   <strong>false</strong> if is't not UTF-32,<br>
3331
   *                   <strong>1</strong> for UTF-32LE,<br>
3332
   *                   <strong>2</strong> for UTF-32BE.
3333
   */
3334 17
  public static function is_utf32($str)
3335
  {
3336
    // init
3337 17
    $str = (string)$str;
3338
3339 17
    if (self::is_binary($str) === false) {
3340 9
      return false;
3341
    }
3342
3343 12
    if (self::$SUPPORT['mbstring'] === false) {
3344 2
      \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', E_USER_WARNING);
3345
    }
3346
3347
    // init
3348 12
    $strChars = [];
3349
3350 12
    $str = self::remove_bom($str);
3351
3352 12
    $maybeUTF32LE = 0;
3353 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3354 12
    if ($test) {
3355 10
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3356 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3357 10
      if ($test3 === $test) {
3358 10
        if (\count($strChars) === 0) {
3359 10
          $strChars = self::count_chars($str, true);
3360
        }
3361 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3362 10
          if (\in_array($test3char, $strChars, true) === true) {
3363 10
            $maybeUTF32LE++;
3364
          }
3365
        }
3366
      }
3367
    }
3368
3369 12
    $maybeUTF32BE = 0;
3370 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3371 12
    if ($test) {
3372 10
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3373 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3374 10
      if ($test3 === $test) {
3375 10
        if (\count($strChars) === 0) {
3376 6
          $strChars = self::count_chars($str, true);
3377
        }
3378 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3379 10
          if (\in_array($test3char, $strChars, true) === true) {
3380 10
            $maybeUTF32BE++;
3381
          }
3382
        }
3383
      }
3384
    }
3385
3386 12
    if ($maybeUTF32BE !== $maybeUTF32LE) {
3387 2
      if ($maybeUTF32LE > $maybeUTF32BE) {
3388 2
        return 1;
3389
      }
3390
3391 2
      return 2;
3392
    }
3393
3394 12
    return false;
3395
  }
3396
3397
  /**
3398
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3399
   *
3400
   * @see    http://hsivonen.iki.fi/php-utf8/
3401
   *
3402
   * @param string|string[] $str    <p>The string to be checked.</p>
3403
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3404
   *
3405
   * @return bool
3406
   */
3407 107
  public static function is_utf8($str, bool $strict = false): bool
3408
  {
3409 107
    if (\is_array($str) === true) {
3410 2
      foreach ($str as $k => $v) {
3411 2
        if (false === self::is_utf8($v, $strict)) {
3412 2
          return false;
3413
        }
3414
      }
3415
3416
      return true;
3417
    }
3418
3419 107
    if ('' === $str) {
3420 12
      return true;
3421
    }
3422
3423 103
    if ($strict === true) {
3424 2
      if (self::is_utf16($str) !== false) {
3425 2
        return false;
3426
      }
3427
3428
      if (self::is_utf32($str) !== false) {
3429
        return false;
3430
      }
3431
    }
3432
3433 103
    if (self::pcre_utf8_support() !== true) {
3434
3435
      // If even just the first character can be matched, when the /u
3436
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3437
      // invalid, nothing at all will match, even if the string contains
3438
      // some valid sequences
3439
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
3440
    }
3441
3442 103
    $mState = 0; // cached expected number of octets after the current octet
3443
    // until the beginning of the next UTF8 character sequence
3444 103
    $mUcs4 = 0; // cached Unicode character
3445 103
    $mBytes = 1; // cached expected number of octets in the current sequence
3446
3447 103
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3448
      self::checkForSupport();
3449
    }
3450
3451 103
    if (self::$ORD === null) {
3452
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3453
    }
3454
3455 103
    $len = self::strlen_in_byte((string)$str);
3456
    /** @noinspection ForeachInvariantsInspection */
3457 103
    for ($i = 0; $i < $len; $i++) {
3458 103
      $in = self::$ORD[$str[$i]];
3459 103
      if ($mState === 0) {
3460
        // When mState is zero we expect either a US-ASCII character or a
3461
        // multi-octet sequence.
3462 103
        if (0 === (0x80 & $in)) {
3463
          // US-ASCII, pass straight through.
3464 98
          $mBytes = 1;
3465 84
        } elseif (0xC0 === (0xE0 & $in)) {
3466
          // First octet of 2 octet sequence.
3467 75
          $mUcs4 = $in;
3468 75
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3469 75
          $mState = 1;
3470 75
          $mBytes = 2;
3471 58
        } elseif (0xE0 === (0xF0 & $in)) {
3472
          // First octet of 3 octet sequence.
3473 41
          $mUcs4 = $in;
3474 41
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3475 41
          $mState = 2;
3476 41
          $mBytes = 3;
3477 30
        } elseif (0xF0 === (0xF8 & $in)) {
3478
          // First octet of 4 octet sequence.
3479 19
          $mUcs4 = $in;
3480 19
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3481 19
          $mState = 3;
3482 19
          $mBytes = 4;
3483 13
        } elseif (0xF8 === (0xFC & $in)) {
3484
          /* First octet of 5 octet sequence.
3485
          *
3486
          * This is illegal because the encoded codepoint must be either
3487
          * (a) not the shortest form or
3488
          * (b) outside the Unicode range of 0-0x10FFFF.
3489
          * Rather than trying to resynchronize, we will carry on until the end
3490
          * of the sequence and let the later error handling code catch it.
3491
          */
3492 5
          $mUcs4 = $in;
3493 5
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3494 5
          $mState = 4;
3495 5
          $mBytes = 5;
3496 10
        } elseif (0xFC === (0xFE & $in)) {
3497
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3498 5
          $mUcs4 = $in;
3499 5
          $mUcs4 = ($mUcs4 & 1) << 30;
3500 5
          $mState = 5;
3501 5
          $mBytes = 6;
3502
        } else {
3503
          // Current octet is neither in the US-ASCII range nor a legal first
3504
          // octet of a multi-octet sequence.
3505 103
          return false;
3506
        }
3507
      } else {
3508
        // When mState is non-zero, we expect a continuation of the multi-octet
3509
        // sequence
3510 84
        if (0x80 === (0xC0 & $in)) {
3511
          // Legal continuation.
3512 76
          $shift = ($mState - 1) * 6;
3513 76
          $tmp = $in;
3514 76
          $tmp = ($tmp & 0x0000003F) << $shift;
3515 76
          $mUcs4 |= $tmp;
3516
          // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3517
          // Unicode code point to be output.
3518 76
          if (0 === --$mState) {
3519
            // Check for illegal sequences and code points.
3520
            //
3521
            // From Unicode 3.1, non-shortest form is illegal
3522
            if (
3523 76
                (2 === $mBytes && $mUcs4 < 0x0080)
3524
                ||
3525 76
                (3 === $mBytes && $mUcs4 < 0x0800)
3526
                ||
3527 76
                (4 === $mBytes && $mUcs4 < 0x10000)
3528
                ||
3529 76
                (4 < $mBytes)
3530
                ||
3531
                // From Unicode 3.2, surrogate characters are illegal.
3532 76
                (($mUcs4 & 0xFFFFF800) === 0xD800)
3533
                ||
3534
                // Code points outside the Unicode range are illegal.
3535 76
                ($mUcs4 > 0x10FFFF)
3536
            ) {
3537 8
              return false;
3538
            }
3539
            // initialize UTF8 cache
3540 76
            $mState = 0;
3541 76
            $mUcs4 = 0;
3542 76
            $mBytes = 1;
3543
          }
3544
        } else {
3545
          // ((0xC0 & (*in) != 0x80) && (mState != 0))
3546
          // Incomplete multi-octet sequence.
3547 36
          return false;
3548
        }
3549
      }
3550
    }
3551
3552 67
    return true;
3553
  }
3554
3555
  /**
3556
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3557
   * Decodes a JSON string
3558
   *
3559
   * @link http://php.net/manual/en/function.json-decode.php
3560
   *
3561
   * @param string $json    <p>
3562
   *                        The <i>json</i> string being decoded.
3563
   *                        </p>
3564
   *                        <p>
3565
   *                        This function only works with UTF-8 encoded strings.
3566
   *                        </p>
3567
   *                        <p>PHP implements a superset of
3568
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3569
   *                        only supports these values when they are nested inside an array or an object.
3570
   *                        </p>
3571
   * @param bool   $assoc   [optional] <p>
3572
   *                        When <b>TRUE</b>, returned objects will be converted into
3573
   *                        associative arrays.
3574
   *                        </p>
3575
   * @param int    $depth   [optional] <p>
3576
   *                        User specified recursion depth.
3577
   *                        </p>
3578
   * @param int    $options [optional] <p>
3579
   *                        Bitmask of JSON decode options. Currently only
3580
   *                        <b>JSON_BIGINT_AS_STRING</b>
3581
   *                        is supported (default is to cast large integers as floats)
3582
   *                        </p>
3583
   *
3584
   * @return mixed
3585
   *                The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3586
   *                null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3587
   *                <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3588
   *                is deeper than the recursion limit.
3589
   */
3590 23
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3591
  {
3592 23
    $json = self::filter($json);
3593
3594 23
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3595
      self::checkForSupport();
3596
    }
3597
3598 23
    if (self::$SUPPORT['json'] === false) {
3599
      throw new \RuntimeException('ext-json: is not installed');
3600
    }
3601
3602
    /** @noinspection PhpComposerExtensionStubsInspection */
3603 23
    $json = \json_decode($json, $assoc, $depth, $options);
3604
3605 23
    return $json;
3606
  }
3607
3608
  /**
3609
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3610
   * Returns the JSON representation of a value.
3611
   *
3612
   * @link http://php.net/manual/en/function.json-encode.php
3613
   *
3614
   * @param mixed $value   <p>
3615
   *                       The <i>value</i> being encoded. Can be any type except
3616
   *                       a resource.
3617
   *                       </p>
3618
   *                       <p>
3619
   *                       All string data must be UTF-8 encoded.
3620
   *                       </p>
3621
   *                       <p>PHP implements a superset of
3622
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3623
   *                       only supports these values when they are nested inside an array or an object.
3624
   *                       </p>
3625
   * @param int   $options [optional] <p>
3626
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3627
   *                       <b>JSON_HEX_TAG</b>,
3628
   *                       <b>JSON_HEX_AMP</b>,
3629
   *                       <b>JSON_HEX_APOS</b>,
3630
   *                       <b>JSON_NUMERIC_CHECK</b>,
3631
   *                       <b>JSON_PRETTY_PRINT</b>,
3632
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3633
   *                       <b>JSON_FORCE_OBJECT</b>,
3634
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3635
   *                       constants is described on
3636
   *                       the JSON constants page.
3637
   *                       </p>
3638
   * @param int   $depth   [optional] <p>
3639
   *                       Set the maximum depth. Must be greater than zero.
3640
   *                       </p>
3641
   *
3642
   * @return string|false
3643
   *                      A JSON encoded <strong>string</strong> on success or<br>
3644
   *                      <strong>FALSE</strong> on failure.
3645
   */
3646 4
  public static function json_encode($value, int $options = 0, int $depth = 512)
3647
  {
3648 4
    $value = self::filter($value);
3649
3650 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3651
      self::checkForSupport();
3652
    }
3653
3654 4
    if (self::$SUPPORT['json'] === false) {
3655
      throw new \RuntimeException('ext-json: is not installed');
3656
    }
3657
3658
    /** @noinspection PhpComposerExtensionStubsInspection */
3659 4
    $json = \json_encode($value, $options, $depth);
3660
3661 4
    return $json;
3662
  }
3663
3664
  /**
3665
   * Checks whether JSON is available on the server.
3666
   *
3667
   * @return bool
3668
   *              <strong>true</strong> if available, <strong>false</strong> otherwise
3669
   */
3670
  public static function json_loaded(): bool
3671
  {
3672
    return \function_exists('json_decode');
3673
  }
3674
3675
  /**
3676
   * Makes string's first char lowercase.
3677
   *
3678
   * @param string $str       <p>The input string</p>
3679
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
3680
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3681
   *
3682
   * @return string The resulting string.
3683
   */
3684 46
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3685
  {
3686 46
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3687 46
    if ($strPartTwo === false) {
3688
      $strPartTwo = '';
3689
    }
3690
3691 46
    $strPartOne = self::strtolower(
3692 46
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3693 46
        $encoding,
3694 46
        $cleanUtf8
3695
    );
3696
3697 46
    return $strPartOne . $strPartTwo;
3698
  }
3699
3700
  /**
3701
   * alias for "UTF8::lcfirst()"
3702
   *
3703
   * @see UTF8::lcfirst()
3704
   *
3705
   * @param string $str
3706
   * @param string $encoding
3707
   * @param bool   $cleanUtf8
3708
   *
3709
   * @return string
3710
   */
3711 2
  public static function lcword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3712
  {
3713 2
    return self::lcfirst($str, $encoding, $cleanUtf8);
3714
  }
3715
3716
  /**
3717
   * Lowercase for all words in the string.
3718
   *
3719
   * @param string   $str        <p>The input string.</p>
3720
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3721
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3722
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3723
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3724
   *
3725
   * @return string
3726
   */
3727 2
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3728
  {
3729 2
    if (!$str) {
3730 2
      return '';
3731
    }
3732
3733 2
    $words = self::str_to_words($str, $charlist);
3734 2
    $newWords = [];
3735
3736 2
    if (\count($exceptions) > 0) {
3737 2
      $useExceptions = true;
3738
    } else {
3739 2
      $useExceptions = false;
3740
    }
3741
3742 2
    foreach ($words as $word) {
3743
3744 2
      if (!$word) {
3745 2
        continue;
3746
      }
3747
3748
      if (
3749 2
          $useExceptions === false
3750
          ||
3751
          (
3752 2
              $useExceptions === true
3753
              &&
3754 2
              !\in_array($word, $exceptions, true)
3755
          )
3756
      ) {
3757 2
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3758
      }
3759
3760 2
      $newWords[] = $word;
3761
    }
3762
3763 2
    return \implode('', $newWords);
3764
  }
3765
3766
  /**
3767
   * alias for "UTF8::lcfirst()"
3768
   *
3769
   * @see UTF8::lcfirst()
3770
   *
3771
   * @param string $str
3772
   * @param string $encoding
3773
   * @param bool   $cleanUtf8
3774
   *
3775
   * @return string
3776
   */
3777 5
  public static function lowerCaseFirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3778
  {
3779 5
    return self::lcfirst($str, $encoding, $cleanUtf8);
3780
  }
3781
3782
  /**
3783
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3784
   *
3785
   * @param string $str   <p>The string to be trimmed</p>
3786
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3787
   *
3788
   * @return string The string with unwanted characters stripped from the left.
3789
   */
3790 22
  public static function ltrim(string $str = '', $chars = INF): string
3791
  {
3792 22
    if ('' === $str) {
3793 3
      return '';
3794
    }
3795
3796
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3797 21
    if ($chars === INF || !$chars) {
3798 14
      $pattern = "^[\pZ\pC]+";
3799
    } else {
3800 10
      $chars = \preg_quote($chars, '/');
3801 10
      $pattern = "^[$chars]+";
3802
    }
3803
3804 21
    return self::regex_replace($str, $pattern, '', '', '/');
3805
  }
3806
3807
  /**
3808
   * Returns the UTF-8 character with the maximum code point in the given data.
3809
   *
3810
   * @param string|array<string> $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3811
   *
3812
   * @return string|null The character with the highest code point than others, returns null on failure or empty input.
3813
   */
3814 2
  public static function max($arg)
3815
  {
3816 2
    if (\is_array($arg) === true) {
3817 2
      $arg = \implode('', $arg);
3818
    }
3819
3820 2
    $codepoints = self::codepoints($arg, false);
3821 2
    if (\count($codepoints) === 0) {
3822 2
      return null;
3823
    }
3824
3825 2
    $codepoint_max = \max($codepoints);
3826
3827 2
    return self::chr($codepoint_max);
3828
  }
3829
3830
  /**
3831
   * Calculates and returns the maximum number of bytes taken by any
3832
   * UTF-8 encoded character in the given string.
3833
   *
3834
   * @param string $str <p>The original Unicode string.</p>
3835
   *
3836
   * @return int Max byte lengths of the given chars.
3837
   */
3838 2
  public static function max_chr_width(string $str): int
3839
  {
3840 2
    $bytes = self::chr_size_list($str);
3841 2
    if (\count($bytes) > 0) {
3842 2
      return (int)\max($bytes);
3843
    }
3844
3845 2
    return 0;
3846
  }
3847
3848
  /**
3849
   * Checks whether mbstring is available on the server.
3850
   *
3851
   * @return bool
3852
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
3853
   */
3854 27
  public static function mbstring_loaded(): bool
3855
  {
3856 27
    $return = \extension_loaded('mbstring') ? true : false;
3857
3858 27
    if ($return === true) {
3859 27
      \mb_internal_encoding('UTF-8');
3860
    }
3861
3862 27
    return $return;
3863
  }
3864
3865
  /**
3866
   * Checks whether mbstring "overloaded" is active on the server.
3867
   *
3868
   * @return bool
3869
   */
3870
  private static function mbstring_overloaded(): bool
3871
  {
3872
    /**
3873
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3874
     */
3875
3876
    /** @noinspection PhpComposerExtensionStubsInspection */
3877
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3878
    return \defined('MB_OVERLOAD_STRING')
3879
           &&
3880
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3881
  }
3882
3883
  /**
3884
   * Returns the UTF-8 character with the minimum code point in the given data.
3885
   *
3886
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3887
   *
3888
   * @return string|null The character with the lowest code point than others, returns null on failure or empty input.
3889
   */
3890 2
  public static function min($arg)
3891
  {
3892 2
    if (\is_array($arg) === true) {
3893 2
      $arg = \implode('', $arg);
3894
    }
3895
3896 2
    $codepoints = self::codepoints($arg, false);
3897 2
    if (\count($codepoints) === 0) {
3898 2
      return null;
3899
    }
3900
3901 2
    $codepoint_min = \min($codepoints);
3902
3903 2
    return self::chr($codepoint_min);
3904
  }
3905
3906
  /**
3907
   * alias for "UTF8::normalize_encoding()"
3908
   *
3909
   * @see        UTF8::normalize_encoding()
3910
   *
3911
   * @param mixed $encoding
3912
   * @param mixed $fallback
3913
   *
3914
   * @return mixed
3915
   *
3916
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3917
   */
3918 2
  public static function normalizeEncoding($encoding, $fallback = '')
3919
  {
3920 2
    return self::normalize_encoding($encoding, $fallback);
3921
  }
3922
3923
  /**
3924
   * Normalize the encoding-"name" input.
3925
   *
3926
   * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3927
   * @param mixed $fallback <p>e.g.: UTF-8</p>
3928
   *
3929
   * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3930
   */
3931 340
  public static function normalize_encoding($encoding, $fallback = '')
3932
  {
3933 340
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3934
3935
    // init
3936 340
    $encoding = (string)$encoding;
3937
3938
    if (
3939 340
        !$encoding
3940
        ||
3941 49
        $encoding === '1' // only a fallback, for non "strict_types" usage ...
3942
        ||
3943 340
        $encoding === '0' // only a fallback, for non "strict_types" usage ...
3944
    ) {
3945 296
      return $fallback;
3946
    }
3947
3948
    if (
3949 48
        'UTF-8' === $encoding
3950
        ||
3951 48
        'UTF8' === $encoding
3952
    ) {
3953 21
      return 'UTF-8';
3954
    }
3955
3956
    if (
3957 41
        '8BIT' === $encoding
3958
        ||
3959 41
        'BINARY' === $encoding
3960
    ) {
3961
      return 'CP850';
3962
    }
3963
3964
    if (
3965 41
        'HTML' === $encoding
3966
        ||
3967 41
        'HTML-ENTITIES' === $encoding
3968
    ) {
3969 2
      return 'HTML-ENTITIES';
3970
    }
3971
3972 41
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3973 39
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3974
    }
3975
3976 5
    if (self::$ENCODINGS === null) {
3977 1
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3978
    }
3979
3980 5
    if (\in_array($encoding, self::$ENCODINGS, true)) {
0 ignored issues
show
Bug introduced by
It seems like self::ENCODINGS can also be of type false; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3980
    if (\in_array($encoding, /** @scrutinizer ignore-type */ self::$ENCODINGS, true)) {
Loading history...
3981 4
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3982
3983 4
      return $encoding;
3984
    }
3985
3986 4
    $encodingOrig = $encoding;
3987 4
    $encoding = \strtoupper($encoding);
3988 4
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3989
3990
    $equivalences = [
3991 4
        'ISO8859'     => 'ISO-8859-1',
3992
        'ISO88591'    => 'ISO-8859-1',
3993
        'ISO'         => 'ISO-8859-1',
3994
        'LATIN'       => 'ISO-8859-1',
3995
        'LATIN1'      => 'ISO-8859-1', // Western European
3996
        'ISO88592'    => 'ISO-8859-2',
3997
        'LATIN2'      => 'ISO-8859-2', // Central European
3998
        'ISO88593'    => 'ISO-8859-3',
3999
        'LATIN3'      => 'ISO-8859-3', // Southern European
4000
        'ISO88594'    => 'ISO-8859-4',
4001
        'LATIN4'      => 'ISO-8859-4', // Northern European
4002
        'ISO88595'    => 'ISO-8859-5',
4003
        'ISO88596'    => 'ISO-8859-6', // Greek
4004
        'ISO88597'    => 'ISO-8859-7',
4005
        'ISO88598'    => 'ISO-8859-8', // Hebrew
4006
        'ISO88599'    => 'ISO-8859-9',
4007
        'LATIN5'      => 'ISO-8859-9', // Turkish
4008
        'ISO885911'   => 'ISO-8859-11',
4009
        'TIS620'      => 'ISO-8859-11', // Thai
4010
        'ISO885910'   => 'ISO-8859-10',
4011
        'LATIN6'      => 'ISO-8859-10', // Nordic
4012
        'ISO885913'   => 'ISO-8859-13',
4013
        'LATIN7'      => 'ISO-8859-13', // Baltic
4014
        'ISO885914'   => 'ISO-8859-14',
4015
        'LATIN8'      => 'ISO-8859-14', // Celtic
4016
        'ISO885915'   => 'ISO-8859-15',
4017
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4018
        'ISO885916'   => 'ISO-8859-16',
4019
        'LATIN10'     => 'ISO-8859-16', // Southeast European
4020
        'CP1250'      => 'WINDOWS-1250',
4021
        'WIN1250'     => 'WINDOWS-1250',
4022
        'WINDOWS1250' => 'WINDOWS-1250',
4023
        'CP1251'      => 'WINDOWS-1251',
4024
        'WIN1251'     => 'WINDOWS-1251',
4025
        'WINDOWS1251' => 'WINDOWS-1251',
4026
        'CP1252'      => 'WINDOWS-1252',
4027
        'WIN1252'     => 'WINDOWS-1252',
4028
        'WINDOWS1252' => 'WINDOWS-1252',
4029
        'CP1253'      => 'WINDOWS-1253',
4030
        'WIN1253'     => 'WINDOWS-1253',
4031
        'WINDOWS1253' => 'WINDOWS-1253',
4032
        'CP1254'      => 'WINDOWS-1254',
4033
        'WIN1254'     => 'WINDOWS-1254',
4034
        'WINDOWS1254' => 'WINDOWS-1254',
4035
        'CP1255'      => 'WINDOWS-1255',
4036
        'WIN1255'     => 'WINDOWS-1255',
4037
        'WINDOWS1255' => 'WINDOWS-1255',
4038
        'CP1256'      => 'WINDOWS-1256',
4039
        'WIN1256'     => 'WINDOWS-1256',
4040
        'WINDOWS1256' => 'WINDOWS-1256',
4041
        'CP1257'      => 'WINDOWS-1257',
4042
        'WIN1257'     => 'WINDOWS-1257',
4043
        'WINDOWS1257' => 'WINDOWS-1257',
4044
        'CP1258'      => 'WINDOWS-1258',
4045
        'WIN1258'     => 'WINDOWS-1258',
4046
        'WINDOWS1258' => 'WINDOWS-1258',
4047
        'UTF16'       => 'UTF-16',
4048
        'UTF32'       => 'UTF-32',
4049
        'UTF8'        => 'UTF-8',
4050
        'UTF'         => 'UTF-8',
4051
        'UTF7'        => 'UTF-7',
4052
        '8BIT'        => 'CP850',
4053
        'BINARY'      => 'CP850',
4054
    ];
4055
4056 4
    if (!empty($equivalences[$encodingUpperHelper])) {
4057 4
      $encoding = $equivalences[$encodingUpperHelper];
4058
    }
4059
4060 4
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4061
4062 4
    return $encoding;
4063
  }
4064
4065
  /**
4066
   * Standardize line ending to unix-like.
4067
   *
4068
   * @param string $str
4069
   *
4070
   * @return string
4071
   */
4072 5
  public static function normalize_line_ending(string $str): string
4073
  {
4074 5
    return (string)str_replace(["\r\n", "\r"], "\n", $str);
4075
  }
4076
4077
  /**
4078
   * Normalize some MS Word special characters.
4079
   *
4080
   * @param string $str <p>The string to be normalized.</p>
4081
   *
4082
   * @return string
4083
   */
4084 39
  public static function normalize_msword(string $str): string
4085
  {
4086 39
    if ('' === $str) {
4087 2
      return '';
4088
    }
4089
4090 39
    static $UTF8_MSWORD_KEYS_CACHE = null;
4091 39
    static $UTF8_MSWORD_VALUES_CACHE = null;
4092
4093 39
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
4094
4095 1
      if (self::$UTF8_MSWORD === null) {
4096 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_msword') can also be of type false. However, the property $UTF8_MSWORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4097
      }
4098
4099 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4099
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4100 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4100
      $UTF8_MSWORD_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4101
    }
4102
4103 39
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
4104
  }
4105
4106
  /**
4107
   * Normalize the whitespace.
4108
   *
4109
   * @param string $str                     <p>The string to be normalized.</p>
4110
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4111
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4112
   *                                        bidirectional text chars.</p>
4113
   *
4114
   * @return string
4115
   */
4116 87
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4117
  {
4118 87
    if ('' === $str) {
4119 9
      return '';
4120
    }
4121
4122 87
    static $WHITESPACE_CACHE = [];
4123 87
    $cacheKey = (int)$keepNonBreakingSpace;
4124
4125 87
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4126
4127 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4128
4129 2
      if ($keepNonBreakingSpace === true) {
4130 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4131
      }
4132
4133 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4134
    }
4135
4136 87
    if ($keepBidiUnicodeControls === false) {
4137 87
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
4138
4139 87
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4140 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4141
      }
4142
4143 87
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4144
    }
4145
4146 87
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4147
  }
4148
4149
  /**
4150
   * Calculates Unicode code point of the given UTF-8 encoded character.
4151
   *
4152
   * INFO: opposite to UTF8::chr()
4153
   *
4154
   * @param string $chr      <p>The character of which to calculate code point.<p/>
4155
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4156
   *
4157
   * @return int
4158
   *             Unicode code point of the given character,<br>
4159
   *             0 on invalid UTF-8 byte sequence.
4160
   */
4161 35
  public static function ord($chr, string $encoding = 'UTF-8'): int
4162
  {
4163
    // init
4164 35
    $chr = (string)$chr;
4165
4166 35
    static $CHAR_CACHE = [];
4167
4168
    // save the original string
4169 35
    $chr_orig = $chr;
4170
4171 35
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4172 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4173
4174
      // check again, if it's still not UTF-8
4175 4
      if ($encoding !== 'UTF-8') {
4176 4
        $chr = self::encode($encoding, $chr);
4177
      }
4178
    }
4179
4180 35
    $cacheKey = $chr_orig . $encoding;
4181 35
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
4182 35
      return $CHAR_CACHE[$cacheKey];
4183
    }
4184
4185 12
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4186
      self::checkForSupport();
4187
    }
4188
4189 12
    if (self::$SUPPORT['intlChar'] === true) {
4190
      /** @noinspection PhpComposerExtensionStubsInspection */
4191 11
      $code = \IntlChar::ord($chr);
4192 11
      if ($code) {
4193 10
        return $CHAR_CACHE[$cacheKey] = $code;
4194
      }
4195
    }
4196
4197
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4198 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
0 ignored issues
show
Bug introduced by
$chr of type array is incompatible with the type string expected by parameter $str of voku\helper\UTF8::substr(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4198
    $chr = \unpack('C*', (string)self::substr(/** @scrutinizer ignore-type */ $chr, 0, 4, 'CP850'));
Loading history...
4199 6
    $code = $chr ? $chr[1] : 0;
4200
4201 6
    if (0xF0 <= $code && isset($chr[4])) {
4202
      /** @noinspection UnnecessaryCastingInspection */
4203
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4204
    }
4205
4206 6
    if (0xE0 <= $code && isset($chr[3])) {
4207
      /** @noinspection UnnecessaryCastingInspection */
4208 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4209
    }
4210
4211 6
    if (0xC0 <= $code && isset($chr[2])) {
4212
      /** @noinspection UnnecessaryCastingInspection */
4213 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xC0) << 6) + $chr[2] - 0x80);
4214
    }
4215
4216 6
    return $CHAR_CACHE[$cacheKey] = $code;
4217
  }
4218
4219
  /**
4220
   * Parses the string into an array (into the the second parameter).
4221
   *
4222
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4223
   *          if the second parameter is not set!
4224
   *
4225
   * @link http://php.net/manual/en/function.parse-str.php
4226
   *
4227
   * @param string $str       <p>The input string.</p>
4228
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4229
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4230
   *
4231
   * @return bool
4232
   *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result.
4233
   */
4234 2
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4235
  {
4236 2
    if ($cleanUtf8 === true) {
4237 2
      $str = self::clean($str);
4238
    }
4239
4240 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4241
      self::checkForSupport();
4242
    }
4243
4244 2
    if (self::$SUPPORT['mbstring'] === true) {
4245 2
      $return = \mb_parse_str($str, $result);
4246
4247 2
      return !($return === false || empty($result));
4248
    }
4249
4250
    /** @noinspection PhpVoidFunctionResultUsedInspection */
4251
    \parse_str($str, $result);
4252
4253
    return !empty($result);
4254
  }
4255
4256
  /**
4257
   * Checks if \u modifier is available that enables Unicode support in PCRE.
4258
   *
4259
   * @return bool
4260
   *              <strong>true</strong> if support is available,<br>
4261
   *              <strong>false</strong> otherwise.
4262
   */
4263 103
  public static function pcre_utf8_support(): bool
4264
  {
4265
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4266 103
    return (bool)@\preg_match('//u', '');
4267
  }
4268
4269
  /**
4270
   * Create an array containing a range of UTF-8 characters.
4271
   *
4272
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4273
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4274
   *
4275
   * @return string[]
4276
   */
4277 2
  public static function range($var1, $var2): array
4278
  {
4279 2
    if (!$var1 || !$var2) {
4280 2
      return [];
4281
    }
4282
4283 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4284
      self::checkForSupport();
4285
    }
4286
4287 2
    if (self::$SUPPORT['ctype'] === false) {
4288
      throw new \RuntimeException('ext-ctype: is not installed');
4289
    }
4290
4291
    /** @noinspection PhpComposerExtensionStubsInspection */
4292 2
    if (\ctype_digit((string)$var1)) {
4293 2
      $start = (int)$var1;
4294 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4295
      $start = (int)self::hex_to_int($var1);
4296
    } else {
4297 2
      $start = self::ord($var1);
4298
    }
4299
4300 2
    if (!$start) {
4301
      return [];
4302
    }
4303
4304
    /** @noinspection PhpComposerExtensionStubsInspection */
4305 2
    if (\ctype_digit((string)$var2)) {
4306 2
      $end = (int)$var2;
4307 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4308
      $end = (int)self::hex_to_int($var2);
4309
    } else {
4310 2
      $end = self::ord($var2);
4311
    }
4312
4313 2
    if (!$end) {
4314
      return [];
4315
    }
4316
4317 2
    return \array_map(
4318
        [
4319 2
            self::class,
4320
            'chr',
4321
        ],
4322 2
        \range($start, $end)
4323
    );
4324
  }
4325
4326
  /**
4327
   * Multi decode html entity & fix urlencoded-win1252-chars.
4328
   *
4329
   * e.g:
4330
   * 'test+test'                     => 'test+test'
4331
   * 'D&#252;sseldorf'               => 'Düsseldorf'
4332
   * 'D%FCsseldorf'                  => 'Düsseldorf'
4333
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4334
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4335
   * 'Düsseldorf'                   => 'Düsseldorf'
4336
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4337
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4338
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4339
   *
4340
   * @param string $str          <p>The input string.</p>
4341
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
4342
   *
4343
   * @return string
4344
   */
4345 3
  public static function rawurldecode(string $str, bool $multi_decode = true): string
4346
  {
4347 3
    if ('' === $str) {
4348 2
      return '';
4349
    }
4350
4351 3
    $pattern = '/%u([0-9a-f]{3,4})/i';
4352 3
    if (\preg_match($pattern, $str)) {
4353 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4354
    }
4355
4356 3
    $flags = ENT_QUOTES | ENT_HTML5;
4357
4358
    do {
4359 3
      $str_compare = $str;
4360
4361 3
      $str = self::fix_simple_utf8(
4362 3
          \rawurldecode(
4363 3
              self::html_entity_decode(
4364 3
                  self::to_utf8($str),
4365 3
                  $flags
4366
              )
4367
          )
4368
      );
4369
4370 3
    } while ($multi_decode === true && $str_compare !== $str);
4371
4372 3
    return $str;
4373
  }
4374
4375
  /**
4376
   * @param array $strings
4377
   * @param bool  $removeEmptyValues
4378
   * @param int   $removeShortValues
4379
   *
4380
   * @return array
4381
   */
4382 2
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
4383
  {
4384
    // init
4385 2
    $return = [];
4386
4387 2
    foreach ($strings as $str) {
4388
      if (
4389 2
          $removeShortValues !== null
4390
          &&
4391 2
          self::strlen($str) <= $removeShortValues
4392
      ) {
4393 2
        continue;
4394
      }
4395
4396
      if (
4397 2
          $removeEmptyValues === true
4398
          &&
4399 2
          \trim($str) === ''
4400
      ) {
4401 2
        continue;
4402
      }
4403
4404 2
      $return[] = $str;
4405
    }
4406
4407 2
    return $return;
4408
  }
4409
4410
  /**
4411
   * Replaces all occurrences of $pattern in $str by $replacement.
4412
   *
4413
   * @param string $str         <p>The input string.</p>
4414
   * @param string $pattern     <p>The regular expression pattern.</p>
4415
   * @param string $replacement <p>The string to replace with.</p>
4416
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
4417
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4418
   *
4419
   * @return string
4420
   */
4421 291
  public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4422
  {
4423 291
    if ($options === 'msr') {
4424 9
      $options = 'ms';
4425
    }
4426
4427
    // fallback
4428 291
    if (!$delimiter) {
4429
      $delimiter = '/';
4430
    }
4431
4432 291
    $str = (string)\preg_replace(
4433 291
        $delimiter . $pattern . $delimiter . 'u' . $options,
4434 291
        $replacement,
4435 291
        $str
4436
    );
4437
4438 291
    return $str;
4439
  }
4440
4441
  /**
4442
   * alias for "UTF8::remove_bom()"
4443
   *
4444
   * @see        UTF8::remove_bom()
4445
   *
4446
   * @param string $str
4447
   *
4448
   * @return string
4449
   *
4450
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4451
   */
4452
  public static function removeBOM(string $str): string
4453
  {
4454
    return self::remove_bom($str);
4455
  }
4456
4457
  /**
4458
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4459
   *
4460
   * @param string $str <p>The input string.</p>
4461
   *
4462
   * @return string String without UTF-BOM.
4463
   */
4464 75
  public static function remove_bom(string $str): string
4465
  {
4466 75
    if ('' === $str) {
4467 7
      return '';
4468
    }
4469
4470 75
    $strLength = self::strlen_in_byte($str);
4471 75
    foreach (self::$BOM as $bomString => $bomByteLength) {
4472 75
      if (0 === self::strpos_in_byte($str, $bomString, 0)) {
4473 10
        $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4474 10
        if ($strTmp === false) {
4475
          return '';
4476
        }
4477
4478 10
        $strLength -= $bomByteLength;
4479
4480 75
        $str = (string)$strTmp;
4481
      }
4482
    }
4483
4484 75
    return $str;
4485
  }
4486
4487
  /**
4488
   * Removes duplicate occurrences of a string in another string.
4489
   *
4490
   * @param string          $str  <p>The base string.</p>
4491
   * @param string|string[] $what <p>String to search for in the base string.</p>
4492
   *
4493
   * @return string The result string with removed duplicates.
4494
   */
4495 2
  public static function remove_duplicates(string $str, $what = ' '): string
4496
  {
4497 2
    if (\is_string($what) === true) {
4498 2
      $what = [$what];
4499
    }
4500
4501 2
    if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4502
      /** @noinspection ForeachSourceInspection */
4503 2
      foreach ($what as $item) {
4504 2
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4505
      }
4506
    }
4507
4508 2
    return $str;
4509
  }
4510
4511
  /**
4512
   * Remove html via "strip_tags()" from the string.
4513
   *
4514
   * @param string $str
4515
   * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4516
   *                              not be stripped. Default: null
4517
   *                              </p>
4518
   *
4519
   * @return string
4520
   */
4521 6
  public static function remove_html(string $str, string $allowableTags = ''): string
4522
  {
4523 6
    return \strip_tags($str, $allowableTags);
4524
  }
4525
4526
  /**
4527
   * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4528
   *
4529
   * @param string $str
4530
   * @param string $replacement [optional] <p>Default is a empty string.</p>
4531
   *
4532
   * @return string
4533
   */
4534 6
  public static function remove_html_breaks(string $str, string $replacement = ''): string
4535
  {
4536 6
    return (string)\preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4537
  }
4538
4539
  /**
4540
   * Remove invisible characters from a string.
4541
   *
4542
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4543
   *
4544
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4545
   *
4546
   * @param string $str
4547
   * @param bool   $url_encoded
4548
   * @param string $replacement
4549
   *
4550
   * @return string
4551
   */
4552 113
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4553
  {
4554
    // init
4555 113
    $non_displayables = [];
4556
4557
    // every control character except newline (dec 10),
4558
    // carriage return (dec 13) and horizontal tab (dec 09)
4559 113
    if ($url_encoded) {
4560 113
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4561 113
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4562
    }
4563
4564 113
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4565
4566
    do {
4567 113
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4568 113
    } while ($count !== 0);
4569
4570 113
    return $str;
4571
  }
4572
4573
  /**
4574
   * Returns a new string with the prefix $substring removed, if present.
4575
   *
4576
   * @param string $str
4577
   * @param string $substring <p>The prefix to remove.</p>
4578
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4579
   *
4580
   * @return string String without the prefix $substring.
4581
   */
4582 12
  public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4583
  {
4584 12
    if (self::str_starts_with($str, $substring)) {
4585
4586 6
      return (string)self::substr(
4587 6
          $str,
4588 6
          self::strlen($substring, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::strlen($substring, $encoding) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4588
          /** @scrutinizer ignore-type */ self::strlen($substring, $encoding),
Loading history...
4589 6
          null,
4590 6
          $encoding
4591
      );
4592
    }
4593
4594 6
    return $str;
4595
  }
4596
4597
  /**
4598
   * Returns a new string with the suffix $substring removed, if present.
4599
   *
4600
   * @param string $str
4601
   * @param string $substring <p>The suffix to remove.</p>
4602
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4603
   *
4604
   * @return string String having a $str without the suffix $substring.
4605
   */
4606 12
  public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4607
  {
4608 12
    if (self::str_ends_with($str, $substring)) {
4609
4610 6
      return (string)self::substr(
4611 6
          $str,
4612 6
          0,
4613 6
          self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4614
      );
4615
    }
4616
4617 6
    return $str;
4618
  }
4619
4620
  /**
4621
   * Replaces all occurrences of $search in $str by $replacement.
4622
   *
4623
   * @param string $str           <p>The input string.</p>
4624
   * @param string $search        <p>The needle to search for.</p>
4625
   * @param string $replacement   <p>The string to replace with.</p>
4626
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4627
   *
4628
   * @return string String after the replacements.
4629
   */
4630 29
  public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4631
  {
4632 29
    if ($caseSensitive) {
4633 22
      return self::str_replace($search, $replacement, $str);
4634
    }
4635
4636 7
    return self::str_ireplace($search, $replacement, $str);
4637
  }
4638
4639
  /**
4640
   * Replaces all occurrences of $search in $str by $replacement.
4641
   *
4642
   * @param string       $str           <p>The input string.</p>
4643
   * @param array        $search        <p>The elements to search for.</p>
4644
   * @param string|array $replacement   <p>The string to replace with.</p>
4645
   * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4646
   *
4647
   * @return string String after the replacements.
4648
   */
4649 30
  public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4650
  {
4651 30
    if ($caseSensitive) {
4652 23
      return self::str_replace($search, $replacement, $str);
4653
    }
4654
4655 7
    return self::str_ireplace($search, $replacement, $str);
4656
  }
4657
4658
  /**
4659
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4660
   *
4661
   * @param string $str                <p>The input string</p>
4662
   * @param string $replacementChar    <p>The replacement character.</p>
4663
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4664
   *
4665
   * @return string
4666
   */
4667 63
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4668
  {
4669 63
    if ('' === $str) {
4670 9
      return '';
4671
    }
4672
4673 63
    if ($processInvalidUtf8 === true) {
4674 63
      $replacementCharHelper = $replacementChar;
4675 63
      if ($replacementChar === '') {
4676 63
        $replacementCharHelper = 'none';
4677
      }
4678
4679 63
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4680
        self::checkForSupport();
4681
      }
4682
4683 63
      if (self::$SUPPORT['mbstring'] === false) {
4684
        // if there is no native support for "mbstring",
4685
        // then we need to clean the string before ...
4686
        $str = self::clean($str);
4687
      }
4688
4689
      // always fallback via symfony polyfill
4690 63
      $save = \mb_substitute_character();
4691 63
      \mb_substitute_character($replacementCharHelper);
4692 63
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4693 63
      \mb_substitute_character($save);
4694
4695 63
      if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4696 63
        $str = $strTmp;
4697
      } else {
4698
        $str = '';
4699
      }
4700
    }
4701
4702 63
    return str_replace(
4703
        [
4704 63
            "\xEF\xBF\xBD",
4705
            '�',
4706
        ],
4707
        [
4708 63
            $replacementChar,
4709 63
            $replacementChar,
4710
        ],
4711 63
        $str
4712
    );
4713
  }
4714
4715
  /**
4716
   * Strip whitespace or other characters from end of a UTF-8 string.
4717
   *
4718
   * @param string $str   <p>The string to be trimmed.</p>
4719
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4720
   *
4721
   * @return string The string with unwanted characters stripped from the right.
4722
   */
4723 22
  public static function rtrim(string $str = '', $chars = INF): string
4724
  {
4725 22
    if ('' === $str) {
4726 3
      return '';
4727
    }
4728
4729
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4730 21
    if ($chars === INF || !$chars) {
4731 16
      $pattern = "[\pZ\pC]+\$";
4732
    } else {
4733 8
      $chars = \preg_quote($chars, '/');
4734 8
      $pattern = "[$chars]+\$";
4735
    }
4736
4737 21
    return self::regex_replace($str, $pattern, '', '', '/');
4738
  }
4739
4740
  /**
4741
   * rxClass
4742
   *
4743
   * @param string $s
4744
   * @param string $class
4745
   *
4746
   * @return string
4747
   */
4748 37
  private static function rxClass(string $s, string $class = ''): string
4749
  {
4750 37
    static $RX_CLASSS_CACHE = [];
4751
4752 37
    $cacheKey = $s . $class;
4753
4754 37
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4755 25
      return $RX_CLASSS_CACHE[$cacheKey];
4756
    }
4757
4758
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4759 16
    $class = [$class];
4760
4761
    /** @noinspection SuspiciousLoopInspection */
4762 16
    foreach (self::str_split($s) as $s) {
4763 15
      if ('-' === $s) {
4764
        $class[0] = '-' . $class[0];
4765 15
      } elseif (!isset($s[2])) {
4766 15
        $class[0] .= \preg_quote($s, '/');
4767 1
      } elseif (1 === self::strlen($s)) {
4768 1
        $class[0] .= $s;
4769
      } else {
4770 15
        $class[] = $s;
4771
      }
4772
    }
4773
4774 16
    if ($class[0]) {
4775 16
      $class[0] = '[' . $class[0] . ']';
4776
    }
4777
4778 16
    if (1 === \count($class)) {
4779 16
      $return = $class[0];
4780
    } else {
4781
      $return = '(?:' . \implode('|', $class) . ')';
4782
    }
4783
4784 16
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4785
4786 16
    return $return;
4787
  }
4788
4789
  /**
4790
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4791
   */
4792 2
  public static function showSupport()
4793
  {
4794 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4795
      self::checkForSupport();
4796
    }
4797
4798 2
    echo '<pre>';
4799 2
    foreach (self::$SUPPORT as $key => $value) {
4800 2
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4801
    }
4802 2
    echo '</pre>';
4803 2
  }
4804
4805
  /**
4806
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4807
   *
4808
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4809
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4810
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4811
   *
4812
   * @return string The HTML numbered entity.
4813
   */
4814 2
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4815
  {
4816 2
    if ('' === $char) {
4817 2
      return '';
4818
    }
4819
4820
    if (
4821 2
        $keepAsciiChars === true
4822
        &&
4823 2
        self::is_ascii($char) === true
4824
    ) {
4825 2
      return $char;
4826
    }
4827
4828 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4829 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4830
    }
4831
4832 2
    return '&#' . self::ord($char, $encoding) . ';';
4833
  }
4834
4835
  /**
4836
   * @param string $str
4837
   * @param int    $tabLength
4838
   *
4839
   * @return string
4840
   */
4841 5
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4842
  {
4843 5
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4844
  }
4845
4846
  /**
4847
   * Convert a string to an array of Unicode characters.
4848
   *
4849
   * @param string|int|string[]|int[] $str       <p>The string to split into array.</p>
4850
   * @param int                       $length    [optional] <p>Max character length of each array element.</p>
4851
   * @param bool                      $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4852
   *
4853
   * @return string[] An array containing chunks of the string.
4854
   */
4855 86
  public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4856
  {
4857 86
    if ($length <= 0) {
4858 3
      return [];
4859
    }
4860
4861 85
    if (\is_array($str) === true) {
4862 2
      foreach ($str as $k => $v) {
4863 2
        $str[$k] = self::split($v, $length);
4864
      }
4865
4866 2
      return $str;
4867
    }
4868
4869
    // init
4870 85
    $str = (string)$str;
4871
4872 85
    if ('' === $str) {
4873 13
      return [];
4874
    }
4875
4876
    // init
4877 82
    $ret = [];
4878
4879 82
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4880
      self::checkForSupport();
4881
    }
4882
4883 82
    if ($cleanUtf8 === true) {
4884 18
      $str = self::clean($str);
4885
    }
4886
4887 82
    if (self::$SUPPORT['pcre_utf8'] === true) {
4888
4889 78
      \preg_match_all('/./us', $str, $retArray);
4890 78
      if (isset($retArray[0])) {
4891 78
        $ret = $retArray[0];
4892
      }
4893 78
      unset($retArray);
4894
4895
    } else {
4896
4897
      // fallback
4898
4899 8
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4900
        self::checkForSupport();
4901
      }
4902
4903 8
      $len = self::strlen_in_byte($str);
4904
4905
      /** @noinspection ForeachInvariantsInspection */
4906 8
      for ($i = 0; $i < $len; $i++) {
4907
4908 8
        if (($str[$i] & "\x80") === "\x00") {
4909
4910 8
          $ret[] = $str[$i];
4911
4912
        } elseif (
4913 8
            isset($str[$i + 1])
4914
            &&
4915 8
            ($str[$i] & "\xE0") === "\xC0"
4916
        ) {
4917
4918 4
          if (($str[$i + 1] & "\xC0") === "\x80") {
4919 4
            $ret[] = $str[$i] . $str[$i + 1];
4920
4921 4
            $i++;
4922
          }
4923
4924
        } elseif (
4925 6
            isset($str[$i + 2])
4926
            &&
4927 6
            ($str[$i] & "\xF0") === "\xE0"
4928
        ) {
4929
4930
          if (
4931 6
              ($str[$i + 1] & "\xC0") === "\x80"
4932
              &&
4933 6
              ($str[$i + 2] & "\xC0") === "\x80"
4934
          ) {
4935 6
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4936
4937 6
            $i += 2;
4938
          }
4939
4940
        } elseif (
4941
            isset($str[$i + 3])
4942
            &&
4943
            ($str[$i] & "\xF8") === "\xF0"
4944
        ) {
4945
4946
          if (
4947
              ($str[$i + 1] & "\xC0") === "\x80"
4948
              &&
4949
              ($str[$i + 2] & "\xC0") === "\x80"
4950
              &&
4951
              ($str[$i + 3] & "\xC0") === "\x80"
4952
          ) {
4953
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4954
4955
            $i += 3;
4956
          }
4957
4958
        }
4959
      }
4960
    }
4961
4962 82
    if ($length > 1) {
4963 11
      $ret = \array_chunk($ret, $length);
4964
4965 11
      return \array_map(
4966 11
          function ($item) {
4967 11
            return \implode('', $item);
4968 11
          }, $ret
4969
      );
4970
    }
4971
4972 75
    if (isset($ret[0]) && $ret[0] === '') {
4973
      return [];
4974
    }
4975
4976 75
    return $ret;
4977
  }
4978
4979
  /**
4980
   * Returns a camelCase version of the string. Trims surrounding spaces,
4981
   * capitalizes letters following digits, spaces, dashes and underscores,
4982
   * and removes spaces, dashes, as well as underscores.
4983
   *
4984
   * @param string $str      <p>The input string.</p>
4985
   * @param string $encoding [optional] <p>Default: UTF-8</p>
4986
   *
4987
   * @return string
4988
   */
4989 32
  public static function str_camelize(string $str, string $encoding = 'UTF-8'): string
4990
  {
4991 32
    $str = self::lcfirst(self::trim($str), $encoding);
4992 32
    $str = (string)\preg_replace('/^[-_]+/', '', $str);
4993
4994 32
    $str = (string)\preg_replace_callback(
4995 32
        '/[-_\s]+(.)?/u',
4996 32
        function ($match) use ($encoding) {
4997 27
          if (isset($match[1])) {
4998 27
            return UTF8::strtoupper($match[1], $encoding);
4999
          }
5000
5001 1
          return '';
5002 32
        },
5003 32
        $str
5004
    );
5005
5006 32
    $str = (string)\preg_replace_callback(
5007 32
        '/[\d]+(.)?/u',
5008 32
        function ($match) use ($encoding) {
5009 6
          return UTF8::strtoupper($match[0], $encoding);
5010 32
        },
5011 32
        $str
5012
    );
5013
5014 32
    return $str;
5015
  }
5016
5017
  /**
5018
   * Returns the string with the first letter of each word capitalized,
5019
   * except for when the word is a name which shouldn't be capitalized.
5020
   *
5021
   * @param string $str
5022
   *
5023
   * @return string String with $str capitalized.
5024
   */
5025 1
  public static function str_capitalize_name(string $str): string
5026
  {
5027 1
    $str = self::collapse_whitespace($str);
5028
5029 1
    $str = self::str_capitalize_name_helper($str, ' ');
5030 1
    $str = self::str_capitalize_name_helper($str, '-');
5031
5032 1
    return $str;
5033
  }
5034
5035
  /**
5036
   * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
5037
   *
5038
   * @param string $names
5039
   * @param string $delimiter
5040
   * @param string $encoding
5041
   *
5042
   * @return string
5043
   */
5044 1
  private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
5045
  {
5046
    // init
5047 1
    $namesArray = \explode($delimiter, $names);
5048
5049 1
    if ($namesArray === false) {
5050
      return '';
5051
    }
5052
5053
    $specialCases = [
5054 1
        'names'    => [
5055
            'ab',
5056
            'af',
5057
            'al',
5058
            'and',
5059
            'ap',
5060
            'bint',
5061
            'binte',
5062
            'da',
5063
            'de',
5064
            'del',
5065
            'den',
5066
            'der',
5067
            'di',
5068
            'dit',
5069
            'ibn',
5070
            'la',
5071
            'mac',
5072
            'nic',
5073
            'of',
5074
            'ter',
5075
            'the',
5076
            'und',
5077
            'van',
5078
            'von',
5079
            'y',
5080
            'zu',
5081
        ],
5082
        'prefixes' => [
5083
            'al-',
5084
            "d'",
5085
            'ff',
5086
            "l'",
5087
            'mac',
5088
            'mc',
5089
            'nic',
5090
        ],
5091
    ];
5092
5093 1
    foreach ($namesArray as &$name) {
5094 1
      if (\in_array($name, $specialCases['names'], true)) {
5095 1
        continue;
5096
      }
5097
5098 1
      $continue = false;
5099
5100 1
      if ($delimiter == '-') {
5101 1
        foreach ($specialCases['names'] as $beginning) {
5102 1
          if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5103 1
            $continue = true;
5104
          }
5105
        }
5106
      }
5107
5108 1
      foreach ($specialCases['prefixes'] as $beginning) {
5109 1
        if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5110 1
          $continue = true;
5111
        }
5112
      }
5113
5114 1
      if ($continue) {
5115 1
        continue;
5116
      }
5117
5118 1
      $name = self::str_upper_first($name);
5119
    }
5120
5121 1
    return \implode($delimiter, $namesArray);
5122
  }
5123
5124
  /**
5125
   * Returns true if the string contains $needle, false otherwise. By default
5126
   * the comparison is case-sensitive, but can be made insensitive by setting
5127
   * $caseSensitive to false.
5128
   *
5129
   * @param string $haystack      <p>The input string.</p>
5130
   * @param string $needle        <p>Substring to look for.</p>
5131
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5132
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5133
   *
5134
   * @return bool Whether or not $haystack contains $needle.
5135
   */
5136 106
  public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5137
  {
5138 106
    if ('' === $haystack || '' === $needle) {
5139 1
      return false;
5140
    }
5141
5142
    // only a fallback to prevent BC in the api ...
5143 105
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5144 2
      $encoding = (string)$caseSensitive;
5145
    }
5146
5147 105
    if ($caseSensitive) {
5148 55
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
5149
    }
5150
5151 50
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
5152
  }
5153
5154
  /**
5155
   * Returns true if the string contains all $needles, false otherwise. By
5156
   * default the comparison is case-sensitive, but can be made insensitive by
5157
   * setting $caseSensitive to false.
5158
   *
5159
   * @param string $haystack      <p>The input string.</p>
5160
   * @param array  $needles       <p>SubStrings to look for.</p>
5161
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5162
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5163
   *
5164
   * @return bool Whether or not $haystack contains $needle.
5165
   */
5166 44
  public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5167
  {
5168 44
    if ('' === $haystack) {
5169
      return false;
5170
    }
5171
5172 44
    if (empty($needles)) {
5173 1
      return false;
5174
    }
5175
5176
    // only a fallback to prevent BC in the api ...
5177 43
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5178 1
      $encoding = (string)$caseSensitive;
5179
    }
5180
5181 43
    foreach ($needles as $needle) {
5182 43
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5183 43
        return false;
5184
      }
5185
    }
5186
5187 24
    return true;
5188
  }
5189
5190
  /**
5191
   * Returns true if the string contains any $needles, false otherwise. By
5192
   * default the comparison is case-sensitive, but can be made insensitive by
5193
   * setting $caseSensitive to false.
5194
   *
5195
   * @param string $haystack      <p>The input string.</p>
5196
   * @param array  $needles       <p>SubStrings to look for.</p>
5197
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5198
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5199
   *
5200
   * @return bool
5201
   *               Whether or not $str contains $needle.
5202
   */
5203 43
  public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5204
  {
5205 43
    if (empty($needles)) {
5206 1
      return false;
5207
    }
5208
5209 42
    foreach ($needles as $needle) {
5210 42
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5211 42
        return true;
5212
      }
5213
    }
5214
5215 18
    return false;
5216
  }
5217
5218
  /**
5219
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
5220
   * inserted before uppercase characters (with the exception of the first
5221
   * character of the string), and in place of spaces as well as underscores.
5222
   *
5223
   * @param string $str      <p>The input string.</p>
5224
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5225
   *
5226
   * @return string
5227
   */
5228 19
  public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5229
  {
5230 19
    return self::str_delimit($str, '-', $encoding);
5231
  }
5232
5233
  /**
5234
   * Returns a lowercase and trimmed string separated by the given delimiter.
5235
   * Delimiters are inserted before uppercase characters (with the exception
5236
   * of the first character of the string), and in place of spaces, dashes,
5237
   * and underscores. Alpha delimiters are not converted to lowercase.
5238
   *
5239
   * @param string $str       <p>The input string.</p>
5240
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
5241
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5242
   *
5243
   * @return string
5244
   */
5245 49
  public static function str_delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
5246
  {
5247 49
    $str = self::trim($str);
5248
5249 49
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
5250
5251 49
    $str = self::strtolower($str, $encoding);
5252
5253 49
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
5254
  }
5255
5256
  /**
5257
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5258
   *
5259
   * @param string $str <p>The input string.</p>
5260
   *
5261
   * @return false|string
5262
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5263
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5264
   */
5265 32
  public static function str_detect_encoding($str)
5266
  {
5267
    // init
5268 32
    $str = (string)$str;
5269
5270
    //
5271
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5272
    //
5273
5274 32
    if (self::is_binary($str, true) === true) {
5275
5276 11
      if (self::is_utf16($str) === 1) {
5277 2
        return 'UTF-16LE';
5278
      }
5279
5280 11
      if (self::is_utf16($str) === 2) {
5281 2
        return 'UTF-16BE';
5282
      }
5283
5284 9
      if (self::is_utf32($str) === 1) {
5285
        return 'UTF-32LE';
5286
      }
5287
5288 9
      if (self::is_utf32($str) === 2) {
5289
        return 'UTF-32BE';
5290
      }
5291
5292
      // is binary but not "UTF-16" or "UTF-32"
5293 9
      return false;
5294
    }
5295
5296
    //
5297
    // 2.) simple check for ASCII chars
5298
    //
5299
5300 27
    if (self::is_ascii($str) === true) {
5301 9
      return 'ASCII';
5302
    }
5303
5304
    //
5305
    // 3.) simple check for UTF-8 chars
5306
    //
5307
5308 27
    if (self::is_utf8($str) === true) {
5309 19
      return 'UTF-8';
5310
    }
5311
5312
    //
5313
    // 4.) check via "mb_detect_encoding()"
5314
    //
5315
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5316
5317
    $detectOrder = [
5318 16
        'ISO-8859-1',
5319
        'ISO-8859-2',
5320
        'ISO-8859-3',
5321
        'ISO-8859-4',
5322
        'ISO-8859-5',
5323
        'ISO-8859-6',
5324
        'ISO-8859-7',
5325
        'ISO-8859-8',
5326
        'ISO-8859-9',
5327
        'ISO-8859-10',
5328
        'ISO-8859-13',
5329
        'ISO-8859-14',
5330
        'ISO-8859-15',
5331
        'ISO-8859-16',
5332
        'WINDOWS-1251',
5333
        'WINDOWS-1252',
5334
        'WINDOWS-1254',
5335
        'CP932',
5336
        'CP936',
5337
        'CP950',
5338
        'CP866',
5339
        'CP850',
5340
        'CP51932',
5341
        'CP50220',
5342
        'CP50221',
5343
        'CP50222',
5344
        'ISO-2022-JP',
5345
        'ISO-2022-KR',
5346
        'JIS',
5347
        'JIS-ms',
5348
        'EUC-CN',
5349
        'EUC-JP',
5350
    ];
5351
5352 16
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5353
      self::checkForSupport();
5354
    }
5355
5356 16
    if (self::$SUPPORT['mbstring'] === true) {
5357
      // info: do not use the symfony polyfill here
5358 16
      $encoding = \mb_detect_encoding($str, $detectOrder, true);
5359 16
      if ($encoding) {
5360 16
        return $encoding;
5361
      }
5362
    }
5363
5364
    //
5365
    // 5.) check via "iconv()"
5366
    //
5367
5368
    if (self::$ENCODINGS === null) {
5369
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
5370
    }
5371
5372
    foreach (self::$ENCODINGS as $encodingTmp) {
5373
      # INFO: //IGNORE but still throw notice
5374
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
5375
      if ((string)@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5376
        return $encodingTmp;
5377
      }
5378
    }
5379
5380
    return false;
5381
  }
5382
5383
  /**
5384
   * Check if the string ends with the given substring.
5385
   *
5386
   * @param string $haystack <p>The string to search in.</p>
5387
   * @param string $needle   <p>The substring to search for.</p>
5388
   *
5389
   * @return bool
5390
   */
5391 40
  public static function str_ends_with(string $haystack, string $needle): bool
5392
  {
5393 40
    if ('' === $haystack || '' === $needle) {
5394 4
      return false;
5395
    }
5396
5397 38
    return \substr($haystack, -\strlen($needle)) === $needle;
5398
  }
5399
5400
  /**
5401
   * Returns true if the string ends with any of $substrings, false otherwise.
5402
   *
5403
   * - case-sensitive
5404
   *
5405
   * @param string   $str        <p>The input string.</p>
5406
   * @param string[] $substrings <p>Substrings to look for.</p>
5407
   *
5408
   * @return bool Whether or not $str ends with $substring.
5409
   */
5410 7
  public static function str_ends_with_any(string $str, array $substrings): bool
5411
  {
5412 7
    if (empty($substrings)) {
5413
      return false;
5414
    }
5415
5416 7
    foreach ($substrings as $substring) {
5417 7
      if (self::str_ends_with($str, $substring)) {
5418 7
        return true;
5419
      }
5420
    }
5421
5422 6
    return false;
5423
  }
5424
5425
  /**
5426
   * Ensures that the string begins with $substring. If it doesn't, it's
5427
   * prepended.
5428
   *
5429
   * @param string $str       <p>The input string.</p>
5430
   * @param string $substring <p>The substring to add if not present.</p>
5431
   *
5432
   * @return string
5433
   */
5434 10
  public static function str_ensure_left(string $str, string $substring): string
5435
  {
5436 10
    if (!self::str_starts_with($str, $substring)) {
5437 4
      $str = $substring . $str;
5438
    }
5439
5440 10
    return $str;
5441
  }
5442
5443
  /**
5444
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
5445
   *
5446
   * @param string $str       <p>The input string.</p>
5447
   * @param string $substring <p>The substring to add if not present.</p>
5448
   *
5449
   * @return string
5450
   */
5451 10
  public static function str_ensure_right(string $str, string $substring): string
5452
  {
5453 10
    if (!self::str_ends_with($str, $substring)) {
5454 4
      $str .= $substring;
5455
    }
5456
5457 10
    return $str;
5458
  }
5459
5460
  /**
5461
   * Capitalizes the first word of the string, replaces underscores with
5462
   * spaces, and strips '_id'.
5463
   *
5464
   * @param string $str
5465
   *
5466
   * @return string
5467
   */
5468 3
  public static function str_humanize($str): string
5469
  {
5470 3
    $str = self::str_replace(
5471
        [
5472 3
            '_id',
5473
            '_',
5474
        ],
5475
        [
5476 3
            '',
5477
            ' ',
5478
        ],
5479 3
        $str
5480
    );
5481
5482 3
    return self::ucfirst(self::trim($str));
5483
  }
5484
5485
  /**
5486
   * Check if the string ends with the given substring, case insensitive.
5487
   *
5488
   * @param string $haystack <p>The string to search in.</p>
5489
   * @param string $needle   <p>The substring to search for.</p>
5490
   *
5491
   * @return bool
5492
   */
5493 12
  public static function str_iends_with(string $haystack, string $needle): bool
5494
  {
5495 12
    if ('' === $haystack || '' === $needle) {
5496 2
      return false;
5497
    }
5498
5499 12
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
5500 12
      return true;
5501
    }
5502
5503 8
    return false;
5504
  }
5505
5506
  /**
5507
   * Returns true if the string ends with any of $substrings, false otherwise.
5508
   *
5509
   * - case-insensitive
5510
   *
5511
   * @param string   $str        <p>The input string.</p>
5512
   * @param string[] $substrings <p>Substrings to look for.</p>
5513
   *
5514
   * @return bool Whether or not $str ends with $substring.
5515
   */
5516 4
  public static function str_iends_with_any(string $str, array $substrings): bool
5517
  {
5518 4
    if (empty($substrings)) {
5519
      return false;
5520
    }
5521
5522 4
    foreach ($substrings as $substring) {
5523 4
      if (self::str_iends_with($str, $substring)) {
5524 4
        return true;
5525
      }
5526
    }
5527
5528
    return false;
5529
  }
5530
5531
  /**
5532
   * Returns the index of the first occurrence of $needle in the string,
5533
   * and false if not found. Accepts an optional offset from which to begin
5534
   * the search.
5535
   *
5536
   * @param string $str      <p>The input string.</p>
5537
   * @param string $needle   <p>Substring to look for.</p>
5538
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5539
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5540
   *
5541
   * @return int|false
5542
   *                    The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5543
   */
5544 2
  public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5545
  {
5546 2
    return self::stripos(
5547 2
        $str,
5548 2
        $needle,
5549 2
        $offset,
5550 2
        $encoding
5551
    );
5552
  }
5553
5554
  /**
5555
   * Returns the index of the last occurrence of $needle in the string,
5556
   * and false if not found. Accepts an optional offset from which to begin
5557
   * the search. Offsets may be negative to count from the last character
5558
   * in the string.
5559
   *
5560
   * @param string $str      <p>The input string.</p>
5561
   * @param string $needle   <p>Substring to look for.</p>
5562
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5563
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5564
   *
5565
   * @return int|false
5566
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5567
   */
5568 2
  public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5569
  {
5570 2
    return self::strripos(
5571 2
        $str,
5572 2
        $needle,
5573 2
        $offset,
5574 2
        $encoding
5575
    );
5576
  }
5577
5578
  /**
5579
   * Returns the index of the first occurrence of $needle in the string,
5580
   * and false if not found. Accepts an optional offset from which to begin
5581
   * the search.
5582
   *
5583
   * @param string $str      <p>The input string.</p>
5584
   * @param string $needle   <p>Substring to look for.</p>
5585
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5586
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5587
   *
5588
   * @return int|false
5589
   *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5590
   */
5591 12
  public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5592
  {
5593 12
    return self::strpos(
5594 12
        $str,
5595 12
        $needle,
5596 12
        $offset,
5597 12
        $encoding
5598
    );
5599
  }
5600
5601
  /**
5602
   * Returns the index of the last occurrence of $needle in the string,
5603
   * and false if not found. Accepts an optional offset from which to begin
5604
   * the search. Offsets may be negative to count from the last character
5605
   * in the string.
5606
   *
5607
   * @param string $str      <p>The input string.</p>
5608
   * @param string $needle   <p>Substring to look for.</p>
5609
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5610
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5611
   *
5612
   * @return int|false
5613
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5614
   */
5615 12
  public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5616
  {
5617 12
    return self::strrpos(
5618 12
        $str,
5619 12
        $needle,
5620 12
        $offset,
5621 12
        $encoding
5622
    );
5623
  }
5624
5625
  /**
5626
   * Inserts $substring into the string at the $index provided.
5627
   *
5628
   * @param string $str       <p>The input string.</p>
5629
   * @param string $substring <p>String to be inserted.</p>
5630
   * @param int    $index     <p>The index at which to insert the substring.</p>
5631
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5632
   *
5633
   * @return string
5634
   */
5635 8
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5636
  {
5637 8
    $len = self::strlen($str, $encoding);
5638
5639 8
    if ($index > $len) {
5640 1
      return $str;
5641
    }
5642
5643 7
    $start = self::substr($str, 0, $index, $encoding);
5644 7
    $end = self::substr($str, $index, $len, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $len can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5644
    $end = self::substr($str, $index, /** @scrutinizer ignore-type */ $len, $encoding);
Loading history...
5645
5646 7
    return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5646
    return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5646
    return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
5647
  }
5648
5649
  /**
5650
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5651
   *
5652
   * @link  http://php.net/manual/en/function.str-ireplace.php
5653
   *
5654
   * @param mixed $search  <p>
5655
   *                       Every replacement with search array is
5656
   *                       performed on the result of previous replacement.
5657
   *                       </p>
5658
   * @param mixed $replace <p>
5659
   *                       </p>
5660
   * @param mixed $subject <p>
5661
   *                       If subject is an array, then the search and
5662
   *                       replace is performed with every entry of
5663
   *                       subject, and the return value is an array as
5664
   *                       well.
5665
   *                       </p>
5666
   * @param int   $count   [optional] <p>
5667
   *                       The number of matched and replaced needles will
5668
   *                       be returned in count which is passed by
5669
   *                       reference.
5670
   *                       </p>
5671
   *
5672
   * @return mixed A string or an array of replacements.
5673
   */
5674 41
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5675
  {
5676 41
    $search = (array)$search;
5677
5678
    /** @noinspection AlterInForeachInspection */
5679 41
    foreach ($search as &$s) {
5680 41
      if ('' === $s .= '') {
5681 7
        $s = '/^(?<=.)$/';
5682
      } else {
5683 41
        $s = '/' . \preg_quote($s, '/') . '/ui';
5684
      }
5685
    }
5686
5687 41
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5688 41
    $count = $replace; // used as reference parameter
5689
5690 41
    return $subject;
5691
  }
5692
5693
  /**
5694
   * Check if the string starts with the given substring, case insensitive.
5695
   *
5696
   * @param string $haystack <p>The string to search in.</p>
5697
   * @param string $needle   <p>The substring to search for.</p>
5698
   *
5699
   * @return bool
5700
   */
5701 12
  public static function str_istarts_with(string $haystack, string $needle): bool
5702
  {
5703 12
    if ('' === $haystack || '' === $needle) {
5704 2
      return false;
5705
    }
5706
5707 12
    if (self::stripos($haystack, $needle) === 0) {
5708 12
      return true;
5709
    }
5710
5711 4
    return false;
5712
  }
5713
5714
  /**
5715
   * Returns true if the string begins with any of $substrings, false otherwise.
5716
   *
5717
   * - case-insensitive
5718
   *
5719
   * @param string $str        <p>The input string.</p>
5720
   * @param array  $substrings <p>Substrings to look for.</p>
5721
   *
5722
   * @return bool Whether or not $str starts with $substring.
5723
   */
5724 4
  public static function str_istarts_with_any(string $str, array $substrings): bool
5725
  {
5726 4
    if ('' === $str) {
5727
      return false;
5728
    }
5729
5730 4
    if (empty($substrings)) {
5731
      return false;
5732
    }
5733
5734 4
    foreach ($substrings as $substring) {
5735 4
      if (self::str_istarts_with($str, $substring)) {
5736 4
        return true;
5737
      }
5738
    }
5739
5740
    return false;
5741
  }
5742
5743
  /**
5744
   * Gets the substring after the first occurrence of a separator.
5745
   *
5746
   * @param string $str       <p>The input string.</p>
5747
   * @param string $separator <p>The string separator.</p>
5748
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5749
   *
5750
   * @return string
5751
   */
5752 1
  public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5753
  {
5754
    if (
5755 1
        $separator === ''
5756
        ||
5757 1
        $str === ''
5758
    ) {
5759 1
      return '';
5760
    }
5761
5762 1
    $offset = self::str_iindex_first($str, $separator);
5763 1
    if ($offset === false) {
5764 1
      return '';
5765
    }
5766
5767 1
    return (string)self::substr(
5768 1
        $str,
5769 1
        $offset + self::strlen($separator, $encoding),
5770 1
        null,
5771 1
        $encoding
5772
    );
5773
  }
5774
5775
  /**
5776
   * Gets the substring after the last occurrence of a separator.
5777
   *
5778
   * @param string $str       <p>The input string.</p>
5779
   * @param string $separator <p>The string separator.</p>
5780
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5781
   *
5782
   * @return string
5783
   */
5784 1
  public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5785
  {
5786
    if (
5787 1
        $separator === ''
5788
        ||
5789 1
        $str === ''
5790
    ) {
5791 1
      return '';
5792
    }
5793
5794 1
    $offset = self::str_iindex_last($str, $separator);
5795 1
    if ($offset === false) {
5796 1
      return '';
5797
    }
5798
5799 1
    return (string)self::substr(
5800 1
        $str,
5801 1
        $offset + self::strlen($separator, $encoding),
5802 1
        null,
5803 1
        $encoding
5804
    );
5805
  }
5806
5807
  /**
5808
   * Gets the substring before the first occurrence of a separator.
5809
   *
5810
   * @param string $str       <p>The input string.</p>
5811
   * @param string $separator <p>The string separator.</p>
5812
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5813
   *
5814
   * @return string
5815
   */
5816 1
  public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5817
  {
5818
    if (
5819 1
        $separator === ''
5820
        ||
5821 1
        $str === ''
5822
    ) {
5823 1
      return '';
5824
    }
5825
5826 1
    $offset = self::str_iindex_first($str, $separator);
5827 1
    if ($offset === false) {
5828 1
      return '';
5829
    }
5830
5831 1
    return (string)self::substr($str, 0, $offset, $encoding);
5832
  }
5833
5834
  /**
5835
   * Gets the substring before the last occurrence of a separator.
5836
   *
5837
   * @param string $str       <p>The input string.</p>
5838
   * @param string $separator <p>The string separator.</p>
5839
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5840
   *
5841
   * @return string
5842
   */
5843 1
  public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5844
  {
5845
    if (
5846 1
        $separator === ''
5847
        ||
5848 1
        $str === ''
5849
    ) {
5850 1
      return '';
5851
    }
5852
5853 1
    $offset = self::str_iindex_last($str, $separator);
5854 1
    if ($offset === false) {
5855 1
      return '';
5856
    }
5857
5858 1
    return (string)self::substr($str, 0, $offset, $encoding);
5859
  }
5860
5861
  /**
5862
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5863
   *
5864
   * @param string $str          <p>The input string.</p>
5865
   * @param string $needle       <p>The string to look for.</p>
5866
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5867
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5868
   *
5869
   * @return string
5870
   */
5871 2
  public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5872
  {
5873
    if (
5874 2
        '' === $needle
5875
        ||
5876 2
        '' === $str
5877
    ) {
5878 2
      return '';
5879
    }
5880
5881 2
    $part = self::stristr(
5882 2
        $str,
5883 2
        $needle,
5884 2
        $beforeNeedle,
5885 2
        $encoding
5886
    );
5887 2
    if (false === $part) {
5888 2
      return '';
5889
    }
5890
5891 2
    return $part;
5892
  }
5893
5894
  /**
5895
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5896
   *
5897
   * @param string $str          <p>The input string.</p>
5898
   * @param string $needle       <p>The string to look for.</p>
5899
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5900
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5901
   *
5902
   * @return string
5903
   */
5904 1
  public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5905
  {
5906
    if (
5907 1
        '' === $needle
5908
        ||
5909 1
        '' === $str
5910
    ) {
5911 1
      return '';
5912
    }
5913
5914 1
    $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5915 1
    if (false === $part) {
5916 1
      return '';
5917
    }
5918
5919 1
    return $part;
5920
  }
5921
5922
  /**
5923
   * Returns the last $n characters of the string.
5924
   *
5925
   * @param string $str      <p>The input string.</p>
5926
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5927
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5928
   *
5929
   * @return string
5930
   */
5931 12
  public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5932
  {
5933 12
    if ($n <= 0) {
5934 4
      return '';
5935
    }
5936
5937 8
    $returnTmp = self::substr($str, -$n, null, $encoding);
5938
5939 8
    return ($returnTmp === false ? '' : $returnTmp);
5940
  }
5941
5942
  /**
5943
   * Limit the number of characters in a string.
5944
   *
5945
   * @param string $str      <p>The input string.</p>
5946
   * @param int    $length   [optional] <p>Default: 100</p>
5947
   * @param string $strAddOn [optional] <p>Default: …</p>
5948
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5949
   *
5950
   * @return string
5951
   */
5952 2
  public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5953
  {
5954 2
    if ('' === $str) {
5955 2
      return '';
5956
    }
5957
5958 2
    if ($length <= 0) {
5959 2
      return '';
5960
    }
5961
5962 2
    if (self::strlen($str, $encoding) <= $length) {
5963 2
      return $str;
5964
    }
5965
5966 2
    return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5966
    return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
5967
  }
5968
5969
  /**
5970
   * Limit the number of characters in a string, but also after the next word.
5971
   *
5972
   * @param string $str      <p>The input string.</p>
5973
   * @param int    $length   [optional] <p>Default: 100</p>
5974
   * @param string $strAddOn [optional] <p>Default: …</p>
5975
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5976
   *
5977
   * @return string
5978
   */
5979 6
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5980
  {
5981 6
    if ('' === $str) {
5982 2
      return '';
5983
    }
5984
5985 6
    if ($length <= 0) {
5986 2
      return '';
5987
    }
5988
5989 6
    if (self::strlen($str, $encoding) <= $length) {
5990 2
      return $str;
5991
    }
5992
5993 6
    if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
5994 5
      return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5994
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
5995
    }
5996
5997 3
    $str = (string)self::substr($str, 0, $length, $encoding);
5998 3
    $array = \explode(' ', $str);
5999 3
    \array_pop($array);
6000 3
    $new_str = \implode(' ', $array);
6001
6002 3
    if ($new_str === '') {
6003 2
      $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
6004
    } else {
6005 3
      $str = $new_str . $strAddOn;
6006
    }
6007
6008 3
    return $str;
6009
  }
6010
6011
  /**
6012
   * Returns the longest common prefix between the string and $otherStr.
6013
   *
6014
   * @param string $str      <p>The input sting.</p>
6015
   * @param string $otherStr <p>Second string for comparison.</p>
6016
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6017
   *
6018
   * @return string
6019
   */
6020 10
  public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6021
  {
6022 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6023
6024 10
    $longestCommonPrefix = '';
6025 10
    for ($i = 0; $i < $maxLength; $i++) {
6026 8
      $char = self::substr($str, $i, 1, $encoding);
6027
6028 8
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
6029 6
        $longestCommonPrefix .= $char;
6030
      } else {
6031 6
        break;
6032
      }
6033
    }
6034
6035 10
    return $longestCommonPrefix;
6036
  }
6037
6038
  /**
6039
   * Returns the longest common substring between the string and $otherStr.
6040
   * In the case of ties, it returns that which occurs first.
6041
   *
6042
   * @param string $str
6043
   * @param string $otherStr <p>Second string for comparison.</p>
6044
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6045
   *
6046
   * @return string String with its $str being the longest common substring.
6047
   */
6048 11
  public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6049
  {
6050
    // Uses dynamic programming to solve
6051
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6052 11
    $strLength = self::strlen($str, $encoding);
6053 11
    $otherLength = self::strlen($otherStr, $encoding);
6054
6055
    // Return if either string is empty
6056 11
    if ($strLength == 0 || $otherLength == 0) {
6057 2
      return '';
6058
    }
6059
6060 9
    $len = 0;
6061 9
    $end = 0;
6062 9
    $table = \array_fill(
6063 9
        0,
6064 9
        $strLength + 1,
6065 9
        \array_fill(0, $otherLength + 1, 0)
6066
    );
6067
6068 9
    for ($i = 1; $i <= $strLength; $i++) {
6069 9
      for ($j = 1; $j <= $otherLength; $j++) {
6070 9
        $strChar = self::substr($str, $i - 1, 1, $encoding);
6071 9
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6072
6073 9
        if ($strChar == $otherChar) {
6074 8
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6075 8
          if ($table[$i][$j] > $len) {
6076 8
            $len = $table[$i][$j];
6077 8
            $end = $i;
6078
          }
6079
        } else {
6080 9
          $table[$i][$j] = 0;
6081
        }
6082
      }
6083
    }
6084
6085 9
    $returnTmp = self::substr($str, $end - $len, $len, $encoding);
6086
6087 9
    return ($returnTmp === false ? '' : $returnTmp);
6088
  }
6089
6090
  /**
6091
   * Returns the longest common suffix between the string and $otherStr.
6092
   *
6093
   * @param string $str
6094
   * @param string $otherStr <p>Second string for comparison.</p>
6095
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6096
   *
6097
   * @return string
6098
   */
6099 10
  public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6100
  {
6101 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6102
6103 10
    $longestCommonSuffix = '';
6104 10
    for ($i = 1; $i <= $maxLength; $i++) {
6105 8
      $char = self::substr($str, -$i, 1, $encoding);
6106
6107 8
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
6108 6
        $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6108
        $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
6109
      } else {
6110 6
        break;
6111
      }
6112
    }
6113
6114 10
    return $longestCommonSuffix;
6115
  }
6116
6117
  /**
6118
   * Returns true if $str matches the supplied pattern, false otherwise.
6119
   *
6120
   * @param string $str     <p>The input string.</p>
6121
   * @param string $pattern <p>Regex pattern to match against.</p>
6122
   *
6123
   * @return bool Whether or not $str matches the pattern.
6124
   */
6125 126
  public static function str_matches_pattern(string $str, string $pattern): bool
6126
  {
6127 126
    if (\preg_match('/' . $pattern . '/u', $str)) {
6128 87
      return true;
6129
    }
6130
6131 39
    return false;
6132
  }
6133
6134
  /**
6135
   * Returns whether or not a character exists at an index. Offsets may be
6136
   * negative to count from the last character in the string. Implements
6137
   * part of the ArrayAccess interface.
6138
   *
6139
   * @param string $str      <p>The input string.</p>
6140
   * @param int    $offset   <p>The index to check.</p>
6141
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6142
   *
6143
   *
6144
   * @return bool Whether or not the index exists.
6145
   */
6146 6
  public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6147
  {
6148
    // init
6149 6
    $length = self::strlen($str, $encoding);
6150
6151 6
    if ($offset >= 0) {
6152 3
      return ($length > $offset);
6153
    }
6154
6155 3
    return ($length >= \abs($offset));
6156
  }
6157
6158
  /**
6159
   * Returns the character at the given index. Offsets may be negative to
6160
   * count from the last character in the string. Implements part of the
6161
   * ArrayAccess interface, and throws an OutOfBoundsException if the index
6162
   * does not exist.
6163
   *
6164
   * @param string $str      <p>The input string.</p>
6165
   * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6166
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6167
   *
6168
   * @return string The character at the specified index.
6169
   *
6170
   * @throws \OutOfBoundsException If the positive or negative offset does not exist.
6171
   */
6172 2
  public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6173
  {
6174
    // init
6175 2
    $length = self::strlen($str);
6176
6177
    if (
6178 2
        ($index >= 0 && $length <= $index)
6179
        ||
6180 2
        $length < \abs($index)
6181
    ) {
6182 1
      throw new \OutOfBoundsException('No character exists at the index');
6183
    }
6184
6185 1
    return self::char_at($str, $index, $encoding);
6186
  }
6187
6188
  /**
6189
   * Pad a UTF-8 string to given length with another string.
6190
   *
6191
   * @param string $str        <p>The input string.</p>
6192
   * @param int    $pad_length <p>The length of return string.</p>
6193
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
6194
   * @param int    $pad_type   [optional] <p>
6195
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
6196
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
6197
   *                           </p>
6198
   * @param string $encoding   [optional] <p>Default: UTF-8</p>
6199
   *
6200
   * @return string Returns the padded string.
6201
   */
6202 41
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
6203
  {
6204 41
    if ('' === $str) {
6205
      return '';
6206
    }
6207
6208 41
    if ($pad_type !== (int)$pad_type) {
6209 13
      if ($pad_type == 'left') {
6210 3
        $pad_type = STR_PAD_LEFT;
6211 10
      } elseif ($pad_type == 'right') {
6212 6
        $pad_type = STR_PAD_RIGHT;
6213 4
      } elseif ($pad_type == 'both') {
6214 3
        $pad_type = STR_PAD_BOTH;
6215
      } else {
6216 1
        throw new \InvalidArgumentException(
6217 1
            'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6218
        );
6219
      }
6220
    }
6221
6222 40
    $str_length = self::strlen($str, $encoding);
6223
6224
    if (
6225 40
        $pad_length > 0
6226
        &&
6227 40
        $pad_length >= $str_length
6228
    ) {
6229 39
      $ps_length = self::strlen($pad_string, $encoding);
6230
6231 39
      $diff = ($pad_length - $str_length);
6232
6233
      switch ($pad_type) {
6234 39
        case STR_PAD_LEFT:
6235 13
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6236 13
          $pre = (string)self::substr($pre, 0, $diff, $encoding);
6237 13
          $post = '';
6238 13
          break;
6239
6240 29
        case STR_PAD_BOTH:
6241 14
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6242 14
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2), $encoding);
6243 14
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6244 14
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2), $encoding);
6245 14
          break;
6246
6247 18
        case STR_PAD_RIGHT:
6248
        default:
6249 18
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6250 18
          $post = (string)self::substr($post, 0, $diff, $encoding);
6251 18
          $pre = '';
6252
      }
6253
6254 39
      return $pre . $str . $post;
6255
    }
6256
6257 4
    return $str;
6258
  }
6259
6260
  /**
6261
   * Returns a new string of a given length such that both sides of the
6262
   * string are padded. Alias for pad() with a $padType of 'both'.
6263
   *
6264
   * @param string $str
6265
   * @param int    $length   <p>Desired string length after padding.</p>
6266
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6267
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6268
   *
6269
   * @return string String with padding applied.
6270
   */
6271 11
  public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6272
  {
6273 11
    $padding = $length - self::strlen($str, $encoding);
6274
6275 11
    return self::apply_padding($str, (int)\floor($padding / 2), (int)\ceil($padding / 2), $padStr, $encoding);
6276
  }
6277
6278
  /**
6279
   * Returns a new string of a given length such that the beginning of the
6280
   * string is padded. Alias for pad() with a $padType of 'left'.
6281
   *
6282
   * @param string $str
6283
   * @param int    $length   <p>Desired string length after padding.</p>
6284
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6285
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6286
   *
6287
   * @return string String with left padding.
6288
   */
6289 7
  public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6290
  {
6291 7
    return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
6292
  }
6293
6294
  /**
6295
   * Returns a new string of a given length such that the end of the string
6296
   * is padded. Alias for pad() with a $padType of 'right'.
6297
   *
6298
   * @param string $str
6299
   * @param int    $length   <p>Desired string length after padding.</p>
6300
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6301
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6302
   *
6303
   * @return string String with right padding.
6304
   */
6305 7
  public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6306
  {
6307 7
    return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
6308
  }
6309
6310
  /**
6311
   * Repeat a string.
6312
   *
6313
   * @param string $str        <p>
6314
   *                           The string to be repeated.
6315
   *                           </p>
6316
   * @param int    $multiplier <p>
6317
   *                           Number of time the input string should be
6318
   *                           repeated.
6319
   *                           </p>
6320
   *                           <p>
6321
   *                           multiplier has to be greater than or equal to 0.
6322
   *                           If the multiplier is set to 0, the function
6323
   *                           will return an empty string.
6324
   *                           </p>
6325
   *
6326
   * @return string The repeated string.
6327
   */
6328 9
  public static function str_repeat(string $str, int $multiplier): string
6329
  {
6330 9
    $str = self::filter($str);
6331
6332 9
    return \str_repeat($str, $multiplier);
6333
  }
6334
6335
  /**
6336
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6337
   *
6338
   * Replace all occurrences of the search string with the replacement string
6339
   *
6340
   * @link http://php.net/manual/en/function.str-replace.php
6341
   *
6342
   * @param mixed $search  <p>
6343
   *                       The value being searched for, otherwise known as the needle.
6344
   *                       An array may be used to designate multiple needles.
6345
   *                       </p>
6346
   * @param mixed $replace <p>
6347
   *                       The replacement value that replaces found search
6348
   *                       values. An array may be used to designate multiple replacements.
6349
   *                       </p>
6350
   * @param mixed $subject <p>
6351
   *                       The string or array being searched and replaced on,
6352
   *                       otherwise known as the haystack.
6353
   *                       </p>
6354
   *                       <p>
6355
   *                       If subject is an array, then the search and
6356
   *                       replace is performed with every entry of
6357
   *                       subject, and the return value is an array as
6358
   *                       well.
6359
   *                       </p>
6360
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
6361
   *
6362
   * @return mixed This function returns a string or an array with the replaced values.
6363
   */
6364 92
  public static function str_replace($search, $replace, $subject, int &$count = null)
6365
  {
6366 92
    return \str_replace($search, $replace, $subject, $count);
6367
  }
6368
6369
  /**
6370
   * Replaces all occurrences of $search from the beginning of string with $replacement.
6371
   *
6372
   * @param string $str         <p>The input string.</p>
6373
   * @param string $search      <p>The string to search for.</p>
6374
   * @param string $replacement <p>The replacement.</p>
6375
   *
6376
   * @return string String after the replacements.
6377
   */
6378 16
  public static function str_replace_beginning(string $str, string $search, string $replacement): string
6379
  {
6380 16
    return self::regex_replace(
6381 16
        $str,
6382 16
        '^' . \preg_quote($search, '/'),
6383 16
        self::str_replace('\\', '\\\\', $replacement)
6384
    );
6385
  }
6386
6387
  /**
6388
   * Replaces all occurrences of $search from the ending of string with $replacement.
6389
   *
6390
   * @param string $str         <p>The input string.</p>
6391
   * @param string $search      <p>The string to search for.</p>
6392
   * @param string $replacement <p>The replacement.</p>
6393
   *
6394
   * @return string String after the replacements.
6395
   */
6396 16
  public static function str_replace_ending(string $str, string $search, string $replacement): string
6397
  {
6398 16
    return self::regex_replace(
6399 16
        $str,
6400 16
        \preg_quote($search, '/') . '$',
6401 16
        self::str_replace('\\', '\\\\', $replacement)
6402
    );
6403
  }
6404
6405
  /**
6406
   * Replace the first "$search"-term with the "$replace"-term.
6407
   *
6408
   * @param string $search
6409
   * @param string $replace
6410
   * @param string $subject
6411
   *
6412
   * @return string
6413
   */
6414 2
  public static function str_replace_first(string $search, string $replace, string $subject): string
6415
  {
6416 2
    $pos = self::strpos($subject, $search);
6417 2
    if ($pos !== false) {
6418 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6418
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
6419
    }
6420
6421 2
    return $subject;
6422
  }
6423
6424
  /**
6425
   * Replace the last "$search"-term with the "$replace"-term.
6426
   *
6427
   * @param string $search
6428
   * @param string $replace
6429
   * @param string $subject
6430
   *
6431
   * @return string
6432
   */
6433 2
  public static function str_replace_last(string $search, string $replace, string $subject): string
6434
  {
6435 2
    $pos = self::strrpos($subject, $search);
6436 2
    if ($pos !== false) {
6437 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6437
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6438
    }
6439
6440 2
    return $subject;
6441
  }
6442
6443
  /**
6444
   * Shuffles all the characters in the string.
6445
   *
6446
   * PS: uses random algorithm which is weak for cryptography purposes
6447
   *
6448
   * @param string $str <p>The input string</p>
6449
   *
6450
   * @return string The shuffled string.
6451
   */
6452 5
  public static function str_shuffle(string $str): string
6453
  {
6454 5
    $indexes = \range(0, self::strlen($str) - 1);
6455
    /** @noinspection NonSecureShuffleUsageInspection */
6456 5
    \shuffle($indexes);
6457
6458 5
    $shuffledStr = '';
6459 5
    foreach ($indexes as $i) {
6460 5
      $shuffledStr .= self::substr($str, $i, 1);
6461
    }
6462
6463 5
    return $shuffledStr;
6464
  }
6465
6466
  /**
6467
   * Returns the substring beginning at $start, and up to, but not including
6468
   * the index specified by $end. If $end is omitted, the function extracts
6469
   * the remaining string. If $end is negative, it is computed from the end
6470
   * of the string.
6471
   *
6472
   * @param string $str
6473
   * @param int    $start    <p>Initial index from which to begin extraction.</p>
6474
   * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6475
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6476
   *
6477
   * @return string|false
6478
   *                     <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6479
   *                     characters long, <b>FALSE</b> will be returned.
6480
   */
6481 18
  public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6482
  {
6483 18
    if ($end === null) {
6484 6
      $length = self::strlen($str);
6485 12
    } elseif ($end >= 0 && $end <= $start) {
6486 4
      return '';
6487 8
    } elseif ($end < 0) {
6488 2
      $length = self::strlen($str) + $end - $start;
6489
    } else {
6490 6
      $length = $end - $start;
6491
    }
6492
6493 14
    return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6493
    return self::substr($str, $start, /** @scrutinizer ignore-type */ $length, $encoding);
Loading history...
6494
  }
6495
6496
  /**
6497
   * Convert a string to e.g.: "snake_case"
6498
   *
6499
   * @param string $str
6500
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6501
   *
6502
   * @return string String in snake_case.
6503
   */
6504 20
  public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6505
  {
6506 20
    $str = self::normalize_whitespace($str);
6507 20
    $str = \str_replace('-', '_', $str);
6508
6509 20
    $str = (string)\preg_replace_callback(
6510 20
        '/([\d|A-Z])/u',
6511 20
        function ($matches) use ($encoding) {
6512 8
          $match = $matches[1];
6513 8
          $matchInt = (int)$match;
6514
6515 8
          if ((string)$matchInt == $match) {
6516 4
            return '_' . $match . '_';
6517
          }
6518
6519 4
          return '_' . UTF8::strtolower($match, $encoding);
6520 20
        },
6521 20
        $str
6522
    );
6523
6524 20
    $str = (string)\preg_replace(
6525
        [
6526 20
            '/\s+/',        // convert spaces to "_"
6527
            '/^\s+|\s+$/',  // trim leading & trailing spaces
6528
            '/_+/',         // remove double "_"
6529
        ],
6530
        [
6531 20
            '_',
6532
            '',
6533
            '_',
6534
        ],
6535 20
        $str
6536
    );
6537
6538 20
    $str = self::trim($str, '_'); // trim leading & trailing "_"
6539 20
    $str = self::trim($str); // trim leading & trailing whitespace
6540
6541 20
    return $str;
6542
  }
6543
6544
  /**
6545
   * Sort all characters according to code points.
6546
   *
6547
   * @param string $str    <p>A UTF-8 string.</p>
6548
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6549
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6550
   *
6551
   * @return string String of sorted characters.
6552
   */
6553 2
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6554
  {
6555 2
    $array = self::codepoints($str);
6556
6557 2
    if ($unique) {
6558 2
      $array = \array_flip(\array_flip($array));
6559
    }
6560
6561 2
    if ($desc) {
6562 2
      \arsort($array);
6563
    } else {
6564 2
      \asort($array);
6565
    }
6566
6567 2
    return self::string($array);
6568
  }
6569
6570
  /**
6571
   * alias for "UTF8::split()"
6572
   *
6573
   * @see UTF8::split()
6574
   *
6575
   * @param string|string[] $str
6576
   * @param int             $len
6577
   *
6578
   * @return string[]
6579
   */
6580 25
  public static function str_split($str, int $len = 1): array
6581
  {
6582 25
    return self::split($str, $len);
6583
  }
6584
6585
  /**
6586
   * Splits the string with the provided regular expression, returning an
6587
   * array of Stringy objects. An optional integer $limit will truncate the
6588
   * results.
6589
   *
6590
   * @param string $str
6591
   * @param string $pattern <p>The regex with which to split the string.</p>
6592
   * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6593
   *
6594
   * @return string[] An array of strings.
6595
   */
6596 16
  public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6597
  {
6598 16
    if ($limit === 0) {
6599 2
      return [];
6600
    }
6601
6602
    // this->split errors when supplied an empty pattern in < PHP 5.4.13
6603
    // and current versions of HHVM (3.8 and below)
6604 14
    if ($pattern === '') {
6605 1
      return [$str];
6606
    }
6607
6608
    // this->split returns the remaining unsplit string in the last index when
6609
    // supplying a limit
6610 13
    if ($limit > 0) {
6611 8
      ++$limit;
6612
    } else {
6613 5
      $limit = -1;
6614
    }
6615
6616 13
    $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6617
6618 13
    if ($array === false) {
6619
      return [];
6620
    }
6621
6622 13
    if ($limit > 0 && \count($array) === $limit) {
6623 4
      \array_pop($array);
6624
    }
6625
6626 13
    return $array;
6627
  }
6628
6629
  /**
6630
   * Check if the string starts with the given substring.
6631
   *
6632
   * @param string $haystack <p>The string to search in.</p>
6633
   * @param string $needle   <p>The substring to search for.</p>
6634
   *
6635
   * @return bool
6636
   */
6637 41
  public static function str_starts_with(string $haystack, string $needle): bool
6638
  {
6639 41
    if ('' === $haystack || '' === $needle) {
6640 4
      return false;
6641
    }
6642
6643 39
    if (\strpos($haystack, $needle) === 0) {
6644 19
      return true;
6645
    }
6646
6647 24
    return false;
6648
  }
6649
6650
  /**
6651
   * Returns true if the string begins with any of $substrings, false otherwise.
6652
   *
6653
   * - case-sensitive
6654
   *
6655
   * @param string $str        <p>The input string.</p>
6656
   * @param array  $substrings <p>Substrings to look for.</p>
6657
   *
6658
   * @return bool Whether or not $str starts with $substring.
6659
   */
6660 8
  public static function str_starts_with_any(string $str, array $substrings): bool
6661
  {
6662 8
    if ('' === $str) {
6663
      return false;
6664
    }
6665
6666 8
    if (empty($substrings)) {
6667
      return false;
6668
    }
6669
6670 8
    foreach ($substrings as $substring) {
6671 8
      if (self::str_starts_with($str, $substring)) {
6672 8
        return true;
6673
      }
6674
    }
6675
6676 6
    return false;
6677
  }
6678
6679
  /**
6680
   * Gets the substring after the first occurrence of a separator.
6681
   *
6682
   * @param string $str       <p>The input string.</p>
6683
   * @param string $separator <p>The string separator.</p>
6684
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6685
   *
6686
   * @return string
6687
   */
6688 1
  public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6689
  {
6690
    if (
6691 1
        $separator === ''
6692
        ||
6693 1
        $str === ''
6694
    ) {
6695 1
      return '';
6696
    }
6697
6698 1
    $offset = self::str_index_first($str, $separator);
6699 1
    if ($offset === false) {
6700 1
      return '';
6701
    }
6702
6703 1
    return (string)self::substr(
6704 1
        $str,
6705 1
        $offset + self::strlen($separator, $encoding),
6706 1
        null,
6707 1
        $encoding
6708
    );
6709
  }
6710
6711
  /**
6712
   * Gets the substring after the last occurrence of a separator.
6713
   *
6714
   * @param string $str       <p>The input string.</p>
6715
   * @param string $separator <p>The string separator.</p>
6716
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6717
   *
6718
   * @return string
6719
   */
6720 1
  public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6721
  {
6722
    if (
6723 1
        $separator === ''
6724
        ||
6725 1
        $str === ''
6726
    ) {
6727 1
      return '';
6728
    }
6729
6730 1
    $offset = self::str_index_last($str, $separator);
6731 1
    if ($offset === false) {
6732 1
      return '';
6733
    }
6734
6735 1
    return (string)self::substr(
6736 1
        $str,
6737 1
        $offset + self::strlen($separator, $encoding),
6738 1
        null,
6739 1
        $encoding
6740
    );
6741
  }
6742
6743
  /**
6744
   * Gets the substring before the first occurrence of a separator.
6745
   *
6746
   * @param string $str       <p>The input string.</p>
6747
   * @param string $separator <p>The string separator.</p>
6748
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6749
   *
6750
   * @return string
6751
   */
6752 1
  public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6753
  {
6754
    if (
6755 1
        $separator === ''
6756
        ||
6757 1
        $str === ''
6758
    ) {
6759 1
      return '';
6760
    }
6761
6762 1
    $offset = self::str_index_first($str, $separator);
6763 1
    if ($offset === false) {
6764 1
      return '';
6765
    }
6766
6767 1
    return (string)self::substr(
6768 1
        $str,
6769 1
        0,
6770 1
        $offset,
6771 1
        $encoding
6772
    );
6773
  }
6774
6775
  /**
6776
   * Gets the substring before the last occurrence of a separator.
6777
   *
6778
   * @param string $str       <p>The input string.</p>
6779
   * @param string $separator <p>The string separator.</p>
6780
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6781
   *
6782
   * @return string
6783
   */
6784 1
  public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6785
  {
6786
    if (
6787 1
        $separator === ''
6788
        ||
6789 1
        $str === ''
6790
    ) {
6791 1
      return '';
6792
    }
6793
6794 1
    $offset = self::str_index_last($str, $separator);
6795 1
    if ($offset === false) {
6796 1
      return '';
6797
    }
6798
6799 1
    return (string)self::substr(
6800 1
        $str,
6801 1
        0,
6802 1
        $offset,
6803 1
        $encoding
6804
    );
6805
  }
6806
6807
  /**
6808
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6809
   *
6810
   * @param string $str          <p>The input string.</p>
6811
   * @param string $needle       <p>The string to look for.</p>
6812
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6813
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6814
   *
6815
   * @return string
6816
   */
6817 2
  public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6818
  {
6819
    if (
6820 2
        '' === $str
6821
        ||
6822 2
        '' === $needle
6823
    ) {
6824 2
      return '';
6825
    }
6826
6827 2
    $part = self::strstr(
6828 2
        $str,
6829 2
        $needle,
6830 2
        $beforeNeedle,
6831 2
        $encoding
6832
    );
6833 2
    if (false === $part) {
6834 2
      return '';
6835
    }
6836
6837 2
    return $part;
6838
  }
6839
6840
  /**
6841
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6842
   *
6843
   * @param string $str          <p>The input string.</p>
6844
   * @param string $needle       <p>The string to look for.</p>
6845
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6846
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6847
   *
6848
   * @return string
6849
   */
6850 2
  public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6851
  {
6852
    if (
6853 2
        '' === $str
6854
        ||
6855 2
        '' === $needle
6856
    ) {
6857 2
      return '';
6858
    }
6859
6860 2
    $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6861 2
    if (false === $part) {
6862 2
      return '';
6863
    }
6864
6865 2
    return $part;
6866
  }
6867
6868
  /**
6869
   * Surrounds $str with the given substring.
6870
   *
6871
   * @param string $str
6872
   * @param string $substring <p>The substring to add to both sides.</P>
6873
   *
6874
   * @return string String with the substring both prepended and appended.
6875
   */
6876 5
  public static function str_surround(string $str, string $substring): string
6877
  {
6878 5
    return \implode('', [$substring, $str, $substring]);
6879
  }
6880
6881
  /**
6882
   * Returns a trimmed string with the first letter of each word capitalized.
6883
   * Also accepts an array, $ignore, allowing you to list words not to be
6884
   * capitalized.
6885
   *
6886
   * @param string              $str
6887
   * @param string[]|array|null $ignore   [optional] <p>An array of words not to capitalize or null. Default: null</p>
6888
   * @param string              $encoding [optional] <p>Default: UTF-8</p>
6889
   *
6890
   * @return string The titleized string.
6891
   */
6892 5
  public static function str_titleize(string $str, array $ignore = null, string $encoding = 'UTF-8'): string
6893
  {
6894 5
    $str = self::trim($str);
6895
6896 5
    $str = (string)\preg_replace_callback(
6897 5
        '/([\S]+)/u',
6898 5
        function ($match) use ($encoding, $ignore) {
6899 5
          if ($ignore && \in_array($match[0], $ignore, true)) {
6900 2
            return $match[0];
6901
          }
6902
6903 5
          return self::str_upper_first(self::strtolower($match[0], $encoding));
6904 5
        },
6905 5
        $str
6906
    );
6907
6908 5
    return $str;
6909
  }
6910
6911
  /**
6912
   * Returns a trimmed string in proper title case.
6913
   *
6914
   * Also accepts an array, $ignore, allowing you to list words not to be
6915
   * capitalized.
6916
   *
6917
   * Adapted from John Gruber's script.
6918
   *
6919
   * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6920
   *
6921
   * @param string $str
6922
   * @param array  $ignore   <p>An array of words not to capitalize.</p>
6923
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6924
   *
6925
   * @return string The titleized string.
6926
   */
6927 35
  public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6928
  {
6929 35
    $smallWords = \array_merge(
6930
        [
6931 35
            '(?<!q&)a',
6932
            'an',
6933
            'and',
6934
            'as',
6935
            'at(?!&t)',
6936
            'but',
6937
            'by',
6938
            'en',
6939
            'for',
6940
            'if',
6941
            'in',
6942
            'of',
6943
            'on',
6944
            'or',
6945
            'the',
6946
            'to',
6947
            'v[.]?',
6948
            'via',
6949
            'vs[.]?',
6950
        ],
6951 35
        $ignore
6952
    );
6953
6954 35
    $smallWordsRx = \implode('|', $smallWords);
6955 35
    $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6956
6957 35
    $str = self::trim($str);
6958
6959 35
    if (self::has_lowercase($str) === false) {
6960 2
      $str = self::strtolower($str);
6961
    }
6962
6963
    // The main substitutions
6964 35
    $str = (string)\preg_replace_callback(
6965
        '~\b (_*) (?:                                                              # 1. Leading underscore and
6966
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6967 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6968
                        |
6969 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6970
                        |
6971 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6972
                        |
6973 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
6974
                      ) (_*) \b                                                           # 6. With trailing underscore
6975
                    ~ux',
6976 35
        function ($matches) use ($encoding) {
6977
          // Preserve leading underscore
6978 35
          $str = $matches[1];
6979 35
          if ($matches[2]) {
6980
            // Preserve URLs, domains, emails and file paths
6981 5
            $str .= $matches[2];
6982 35
          } elseif ($matches[3]) {
6983
            // Lower-case small words
6984 25
            $str .= self::strtolower($matches[3], $encoding);
6985 35
          } elseif ($matches[4]) {
6986
            // Capitalize word w/o internal caps
6987 34
            $str .= static::str_upper_first($matches[4], $encoding);
6988
          } else {
6989
            // Preserve other kinds of word (iPhone)
6990 7
            $str .= $matches[5];
6991
          }
6992
          // Preserve trailing underscore
6993 35
          $str .= $matches[6];
6994
6995 35
          return $str;
6996 35
        },
6997 35
        $str
6998
    );
6999
7000
    // Exceptions for small words: capitalize at start of title...
7001 35
    $str = (string)\preg_replace_callback(
7002
        '~(  \A [[:punct:]]*                # start of title...
7003
                      |  [:.;?!][ ]+               # or of subsentence...
7004
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7005 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7006
                     ~uxi',
7007 35
        function ($matches) use ($encoding) {
7008 11
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
7009 35
        },
7010 35
        $str
7011
    );
7012
7013
    // ...and end of title
7014 35
    $str = (string)\preg_replace_callback(
7015 35
        '~\b ( ' . $smallWordsRx . ' ) # small word...
7016
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7017
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7018
                     ~uxi',
7019 35
        function ($matches) use ($encoding) {
7020 3
          return static::str_upper_first($matches[1], $encoding);
7021 35
        },
7022 35
        $str
7023
    );
7024
7025
    // Exceptions for small words in hyphenated compound words
7026
    // e.g. "in-flight" -> In-Flight
7027 35
    $str = (string)\preg_replace_callback(
7028
        '~\b
7029
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7030 35
                        ( ' . $smallWordsRx . ' )
7031
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7032
                       ~uxi',
7033 35
        function ($matches) use ($encoding) {
7034
          return static::str_upper_first($matches[1], $encoding);
7035 35
        },
7036 35
        $str
7037
    );
7038
7039
    // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7040 35
    $str = (string)\preg_replace_callback(
7041
        '~\b
7042
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7043
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7044 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7045
                      (?!	- )                   # Negative lookahead for another -
7046
                     ~uxi',
7047 35
        function ($matches) use ($encoding) {
7048
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
7049 35
        },
7050 35
        $str
7051
    );
7052
7053 35
    return $str;
7054
  }
7055
7056
  /**
7057
   * Get a binary representation of a specific string.
7058
   *
7059
   * @param string $str <p>The input string.</p>
7060
   *
7061
   * @return string
7062
   */
7063 2
  public static function str_to_binary(string $str): string
7064
  {
7065 2
    $value = \unpack('H*', $str);
7066
7067 2
    return \base_convert($value[1], 16, 2);
7068
  }
7069
7070
  /**
7071
   * @param string   $str
7072
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7073
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7074
   *
7075
   * @return string[]
7076
   */
7077 17
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7078
  {
7079 17
    if ('' === $str) {
7080 1
      return ($removeEmptyValues === true ? [] : ['']);
7081
    }
7082
7083 16
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
7084
7085 16
    if ($return === false) {
7086
      return ($removeEmptyValues === true ? [] : ['']);
7087
    }
7088
7089
    if (
7090 16
        $removeShortValues === null
7091
        &&
7092 16
        $removeEmptyValues === false
7093
    ) {
7094 16
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7095
    }
7096
7097
    $tmpReturn = self::reduce_string_array(
7098
        $return,
7099
        $removeEmptyValues,
7100
        $removeShortValues
7101
    );
7102
7103
    return $tmpReturn;
7104
  }
7105
7106
  /**
7107
   * Convert a string into an array of words.
7108
   *
7109
   * @param string   $str
7110
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7111
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7112
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7113
   *
7114
   * @return string[]
7115
   */
7116 14
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
7117
  {
7118 14
    if ('' === $str) {
7119 4
      return ($removeEmptyValues === true ? [] : ['']);
7120
    }
7121
7122 14
    $charList = self::rxClass($charList, '\pL');
7123
7124 14
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
7125
7126 14
    if ($return === false) {
7127
      return ($removeEmptyValues === true ? [] : ['']);
7128
    }
7129
7130
    if (
7131 14
        $removeShortValues === null
7132
        &&
7133 14
        $removeEmptyValues === false
7134
    ) {
7135 14
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7136
    }
7137
7138 2
    $tmpReturn = self::reduce_string_array(
7139 2
        $return,
7140 2
        $removeEmptyValues,
7141 2
        $removeShortValues
7142
    );
7143
7144 2
    foreach ($tmpReturn as &$item) {
7145 2
      $item = (string)$item;
7146
    }
7147
7148 2
    return $tmpReturn;
7149
  }
7150
7151
  /**
7152
   * alias for "UTF8::to_ascii()"
7153
   *
7154
   * @see UTF8::to_ascii()
7155
   *
7156
   * @param string $str
7157
   * @param string $unknown
7158
   * @param bool   $strict
7159
   *
7160
   * @return string
7161
   */
7162 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7163
  {
7164 7
    return self::to_ascii($str, $unknown, $strict);
7165
  }
7166
7167
  /**
7168
   * Truncates the string to a given length. If $substring is provided, and
7169
   * truncating occurs, the string is further truncated so that the substring
7170
   * may be appended without exceeding the desired length.
7171
   *
7172
   * @param string $str
7173
   * @param int    $length    <p>Desired length of the truncated string.</p>
7174
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7175
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7176
   *
7177
   * @return string String after truncating.
7178
   */
7179 22
  public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7180
  {
7181
    // init
7182 22
    $str = (string)$str;
7183
7184 22
    if ('' === $str) {
7185
      return '';
7186
    }
7187
7188 22
    if ($length >= self::strlen($str, $encoding)) {
7189 4
      return $str;
7190
    }
7191
7192
    // Need to further trim the string so we can append the substring
7193 18
    $substringLength = self::strlen($substring, $encoding);
7194 18
    $length -= $substringLength;
7195
7196 18
    $truncated = self::substr($str, 0, $length, $encoding);
7197
7198 18
    return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7198
    return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7199
  }
7200
7201
  /**
7202
   * Truncates the string to a given length, while ensuring that it does not
7203
   * split words. If $substring is provided, and truncating occurs, the
7204
   * string is further truncated so that the substring may be appended without
7205
   * exceeding the desired length.
7206
   *
7207
   * @param string $str
7208
   * @param int    $length    <p>Desired length of the truncated string.</p>
7209
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7210
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7211
   *
7212
   * @return string String after truncating.
7213
   */
7214 23
  public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7215
  {
7216 23
    if ($length >= self::strlen($str, $encoding)) {
7217 4
      return $str;
7218
    }
7219
7220
    // need to further trim the string so we can append the substring
7221 19
    $substringLength = self::strlen($substring, $encoding);
7222 19
    $length -= $substringLength;
7223
7224 19
    $truncated = self::substr($str, 0, $length, $encoding);
7225 19
    if ($truncated === false) {
7226
      return '';
7227
    }
7228
7229
    // if the last word was truncated
7230 19
    $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7231 19
    if ($strPosSpace != $length) {
7232
      // find pos of the last occurrence of a space, get up to that
7233 12
      $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7234
7235 12
      if ($lastPos !== false || $strPosSpace !== false) {
7236 11
        $truncated = self::substr($truncated, 0, (int)$lastPos, $encoding);
7237
      }
7238
    }
7239
7240 19
    $str = $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7240
    $str = /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7241
7242 19
    return $str;
7243
  }
7244
7245
  /**
7246
   * Returns a lowercase and trimmed string separated by underscores.
7247
   * Underscores are inserted before uppercase characters (with the exception
7248
   * of the first character of the string), and in place of spaces as well as
7249
   * dashes.
7250
   *
7251
   * @param string $str
7252
   *
7253
   * @return string The underscored string.
7254
   */
7255 16
  public static function str_underscored(string $str): string
7256
  {
7257 16
    return self::str_delimit($str, '_');
7258
  }
7259
7260
  /**
7261
   * Returns an UpperCamelCase version of the supplied string. It trims
7262
   * surrounding spaces, capitalizes letters following digits, spaces, dashes
7263
   * and underscores, and removes spaces, dashes, underscores.
7264
   *
7265
   * @param string $str      <p>The input string.</p>
7266
   * @param string $encoding [optional] <p>Default: UTF-8</p>
7267
   *
7268
   * @return string String in UpperCamelCase.
7269
   */
7270 13
  public static function str_upper_camelize(string $str, string $encoding = 'UTF-8'): string
7271
  {
7272 13
    return self::str_upper_first(self::str_camelize($str, $encoding), $encoding);
7273
  }
7274
7275
  /**
7276
   * alias for "UTF8::ucfirst()"
7277
   *
7278
   * @see UTF8::ucfirst()
7279
   *
7280
   * @param string $str
7281
   * @param string $encoding
7282
   * @param bool   $cleanUtf8
7283
   *
7284
   * @return string
7285
   */
7286 58
  public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7287
  {
7288 58
    return self::ucfirst($str, $encoding, $cleanUtf8);
7289
  }
7290
7291
  /**
7292
   * Counts number of words in the UTF-8 string.
7293
   *
7294
   * @param string $str      <p>The input string.</p>
7295
   * @param int    $format   [optional] <p>
7296
   *                         <strong>0</strong> => return a number of words (default)<br>
7297
   *                         <strong>1</strong> => return an array of words<br>
7298
   *                         <strong>2</strong> => return an array of words with word-offset as key
7299
   *                         </p>
7300
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7301
   *
7302
   * @return string[]|int The number of words in the string
7303
   */
7304 2
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7305
  {
7306 2
    $strParts = self::str_to_words($str, $charlist);
7307
7308 2
    $len = \count($strParts);
7309
7310 2
    if ($format === 1) {
7311
7312 2
      $numberOfWords = [];
7313 2
      for ($i = 1; $i < $len; $i += 2) {
7314 2
        $numberOfWords[] = $strParts[$i];
7315
      }
7316
7317 2
    } elseif ($format === 2) {
7318
7319 2
      $numberOfWords = [];
7320 2
      $offset = self::strlen($strParts[0]);
7321 2
      for ($i = 1; $i < $len; $i += 2) {
7322 2
        $numberOfWords[$offset] = $strParts[$i];
7323 2
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
7324
      }
7325
7326
    } else {
7327
7328 2
      $numberOfWords = (int)(($len - 1) / 2);
7329
7330
    }
7331
7332 2
    return $numberOfWords;
7333
  }
7334
7335
  /**
7336
   * Case-insensitive string comparison.
7337
   *
7338
   * INFO: Case-insensitive version of UTF8::strcmp()
7339
   *
7340
   * @param string $str1     <p>The first string.</p>
7341
   * @param string $str2     <p>The second string.</p>
7342
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7343
   *
7344
   * @return int
7345
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7346
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7347
   *             <strong>0</strong> if they are equal.
7348
   */
7349 23
  public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7350
  {
7351 23
    return self::strcmp(
7352 23
        self::strtocasefold($str1, true, false, $encoding, null, false),
7353 23
        self::strtocasefold($str2, true, false, $encoding, null, false)
7354
    );
7355
  }
7356
7357
  /**
7358
   * alias for "UTF8::strstr()"
7359
   *
7360
   * @see UTF8::strstr()
7361
   *
7362
   * @param string $haystack
7363
   * @param string $needle
7364
   * @param bool   $before_needle
7365
   * @param string $encoding
7366
   * @param bool   $cleanUtf8
7367
   *
7368
   * @return string|false
7369
   */
7370 2
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7371
  {
7372 2
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7373
  }
7374
7375
  /**
7376
   * Case-sensitive string comparison.
7377
   *
7378
   * @param string $str1 <p>The first string.</p>
7379
   * @param string $str2 <p>The second string.</p>
7380
   *
7381
   * @return int
7382
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
7383
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
7384
   *              <strong>0</strong> if they are equal.
7385
   */
7386 29
  public static function strcmp(string $str1, string $str2): int
7387
  {
7388
    /** @noinspection PhpUndefinedClassInspection */
7389 29
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7390 24
        \Normalizer::normalize($str1, \Normalizer::NFD),
7391 29
        \Normalizer::normalize($str2, \Normalizer::NFD)
7392
    );
7393
  }
7394
7395
  /**
7396
   * Find length of initial segment not matching mask.
7397
   *
7398
   * @param string $str
7399
   * @param string $charList
7400
   * @param int    $offset
7401
   * @param int    $length
7402
   *
7403
   * @return int|null
7404
   */
7405 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
7406
  {
7407 15
    if ('' === $charList .= '') {
7408 1
      return null;
7409
    }
7410
7411 14
    if ($offset || $length !== null) {
7412 2
      $strTmp = self::substr($str, $offset, $length);
7413 2
      if ($strTmp === false) {
7414
        return null;
7415
      }
7416 2
      $str = (string)$strTmp;
7417
    }
7418
7419 14
    if ('' === $str) {
7420 1
      return null;
7421
    }
7422
7423 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept null|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7423
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7424 13
      return self::strlen($length[1]);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($length[1]) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7425
    }
7426
7427 1
    return self::strlen($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($str) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7428
  }
7429
7430
  /**
7431
   * alias for "UTF8::stristr()"
7432
   *
7433
   * @see UTF8::stristr()
7434
   *
7435
   * @param string $haystack
7436
   * @param string $needle
7437
   * @param bool   $before_needle
7438
   * @param string $encoding
7439
   * @param bool   $cleanUtf8
7440
   *
7441
   * @return string|false
7442
   */
7443 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7444
  {
7445 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7446
  }
7447
7448
  /**
7449
   * Create a UTF-8 string from code points.
7450
   *
7451
   * INFO: opposite to UTF8::codepoints()
7452
   *
7453
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7454
   *
7455
   * @return string UTF-8 encoded string.
7456
   */
7457 4
  public static function string(array $array): string
7458
  {
7459 4
    return \implode(
7460 4
        '',
7461 4
        \array_map(
7462
            [
7463 4
                self::class,
7464
                'chr',
7465
            ],
7466 4
            $array
7467
        )
7468
    );
7469
  }
7470
7471
  /**
7472
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7473
   *
7474
   * @param string $str <p>The input string.</p>
7475
   *
7476
   * @return bool
7477
   *              <strong>true</strong> if the string has BOM at the start,<br>
7478
   *              <strong>false</strong> otherwise.
7479
   */
7480 6
  public static function string_has_bom(string $str): bool
7481
  {
7482 6
    foreach (self::$BOM as $bomString => $bomByteLength) {
7483 6
      if (0 === \strpos($str, $bomString)) {
7484 6
        return true;
7485
      }
7486
    }
7487
7488 6
    return false;
7489
  }
7490
7491
  /**
7492
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7493
   *
7494
   * @link http://php.net/manual/en/function.strip-tags.php
7495
   *
7496
   * @param string $str             <p>
7497
   *                                The input string.
7498
   *                                </p>
7499
   * @param string $allowable_tags  [optional] <p>
7500
   *                                You can use the optional second parameter to specify tags which should
7501
   *                                not be stripped.
7502
   *                                </p>
7503
   *                                <p>
7504
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
7505
   *                                can not be changed with allowable_tags.
7506
   *                                </p>
7507
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
7508
   *
7509
   * @return string The stripped string.
7510
   */
7511 4
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7512
  {
7513 4
    if ('' === $str) {
7514 1
      return '';
7515
    }
7516
7517 4
    if ($cleanUtf8 === true) {
7518 2
      $str = self::clean($str);
7519
    }
7520
7521 4
    return \strip_tags($str, $allowable_tags);
7522
  }
7523
7524
  /**
7525
   * Strip all whitespace characters. This includes tabs and newline
7526
   * characters, as well as multibyte whitespace such as the thin space
7527
   * and ideographic space.
7528
   *
7529
   * @param string $str
7530
   *
7531
   * @return string
7532
   */
7533 36
  public static function strip_whitespace(string $str): string
7534
  {
7535 36
    if ('' === $str) {
7536 3
      return '';
7537
    }
7538
7539 33
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
7540
  }
7541
7542
  /**
7543
   * Finds position of first occurrence of a string within another, case insensitive.
7544
   *
7545
   * @link http://php.net/manual/en/function.mb-stripos.php
7546
   *
7547
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7548
   * @param string $needle    <p>The string to find in haystack.</p>
7549
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7550
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7551
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7552
   *
7553
   * @return int|false
7554
   *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7555
   *                   haystack string,<br> or <strong>false</strong> if needle is not found.
7556
   */
7557 75
  public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7558
  {
7559 75
    if ('' === $haystack || '' === $needle) {
7560 5
      return false;
7561
    }
7562
7563 74
    if ($cleanUtf8 === true) {
7564
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7565
      // if invalid characters are found in $haystack before $needle
7566 1
      $haystack = self::clean($haystack);
7567 1
      $needle = self::clean($needle);
7568
    }
7569
7570 74
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7571 23
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7572
    }
7573
7574 74
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7575
      self::checkForSupport();
7576
    }
7577
7578 74
    if (self::$SUPPORT['mbstring'] === true) {
7579 74
      $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7580 74
      if ($returnTmp !== false) {
7581 54
        return $returnTmp;
7582
      }
7583
    }
7584
7585
    if (
7586 31
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7587
        &&
7588 31
        $offset >= 0 // grapheme_stripos() can't handle negative offset
7589
        &&
7590 31
        self::$SUPPORT['intl'] === true
7591
    ) {
7592 31
      $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7593 31
      if ($returnTmp !== false) {
7594
        return $returnTmp;
7595
      }
7596
    }
7597
7598
    //
7599
    // fallback for ascii only
7600
    //
7601
7602 31
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7603 15
      return \stripos($haystack, $needle, $offset);
7604
    }
7605
7606
    //
7607
    // fallback via vanilla php
7608
    //
7609
7610 20
    $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
7611 20
    $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
7612
7613 20
    return self::strpos($haystack, $needle, $offset, $encoding);
7614
  }
7615
7616
  /**
7617
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
7618
   *
7619
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7620
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7621
   * @param bool   $before_needle  [optional] <p>
7622
   *                               If <b>TRUE</b>, it returns the part of the
7623
   *                               haystack before the first occurrence of the needle (excluding the needle).
7624
   *                               </p>
7625
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
7626
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7627
   *
7628
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
7629
   */
7630 19
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7631
  {
7632 19
    if ('' === $haystack || '' === $needle) {
7633 6
      return false;
7634
    }
7635
7636 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7637 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7638
    }
7639
7640 13
    if ($cleanUtf8 === true) {
7641
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7642
      // if invalid characters are found in $haystack before $needle
7643 1
      $needle = self::clean($needle);
7644 1
      $haystack = self::clean($haystack);
7645
    }
7646
7647 13
    if (!$needle) {
7648
      return $haystack;
7649
    }
7650
7651 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7652
      self::checkForSupport();
7653
    }
7654
7655
    if (
7656 13
        $encoding !== 'UTF-8'
7657
        &&
7658 13
        self::$SUPPORT['mbstring'] === false
7659
    ) {
7660
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7661
    }
7662
7663 13
    if (self::$SUPPORT['mbstring'] === true) {
7664 13
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7665
    }
7666
7667
    if (
7668
        $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7669
        &&
7670
        self::$SUPPORT['intl'] === true
7671
    ) {
7672
      $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7673
      if ($returnTmp !== false) {
7674
        return $returnTmp;
7675
      }
7676
    }
7677
7678
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7679
      return \stristr($haystack, $needle, $before_needle);
7680
    }
7681
7682
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7683
7684
    if (!isset($match[1])) {
7685
      return false;
7686
    }
7687
7688
    if ($before_needle) {
7689
      return $match[1];
7690
    }
7691
7692
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7692
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
7693
  }
7694
7695
  /**
7696
   * Get the string length, not the byte-length!
7697
   *
7698
   * @link     http://php.net/manual/en/function.mb-strlen.php
7699
   *
7700
   * @param string $str       <p>The string being checked for length.</p>
7701
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7702
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7703
   *
7704
   * @return int|false
7705
   *             The number <strong>(int)</strong> of characters in the string $str having character encoding $encoding.
7706
   *             (One multi-byte character counted as +1).
7707
   *             <br>
7708
   *             Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid chars.
7709
   */
7710 262
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7711
  {
7712 262
    if ('' === $str) {
7713 37
      return 0;
7714
    }
7715
7716 260
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7717 83
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7718
    }
7719
7720
    //
7721
    // fallback for binary || ascii only
7722
    //
7723
7724
    if (
7725 260
        $encoding === 'CP850'
7726
        ||
7727 260
        $encoding === 'ASCII'
7728
    ) {
7729 2
      return self::strlen_in_byte($str);
7730
    }
7731
7732 260
    if ($cleanUtf8 === true) {
7733
      // "mb_strlen" and "\iconv_strlen" returns wrong length,
7734
      // if invalid characters are found in $str
7735 4
      $str = self::clean($str);
7736
    }
7737
7738 260
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7739
      self::checkForSupport();
7740
    }
7741
7742
    if (
7743 260
        $encoding !== 'UTF-8'
7744
        &&
7745 260
        self::$SUPPORT['mbstring'] === false
7746
        &&
7747 260
        self::$SUPPORT['iconv'] === false
7748
    ) {
7749 2
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7750
    }
7751
7752
    //
7753
    // fallback via mbstring
7754
    //
7755
7756 260
    if (self::$SUPPORT['mbstring'] === true) {
7757 256
      $returnTmp = \mb_strlen($str, $encoding);
7758 256
      if ($returnTmp !== false) {
7759 256
        return $returnTmp;
7760
      }
7761
    }
7762
7763
    //
7764
    // fallback via iconv
7765
    //
7766
7767 8
    if (self::$SUPPORT['iconv'] === true) {
7768
      $returnTmp = \iconv_strlen($str, $encoding);
7769
      if ($returnTmp !== false) {
7770
        return $returnTmp;
7771
      }
7772
    }
7773
7774
    //
7775
    // fallback via intl
7776
    //
7777
7778
    if (
7779 8
        $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7780
        &&
7781 8
        self::$SUPPORT['intl'] === true
7782
    ) {
7783
      $returnTmp = \grapheme_strlen($str);
7784
      if ($returnTmp !== null) {
7785
        return $returnTmp;
7786
      }
7787
    }
7788
7789
    //
7790
    // fallback for ascii only
7791
    //
7792
7793 8
    if (self::is_ascii($str)) {
7794 4
      return \strlen($str);
7795
    }
7796
7797
    //
7798
    // fallback via vanilla php
7799
    //
7800
7801 8
    \preg_match_all('/./us', $str, $parts);
7802
7803 8
    $returnTmp = \count($parts[0]);
7804 8
    if ($returnTmp === 0 && isset($str[0])) {
7805
      return false;
7806
    }
7807
7808 8
    return $returnTmp;
7809
  }
7810
7811
  /**
7812
   * Get string length in byte.
7813
   *
7814
   * @param string $str
7815
   *
7816
   * @return int
7817
   */
7818 194
  public static function strlen_in_byte(string $str): int
7819
  {
7820 194
    if ($str === '') {
7821
      return 0;
7822
    }
7823
7824 194
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7825
      self::checkForSupport();
7826
    }
7827
7828 194
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7829
      // "mb_" is available if overload is used, so use it ...
7830
      return \mb_strlen($str, 'CP850'); // 8-BIT
7831
    }
7832
7833 194
    return \strlen($str);
7834
  }
7835
7836
  /**
7837
   * Case insensitive string comparisons using a "natural order" algorithm.
7838
   *
7839
   * INFO: natural order version of UTF8::strcasecmp()
7840
   *
7841
   * @param string $str1     <p>The first string.</p>
7842
   * @param string $str2     <p>The second string.</p>
7843
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7844
   *
7845
   * @return int
7846
   *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7847
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7848
   *             <strong>0</strong> if they are equal
7849
   */
7850 2
  public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7851
  {
7852 2
    return self::strnatcmp(
7853 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
7854 2
        self::strtocasefold($str2, true, false, $encoding, null, false)
7855
    );
7856
  }
7857
7858
  /**
7859
   * String comparisons using a "natural order" algorithm
7860
   *
7861
   * INFO: natural order version of UTF8::strcmp()
7862
   *
7863
   * @link  http://php.net/manual/en/function.strnatcmp.php
7864
   *
7865
   * @param string $str1 <p>The first string.</p>
7866
   * @param string $str2 <p>The second string.</p>
7867
   *
7868
   * @return int
7869
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7870
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7871
   *             <strong>0</strong> if they are equal
7872
   */
7873 4
  public static function strnatcmp(string $str1, string $str2): int
7874
  {
7875 4
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
7876
  }
7877
7878
  /**
7879
   * Case-insensitive string comparison of the first n characters.
7880
   *
7881
   * @link  http://php.net/manual/en/function.strncasecmp.php
7882
   *
7883
   * @param string $str1     <p>The first string.</p>
7884
   * @param string $str2     <p>The second string.</p>
7885
   * @param int    $len      <p>The length of strings to be used in the comparison.</p>
7886
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7887
   *
7888
   * @return int
7889
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7890
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7891
   *             <strong>0</strong> if they are equal
7892
   */
7893 2
  public static function strncasecmp(string $str1, string $str2, int $len, string $encoding = 'UTF-8'): int
7894
  {
7895 2
    return self::strncmp(
7896 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
7897 2
        self::strtocasefold($str2, true, false, $encoding, null, false),
7898 2
        $len
7899
    );
7900
  }
7901
7902
  /**
7903
   * String comparison of the first n characters.
7904
   *
7905
   * @link  http://php.net/manual/en/function.strncmp.php
7906
   *
7907
   * @param string $str1 <p>The first string.</p>
7908
   * @param string $str2 <p>The second string.</p>
7909
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
7910
   *
7911
   * @return int
7912
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7913
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7914
   *             <strong>0</strong> if they are equal
7915
   */
7916 4
  public static function strncmp(string $str1, string $str2, int $len): int
7917
  {
7918 4
    $str1 = (string)self::substr($str1, 0, $len);
7919 4
    $str2 = (string)self::substr($str2, 0, $len);
7920
7921 4
    return self::strcmp($str1, $str2);
7922
  }
7923
7924
  /**
7925
   * Search a string for any of a set of characters.
7926
   *
7927
   * @link  http://php.net/manual/en/function.strpbrk.php
7928
   *
7929
   * @param string $haystack  <p>The string where char_list is looked for.</p>
7930
   * @param string $char_list <p>This parameter is case sensitive.</p>
7931
   *
7932
   * @return string|false String starting from the character found, or false if it is not found.
7933
   */
7934 2
  public static function strpbrk(string $haystack, string $char_list)
7935
  {
7936 2
    if ('' === $haystack || '' === $char_list) {
7937 2
      return false;
7938
    }
7939
7940 2
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7941 2
      return \substr($haystack, (int)\strpos($haystack, $m[0]));
7942
    }
7943
7944 2
    return false;
7945
  }
7946
7947
  /**
7948
   * Find position of first occurrence of string in a string.
7949
   *
7950
   * @link http://php.net/manual/en/function.mb-strpos.php
7951
   *
7952
   * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7953
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7954
   * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7955
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7956
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7957
   *
7958
   * @return int|false
7959
   *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
7960
   *                   string.<br> If needle is not found it returns false.
7961
   */
7962 142
  public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7963
  {
7964 142
    if ('' === $haystack) {
7965 4
      return false;
7966
    }
7967
7968
    // iconv and mbstring do not support integer $needle
7969 141
    if ((int)$needle === $needle && $needle >= 0) {
7970
      $needle = (string)self::chr($needle);
7971
    }
7972 141
    $needle = (string)$needle;
7973
7974 141
    if ('' === $needle) {
7975 2
      return false;
7976
    }
7977
7978 141
    if ($cleanUtf8 === true) {
7979
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7980
      // if invalid characters are found in $haystack before $needle
7981 3
      $needle = self::clean($needle);
7982 3
      $haystack = self::clean($haystack);
7983
    }
7984
7985 141
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7986 55
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7987
    }
7988
7989 141
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7990
      self::checkForSupport();
7991
    }
7992
7993
    //
7994
    // fallback for binary || ascii only
7995
    //
7996
7997
    if (
7998 141
        $encoding === 'CP850'
7999
        ||
8000 141
        $encoding === 'ASCII'
8001
    ) {
8002 2
      return self::strpos_in_byte($haystack, $needle, $offset);
8003
    }
8004
8005
    if (
8006 141
        $encoding !== 'UTF-8'
8007
        &&
8008 141
        self::$SUPPORT['iconv'] === false
8009
        &&
8010 141
        self::$SUPPORT['mbstring'] === false
8011
    ) {
8012 2
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8013
    }
8014
8015
    //
8016
    // fallback via mbstring
8017
    //
8018
8019 141
    if (self::$SUPPORT['mbstring'] === true) {
8020 141
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
8021 141
      if ($returnTmp !== false) {
8022 86
        return $returnTmp;
8023
      }
8024
    }
8025
8026
    //
8027
    // fallback via intl
8028
    //
8029
8030
    if (
8031 69
        $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8032
        &&
8033 69
        $offset >= 0 // grapheme_strpos() can't handle negative offset
8034
        &&
8035 69
        self::$SUPPORT['intl'] === true
8036
    ) {
8037 69
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
8038 69
      if ($returnTmp !== false) {
8039
        return $returnTmp;
8040
      }
8041
    }
8042
8043
    //
8044
    // fallback via iconv
8045
    //
8046
8047
    if (
8048 69
        $offset >= 0 // iconv_strpos() can't handle negative offset
8049
        &&
8050 69
        self::$SUPPORT['iconv'] === true
8051
    ) {
8052
      // ignore invalid negative offset to keep compatibility
8053
      // with php < 5.5.35, < 5.6.21, < 7.0.6
8054 69
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
8055 69
      if ($returnTmp !== false) {
8056
        return $returnTmp;
8057
      }
8058
    }
8059
8060
    //
8061
    // fallback for ascii only
8062
    //
8063
8064 69
    if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
8065 35
      return \strpos($haystack, $needle, $offset);
8066
    }
8067
8068
    //
8069
    // fallback via vanilla php
8070
    //
8071
8072 39
    if ($haystackIsAscii) {
8073
      $haystackTmp = \substr($haystack, $offset);
8074
    } else {
8075 39
      $haystackTmp = self::substr($haystack, $offset, null, $encoding);
8076
    }
8077 39
    if ($haystackTmp === false) {
8078
      $haystackTmp = '';
8079
    }
8080 39
    $haystack = (string)$haystackTmp;
8081
8082 39
    if ($offset < 0) {
8083 2
      $offset = 0;
8084
    }
8085
8086 39
    $pos = \strpos($haystack, $needle);
8087 39
    if ($pos === false) {
8088 39
      return false;
8089
    }
8090
8091 4
    if ($pos) {
8092 4
      return ($offset + (self::strlen(substr($haystack, 0, $pos), $encoding)));
8093
    }
8094
8095 2
    return ($offset + 0);
8096
  }
8097
8098
  /**
8099
   * Find position of first occurrence of string in a string.
8100
   *
8101
   * @param string $haystack <p>
8102
   *                         The string being checked.
8103
   *                         </p>
8104
   * @param string $needle   <p>
8105
   *                         The position counted from the beginning of haystack.
8106
   *                         </p>
8107
   * @param int    $offset   [optional] <p>
8108
   *                         The search offset. If it is not specified, 0 is used.
8109
   *                         </p>
8110
   *
8111
   * @return int|false The numeric position of the first occurrence of needle in the
8112
   *                   haystack string. If needle is not found, it returns false.
8113
   */
8114 77
  public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8115
  {
8116 77
    if ($haystack === '' || $needle === '') {
8117
      return false;
8118
    }
8119
8120 77
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8121
      self::checkForSupport();
8122
    }
8123
8124 77
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8125
      // "mb_" is available if overload is used, so use it ...
8126
      return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8127
    }
8128
8129 77
    return \strpos($haystack, $needle, $offset);
8130
  }
8131
8132
  /**
8133
   * Finds the last occurrence of a character in a string within another.
8134
   *
8135
   * @link http://php.net/manual/en/function.mb-strrchr.php
8136
   *
8137
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8138
   * @param string $needle        <p>The string to find in haystack</p>
8139
   * @param bool   $before_needle [optional] <p>
8140
   *                              Determines which portion of haystack
8141
   *                              this function returns.
8142
   *                              If set to true, it returns all of haystack
8143
   *                              from the beginning to the last occurrence of needle.
8144
   *                              If set to false, it returns all of haystack
8145
   *                              from the last occurrence of needle to the end,
8146
   *                              </p>
8147
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8148
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8149
   *
8150
   * @return string|false The portion of haystack or false if needle is not found.
8151
   */
8152 4
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8153
  {
8154 4
    if ('' === $haystack || '' === $needle) {
8155 2
      return false;
8156
    }
8157
8158 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8159 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8160
    }
8161
8162 4
    if ($cleanUtf8 === true) {
8163
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8164
      // if invalid characters are found in $haystack before $needle
8165 2
      $needle = self::clean($needle);
8166 2
      $haystack = self::clean($haystack);
8167
    }
8168
8169 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8170
      self::checkForSupport();
8171
    }
8172
8173
    if (
8174 4
        $encoding !== 'UTF-8'
8175
        &&
8176 4
        self::$SUPPORT['mbstring'] === false
8177
    ) {
8178
      \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8179
    }
8180
8181 4
    if (self::$SUPPORT['mbstring'] === true) {
8182 4
      return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8183
    }
8184
8185
    //
8186
    // fallback for binary || ascii only
8187
    //
8188
8189
    if (
8190
        $before_needle === false
8191
        &&
8192
        (
8193
            $encoding === 'CP850'
8194
            ||
8195
            $encoding === 'ASCII'
8196
        )
8197
    ) {
8198
      return \strrchr($haystack, $needle);
8199
    }
8200
8201
    //
8202
    // fallback via iconv
8203
    //
8204
8205
    if (self::$SUPPORT['iconv'] === true) {
8206
      $needleTmp = self::substr($needle, 0, 1, $encoding);
8207
      if ($needleTmp === false) {
8208
        return false;
8209
      }
8210
      $needle = (string)$needleTmp;
8211
8212
      $pos = \iconv_strrpos($haystack, $needle, $encoding);
8213
      if (false === $pos) {
8214
        return false;
8215
      }
8216
8217
      if ($before_needle) {
8218
        return self::substr($haystack, 0, $pos, $encoding);
8219
      }
8220
8221
      return self::substr($haystack, $pos, null, $encoding);
8222
    }
8223
8224
    //
8225
    // fallback via vanilla php
8226
    //
8227
8228
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8229
    if ($needleTmp === false) {
8230
      return false;
8231
    }
8232
    $needle = (string)$needleTmp;
8233
8234
    $pos = self::strrpos($haystack, $needle, null, $encoding);
8235
    if ($pos === false) {
8236
      return false;
8237
    }
8238
8239
    if ($before_needle) {
8240
      return self::substr($haystack, 0, $pos, $encoding);
8241
    }
8242
8243
    return self::substr($haystack, $pos, null, $encoding);
8244
  }
8245
8246
  /**
8247
   * Reverses characters order in the string.
8248
   *
8249
   * @param string $str <p>The input string.</p>
8250
   *
8251
   * @return string The string with characters in the reverse sequence.
8252
   */
8253 10
  public static function strrev(string $str): string
8254
  {
8255 10
    if ('' === $str) {
8256 4
      return '';
8257
    }
8258
8259 8
    $reversed = '';
8260 8
    $i = self::strlen($str);
8261 8
    while ($i--) {
8262 8
      $reversed .= self::substr($str, $i, 1);
0 ignored issues
show
Bug introduced by
It seems like $i can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8262
      $reversed .= self::substr($str, /** @scrutinizer ignore-type */ $i, 1);
Loading history...
8263
    }
8264
8265 8
    return $reversed;
8266
  }
8267
8268
  /**
8269
   * Finds the last occurrence of a character in a string within another, case insensitive.
8270
   *
8271
   * @link http://php.net/manual/en/function.mb-strrichr.php
8272
   *
8273
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
8274
   * @param string $needle         <p>The string to find in haystack.</p>
8275
   * @param bool   $before_needle  [optional] <p>
8276
   *                               Determines which portion of haystack
8277
   *                               this function returns.
8278
   *                               If set to true, it returns all of haystack
8279
   *                               from the beginning to the last occurrence of needle.
8280
   *                               If set to false, it returns all of haystack
8281
   *                               from the last occurrence of needle to the end,
8282
   *                               </p>
8283
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8284
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8285
   *
8286
   * @return string|false The portion of haystack or<br>false if needle is not found.
8287
   */
8288 3
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8289
  {
8290 3
    if ('' === $haystack || '' === $needle) {
8291 2
      return false;
8292
    }
8293
8294 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8295 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8296
    }
8297
8298 3
    if ($cleanUtf8 === true) {
8299
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8300
      // if invalid characters are found in $haystack before $needle
8301 2
      $needle = self::clean($needle);
8302 2
      $haystack = self::clean($haystack);
8303
    }
8304
8305 3
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8306
      self::checkForSupport();
8307
    }
8308
8309
    //
8310
    // fallback via mbstring
8311
    //
8312
8313 3
    if (self::$SUPPORT['mbstring'] === true) {
8314 3
      return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8315
    }
8316
8317
    //
8318
    // fallback via vanilla php
8319
    //
8320
8321
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8322
    if ($needleTmp === false) {
8323
      return false;
8324
    }
8325
    $needle = (string)$needleTmp;
8326
8327
    $pos = self::strripos($haystack, $needle, 0, $encoding);
8328
    if ($pos === false) {
8329
      return false;
8330
    }
8331
8332
    if ($before_needle) {
8333
      return self::substr($haystack, 0, $pos, $encoding);
8334
    }
8335
8336
    return self::substr($haystack, $pos, null, $encoding);
8337
  }
8338
8339
  /**
8340
   * Find position of last occurrence of a case-insensitive string.
8341
   *
8342
   * @param string     $haystack  <p>The string to look in.</p>
8343
   * @param string|int $needle    <p>The string to look for.</p>
8344
   * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8345
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8346
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8347
   *
8348
   * @return int|false
8349
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8350
   *                   string.<br>If needle is not found, it returns false.
8351
   */
8352 4
  public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8353
  {
8354 4
    if ('' === $haystack) {
8355
      return false;
8356
    }
8357
8358
    // iconv and mbstring do not support integer $needle
8359 4
    if ((int)$needle === $needle && $needle >= 0) {
8360
      $needle = (string)self::chr($needle);
8361
    }
8362 4
    $needle = (string)$needle;
8363
8364 4
    if ('' === $needle) {
8365
      return false;
8366
    }
8367
8368 4
    if ($cleanUtf8 === true) {
8369
      // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8370 2
      $needle = self::clean($needle);
8371 2
      $haystack = self::clean($haystack);
8372
    }
8373
8374 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8375 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8376
    }
8377
8378
    //
8379
    // fallback for binary || ascii only
8380
    //
8381
8382
    if (
8383 4
        $encoding === 'CP850'
8384
        ||
8385 4
        $encoding === 'ASCII'
8386
    ) {
8387
      return self::strripos_in_byte($haystack, $needle, $offset);
8388
    }
8389
8390 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8391
      self::checkForSupport();
8392
    }
8393
8394
    if (
8395 4
        $encoding !== 'UTF-8'
8396
        &&
8397 4
        self::$SUPPORT['mbstring'] === false
8398
    ) {
8399
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8400
    }
8401
8402
    //
8403
    // fallback via mbstrig
8404
    //
8405
8406 4
    if (self::$SUPPORT['mbstring'] === true) {
8407 4
      return \mb_strripos($haystack, $needle, $offset, $encoding);
8408
    }
8409
8410
    //
8411
    // fallback via intl
8412
    //
8413
8414
    if (
8415
        $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8416
        &&
8417
        $offset >= 0 // grapheme_strripos() can't handle negative offset
8418
        &&
8419
        self::$SUPPORT['intl'] === true
8420
    ) {
8421
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8422
      if ($returnTmp !== false) {
8423
        return $returnTmp;
8424
      }
8425
    }
8426
8427
    //
8428
    // fallback for ascii only
8429
    //
8430
8431
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8432
      return self::strripos_in_byte($haystack, $needle, $offset);
8433
    }
8434
8435
    //
8436
    // fallback via vanilla php
8437
    //
8438
8439
    $haystack = self::strtocasefold($haystack, true, false, $encoding);
8440
    $needle = self::strtocasefold($needle, true, false, $encoding);
8441
8442
    return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8443
  }
8444
8445
  /**
8446
   * Finds position of last occurrence of a string within another, case insensitive.
8447
   *
8448
   * @param string $haystack <p>
8449
   *                         The string from which to get the position of the last occurrence
8450
   *                         of needle.
8451
   *                         </p>
8452
   * @param string $needle   <p>
8453
   *                         The string to find in haystack.
8454
   *                         </p>
8455
   * @param int    $offset   [optional] <p>
8456
   *                         The position in haystack
8457
   *                         to start searching.
8458
   *                         </p>
8459
   *
8460
   * @return int|false Return the numeric position of the last occurrence of needle in the
8461
   *                   haystack string, or false if needle is not found.
8462
   */
8463
  public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
8464
  {
8465
    if ($haystack === '' || $needle === '') {
8466
      return false;
8467
    }
8468
8469
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8470
      self::checkForSupport();
8471
    }
8472
8473
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8474
      // "mb_" is available if overload is used, so use it ...
8475
      return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8476
    }
8477
8478
    return \strripos($haystack, $needle, $offset);
8479
  }
8480
8481
  /**
8482
   * Find position of last occurrence of a string in a string.
8483
   *
8484
   * @link http://php.net/manual/en/function.mb-strrpos.php
8485
   *
8486
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8487
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8488
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8489
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
8490
   *                              the end of the string.
8491
   *                              </p>
8492
   * @param string     $encoding  [optional] <p>Set the charset.</p>
8493
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8494
   *
8495
   * @return int|false
8496
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8497
   *                   string.<br>If needle is not found, it returns false.
8498
   */
8499 38
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8500
  {
8501 38
    if ('' === $haystack) {
8502 3
      return false;
8503
    }
8504
8505
    // iconv and mbstring do not support integer $needle
8506 37
    if ((int)$needle === $needle && $needle >= 0) {
8507 2
      $needle = (string)self::chr($needle);
8508
    }
8509 37
    $needle = (string)$needle;
8510
8511 37
    if ('' === $needle) {
8512 2
      return false;
8513
    }
8514
8515 37
    if ($cleanUtf8 === true) {
8516
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8517 4
      $needle = self::clean($needle);
8518 4
      $haystack = self::clean($haystack);
8519
    }
8520
8521 37
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8522 14
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8523
    }
8524
8525
    //
8526
    // fallback for binary || ascii only
8527
    //
8528
8529
    if (
8530 37
        $encoding === 'CP850'
8531
        ||
8532 37
        $encoding === 'ASCII'
8533
    ) {
8534 2
      return self::strrpos_in_byte($haystack, $needle, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::strrpos_in_byte() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8534
      return self::strrpos_in_byte($haystack, $needle, /** @scrutinizer ignore-type */ $offset);
Loading history...
8535
    }
8536
8537 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8538
      self::checkForSupport();
8539
    }
8540
8541
    if (
8542 37
        $encoding !== 'UTF-8'
8543
        &&
8544 37
        self::$SUPPORT['mbstring'] === false
8545
    ) {
8546
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8547
    }
8548
8549
    //
8550
    // fallback via mbstring
8551
    //
8552
8553 37
    if (self::$SUPPORT['mbstring'] === true) {
8554 37
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
8555
    }
8556
8557
    //
8558
    // fallback via intl
8559
    //
8560
8561
    if (
8562
        $offset !== null
8563
        &&
8564
        $offset >= 0 // grapheme_strrpos() can't handle negative offset
8565
        &&
8566
        $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8567
        &&
8568
        self::$SUPPORT['intl'] === true
8569
    ) {
8570
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8571
      if ($returnTmp !== false) {
8572
        return $returnTmp;
8573
      }
8574
    }
8575
8576
    //
8577
    // fallback for ascii only
8578
    //
8579
8580
    if (
8581
        $offset !== null
8582
        &&
8583
        self::is_ascii($haystack)
8584
        &&
8585
        self::is_ascii($needle)
8586
    ) {
8587
      return self::strrpos_in_byte($haystack, $needle, $offset);
8588
    }
8589
8590
    //
8591
    // fallback via vanilla php
8592
    //
8593
8594
    $haystackTmp = null;
8595
    if ($offset > 0) {
8596
      $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8596
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
8597
    } elseif ($offset < 0) {
8598
      $haystackTmp = self::substr($haystack, 0, $offset);
8599
      $offset = 0;
8600
    }
8601
8602
    if ($haystackTmp !== null) {
8603
      if ($haystackTmp === false) {
8604
        $haystackTmp = '';
8605
      }
8606
      $haystack = (string)$haystackTmp;
8607
    }
8608
8609
    $pos = self::strrpos_in_byte($haystack, $needle);
8610
    if ($pos === false) {
8611
      return false;
8612
    }
8613
8614
    return $offset + self::strlen(self::substr_in_byte($haystack, 0, $pos));
8615
  }
8616
8617
  /**
8618
   * Find position of last occurrence of a string in a string.
8619
   *
8620
   * @param string $haystack <p>
8621
   *                         The string being checked, for the last occurrence
8622
   *                         of needle.
8623
   *                         </p>
8624
   * @param string $needle   <p>
8625
   *                         The string to find in haystack.
8626
   *                         </p>
8627
   * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
8628
   *                         the string. Negative values will stop searching at an arbitrary point
8629
   *                         prior to the end of the string.
8630
   *
8631
   * @return int|false The numeric position of the last occurrence of needle in the
8632
   *                   haystack string. If needle is not found, it returns false.
8633
   */
8634 2
  public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
8635
  {
8636 2
    if ($haystack === '' || $needle === '') {
8637
      return false;
8638
    }
8639
8640 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8641
      self::checkForSupport();
8642
    }
8643
8644 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8645
      // "mb_" is available if overload is used, so use it ...
8646
      return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8647
    }
8648
8649 2
    return \strrpos($haystack, $needle, $offset);
8650
  }
8651
8652
  /**
8653
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8654
   * mask.
8655
   *
8656
   * @param string $str    <p>The input string.</p>
8657
   * @param string $mask   <p>The mask of chars</p>
8658
   * @param int    $offset [optional]
8659
   * @param int    $length [optional]
8660
   *
8661
   * @return int
8662
   */
8663 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
8664
  {
8665 10
    if ($offset || $length !== null) {
8666 2
      $strTmp = self::substr($str, $offset, $length);
8667 2
      if ($strTmp === false) {
8668
        $strTmp = '';
8669
      }
8670 2
      $str = (string)$strTmp;
8671
    }
8672
8673 10
    if ('' === $str || '' === $mask) {
8674 2
      return 0;
8675
    }
8676
8677 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug introduced by
$str of type string is incompatible with the type null|array expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8677
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
Bug Best Practice introduced by
The expression return preg_match('/^' ....lf::strlen($str[0]) : 0 could return the type false which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
8678
  }
8679
8680
  /**
8681
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8682
   *
8683
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
8684
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
8685
   * @param bool   $before_needle  [optional] <p>
8686
   *                               If <b>TRUE</b>, strstr() returns the part of the
8687
   *                               haystack before the first occurrence of the needle (excluding the needle).
8688
   *                               </p>
8689
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8690
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8691
   *
8692
   * @return string|false
8693
   *                       A sub-string,<br>or <strong>false</strong> if needle is not found.
8694
   */
8695 5
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8696
  {
8697 5
    if ('' === $haystack || '' === $needle) {
8698 2
      return false;
8699
    }
8700
8701 5
    if ($cleanUtf8 === true) {
8702
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8703
      // if invalid characters are found in $haystack before $needle
8704
      $needle = self::clean($needle);
8705
      $haystack = self::clean($haystack);
8706
    }
8707
8708 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8709 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8710
    }
8711
8712
    //
8713
    // fallback for binary || ascii only
8714
    //
8715
8716
    if (
8717 5
        $encoding === 'CP850'
8718
        ||
8719 5
        $encoding === 'ASCII'
8720
    ) {
8721
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8722
    }
8723
8724 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8725
      self::checkForSupport();
8726
    }
8727
8728
    if (
8729 5
        $encoding !== 'UTF-8'
8730
        &&
8731 5
        self::$SUPPORT['mbstring'] === false
8732
    ) {
8733
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8734
    }
8735
8736
    //
8737
    // fallback via mbstring
8738
    //
8739
8740 5
    if (self::$SUPPORT['mbstring'] === true) {
8741 5
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8742
    }
8743
8744
    //
8745
    // fallback via intl
8746
    //
8747
8748
    if (
8749
        $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8750
        &&
8751
        self::$SUPPORT['intl'] === true
8752
    ) {
8753
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8754
      if ($returnTmp !== false) {
8755
        return $returnTmp;
8756
      }
8757
    }
8758
8759
    //
8760
    // fallback for ascii only
8761
    //
8762
8763
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8764
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8765
    }
8766
8767
    //
8768
    // fallback via vanilla php
8769
    //
8770
8771
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8772
8773
    if (!isset($match[1])) {
8774
      return false;
8775
    }
8776
8777
    if ($before_needle) {
8778
      return $match[1];
8779
    }
8780
8781
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8781
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
8782
  }
8783
8784
  /**
8785
   *  * Finds first occurrence of a string within another.
8786
   *
8787
   * @param string $haystack      <p>
8788
   *                              The string from which to get the first occurrence
8789
   *                              of needle.
8790
   *                              </p>
8791
   * @param string $needle        <p>
8792
   *                              The string to find in haystack.
8793
   *                              </p>
8794
   * @param bool   $before_needle [optional] <p>
8795
   *                              Determines which portion of haystack
8796
   *                              this function returns.
8797
   *                              If set to true, it returns all of haystack
8798
   *                              from the beginning to the first occurrence of needle.
8799
   *                              If set to false, it returns all of haystack
8800
   *                              from the first occurrence of needle to the end,
8801
   *                              </p>
8802
   *
8803
   * @return string|false The portion of haystack,
8804
   *                      or false if needle is not found.
8805
   */
8806
  public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
8807
  {
8808
    if ($haystack === '' || $needle === '') {
8809
      return false;
8810
    }
8811
8812
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8813
      self::checkForSupport();
8814
    }
8815
8816
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8817
      // "mb_" is available if overload is used, so use it ...
8818
      return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
8819
    }
8820
8821
    return \strstr($haystack, $needle, $before_needle);
8822
  }
8823
8824
  /**
8825
   * Unicode transformation for case-less matching.
8826
   *
8827
   * @link http://unicode.org/reports/tr21/tr21-5.html
8828
   *
8829
   * @param string      $str       <p>The input string.</p>
8830
   * @param bool        $full      [optional] <p>
8831
   *                               <b>true</b>, replace full case folding chars (default)<br>
8832
   *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
8833
   *                               </p>
8834
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8835
   * @param string      $encoding  [optional] <p>Set the charset.</p>
8836
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8837
   * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase is
8838
   *                               for some languages better ...</p>
8839
   *
8840
   * @return string
8841
   */
8842 53
  public static function strtocasefold(
8843
      string $str,
8844
      bool $full = true,
8845
      bool $cleanUtf8 = false,
8846
      string $encoding = 'UTF-8',
8847
      string $lang = null,
8848
      $lower = true
8849
  ): string
8850
  {
8851 53
    if ('' === $str) {
8852 5
      return '';
8853
    }
8854
8855 52
    $str = self::fixStrCaseHelper($str, $lower, $full);
8856
8857 52
    if ($lower === true) {
8858 2
      return self::strtolower($str, $encoding, $cleanUtf8, $lang);
8859
    }
8860
8861 50
    return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
8862
  }
8863
8864
  /**
8865
   * Make a string lowercase.
8866
   *
8867
   * @link http://php.net/manual/en/function.mb-strtolower.php
8868
   *
8869
   * @param string      $str                   <p>The string being lowercased.</p>
8870
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
8871
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8872
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8873
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8874
   *
8875
   * @return string String with all alphabetic characters converted to lowercase.
8876
   */
8877 151
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8878
  {
8879
    // init
8880 151
    $str = (string)$str;
8881
8882 151
    if ('' === $str) {
8883 4
      return '';
8884
    }
8885
8886 149
    if ($cleanUtf8 === true) {
8887
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8888
      // if invalid characters are found in $haystack before $needle
8889 4
      $str = self::clean($str);
8890
    }
8891
8892 149
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8893 94
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8894
    }
8895
8896
    // hack for old php version or for the polyfill ...
8897 149
    if ($tryToKeepStringLength === true) {
8898
      $str = self::fixStrCaseHelper($str, true);
8899
    }
8900
8901 149
    if ($lang !== null) {
8902
8903 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8904
        self::checkForSupport();
8905
      }
8906
8907 2
      if (self::$SUPPORT['intl'] === true) {
8908
8909 2
        $langCode = $lang . '-Lower';
8910 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8911
          \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, E_USER_WARNING);
8912
8913
          $langCode = 'Any-Lower';
8914
        }
8915
8916
        /** @noinspection PhpComposerExtensionStubsInspection */
8917 2
        return transliterator_transliterate($langCode, $str);
8918
      }
8919
8920
      \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, E_USER_WARNING);
8921
    }
8922
8923
    // always fallback via symfony polyfill
8924 149
    return \mb_strtolower($str, $encoding);
8925
  }
8926
8927
  /**
8928
   * Generic case sensitive transformation for collation matching.
8929
   *
8930
   * @param string $str <p>The input string</p>
8931
   *
8932
   * @return string
8933
   */
8934 6
  private static function strtonatfold(string $str): string
8935
  {
8936
    /** @noinspection PhpUndefinedClassInspection */
8937 6
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
8938
  }
8939
8940
  /**
8941
   * Make a string uppercase.
8942
   *
8943
   * @link http://php.net/manual/en/function.mb-strtoupper.php
8944
   *
8945
   * @param string      $str                   <p>The string being uppercased.</p>
8946
   * @param string      $encoding              [optional] <p>Set the charset.</p>
8947
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8948
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8949
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8950
   *
8951
   * @return string String with all alphabetic characters converted to uppercase.
8952
   */
8953 160
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8954
  {
8955
    // init
8956 160
    $str = (string)$str;
8957
8958 160
    if ('' === $str) {
8959 4
      return '';
8960
    }
8961
8962 158
    if ($cleanUtf8 === true) {
8963
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8964
      // if invalid characters are found in $haystack before $needle
8965 3
      $str = self::clean($str);
8966
    }
8967
8968 158
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8969 72
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8970
    }
8971
8972
    // hack for old php version or for the polyfill ...
8973 158
    if ($tryToKeepStringLength === true) {
8974 2
      $str = self::fixStrCaseHelper($str, false);
8975
    }
8976
8977 158
    if ($lang !== null) {
8978
8979 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8980
        self::checkForSupport();
8981
      }
8982
8983 2
      if (self::$SUPPORT['intl'] === true) {
8984
8985 2
        $langCode = $lang . '-Upper';
8986 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8987
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
8988
8989
          $langCode = 'Any-Upper';
8990
        }
8991
8992
        /** @noinspection PhpComposerExtensionStubsInspection */
8993 2
        return transliterator_transliterate($langCode, $str);
8994
      }
8995
8996
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
8997
    }
8998
8999
    // always fallback via symfony polyfill
9000 158
    return \mb_strtoupper($str, $encoding);
9001
  }
9002
9003
  /**
9004
   * Translate characters or replace sub-strings.
9005
   *
9006
   * @link  http://php.net/manual/en/function.strtr.php
9007
   *
9008
   * @param string          $str  <p>The string being translated.</p>
9009
   * @param string|string[] $from <p>The string replacing from.</p>
9010
   * @param string|string[] $to   <p>The string being translated to to.</p>
9011
   *
9012
   * @return string
9013
   *                This function returns a copy of str, translating all occurrences of each character in from to the
9014
   *                corresponding character in to.
9015
   */
9016 2
  public static function strtr(string $str, $from, $to = INF): string
9017
  {
9018 2
    if ('' === $str) {
9019
      return '';
9020
    }
9021
9022 2
    if ($from === $to) {
9023
      return $str;
9024
    }
9025
9026 2
    if (INF !== $to) {
9027 2
      $from = self::str_split($from);
9028 2
      $to = self::str_split($to);
9029 2
      $countFrom = \count($from);
9030 2
      $countTo = \count($to);
9031
9032 2
      if ($countFrom > $countTo) {
9033 2
        $from = \array_slice($from, 0, $countTo);
9034 2
      } elseif ($countFrom < $countTo) {
9035 2
        $to = \array_slice($to, 0, $countFrom);
9036
      }
9037
9038 2
      $from = \array_combine($from, $to);
9039
    }
9040
9041 2
    if (\is_string($from)) {
9042 2
      return \str_replace($from, '', $str);
9043
    }
9044
9045 2
    return \strtr($str, $from);
9046
  }
9047
9048
  /**
9049
   * Return the width of a string.
9050
   *
9051
   * @param string $str       <p>The input string.</p>
9052
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9053
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9054
   *
9055
   * @return int
9056
   */
9057 2
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
9058
  {
9059 2
    if ('' === $str) {
9060 2
      return 0;
9061
    }
9062
9063 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9064 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9065
    }
9066
9067 2
    if ($cleanUtf8 === true) {
9068
      // iconv and mbstring are not tolerant to invalid encoding
9069
      // further, their behaviour is inconsistent with that of PHP's substr
9070 2
      $str = self::clean($str);
9071
    }
9072
9073 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9074
      self::checkForSupport();
9075
    }
9076
9077
    //
9078
    // fallback via mbstring
9079
    //
9080
9081 2
    if (self::$SUPPORT['mbstring'] === true) {
9082 2
      return \mb_strwidth($str, $encoding);
9083
    }
9084
9085
    //
9086
    // fallback via vanilla php
9087
    //
9088
9089
    if ('UTF-8' !== $encoding) {
9090
      $str = self::encode('UTF-8', $str, false, $encoding);
9091
    }
9092
9093
    $wide = 0;
9094
    $str = (string)preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
9095
9096
    return ($wide << 1) + self::strlen($str, 'UTF-8');
9097
  }
9098
9099
  /**
9100
   * Get part of a string.
9101
   *
9102
   * @link http://php.net/manual/en/function.mb-substr.php
9103
   *
9104
   * @param string $str       <p>The string being checked.</p>
9105
   * @param int    $offset    <p>The first position used in str.</p>
9106
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
9107
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9108
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9109
   *
9110
   * @return string|false
9111
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9112
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9113
   *                      characters long, <b>FALSE</b> will be returned.
9114
   */
9115 394
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
9116
  {
9117 394
    if ('' === $str) {
9118 19
      return '';
9119
    }
9120
9121
    // Empty string
9122 388
    if ($length === 0) {
9123 14
      return '';
9124
    }
9125
9126 385
    if ($cleanUtf8 === true) {
9127
      // iconv and mbstring are not tolerant to invalid encoding
9128
      // further, their behaviour is inconsistent with that of PHP's substr
9129 2
      $str = self::clean($str);
9130
    }
9131
9132
    // Whole string
9133 385
    if (!$offset && $length === null) {
9134 40
      return $str;
9135
    }
9136
9137 356
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9138 157
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9139
    }
9140
9141 356
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9142
      self::checkForSupport();
9143
    }
9144
9145
    //
9146
    // fallback for binary || ascii only
9147
    //
9148
9149
    if (
9150 356
        $encoding === 'CP850'
9151
        ||
9152 356
        $encoding === 'ASCII'
9153
    ) {
9154 5
      return self::substr_in_byte($str, $offset, $length);
9155
    }
9156
9157
    //
9158
    // fallback via mbstring
9159
    //
9160
9161 351
    if (self::$SUPPORT['mbstring'] === true) {
9162 351
      $return = \mb_substr($str, $offset, $length ?? 2147483647, $encoding);
9163 351
      if ($return !== false) {
9164 351
        return $return;
9165
      }
9166
    }
9167
9168
    // otherwise we need the string-length and can't fake it via "2147483647"
9169 4
    $str_length = 0;
9170 4
    if ($offset || $length === null) {
9171 4
      $str_length = self::strlen($str, $encoding);
9172
    }
9173
9174
    // e.g.: invalid chars + mbstring not installed
9175 4
    if ($str_length === false) {
9176
      return false;
9177
    }
9178
9179
    // Empty string
9180 4
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
9181
      return '';
9182
    }
9183
9184
    // Impossible
9185 4
    if ($offset && $offset > $str_length) {
9186
      // "false" is the php native return type here,
9187
      //  but we optimized this for performance ... see "2147483647" instead of "strlen"
9188
      return '';
9189
9190
    }
9191
9192 4
    if ($length === null) {
9193 4
      $length = (int)$str_length;
9194
    } else {
9195 2
      $length = (int)$length;
9196
    }
9197
9198
    if (
9199 4
        $encoding !== 'UTF-8'
9200
        &&
9201 4
        self::$SUPPORT['mbstring'] === false
9202
    ) {
9203 2
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9204
    }
9205
9206
    //
9207
    // fallback via intl
9208
    //
9209
9210
    if (
9211 4
        $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
9212
        &&
9213 4
        $offset >= 0 // grapheme_substr() can't handle negative offset
9214
        &&
9215 4
        self::$SUPPORT['intl'] === true
9216
    ) {
9217
      $returnTmp = \grapheme_substr($str, $offset, $length);
9218
      if ($returnTmp !== false) {
9219
        return $returnTmp;
9220
      }
9221
    }
9222
9223
    //
9224
    // fallback via iconv
9225
    //
9226
9227
    if (
9228 4
        $length >= 0 // "iconv_substr()" can't handle negative length
9229
        &&
9230 4
        self::$SUPPORT['iconv'] === true
9231
    ) {
9232
      $returnTmp = \iconv_substr($str, $offset, $length);
9233
      if ($returnTmp !== false) {
9234
        return $returnTmp;
9235
      }
9236
    }
9237
9238
    //
9239
    // fallback for ascii only
9240
    //
9241
9242 4
    if (self::is_ascii($str)) {
9243
      return \substr($str, $offset, $length);
9244
    }
9245
9246
    //
9247
    // fallback via vanilla php
9248
    //
9249
9250
    // split to array, and remove invalid characters
9251 4
    $array = self::split($str);
9252
9253
    // extract relevant part, and join to make sting again
9254 4
    return \implode('', \array_slice($array, $offset, $length));
9255
  }
9256
9257
  /**
9258
   * Binary safe comparison of two strings from an offset, up to length characters.
9259
   *
9260
   * @param string   $str1               <p>The main string being compared.</p>
9261
   * @param string   $str2               <p>The secondary string being compared.</p>
9262
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
9263
   *                                     counting from the end of the string.</p>
9264
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
9265
   *                                     the length of the str compared to the length of main_str less the offset.</p>
9266
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
9267
   *                                     insensitive.</p>
9268
   *
9269
   * @return int
9270
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9271
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9272
   *             <strong>0</strong> if they are equal.
9273
   */
9274 2
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
9275
  {
9276
    if (
9277 2
        $offset !== 0
9278
        ||
9279 2
        $length !== null
9280
    ) {
9281 2
      $str1Tmp = self::substr($str1, $offset, $length);
9282 2
      if ($str1Tmp === false) {
9283
        $str1Tmp = '';
9284
      }
9285 2
      $str1 = (string)$str1Tmp;
9286
9287 2
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str1) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9287
      $str2Tmp = self::substr($str2, 0, /** @scrutinizer ignore-type */ self::strlen($str1));
Loading history...
9288 2
      if ($str2Tmp === false) {
9289
        $str2Tmp = '';
9290
      }
9291 2
      $str2 = (string)$str2Tmp;
9292
    }
9293
9294 2
    if ($case_insensitivity === true) {
9295 2
      return self::strcasecmp($str1, $str2);
9296
    }
9297
9298 2
    return self::strcmp($str1, $str2);
9299
  }
9300
9301
  /**
9302
   * Count the number of substring occurrences.
9303
   *
9304
   * @link  http://php.net/manual/en/function.substr-count.php
9305
   *
9306
   * @param string $haystack   <p>The string to search in.</p>
9307
   * @param string $needle     <p>The substring to search for.</p>
9308
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
9309
   * @param int    $length     [optional] <p>
9310
   *                           The maximum length after the specified offset to search for the
9311
   *                           substring. It outputs a warning if the offset plus the length is
9312
   *                           greater than the haystack length.
9313
   *                           </p>
9314
   * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9315
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9316
   *
9317
   * @return int|false This functions returns an integer or false if there isn't a string.
9318
   */
9319 18
  public static function substr_count(
9320
      string $haystack,
9321
      string $needle,
9322
      int $offset = 0,
9323
      int $length = null,
9324
      string $encoding = 'UTF-8',
9325
      bool $cleanUtf8 = false
9326
  )
9327
  {
9328 18
    if ('' === $haystack || '' === $needle) {
9329 2
      return false;
9330
    }
9331
9332 18
    if ($offset || $length !== null) {
9333
9334 2
      if ($length === null) {
9335 2
        $lengthTmp = self::strlen($haystack);
9336 2
        if ($lengthTmp === false) {
9337
          return false;
9338
        }
9339 2
        $length = (int)$lengthTmp;
9340
      }
9341
9342
      if (
9343
          (
9344 2
              $length !== 0
9345
              &&
9346 2
              $offset !== 0
9347
          )
9348
          &&
9349 2
          ($length + $offset) <= 0
9350
          &&
9351 2
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9352
      ) {
9353 2
        return false;
9354
      }
9355
9356 2
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
9357 2
      if ($haystackTmp === false) {
9358
        $haystackTmp = '';
9359
      }
9360 2
      $haystack = (string)$haystackTmp;
9361
    }
9362
9363 18
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9364 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9365
    }
9366
9367 18
    if ($cleanUtf8 === true) {
9368
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9369
      // if invalid characters are found in $haystack before $needle
9370
      $needle = self::clean($needle);
9371
      $haystack = self::clean($haystack);
9372
    }
9373
9374 18
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9375
      self::checkForSupport();
9376
    }
9377
9378
    if (
9379 18
        $encoding !== 'UTF-8'
9380
        &&
9381 18
        self::$SUPPORT['mbstring'] === false
9382
    ) {
9383
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9384
    }
9385
9386 18
    if (self::$SUPPORT['mbstring'] === true) {
9387 18
      return \mb_substr_count($haystack, $needle, $encoding);
9388
    }
9389
9390
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
9391
9392
    return \count($matches);
9393
  }
9394
9395
  /**
9396
   * Count the number of substring occurrences.
9397
   *
9398
   * @param string $haystack <p>
9399
   *                         The string being checked.
9400
   *                         </p>
9401
   * @param string $needle   <p>
9402
   *                         The string being found.
9403
   *                         </p>
9404
   * @param int    $offset   [optional] <p>
9405
   *                         The offset where to start counting
9406
   *                         </p>
9407
   * @param int    $length   [optional] <p>
9408
   *                         The maximum length after the specified offset to search for the
9409
   *                         substring. It outputs a warning if the offset plus the length is
9410
   *                         greater than the haystack length.
9411
   *                         </p>
9412
   *
9413
   * @return int|false The number of times the
9414
   *                   needle substring occurs in the
9415
   *                   haystack string.
9416
   */
9417 38
  public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null)
9418
  {
9419 38
    if ($haystack === '' || $needle === '') {
9420
      return 0;
9421
    }
9422
9423 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9424
      self::checkForSupport();
9425
    }
9426
9427
    if (
9428 38
        ($offset || $length !== null)
9429
        &&
9430 38
        self::$SUPPORT['mbstring_func_overload'] === true
9431
    ) {
9432
9433
      if ($length === null) {
9434
        $lengthTmp = self::strlen($haystack);
9435
        if ($lengthTmp === false) {
9436
          return false;
9437
        }
9438
        $length = (int)$lengthTmp;
9439
      }
9440
9441
      if (
9442
          (
9443
              $length !== 0
9444
              &&
9445
              $offset !== 0
9446
          )
9447
          &&
9448
          ($length + $offset) <= 0
9449
          &&
9450
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9451
      ) {
9452
        return false;
9453
      }
9454
9455
      $haystackTmp = self::substr_in_byte($haystack, $offset, $length);
9456
      if ($haystackTmp === false) {
0 ignored issues
show
introduced by
The condition $haystackTmp === false is always false.
Loading history...
9457
        $haystackTmp = '';
9458
      }
9459
      $haystack = (string)$haystackTmp;
9460
    }
9461
9462 38
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9463
      // "mb_" is available if overload is used, so use it ...
9464
      return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
9465
    }
9466
9467 38
    return \substr_count($haystack, $needle, $offset, $length);
9468
  }
9469
9470
  /**
9471
   * Returns the number of occurrences of $substring in the given string.
9472
   * By default, the comparison is case-sensitive, but can be made insensitive
9473
   * by setting $caseSensitive to false.
9474
   *
9475
   * @param string $str           <p>The input string.</p>
9476
   * @param string $substring     <p>The substring to search for.</p>
9477
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
9478
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9479
   *
9480
   * @return int
9481
   */
9482 15
  public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
9483
  {
9484 15
    if ('' === $str || '' === $substring) {
9485 2
      return 0;
9486
    }
9487
9488
    // only a fallback to prevent BC in the api ...
9489 13
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
9490 4
      $encoding = (string)$caseSensitive;
9491
    }
9492
9493 13
    if (!$caseSensitive) {
9494 6
      $str = self::strtocasefold($str, true, false, $encoding, null, false);
9495 6
      $substring = self::strtocasefold($substring, true, false, $encoding, null, false);
9496
    }
9497
9498 13
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
9499
  }
9500
9501
  /**
9502
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
9503
   *
9504
   * @param string $haystack <p>The string to search in.</p>
9505
   * @param string $needle   <p>The substring to search for.</p>
9506
   *
9507
   * @return string Return the sub-string.
9508
   */
9509 2
  public static function substr_ileft(string $haystack, string $needle): string
9510
  {
9511 2
    if ('' === $haystack) {
9512 2
      return '';
9513
    }
9514
9515 2
    if ('' === $needle) {
9516 2
      return $haystack;
9517
    }
9518
9519 2
    if (self::str_istarts_with($haystack, $needle) === true) {
9520 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9520
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9521 2
      if ($haystackTmp === false) {
9522
        $haystackTmp = '';
9523
      }
9524 2
      $haystack = (string)$haystackTmp;
9525
    }
9526
9527 2
    return $haystack;
9528
  }
9529
9530
  /**
9531
   * Get part of a string process in bytes.
9532
   *
9533
   * @param string $str    <p>The string being checked.</p>
9534
   * @param int    $offset <p>The first position used in str.</p>
9535
   * @param int    $length [optional] <p>The maximum length of the returned string.</p>
9536
   *
9537
   * @return string|false
9538
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9539
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9540
   *                      characters long, <b>FALSE</b> will be returned.
9541
   */
9542 55
  public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
9543
  {
9544 55
    if ($str === '') {
9545
      return '';
9546
    }
9547
9548
    // Empty string
9549 55
    if ($length === 0) {
9550
      return '';
9551
    }
9552
9553
    // Whole string
9554 55
    if (!$offset && $length === null) {
9555
      return $str;
9556
    }
9557
9558 55
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9559
      self::checkForSupport();
9560
    }
9561
9562 55
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9563
      // "mb_" is available if overload is used, so use it ...
9564
      return \mb_substr($str, $offset, $length ?? 2147483647, 'CP850'); // 8-BIT
9565
    }
9566
9567 55
    return \substr($str, $offset, $length ?? 2147483647);
9568
  }
9569
9570
  /**
9571
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
9572
   *
9573
   * @param string $haystack <p>The string to search in.</p>
9574
   * @param string $needle   <p>The substring to search for.</p>
9575
   *
9576
   * @return string Return the sub-string.
9577
   */
9578 2
  public static function substr_iright(string $haystack, string $needle): string
9579
  {
9580 2
    if ('' === $haystack) {
9581 2
      return '';
9582
    }
9583
9584 2
    if ('' === $needle) {
9585 2
      return $haystack;
9586
    }
9587
9588 2
    if (self::str_iends_with($haystack, $needle) === true) {
9589 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9590 2
      if ($haystackTmp === false) {
9591
        $haystackTmp = '';
9592
      }
9593 2
      $haystack = (string)$haystackTmp;
9594
    }
9595
9596 2
    return $haystack;
9597
  }
9598
9599
  /**
9600
   * Removes an prefix ($needle) from start of the string ($haystack).
9601
   *
9602
   * @param string $haystack <p>The string to search in.</p>
9603
   * @param string $needle   <p>The substring to search for.</p>
9604
   *
9605
   * @return string Return the sub-string.
9606
   */
9607 2
  public static function substr_left(string $haystack, string $needle): string
9608
  {
9609 2
    if ('' === $haystack) {
9610 2
      return '';
9611
    }
9612
9613 2
    if ('' === $needle) {
9614 2
      return $haystack;
9615
    }
9616
9617 2
    if (self::str_starts_with($haystack, $needle) === true) {
9618 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9618
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9619 2
      if ($haystackTmp === false) {
9620
        $haystackTmp = '';
9621
      }
9622 2
      $haystack = (string)$haystackTmp;
9623
    }
9624
9625 2
    return $haystack;
9626
  }
9627
9628
  /**
9629
   * Replace text within a portion of a string.
9630
   *
9631
   * source: https://gist.github.com/stemar/8287074
9632
   *
9633
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
9634
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
9635
   * @param int|int[]       $offset           <p>
9636
   *                                          If start is positive, the replacing will begin at the start'th offset
9637
   *                                          into string.
9638
   *                                          <br><br>
9639
   *                                          If start is negative, the replacing will begin at the start'th character
9640
   *                                          from the end of string.
9641
   *                                          </p>
9642
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
9643
   *                                          portion of string which is to be replaced. If it is negative, it
9644
   *                                          represents the number of characters from the end of string at which to
9645
   *                                          stop replacing. If it is not given, then it will default to strlen(
9646
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
9647
   *                                          length is zero then this function will have the effect of inserting
9648
   *                                          replacement into string at the given start offset.</p>
9649
   *
9650
   * @return string|string[] The result string is returned. If string is an array then array is returned.
9651
   */
9652 10
  public static function substr_replace($str, $replacement, $offset, $length = null)
9653
  {
9654 10
    if (\is_array($str) === true) {
9655 1
      $num = \count($str);
9656
9657
      // the replacement
9658 1
      if (\is_array($replacement) === true) {
9659 1
        $replacement = \array_slice($replacement, 0, $num);
9660
      } else {
9661 1
        $replacement = \array_pad([$replacement], $num, $replacement);
9662
      }
9663
9664
      // the offset
9665 1
      if (\is_array($offset) === true) {
9666 1
        $offset = \array_slice($offset, 0, $num);
9667 1
        foreach ($offset as &$valueTmp) {
9668 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
9669
        }
9670 1
        unset($valueTmp);
9671
      } else {
9672 1
        $offset = \array_pad([$offset], $num, $offset);
9673
      }
9674
9675
      // the length
9676 1
      if (null === $length) {
9677 1
        $length = \array_fill(0, $num, 0);
9678 1
      } elseif (\is_array($length) === true) {
9679 1
        $length = \array_slice($length, 0, $num);
9680 1
        foreach ($length as &$valueTmpV2) {
9681 1
          if (null !== $valueTmpV2) {
9682 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9683
          } else {
9684 1
            $valueTmpV2 = 0;
9685
          }
9686
        }
9687 1
        unset($valueTmpV2);
9688
      } else {
9689 1
        $length = \array_pad([$length], $num, $length);
9690
      }
9691
9692
      // recursive call
9693 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9694
    }
9695
9696 10
    if (\is_array($replacement) === true) {
9697 1
      if (\count($replacement) > 0) {
9698 1
        $replacement = $replacement[0];
9699
      } else {
9700 1
        $replacement = '';
9701
      }
9702
    }
9703
9704
    // init
9705 10
    $str = (string)$str;
9706 10
    $replacement = (string)$replacement;
9707
9708 10
    if ('' === $str) {
9709 1
      return $replacement;
9710
    }
9711
9712 9
    if (self::is_ascii($str)) {
9713 6
      return ($length === null) ?
9714
          \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9714
          \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
9715 6
          \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9715
          \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
9716
    }
9717
9718 8
    \preg_match_all('/./us', $str, $smatches);
9719 8
    \preg_match_all('/./us', $replacement, $rmatches);
9720
9721 8
    if ($length === null) {
9722 3
      $lengthTmp = self::strlen($str);
9723 3
      if ($lengthTmp === false) {
9724
        // e.g.: non mbstring support + invalid chars
9725
        return '';
9726
      }
9727 3
      $length = (int)$lengthTmp;
9728
    }
9729
9730 8
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9730
    \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9730
    \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
9731
9732 8
    return \implode('', $smatches[0]);
9733
  }
9734
9735
  /**
9736
   * Removes an suffix ($needle) from end of the string ($haystack).
9737
   *
9738
   * @param string $haystack <p>The string to search in.</p>
9739
   * @param string $needle   <p>The substring to search for.</p>
9740
   *
9741
   * @return string Return the sub-string.
9742
   */
9743 2
  public static function substr_right(string $haystack, string $needle): string
9744
  {
9745 2
    if ('' === $haystack) {
9746 2
      return '';
9747
    }
9748
9749 2
    if ('' === $needle) {
9750 2
      return $haystack;
9751
    }
9752
9753 2
    if (self::str_ends_with($haystack, $needle) === true) {
9754 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9755 2
      if ($haystackTmp === false) {
9756
        $haystackTmp = '';
9757
      }
9758 2
      $haystack = (string)$haystackTmp;
9759
    }
9760
9761 2
    return $haystack;
9762
  }
9763
9764
  /**
9765
   * Returns a case swapped version of the string.
9766
   *
9767
   * @param string $str       <p>The input string.</p>
9768
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9769
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9770
   *
9771
   * @return string Each character's case swapped.
9772
   */
9773 6
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9774
  {
9775 6
    if ('' === $str) {
9776 1
      return '';
9777
    }
9778
9779 6
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9780 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9781
    }
9782
9783 6
    if ($cleanUtf8 === true) {
9784
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9785
      // if invalid characters are found in $haystack before $needle
9786 2
      $str = self::clean($str);
9787
    }
9788
9789 6
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9790
  }
9791
9792
  /**
9793
   * Checks whether mbstring is available on the server.
9794
   *
9795
   * @return bool
9796
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
9797
   */
9798
  public static function symfony_polyfill_used(): bool
9799
  {
9800
    // init
9801
    $return = false;
9802
9803
    $returnTmp = \extension_loaded('mbstring') ? true : false;
9804
    if ($returnTmp === false && \function_exists('mb_strlen')) {
9805
      $return = true;
9806
    }
9807
9808
    $returnTmp = \extension_loaded('iconv') ? true : false;
9809
    if ($returnTmp === false && \function_exists('iconv')) {
9810
      $return = true;
9811
    }
9812
9813
    return $return;
9814
  }
9815
9816
  /**
9817
   * @param string $str
9818
   * @param int    $tabLength
9819
   *
9820
   * @return string
9821
   */
9822 6
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9823
  {
9824 6
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9825
  }
9826
9827
  /**
9828
   * Converts the first character of each word in the string to uppercase
9829
   * and all other chars to lowercase.
9830
   *
9831
   * @param string $str      <p>The input string.</p>
9832
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9833
   *
9834
   * @return string String with all characters of $str being title-cased.
9835
   */
9836 5
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
9837
  {
9838 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9839 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9840
    }
9841
9842
    // always fallback via symfony polyfill
9843 5
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
9844
  }
9845
9846
  /**
9847
   * alias for "UTF8::to_ascii()"
9848
   *
9849
   * @see        UTF8::to_ascii()
9850
   *
9851
   * @param string $str
9852
   * @param string $subst_chr
9853
   * @param bool   $strict
9854
   *
9855
   * @return string
9856
   *
9857
   * @deprecated <p>use "UTF8::to_ascii()"</p>
9858
   */
9859 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
9860
  {
9861 7
    return self::to_ascii($str, $subst_chr, $strict);
9862
  }
9863
9864
  /**
9865
   * alias for "UTF8::to_iso8859()"
9866
   *
9867
   * @see        UTF8::to_iso8859()
9868
   *
9869
   * @param string|string[] $str
9870
   *
9871
   * @return string|string[]
9872
   *
9873
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
9874
   */
9875 2
  public static function toIso8859($str)
9876
  {
9877 2
    return self::to_iso8859($str);
9878
  }
9879
9880
  /**
9881
   * alias for "UTF8::to_latin1()"
9882
   *
9883
   * @see        UTF8::to_latin1()
9884
   *
9885
   * @param string|string[] $str
9886
   *
9887
   * @return string|string[]
9888
   *
9889
   * @deprecated <p>use "UTF8::to_latin1()"</p>
9890
   */
9891 2
  public static function toLatin1($str)
9892
  {
9893 2
    return self::to_latin1($str);
9894
  }
9895
9896
  /**
9897
   * alias for "UTF8::to_utf8()"
9898
   *
9899
   * @see        UTF8::to_utf8()
9900
   *
9901
   * @param string|string[] $str
9902
   *
9903
   * @return string|string[]
9904
   *
9905
   * @deprecated <p>use "UTF8::to_utf8()"</p>
9906
   */
9907 2
  public static function toUTF8($str)
9908
  {
9909 2
    return self::to_utf8($str);
9910
  }
9911
9912
  /**
9913
   * Convert a string into ASCII.
9914
   *
9915
   * @param string $str     <p>The input string.</p>
9916
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
9917
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
9918
   *                        performance</p>
9919
   *
9920
   * @return string
9921
   */
9922 37
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
9923
  {
9924 37
    static $UTF8_TO_ASCII;
9925
9926 37
    if ('' === $str) {
9927 3
      return '';
9928
    }
9929
9930
    // check if we only have ASCII, first (better performance)
9931 34
    if (self::is_ascii($str) === true) {
9932 6
      return $str;
9933
    }
9934
9935 29
    $str = self::clean(
9936 29
        $str,
9937 29
        true,
9938 29
        true,
9939 29
        true,
9940 29
        false,
9941 29
        true,
9942 29
        true
9943
    );
9944
9945
    // check again, if we only have ASCII, now ...
9946 29
    if (self::is_ascii($str) === true) {
9947 12
      return $str;
9948
    }
9949
9950 18
    if ($strict === true) {
9951
9952 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9953
        self::checkForSupport();
9954
      }
9955
9956 1
      if (self::$SUPPORT['intl'] === true) {
9957
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
9958
        /** @noinspection PhpComposerExtensionStubsInspection */
9959 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
9960
9961
        // check again, if we only have ASCII, now ...
9962 1
        if (self::is_ascii($str) === true) {
9963 1
          return $str;
9964
        }
9965
9966
      }
9967
    }
9968
9969 18
    if (self::$ORD === null) {
9970
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9971
    }
9972
9973 18
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
9974 18
    $chars = $ar[0];
9975 18
    $ord = null;
9976 18
    foreach ($chars as &$c) {
9977
9978 18
      $ordC0 = self::$ORD[$c[0]];
9979
9980 18
      if ($ordC0 >= 0 && $ordC0 <= 127) {
9981 14
        continue;
9982
      }
9983
9984 18
      $ordC1 = self::$ORD[$c[1]];
9985
9986
      // ASCII - next please
9987 18
      if ($ordC0 >= 192 && $ordC0 <= 223) {
9988 16
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
9989
      }
9990
9991 18
      if ($ordC0 >= 224) {
9992 7
        $ordC2 = self::$ORD[$c[2]];
9993
9994 7
        if ($ordC0 <= 239) {
9995 6
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
9996
        }
9997
9998 7
        if ($ordC0 >= 240) {
9999 2
          $ordC3 = self::$ORD[$c[3]];
10000
10001 2
          if ($ordC0 <= 247) {
10002 2
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
10003
          }
10004
10005 2
          if ($ordC0 >= 248) {
10006
            $ordC4 = self::$ORD[$c[4]];
10007
10008
            if ($ordC0 <= 251) {
10009
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
10010
            }
10011
10012
            if ($ordC0 >= 252) {
10013
              $ordC5 = self::$ORD[$c[5]];
10014
10015
              if ($ordC0 <= 253) {
10016
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
10017
              }
10018
            }
10019
          }
10020
        }
10021
      }
10022
10023 18
      if ($ordC0 === 254 || $ordC0 === 255) {
10024
        $c = $unknown;
10025
        continue;
10026
      }
10027
10028 18
      if ($ord === null) {
10029
        $c = $unknown;
10030
        continue;
10031
      }
10032
10033 18
      $bank = $ord >> 8;
10034 18
      if (!isset($UTF8_TO_ASCII[$bank])) {
10035 9
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
10036 9
        if ($UTF8_TO_ASCII[$bank] === false) {
10037 2
          $UTF8_TO_ASCII[$bank] = [];
10038
        }
10039
      }
10040
10041 18
      $newchar = $ord & 255;
10042
10043 18
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
10044
10045
        // keep for debugging
10046
        /*
10047
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10048
        echo "char: " . $c . "\n";
10049
        echo "ord: " . $ord . "\n";
10050
        echo "newchar: " . $newchar . "\n";
10051
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
10052
        echo "bank:" . $bank . "\n\n";
10053
        */
10054
10055 17
        $c = $UTF8_TO_ASCII[$bank][$newchar];
10056
      } else {
10057
10058
        // keep for debugging missing chars
10059
        /*
10060
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10061
        echo "char: " . $c . "\n";
10062
        echo "ord: " . $ord . "\n";
10063
        echo "newchar: " . $newchar . "\n";
10064
        echo "bank:" . $bank . "\n\n";
10065
        */
10066
10067 18
        $c = $unknown;
10068
      }
10069
    }
10070
10071 18
    return \implode('', $chars);
10072
  }
10073
10074
  /**
10075
   * @param mixed $str
10076
   *
10077
   * @return bool
10078
   */
10079 19
  public static function to_boolean($str): bool
10080
  {
10081
    // init
10082 19
    $str = (string)$str;
10083
10084 19
    if ('' === $str) {
10085 2
      return false;
10086
    }
10087
10088 17
    $key = \strtolower($str);
10089
10090
    // Info: http://php.net/manual/en/filter.filters.validate.php
10091
    $map = [
10092 17
        'true'  => true,
10093
        '1'     => true,
10094
        'on'    => true,
10095
        'yes'   => true,
10096
        'false' => false,
10097
        '0'     => false,
10098
        'off'   => false,
10099
        'no'    => false,
10100
    ];
10101
10102 17
    if (isset($map[$key])) {
10103 13
      return $map[$key];
10104
    }
10105
10106
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10107 4
    if (\is_numeric($str)) {
10108 2
      return (((float)$str + 0) > 0);
10109
    }
10110
10111 2
    return (bool)self::trim($str);
10112
  }
10113
10114
  /**
10115
   * Convert a string into "ISO-8859"-encoding (Latin-1).
10116
   *
10117
   * @param string|string[] $str
10118
   *
10119
   * @return string|string[]
10120
   */
10121 8
  public static function to_iso8859($str)
10122
  {
10123 8
    if (\is_array($str) === true) {
10124 2
      foreach ($str as $k => $v) {
10125 2
        $str[$k] = self::to_iso8859($v);
10126
      }
10127
10128 2
      return $str;
10129
    }
10130
10131 8
    $str = (string)$str;
10132 8
    if ('' === $str) {
10133 2
      return '';
10134
    }
10135
10136 8
    return self::utf8_decode($str);
10137
  }
10138
10139
  /**
10140
   * alias for "UTF8::to_iso8859()"
10141
   *
10142
   * @see UTF8::to_iso8859()
10143
   *
10144
   * @param string|string[] $str
10145
   *
10146
   * @return string|string[]
10147
   */
10148 2
  public static function to_latin1($str)
10149
  {
10150 2
    return self::to_iso8859($str);
10151
  }
10152
10153
  /**
10154
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
10155
   *
10156
   * <ul>
10157
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
10158
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
10159
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
10160
   * case.</li>
10161
   * </ul>
10162
   *
10163
   * @param string|string[] $str                    <p>Any string or array.</p>
10164
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
10165
   *
10166
   * @return string|string[] The UTF-8 encoded string.
10167
   */
10168 38
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
10169
  {
10170 38
    if (\is_array($str) === true) {
10171 4
      foreach ($str as $k => $v) {
10172 4
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
10173
      }
10174
10175 4
      return $str;
10176
    }
10177
10178 38
    $str = (string)$str;
10179 38
    if ('' === $str) {
10180 6
      return $str;
10181
    }
10182
10183 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10184
      self::checkForSupport();
10185
    }
10186
10187 38
    $max = self::strlen_in_byte($str);
10188 38
    $buf = '';
10189
10190
    /** @noinspection ForeachInvariantsInspection */
10191 38
    for ($i = 0; $i < $max; $i++) {
10192 38
      $c1 = $str[$i];
10193
10194 38
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
10195
10196 34
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
10197
10198 31
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10199
10200 31
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
10201 17
            $buf .= $c1 . $c2;
10202 17
            $i++;
10203
          } else { // not valid UTF8 - convert it
10204 31
            $buf .= self::to_utf8_convert_helper($c1);
10205
          }
10206
10207 34
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
10208
10209 32
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10210 32
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10211
10212 32
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
10213 14
            $buf .= $c1 . $c2 . $c3;
10214 14
            $i += 2;
10215
          } else { // not valid UTF8 - convert it
10216 32
            $buf .= self::to_utf8_convert_helper($c1);
10217
          }
10218
10219 26
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
10220
10221 26
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10222 26
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10223 26
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
10224
10225 26
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
10226 8
            $buf .= $c1 . $c2 . $c3 . $c4;
10227 8
            $i += 3;
10228
          } else { // not valid UTF8 - convert it
10229 26
            $buf .= self::to_utf8_convert_helper($c1);
10230
          }
10231
10232
        } else { // doesn't look like UTF8, but should be converted
10233 34
          $buf .= self::to_utf8_convert_helper($c1);
10234
        }
10235
10236 35
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
10237
10238 4
        $buf .= self::to_utf8_convert_helper($c1);
10239
10240
      } else { // it doesn't need conversion
10241 35
        $buf .= $c1;
10242
      }
10243
    }
10244
10245
    // decode unicode escape sequences
10246 38
    $buf = \preg_replace_callback(
10247 38
        '/\\\\u([0-9a-f]{4})/i',
10248 38
        function ($match) {
10249
          // always fallback via symfony polyfill
10250 8
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
10251 38
        },
10252 38
        $buf
10253
    );
10254
10255
    // decode UTF-8 codepoints
10256 38
    if ($decodeHtmlEntityToUtf8 === true) {
10257 2
      $buf = self::html_entity_decode($buf);
10258
    }
10259
10260 38
    return $buf;
10261
  }
10262
10263
  /**
10264
   * @param int|string $input
10265
   *
10266
   * @return string
10267
   */
10268 30
  private static function to_utf8_convert_helper($input): string
10269
  {
10270
    // init
10271 30
    $buf = '';
10272
10273 30
    if (self::$ORD === null) {
10274 1
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10275
    }
10276
10277 30
    if (self::$CHR === null) {
10278 1
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10279
    }
10280
10281 30
    if (self::$WIN1252_TO_UTF8 === null) {
10282 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10283
    }
10284
10285 30
    $ordC1 = self::$ORD[$input];
10286 30
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
10287 30
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
10288
    } else {
10289 2
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
10290 2
      $cc2 = ((string)$input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
10291 2
      $buf .= $cc1 . $cc2;
10292
    }
10293
10294 30
    return $buf;
10295
  }
10296
10297
  /**
10298
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
10299
   *
10300
   * INFO: This is slower then "trim()"
10301
   *
10302
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
10303
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
10304
   *
10305
   * @param string $str   <p>The string to be trimmed</p>
10306
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
10307
   *
10308
   * @return string The trimmed string.
10309
   */
10310 214
  public static function trim(string $str = '', $chars = INF): string
10311
  {
10312 214
    if ('' === $str) {
10313 11
      return '';
10314
    }
10315
10316
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
10317 206
    if ($chars === INF || !$chars) {
10318 179
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
10319
    } else {
10320 47
      $chars = \preg_quote($chars, '/');
10321 47
      $pattern = "^[$chars]+|[$chars]+\$";
10322
    }
10323
10324 206
    return self::regex_replace($str, $pattern, '', '', '/');
10325
  }
10326
10327
  /**
10328
   * Makes string's first char uppercase.
10329
   *
10330
   * @param string $str       <p>The input string.</p>
10331
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10332
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10333
   *
10334
   * @return string The resulting string.
10335
   */
10336 76
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10337
  {
10338 76
    if ($cleanUtf8 === true) {
10339
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10340
      // if invalid characters are found in $haystack before $needle
10341 1
      $str = self::clean($str);
10342
    }
10343
10344 76
    $strPartTwo = self::substr($str, 1, null, $encoding);
10345 76
    if ($strPartTwo === false) {
10346
      $strPartTwo = '';
10347
    }
10348
10349 76
    $strPartOne = self::strtoupper(
10350 76
        (string)self::substr($str, 0, 1, $encoding),
10351 76
        $encoding,
10352 76
        $cleanUtf8
10353
    );
10354
10355 76
    return $strPartOne . $strPartTwo;
10356
  }
10357
10358
  /**
10359
   * alias for "UTF8::ucfirst()"
10360
   *
10361
   * @see UTF8::ucfirst()
10362
   *
10363
   * @param string $str
10364
   * @param string $encoding
10365
   * @param bool   $cleanUtf8
10366
   *
10367
   * @return string
10368
   */
10369 1
  public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10370
  {
10371 1
    return self::ucfirst($str, $encoding, $cleanUtf8);
10372
  }
10373
10374
  /**
10375
   * Uppercase for all words in the string.
10376
   *
10377
   * @param string   $str        <p>The input string.</p>
10378
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
10379
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
10380
   * @param string   $encoding   [optional] <p>Set the charset.</p>
10381
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
10382
   *
10383
   * @return string
10384
   */
10385 9
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10386
  {
10387 9
    if (!$str) {
10388 2
      return '';
10389
    }
10390
10391
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
10392
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
10393
10394 8
    if ($cleanUtf8 === true) {
10395
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10396
      // if invalid characters are found in $haystack before $needle
10397 1
      $str = self::clean($str);
10398
    }
10399
10400 8
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
10401
10402
    if (
10403 8
        $usePhpDefaultFunctions === true
10404
        &&
10405 8
        self::is_ascii($str) === true
10406
    ) {
10407
      return \ucwords($str);
10408
    }
10409
10410 8
    $words = self::str_to_words($str, $charlist);
10411 8
    $newWords = [];
10412
10413 8
    if (\count($exceptions) > 0) {
10414 1
      $useExceptions = true;
10415
    } else {
10416 8
      $useExceptions = false;
10417
    }
10418
10419 8
    foreach ($words as $word) {
10420
10421 8
      if (!$word) {
10422 8
        continue;
10423
      }
10424
10425
      if (
10426 8
          $useExceptions === false
10427
          ||
10428
          (
10429 1
              $useExceptions === true
10430
              &&
10431 8
              !\in_array($word, $exceptions, true)
10432
          )
10433
      ) {
10434 8
        $word = self::ucfirst($word, $encoding);
10435
      }
10436
10437 8
      $newWords[] = $word;
10438
    }
10439
10440 8
    return \implode('', $newWords);
10441
  }
10442
10443
  /**
10444
   * Multi decode html entity & fix urlencoded-win1252-chars.
10445
   *
10446
   * e.g:
10447
   * 'test+test'                     => 'test test'
10448
   * 'D&#252;sseldorf'               => 'Düsseldorf'
10449
   * 'D%FCsseldorf'                  => 'Düsseldorf'
10450
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
10451
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
10452
   * 'Düsseldorf'                   => 'Düsseldorf'
10453
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
10454
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
10455
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
10456
   *
10457
   * @param string $str          <p>The input string.</p>
10458
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
10459
   *
10460
   * @return string
10461
   */
10462 2
  public static function urldecode(string $str, bool $multi_decode = true): string
10463
  {
10464 2
    if ('' === $str) {
10465 2
      return '';
10466
    }
10467
10468 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
10469 2
    if (\preg_match($pattern, $str)) {
10470 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
10471
    }
10472
10473 2
    $flags = ENT_QUOTES | ENT_HTML5;
10474
10475
    do {
10476 2
      $str_compare = $str;
10477
10478 2
      $str = self::fix_simple_utf8(
10479 2
          \urldecode(
10480 2
              self::html_entity_decode(
10481 2
                  self::to_utf8($str),
10482 2
                  $flags
10483
              )
10484
          )
10485
      );
10486
10487 2
    } while ($multi_decode === true && $str_compare !== $str);
10488
10489 2
    return $str;
10490
  }
10491
10492
  /**
10493
   * Return a array with "urlencoded"-win1252 -> UTF-8
10494
   *
10495
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
10496
   *
10497
   * @return string[]
10498
   */
10499 2
  public static function urldecode_fix_win1252_chars(): array
10500
  {
10501
    return [
10502 2
        '%20' => ' ',
10503
        '%21' => '!',
10504
        '%22' => '"',
10505
        '%23' => '#',
10506
        '%24' => '$',
10507
        '%25' => '%',
10508
        '%26' => '&',
10509
        '%27' => "'",
10510
        '%28' => '(',
10511
        '%29' => ')',
10512
        '%2A' => '*',
10513
        '%2B' => '+',
10514
        '%2C' => ',',
10515
        '%2D' => '-',
10516
        '%2E' => '.',
10517
        '%2F' => '/',
10518
        '%30' => '0',
10519
        '%31' => '1',
10520
        '%32' => '2',
10521
        '%33' => '3',
10522
        '%34' => '4',
10523
        '%35' => '5',
10524
        '%36' => '6',
10525
        '%37' => '7',
10526
        '%38' => '8',
10527
        '%39' => '9',
10528
        '%3A' => ':',
10529
        '%3B' => ';',
10530
        '%3C' => '<',
10531
        '%3D' => '=',
10532
        '%3E' => '>',
10533
        '%3F' => '?',
10534
        '%40' => '@',
10535
        '%41' => 'A',
10536
        '%42' => 'B',
10537
        '%43' => 'C',
10538
        '%44' => 'D',
10539
        '%45' => 'E',
10540
        '%46' => 'F',
10541
        '%47' => 'G',
10542
        '%48' => 'H',
10543
        '%49' => 'I',
10544
        '%4A' => 'J',
10545
        '%4B' => 'K',
10546
        '%4C' => 'L',
10547
        '%4D' => 'M',
10548
        '%4E' => 'N',
10549
        '%4F' => 'O',
10550
        '%50' => 'P',
10551
        '%51' => 'Q',
10552
        '%52' => 'R',
10553
        '%53' => 'S',
10554
        '%54' => 'T',
10555
        '%55' => 'U',
10556
        '%56' => 'V',
10557
        '%57' => 'W',
10558
        '%58' => 'X',
10559
        '%59' => 'Y',
10560
        '%5A' => 'Z',
10561
        '%5B' => '[',
10562
        '%5C' => '\\',
10563
        '%5D' => ']',
10564
        '%5E' => '^',
10565
        '%5F' => '_',
10566
        '%60' => '`',
10567
        '%61' => 'a',
10568
        '%62' => 'b',
10569
        '%63' => 'c',
10570
        '%64' => 'd',
10571
        '%65' => 'e',
10572
        '%66' => 'f',
10573
        '%67' => 'g',
10574
        '%68' => 'h',
10575
        '%69' => 'i',
10576
        '%6A' => 'j',
10577
        '%6B' => 'k',
10578
        '%6C' => 'l',
10579
        '%6D' => 'm',
10580
        '%6E' => 'n',
10581
        '%6F' => 'o',
10582
        '%70' => 'p',
10583
        '%71' => 'q',
10584
        '%72' => 'r',
10585
        '%73' => 's',
10586
        '%74' => 't',
10587
        '%75' => 'u',
10588
        '%76' => 'v',
10589
        '%77' => 'w',
10590
        '%78' => 'x',
10591
        '%79' => 'y',
10592
        '%7A' => 'z',
10593
        '%7B' => '{',
10594
        '%7C' => '|',
10595
        '%7D' => '}',
10596
        '%7E' => '~',
10597
        '%7F' => '',
10598
        '%80' => '`',
10599
        '%81' => '',
10600
        '%82' => '‚',
10601
        '%83' => 'ƒ',
10602
        '%84' => '„',
10603
        '%85' => '…',
10604
        '%86' => '†',
10605
        '%87' => '‡',
10606
        '%88' => 'ˆ',
10607
        '%89' => '‰',
10608
        '%8A' => 'Š',
10609
        '%8B' => '‹',
10610
        '%8C' => 'Œ',
10611
        '%8D' => '',
10612
        '%8E' => 'Ž',
10613
        '%8F' => '',
10614
        '%90' => '',
10615
        '%91' => '‘',
10616
        '%92' => '’',
10617
        '%93' => '“',
10618
        '%94' => '”',
10619
        '%95' => '•',
10620
        '%96' => '–',
10621
        '%97' => '—',
10622
        '%98' => '˜',
10623
        '%99' => '™',
10624
        '%9A' => 'š',
10625
        '%9B' => '›',
10626
        '%9C' => 'œ',
10627
        '%9D' => '',
10628
        '%9E' => 'ž',
10629
        '%9F' => 'Ÿ',
10630
        '%A0' => '',
10631
        '%A1' => '¡',
10632
        '%A2' => '¢',
10633
        '%A3' => '£',
10634
        '%A4' => '¤',
10635
        '%A5' => '¥',
10636
        '%A6' => '¦',
10637
        '%A7' => '§',
10638
        '%A8' => '¨',
10639
        '%A9' => '©',
10640
        '%AA' => 'ª',
10641
        '%AB' => '«',
10642
        '%AC' => '¬',
10643
        '%AD' => '',
10644
        '%AE' => '®',
10645
        '%AF' => '¯',
10646
        '%B0' => '°',
10647
        '%B1' => '±',
10648
        '%B2' => '²',
10649
        '%B3' => '³',
10650
        '%B4' => '´',
10651
        '%B5' => 'µ',
10652
        '%B6' => '¶',
10653
        '%B7' => '·',
10654
        '%B8' => '¸',
10655
        '%B9' => '¹',
10656
        '%BA' => 'º',
10657
        '%BB' => '»',
10658
        '%BC' => '¼',
10659
        '%BD' => '½',
10660
        '%BE' => '¾',
10661
        '%BF' => '¿',
10662
        '%C0' => 'À',
10663
        '%C1' => 'Á',
10664
        '%C2' => 'Â',
10665
        '%C3' => 'Ã',
10666
        '%C4' => 'Ä',
10667
        '%C5' => 'Å',
10668
        '%C6' => 'Æ',
10669
        '%C7' => 'Ç',
10670
        '%C8' => 'È',
10671
        '%C9' => 'É',
10672
        '%CA' => 'Ê',
10673
        '%CB' => 'Ë',
10674
        '%CC' => 'Ì',
10675
        '%CD' => 'Í',
10676
        '%CE' => 'Î',
10677
        '%CF' => 'Ï',
10678
        '%D0' => 'Ð',
10679
        '%D1' => 'Ñ',
10680
        '%D2' => 'Ò',
10681
        '%D3' => 'Ó',
10682
        '%D4' => 'Ô',
10683
        '%D5' => 'Õ',
10684
        '%D6' => 'Ö',
10685
        '%D7' => '×',
10686
        '%D8' => 'Ø',
10687
        '%D9' => 'Ù',
10688
        '%DA' => 'Ú',
10689
        '%DB' => 'Û',
10690
        '%DC' => 'Ü',
10691
        '%DD' => 'Ý',
10692
        '%DE' => 'Þ',
10693
        '%DF' => 'ß',
10694
        '%E0' => 'à',
10695
        '%E1' => 'á',
10696
        '%E2' => 'â',
10697
        '%E3' => 'ã',
10698
        '%E4' => 'ä',
10699
        '%E5' => 'å',
10700
        '%E6' => 'æ',
10701
        '%E7' => 'ç',
10702
        '%E8' => 'è',
10703
        '%E9' => 'é',
10704
        '%EA' => 'ê',
10705
        '%EB' => 'ë',
10706
        '%EC' => 'ì',
10707
        '%ED' => 'í',
10708
        '%EE' => 'î',
10709
        '%EF' => 'ï',
10710
        '%F0' => 'ð',
10711
        '%F1' => 'ñ',
10712
        '%F2' => 'ò',
10713
        '%F3' => 'ó',
10714
        '%F4' => 'ô',
10715
        '%F5' => 'õ',
10716
        '%F6' => 'ö',
10717
        '%F7' => '÷',
10718
        '%F8' => 'ø',
10719
        '%F9' => 'ù',
10720
        '%FA' => 'ú',
10721
        '%FB' => 'û',
10722
        '%FC' => 'ü',
10723
        '%FD' => 'ý',
10724
        '%FE' => 'þ',
10725
        '%FF' => 'ÿ',
10726
    ];
10727
  }
10728
10729
  /**
10730
   * Decodes an UTF-8 string to ISO-8859-1.
10731
   *
10732
   * @param string $str <p>The input string.</p>
10733
   * @param bool   $keepUtf8Chars
10734
   *
10735
   * @return string
10736
   */
10737 14
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10738
  {
10739 14
    if ('' === $str) {
10740 5
      return '';
10741
    }
10742
10743 14
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10744 14
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10745
10746 14
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10747
10748 1
      if (self::$WIN1252_TO_UTF8 === null) {
10749
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10750
      }
10751
10752 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10752
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10753 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10753
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10754
    }
10755
10756
    /** @noinspection PhpInternalEntityUsedInspection */
10757 14
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10758
10759 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10760
      self::checkForSupport();
10761
    }
10762
10763
    // save for later comparision
10764 14
    $str_backup = $str;
10765 14
    $len = self::strlen_in_byte($str);
10766
10767 14
    if (self::$ORD === null) {
10768
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10769
    }
10770
10771 14
    if (self::$CHR === null) {
10772
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10773
    }
10774
10775 14
    $noCharFound = '?';
10776
    /** @noinspection ForeachInvariantsInspection */
10777 14
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10778 14
      switch ($str[$i] & "\xF0") {
10779 14
        case "\xC0":
10780 12
        case "\xD0":
10781 14
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10782 14
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10783 14
          break;
10784
10785
        /** @noinspection PhpMissingBreakStatementInspection */
10786 12
        case "\xF0":
10787
          ++$i;
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment if this fall-through is intended.
Loading history...
10788 12
        case "\xE0":
10789 10
          $str[$j] = $noCharFound;
10790 10
          $i += 2;
10791 10
          break;
10792
10793
        default:
10794 12
          $str[$j] = $str[$i];
10795
      }
10796
    }
10797
10798 14
    $return = self::substr_in_byte($str, 0, $j);
10799 14
    if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
10800
      $return = '';
10801
    }
10802
10803
    if (
10804 14
        $keepUtf8Chars === true
10805
        &&
10806 14
        self::strlen($return) >= self::strlen($str_backup)
10807
    ) {
10808 2
      return $str_backup;
10809
    }
10810
10811 14
    return $return;
10812
  }
10813
10814
  /**
10815
   * Encodes an ISO-8859-1 string to UTF-8.
10816
   *
10817
   * @param string $str <p>The input string.</p>
10818
   *
10819
   * @return string
10820
   */
10821 14
  public static function utf8_encode(string $str): string
10822
  {
10823 14
    if ('' === $str) {
10824 13
      return '';
10825
    }
10826
10827 14
    $str = \utf8_encode($str);
10828
10829
    // the polyfill maybe return false
10830
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10831 14
    if ($str === false) {
10832
      return '';
10833
    }
10834
10835 14
    if (false === \strpos($str, "\xC2")) {
10836 6
      return $str;
10837
    }
10838
10839 12
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
10840 12
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
10841
10842 12
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
10843
10844 1
      if (self::$WIN1252_TO_UTF8 === null) {
10845
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10846
      }
10847
10848 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10848
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10849 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10849
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10850
    }
10851
10852 12
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
10853
  }
10854
10855
  /**
10856
   * fix -> utf8-win1252 chars
10857
   *
10858
   * @param string $str <p>The input string.</p>
10859
   *
10860
   * @return string
10861
   *
10862
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
10863
   */
10864 2
  public static function utf8_fix_win1252_chars(string $str): string
10865
  {
10866 2
    return self::fix_simple_utf8($str);
10867
  }
10868
10869
  /**
10870
   * Returns an array with all utf8 whitespace characters.
10871
   *
10872
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
10873
   *
10874
   * @author: Derek E. [email protected]
10875
   *
10876
   * @return string[]
10877
   *                 An array with all known whitespace characters as values and the type of whitespace as keys
10878
   *                 as defined in above URL.
10879
   */
10880 2
  public static function whitespace_table(): array
10881
  {
10882 2
    return self::$WHITESPACE_TABLE;
10883
  }
10884
10885
  /**
10886
   * Limit the number of words in a string.
10887
   *
10888
   * @param string $str      <p>The input string.</p>
10889
   * @param int    $limit    <p>The limit of words as integer.</p>
10890
   * @param string $strAddOn <p>Replacement for the striped string.</p>
10891
   *
10892
   * @return string
10893
   */
10894 2
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
10895
  {
10896 2
    if ('' === $str) {
10897 2
      return '';
10898
    }
10899
10900 2
    if ($limit < 1) {
10901 2
      return '';
10902
    }
10903
10904 2
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
10905
10906
    if (
10907 2
        !isset($matches[0])
10908
        ||
10909 2
        self::strlen($str) === self::strlen($matches[0])
10910
    ) {
10911 2
      return $str;
10912
    }
10913
10914 2
    return self::rtrim($matches[0]) . $strAddOn;
10915
  }
10916
10917
  /**
10918
   * Wraps a string to a given number of characters
10919
   *
10920
   * @link  http://php.net/manual/en/function.wordwrap.php
10921
   *
10922
   * @param string $str   <p>The input string.</p>
10923
   * @param int    $width [optional] <p>The column width.</p>
10924
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
10925
   * @param bool   $cut   [optional] <p>
10926
   *                      If the cut is set to true, the string is
10927
   *                      always wrapped at or before the specified width. So if you have
10928
   *                      a word that is larger than the given width, it is broken apart.
10929
   *                      </p>
10930
   *
10931
   * @return string The given string wrapped at the specified column.
10932
   */
10933 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
10934
  {
10935 10
    if ('' === $str || '' === $break) {
10936 3
      return '';
10937
    }
10938
10939 8
    $w = '';
10940 8
    $strSplit = \explode($break, $str);
10941 8
    if ($strSplit === false) {
10942
      $count = 0;
10943
    } else {
10944 8
      $count = \count($strSplit);
10945
    }
10946
10947 8
    $chars = [];
10948
    /** @noinspection ForeachInvariantsInspection */
10949 8
    for ($i = 0; $i < $count; ++$i) {
10950
10951 8
      if ($i) {
10952 1
        $chars[] = $break;
10953 1
        $w .= '#';
10954
      }
10955
10956 8
      $c = $strSplit[$i];
10957 8
      unset($strSplit[$i]);
10958
10959 8
      if ($c !== null) {
10960 8
        foreach (self::split($c) as $c) {
10961 8
          $chars[] = $c;
10962 8
          $w .= ' ' === $c ? ' ' : '?';
10963
        }
10964
      }
10965
    }
10966
10967 8
    $strReturn = '';
10968 8
    $j = 0;
10969 8
    $b = $i = -1;
10970 8
    $w = \wordwrap($w, $width, '#', $cut);
10971
10972 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
10973 6
      for (++$i; $i < $b; ++$i) {
10974 6
        $strReturn .= $chars[$j];
10975 6
        unset($chars[$j++]);
10976
      }
10977
10978 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
10979 3
        unset($chars[$j++]);
10980
      }
10981
10982 6
      $strReturn .= $break;
10983
    }
10984
10985 8
    return $strReturn . \implode('', $chars);
10986
  }
10987
10988
  /**
10989
   * Line-Wrap the string after $limit, but also after the next word.
10990
   *
10991
   * @param string $str
10992
   * @param int    $limit
10993
   *
10994
   * @return string
10995
   */
10996 1
  public static function wordwrap_per_line(string $str, int $limit): string
10997
  {
10998 1
    $strings = (array)\preg_split('/\\r\\n|\\r|\\n/', $str);
10999
11000 1
    $string = '';
11001 1
    foreach ($strings as $value) {
11002 1
      if ($value === false) {
11003
        continue;
11004
      }
11005
11006 1
      $string .= wordwrap($value, $limit);
11007 1
      $string .= "\n";
11008
    }
11009
11010 1
    return $string;
11011
  }
11012
11013
  /**
11014
   * Returns an array of Unicode White Space characters.
11015
   *
11016
   * @return string[] An array with numeric code point as key and White Space Character as value.
11017
   */
11018 2
  public static function ws(): array
11019
  {
11020 2
    return self::$WHITESPACE;
11021
  }
11022
11023
11024
}
11025