Passed
Push — master ( fa6c8b...917cec )
by Lars
03:34
created

UTF8::strwidth()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 14
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 4

Importance

Changes 0
Metric Value
cc 4
eloc 5
nc 4
nop 3
dl 0
loc 14
ccs 6
cts 6
cp 1
crap 4
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206
   * __construct()
207
   */
208 32
  public function __construct()
209
  {
210 32
    self::checkForSupport();
211 32
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219
   * @return string Single Multi-Byte character.
220
   */
221 3
  public static function access(string $str, int $pos): string
222
  {
223 3
    if ('' === $str) {
224 1
      return '';
225
    }
226
227 3
    if ($pos < 0) {
228 2
      return '';
229
    }
230
231 3
    return (string)self::substr($str, $pos, 1);
232
  }
233
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string The output string that contains BOM.
242
   */
243 2
  public static function add_bom_to_string(string $str): string
244
  {
245 2
    if (self::string_has_bom($str) === false) {
246 2
      $str = self::bom() . $str;
247
    }
248
249 2
    return $str;
250
  }
251
252
  /**
253
   * Adds the specified amount of left and right padding to the given string.
254
   * The default character used is a space.
255
   *
256
   * @param string $str
257
   * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
258
   * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
259
   * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
260
   * @param string $encoding [optional] <p>Default: UTF-8</p>
261
   *
262
   * @return string String with padding applied.
263
   */
264 25
  private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
265
  {
266 25
    $strlen = self::strlen($str, $encoding);
267
268 25
    if ($left && $right) {
269 8
      $length = ($left + $right) + $strlen;
270 8
      $type = STR_PAD_BOTH;
271 17
    } elseif ($left) {
272 7
      $length = $left + $strlen;
273 7
      $type = STR_PAD_LEFT;
274 10
    } elseif ($right) {
275 10
      $length = $right + $strlen;
276 10
      $type = STR_PAD_RIGHT;
277
    } else {
278
      $length = ($left + $right) + $strlen;
279
      $type = STR_PAD_BOTH;
280
    }
281
282 25
    return self::str_pad($str, $length, $padStr, $type, $encoding);
283
  }
284
285
  /**
286
   * Changes all keys in an array.
287
   *
288
   * @param array $array <p>The array to work on</p>
289
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
290
   *                     or <strong>CASE_LOWER</strong> (default)</p>
291
   *
292
   * @return string[] An array with its keys lower or uppercased.
293
   */
294 2
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
295
  {
296
    if (
297 2
        $case !== CASE_LOWER
298
        &&
299 2
        $case !== CASE_UPPER
300
    ) {
301
      $case = CASE_LOWER;
302
    }
303
304 2
    $return = [];
305 2
    foreach ($array as $key => $value) {
306 2
      if ($case === CASE_LOWER) {
307 2
        $key = self::strtolower($key);
308
      } else {
309 2
        $key = self::strtoupper($key);
310
      }
311
312 2
      $return[$key] = $value;
313
    }
314
315 2
    return $return;
316
  }
317
318
  /**
319
   * Returns the substring between $start and $end, if found, or an empty
320
   * string. An optional offset may be supplied from which to begin the
321
   * search for the start string.
322
   *
323
   * @param string $str
324
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
325
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
326
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
327
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
328
   *
329
   * @return string
330
   */
331 16
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
332
  {
333 16
    $posStart = self::strpos($str, $start, $offset, $encoding);
334 16
    if ($posStart === false) {
335 2
      return '';
336
    }
337
338 14
    $substrIndex = $posStart + self::strlen($start, $encoding);
339 14
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
340
    if (
341 14
        $posEnd === false
342
        ||
343 14
        $posEnd === $substrIndex
344
    ) {
345 4
      return '';
346
    }
347
348 10
    $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
349
350 10
    if ($return === false) {
351
      return '';
352
    }
353
354 10
    return $return;
355
  }
356
357
  /**
358
   * Convert binary into an string.
359
   *
360
   * @param mixed $bin 1|0
361
   *
362
   * @return string
363
   */
364 2
  public static function binary_to_str($bin): string
365
  {
366 2
    if (!isset($bin[0])) {
367
      return '';
368
    }
369
370 2
    $convert = \base_convert($bin, 2, 16);
371 2
    if ($convert === '0') {
372 1
      return '';
373
    }
374
375 2
    return \pack('H*', $convert);
376
  }
377
378
  /**
379
   * Returns the UTF-8 Byte Order Mark Character.
380
   *
381
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
382
   *
383
   * @return string UTF-8 Byte Order Mark
384
   */
385 4
  public static function bom(): string
386
  {
387 4
    return "\xef\xbb\xbf";
388
  }
389
390
  /**
391
   * @alias of UTF8::chr_map()
392
   *
393
   * @see   UTF8::chr_map()
394
   *
395
   * @param string|array $callback
396
   * @param string       $str
397
   *
398
   * @return string[]
399
   */
400 2
  public static function callback($callback, string $str): array
401
  {
402 2
    return self::chr_map($callback, $str);
403
  }
404
405
  /**
406
   * Returns the character at $index, with indexes starting at 0.
407
   *
408
   * @param string $str
409
   * @param int    $index    <p>Position of the character.</p>
410
   * @param string $encoding [optional] <p>Default is UTF-8</p>
411
   *
412
   * @return string The character at $index.
413
   */
414 9
  public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
415
  {
416 9
    return (string)self::substr($str, $index, 1, $encoding);
417
  }
418
419
  /**
420
   * Returns an array consisting of the characters in the string.
421
   *
422
   * @param string $str <p>The input string.</p>
423
   *
424
   * @return string[] An array of chars.
425
   */
426 3
  public static function chars(string $str): array
427
  {
428 3
    return self::str_split($str, 1);
429
  }
430
431
  /**
432
   * This method will auto-detect your server environment for UTF-8 support.
433
   *
434
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
435
   */
436 37
  public static function checkForSupport()
437
  {
438 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
439
440
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
441
442
      // http://php.net/manual/en/book.mbstring.php
443
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
444
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
445
446
      // http://php.net/manual/en/book.iconv.php
447
      self::$SUPPORT['iconv'] = self::iconv_loaded();
448
449
      // http://php.net/manual/en/book.intl.php
450
      self::$SUPPORT['intl'] = self::intl_loaded();
451
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
452
453
      self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
454
455
      if (
456
          self::$SUPPORT['intl'] === true
457
          &&
458
          \function_exists('transliterator_list_ids') === true
459
      ) {
460
        /** @noinspection PhpComposerExtensionStubsInspection */
461
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
462
      }
463
464
      // http://php.net/manual/en/class.intlchar.php
465
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
466
467
      // http://php.net/manual/en/book.ctype.php
468
      self::$SUPPORT['ctype'] = self::ctype_loaded();
469
470
      // http://php.net/manual/en/class.finfo.php
471
      self::$SUPPORT['finfo'] = self::finfo_loaded();
472
473
      // http://php.net/manual/en/book.json.php
474
      self::$SUPPORT['json'] = self::json_loaded();
475
476
      // http://php.net/manual/en/book.pcre.php
477
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
478
    }
479 37
  }
480
481
  /**
482
   * Generates a UTF-8 encoded character from the given code point.
483
   *
484
   * INFO: opposite to UTF8::ord()
485
   *
486
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
487
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
488
   *
489
   * @return string|null Multi-Byte character, returns null on failure or empty input.
490
   */
491 17
  public static function chr($code_point, string $encoding = 'UTF-8')
492
  {
493
    // init
494 17
    static $CHAR_CACHE = [];
495
496 17
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
497
      self::checkForSupport();
498
    }
499
500 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
501 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
502
    }
503
504
    if (
505 17
        $encoding !== 'UTF-8'
506
        &&
507 17
        $encoding !== 'ISO-8859-1'
508
        &&
509 17
        $encoding !== 'WINDOWS-1252'
510
        &&
511 17
        self::$SUPPORT['mbstring'] === false
512
    ) {
513
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
514
    }
515
516 17
    $cacheKey = $code_point . $encoding;
517 17
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
518 16
      return $CHAR_CACHE[$cacheKey];
519
    }
520
521 11
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
522
523 10
      if (self::$CHR === null) {
524
        $chrTmp = self::getData('chr');
525
        if ($chrTmp) {
526
          self::$CHR = (array)$chrTmp;
527
        }
528
      }
529
530 10
      $chr = self::$CHR[$code_point];
531
532 10
      if ($encoding !== 'UTF-8') {
533 1
        $chr = self::encode($encoding, $chr);
534
      }
535
536 10
      return $CHAR_CACHE[$cacheKey] = $chr;
537
    }
538
539 7
    if (self::$SUPPORT['intlChar'] === true) {
540
      /** @noinspection PhpComposerExtensionStubsInspection */
541 7
      $chr = \IntlChar::chr($code_point);
542
543 7
      if ($encoding !== 'UTF-8') {
544
        $chr = self::encode($encoding, $chr);
545
      }
546
547 7
      return $CHAR_CACHE[$cacheKey] = $chr;
548
    }
549
550
    if (self::$CHR === null) {
551
      $chrTmp = self::getData('chr');
552
      if ($chrTmp) {
553
        self::$CHR = (array)$chrTmp;
554
      }
555
    }
556
557
    $code_point = (int)$code_point;
558
    if ($code_point <= 0x7F) {
559
      $chr = self::$CHR[$code_point];
560
    } elseif ($code_point <= 0x7FF) {
561
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
562
             self::$CHR[($code_point & 0x3F) + 0x80];
563
    } elseif ($code_point <= 0xFFFF) {
564
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
565
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
566
             self::$CHR[($code_point & 0x3F) + 0x80];
567
    } else {
568
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
569
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
570
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
571
             self::$CHR[($code_point & 0x3F) + 0x80];
572
    }
573
574
    if ($encoding !== 'UTF-8') {
575
      $chr = self::encode($encoding, $chr);
576
    }
577
578
    return $CHAR_CACHE[$cacheKey] = $chr;
579
  }
580
581
  /**
582
   * Applies callback to all characters of a string.
583
   *
584
   * @param string|array $callback <p>The callback function.</p>
585
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
586
   *
587
   * @return string[] The outcome of callback.
588
   */
589 2
  public static function chr_map($callback, string $str): array
590
  {
591 2
    $chars = self::split($str);
592
593 2
    return \array_map($callback, $chars);
594
  }
595
596
  /**
597
   * Generates an array of byte length of each character of a Unicode string.
598
   *
599
   * 1 byte => U+0000  - U+007F
600
   * 2 byte => U+0080  - U+07FF
601
   * 3 byte => U+0800  - U+FFFF
602
   * 4 byte => U+10000 - U+10FFFF
603
   *
604
   * @param string $str <p>The original unicode string.</p>
605
   *
606
   * @return int[] An array of byte lengths of each character.
607
   */
608 4
  public static function chr_size_list(string $str): array
609
  {
610 4
    if ('' === $str) {
611 4
      return [];
612
    }
613
614 4
    $strSplit = self::split($str);
615
616 4
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
617
      return \array_map(
618
          function ($data) {
619
            return UTF8::strlen_in_byte($data);
620
          },
621
          $strSplit
622
      );
623
    }
624
625 4
    return \array_map('\strlen', $strSplit);
626
  }
627
628
  /**
629
   * Get a decimal code representation of a specific character.
630
   *
631
   * @param string $char <p>The input character.</p>
632
   *
633
   * @return int
634
   */
635 4
  public static function chr_to_decimal(string $char): int
636
  {
637 4
    $code = self::ord($char[0]);
638 4
    $bytes = 1;
639
640 4
    if (!($code & 0x80)) {
641
      // 0xxxxxxx
642 4
      return $code;
643
    }
644
645 4
    if (($code & 0xe0) === 0xc0) {
646
      // 110xxxxx
647 4
      $bytes = 2;
648 4
      $code &= ~0xc0;
649 4
    } elseif (($code & 0xf0) === 0xe0) {
650
      // 1110xxxx
651 4
      $bytes = 3;
652 4
      $code &= ~0xe0;
653 2
    } elseif (($code & 0xf8) === 0xf0) {
654
      // 11110xxx
655 2
      $bytes = 4;
656 2
      $code &= ~0xf0;
657
    }
658
659 4
    for ($i = 2; $i <= $bytes; $i++) {
660
      // 10xxxxxx
661 4
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
662
    }
663
664 4
    return $code;
665
  }
666
667
  /**
668
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
669
   *
670
   * @param string|int $char <p>The input character</p>
671
   * @param string     $pfix [optional]
672
   *
673
   * @return string The code point encoded as U+xxxx
674
   */
675 2
  public static function chr_to_hex($char, string $pfix = 'U+'): string
676
  {
677 2
    if ('' === $char) {
678 2
      return '';
679
    }
680
681 2
    if ($char === '&#0;') {
682 2
      $char = '';
683
    }
684
685 2
    return self::int_to_hex(self::ord($char), $pfix);
686
  }
687
688
  /**
689
   * alias for "UTF8::chr_to_decimal()"
690
   *
691
   * @see UTF8::chr_to_decimal()
692
   *
693
   * @param string $chr
694
   *
695
   * @return int
696
   */
697 2
  public static function chr_to_int(string $chr): int
698
  {
699 2
    return self::chr_to_decimal($chr);
700
  }
701
702
  /**
703
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
704
   *
705
   * @param string $body     <p>The original string to be split.</p>
706
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
707
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
708
   *
709
   * @return string The chunked string.
710
   */
711 4
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
712
  {
713 4
    return \implode($end, self::split($body, $chunklen));
714
  }
715
716
  /**
717
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
718
   *
719
   * @param string $str                           <p>The string to be sanitized.</p>
720
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
721
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
722
   *                                              whitespace.</p>
723
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
724
   *                                              e.g.: "…"
725
   *                                              => "..."</p>
726
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
727
   *                                              combination with
728
   *                                              $normalize_whitespace</p>
729
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
730
   *                                              mark e.g.: "�"</p>
731
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
732
   *                                              characters e.g.: "\0"</p>
733
   *
734
   * @return string Clean UTF-8 encoded string.
735
   */
736 111
  public static function clean(
737
      string $str,
738
      bool $remove_bom = false,
739
      bool $normalize_whitespace = false,
740
      bool $normalize_msword = false,
741
      bool $keep_non_breaking_space = false,
742
      bool $replace_diamond_question_mark = false,
743
      bool $remove_invisible_characters = true
744
  ): string
745
  {
746
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
747
    // caused connection reset problem on larger strings
748
749 111
    $regx = '/
750
      (
751
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
752
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
753
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
754
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
755
        ){1,100}                      # ...one or more times
756
      )
757
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
758
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
759
    /x';
760 111
    $str = (string)\preg_replace($regx, '$1', $str);
761
762 111
    if ($replace_diamond_question_mark === true) {
763 61
      $str = self::replace_diamond_question_mark($str, '');
764
    }
765
766 111
    if ($remove_invisible_characters === true) {
767 111
      $str = self::remove_invisible_characters($str);
768
    }
769
770 111
    if ($normalize_whitespace === true) {
771 65
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
772
    }
773
774 111
    if ($normalize_msword === true) {
775 33
      $str = self::normalize_msword($str);
776
    }
777
778 111
    if ($remove_bom === true) {
779 63
      $str = self::remove_bom($str);
780
    }
781
782 111
    return $str;
783
  }
784
785
  /**
786
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
787
   *
788
   * @param string $str <p>The input string.</p>
789
   *
790
   * @return string
791
   */
792 33
  public static function cleanup($str): string
793
  {
794
    // init
795 33
    $str = (string)$str;
796
797 33
    if ('' === $str) {
798 5
      return '';
799
    }
800
801
    // fixed ISO <-> UTF-8 Errors
802 33
    $str = self::fix_simple_utf8($str);
803
804
    // remove all none UTF-8 symbols
805
    // && remove diamond question mark (�)
806
    // && remove remove invisible characters (e.g. "\0")
807
    // && remove BOM
808
    // && normalize whitespace chars (but keep non-breaking-spaces)
809 33
    $str = self::clean(
810 33
        $str,
811 33
        true,
812 33
        true,
813 33
        false,
814 33
        true,
815 33
        true,
816 33
        true
817
    );
818
819 33
    return $str;
820
  }
821
822
  /**
823
   * Accepts a string or a array of strings and returns an array of Unicode code points.
824
   *
825
   * INFO: opposite to UTF8::string()
826
   *
827
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
828
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
829
   *                                    default, code points will be returned as integers.</p>
830
   *
831
   * @return array<int|string>
832
   *                           The array of code points:<br>
833
   *                           array<int> for $u_style === false<br>
834
   *                           array<string> for $u_style === true<br>
835
   */
836 12
  public static function codepoints($arg, bool $u_style = false): array
837
  {
838 12
    if (\is_string($arg) === true) {
839 12
      $arg = self::split($arg);
840
    }
841
842 12
    $arg = \array_map(
843
        [
844 12
            self::class,
845
            'ord',
846
        ],
847 12
        $arg
848
    );
849
850 12
    if (\count($arg) === 0) {
851 7
      return [];
852
    }
853
854 11
    if ($u_style) {
855 2
      $arg = \array_map(
856
          [
857 2
              self::class,
858
              'int_to_hex',
859
          ],
860 2
          $arg
861
      );
862
    }
863
864 11
    return $arg;
865
  }
866
867
  /**
868
   * Trims the string and replaces consecutive whitespace characters with a
869
   * single space. This includes tabs and newline characters, as well as
870
   * multibyte whitespace such as the thin space and ideographic space.
871
   *
872
   * @param string $str <p>The input string.</p>
873
   *
874
   * @return string String with a trimmed $str and condensed whitespace.
875
   */
876 13
  public static function collapse_whitespace(string $str): string
877
  {
878 13
    return self::trim(
879 13
        self::regex_replace($str, '[[:space:]]+', ' ')
880
    );
881
  }
882
883
  /**
884
   * Returns count of characters used in a string.
885
   *
886
   * @param string $str       <p>The input string.</p>
887
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
888
   *
889
   * @return int[] An associative array of Character as keys and
890
   *               their count as values.
891
   */
892 18
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
893
  {
894 18
    return \array_count_values(self::split($str, 1, $cleanUtf8));
895
  }
896
897
  /**
898
   * Remove css media-queries.
899
   *
900
   * @param string $str
901
   *
902
   * @return string
903
   */
904 1
  public static function css_stripe_media_queries(string $str): string
905
  {
906 1
    return (string)\preg_replace(
907 1
        '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
908 1
        '',
909 1
        $str
910
    );
911
  }
912
913
  /**
914
   * Checks whether ctype is available on the server.
915
   *
916
   * @return bool
917
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
918
   */
919
  public static function ctype_loaded(): bool
920
  {
921
    return \extension_loaded('ctype');
922
  }
923
924
  /**
925
   * Converts a int-value into an UTF-8 character.
926
   *
927
   * @param mixed $int
928
   *
929
   * @return string
930
   */
931 10
  public static function decimal_to_chr($int): string
932
  {
933 10
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
934
  }
935
936
  /**
937
   * Encode a string with a new charset-encoding.
938
   *
939
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
940
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
941
   *
942
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
943
   * @param string $str      <p>The input string</p>
944
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
945
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
946
   *
947
   * @return string
948
   */
949 30
  public static function encode(string $encoding, string $str, bool $force = true): string
950
  {
951 30
    if ('' === $str || '' === $encoding) {
952 12
      return $str;
953
    }
954
955 30
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
956 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
957
    }
958
959 30
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
960
      self::checkForSupport();
961
    }
962
963 30
    $encodingDetected = self::str_detect_encoding($str);
964
965
    // DEBUG
966
    //var_dump($encoding, $encodingDetected, $str, "\n\n");
967
968
    if (
969 30
        $force === true
970
        ||
971
        (
972 13
            $encodingDetected !== false
973
            &&
974 30
            $encodingDetected !== $encoding
975
        )
976
    ) {
977
978
      if (
979 30
          $encoding === 'UTF-8'
980
          &&
981
          (
982 26
              $force === true
983 9
              || $encodingDetected === 'UTF-8'
984 9
              || $encodingDetected === 'WINDOWS-1252'
985 30
              || $encodingDetected === 'ISO-8859-1'
986
          )
987
      ) {
988 25
        return self::to_utf8($str);
989
      }
990
991
      if (
992 11
          $encoding === 'ISO-8859-1'
993
          &&
994
          (
995 8
              $force === true
996 4
              || $encodingDetected === 'ISO-8859-1'
997 4
              || $encodingDetected === 'WINDOWS-1252'
998 11
              || $encodingDetected === 'UTF-8'
999
          )
1000
      ) {
1001 8
        return self::to_iso8859($str);
1002
      }
1003
1004
      if (
1005 5
          $encoding !== 'UTF-8'
1006
          &&
1007 5
          $encoding !== 'ISO-8859-1'
1008
          &&
1009 5
          $encoding !== 'WINDOWS-1252'
1010
          &&
1011 5
          self::$SUPPORT['mbstring'] === false
1012
      ) {
1013
        \trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1014
      }
1015
1016
      // always fallback via symfony polyfill
1017 5
      $strEncoded = \mb_convert_encoding(
1018 5
          $str,
1019 5
          $encoding,
1020 5
          ($force === true ? $encoding : $encodingDetected)
0 ignored issues
show
Bug introduced by
It seems like $force === true ? $encoding : $encodingDetected can also be of type false; however, parameter $from_encoding of mb_convert_encoding() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1020
          /** @scrutinizer ignore-type */ ($force === true ? $encoding : $encodingDetected)
Loading history...
1021
      );
1022
1023 5
      if ($strEncoded) {
1024 5
        return $strEncoded;
1025
      }
1026
    }
1027
1028 7
    return $str;
1029
  }
1030
1031
  /**
1032
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1033
   *
1034
   * @param string   $str                    <p>The input string.</p>
1035
   * @param string   $search                 <p>The searched string.</p>
1036
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1037
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1038
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1039
   *
1040
   * @return string
1041
   */
1042 1
  public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1043
  {
1044 1
    if ('' === $str) {
1045 1
      return '';
1046
    }
1047
1048 1
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1049
1050 1
    if ($length === null) {
1051 1
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
1052
    }
1053
1054 1
    if (empty($search)) {
1055
1056 1
      $stringLength = self::strlen($str, $encoding);
1057
1058 1
      if ($length > 0) {
1059 1
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1060
      } else {
1061 1
        $end = 0;
1062
      }
1063
1064 1
      $pos = (int)\min(
1065 1
          self::strpos($str, ' ', $end, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $end can also be of type false; however, parameter $offset of voku\helper\UTF8::strpos() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1065
          self::strpos($str, ' ', /** @scrutinizer ignore-type */ $end, $encoding),
Loading history...
1066 1
          self::strpos($str, '.', $end, $encoding)
1067
      );
1068
1069 1
      if ($pos) {
1070 1
        $strSub = self::substr($str, 0, $pos, $encoding);
1071 1
        if ($strSub === false) {
1072
          return '';
1073
        }
1074
1075 1
        return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1076
      }
1077
1078
      return $str;
1079
    }
1080
1081 1
    $wordPos = self::stripos($str, $search, 0, $encoding);
1082 1
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1083
1084 1
    $pos_start = 0;
1085 1
    if ($halfSide > 0) {
1086 1
      $halfText = self::substr($str, 0, $halfSide, $encoding);
1087 1
      if ($halfText !== false) {
1088 1
        $pos_start = (int)\max(
1089 1
            self::strrpos($halfText, ' ', 0, $encoding),
1090 1
            self::strrpos($halfText, '.', 0, $encoding)
1091
        );
1092
      }
1093
    }
1094
1095 1
    if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type integer|false is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1096 1
      $l = $pos_start + $length - 1;
1097 1
      $realLength = self::strlen($str, $encoding);
1098
1099 1
      if ($l > $realLength) {
1100
        $l = $realLength;
1101
      }
1102
1103 1
      $pos_end = (int)\min(
1104 1
              self::strpos($str, ' ', $l, $encoding),
1105 1
              self::strpos($str, '.', $l, $encoding)
1106 1
          ) - $pos_start;
1107
1108 1
      if (!$pos_end || $pos_end <= 0) {
1109 1
        $strSub = self::substr($str, $pos_start, self::strlen($str), $encoding);
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1109
        $strSub = self::substr($str, $pos_start, /** @scrutinizer ignore-type */ self::strlen($str), $encoding);
Loading history...
1110 1
        if ($strSub !== false) {
1111 1
          $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1112
        } else {
1113 1
          $extract = '';
1114
        }
1115
      } else {
1116 1
        $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1117 1
        if ($strSub !== false) {
1118 1
          $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1119
        } else {
1120 1
          $extract = '';
1121
        }
1122
      }
1123
1124
    } else {
1125
1126 1
      $l = $length - 1;
1127 1
      $trueLength = self::strlen($str, $encoding);
1128
1129 1
      if ($l > $trueLength) {
1130
        $l = $trueLength;
1131
      }
1132
1133 1
      $pos_end = \min(
1134 1
          self::strpos($str, ' ', $l, $encoding),
1135 1
          self::strpos($str, '.', $l, $encoding)
1136
      );
1137
1138 1
      if ($pos_end) {
1139 1
        $strSub = self::substr($str, 0, $pos_end, $encoding);
1140 1
        if ($strSub !== false) {
1141 1
          $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1142
        } else {
1143 1
          $extract = '';
1144
        }
1145
      } else {
1146 1
        $extract = $str;
1147
      }
1148
    }
1149
1150 1
    return $extract;
1151
  }
1152
1153
  /**
1154
   * Reads entire file into a string.
1155
   *
1156
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1157
   *
1158
   * @link http://php.net/manual/en/function.file-get-contents.php
1159
   *
1160
   * @param string        $filename         <p>
1161
   *                                        Name of the file to read.
1162
   *                                        </p>
1163
   * @param bool          $use_include_path [optional] <p>
1164
   *                                        Prior to PHP 5, this parameter is called
1165
   *                                        use_include_path and is a bool.
1166
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1167
   *                                        to trigger include path
1168
   *                                        search.
1169
   *                                        </p>
1170
   * @param resource|null $context          [optional] <p>
1171
   *                                        A valid context resource created with
1172
   *                                        stream_context_create. If you don't need to use a
1173
   *                                        custom context, you can skip this parameter by &null;.
1174
   *                                        </p>
1175
   * @param int|null      $offset           [optional] <p>
1176
   *                                        The offset where the reading starts.
1177
   *                                        </p>
1178
   * @param int|null      $maxLength        [optional] <p>
1179
   *                                        Maximum length of data read. The default is to read until end
1180
   *                                        of file is reached.
1181
   *                                        </p>
1182
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1183
   *
1184
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
1185
   *                                        images or pdf, because they used non default utf-8 chars.</p>
1186
   *
1187
   * @return string|false The function returns the read data or false on failure.
1188
   */
1189 11
  public static function file_get_contents(
1190
      string $filename,
1191
      bool $use_include_path = false,
1192
      $context = null,
1193
      int $offset = null,
1194
      int $maxLength = null,
1195
      int $timeout = 10,
1196
      bool $convertToUtf8 = true
1197
  )
1198
  {
1199
    // init
1200 11
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1201
1202 11
    if ($timeout && $context === null) {
1203 9
      $context = \stream_context_create(
1204
          [
1205
              'http' =>
1206
                  [
1207 9
                      'timeout' => $timeout,
1208
                  ],
1209
          ]
1210
      );
1211
    }
1212
1213 11
    if ($offset === null) {
1214 11
      $offset = 0;
1215
    }
1216
1217 11
    if (\is_int($maxLength) === true) {
1218 2
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1219
    } else {
1220 11
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1221
    }
1222
1223
    // return false on error
1224 11
    if ($data === false) {
1225
      return false;
1226
    }
1227
1228 11
    if ($convertToUtf8 === true) {
1229
      // only for non binary, but also for UTF-16 or UTF-32
1230
      if (
1231 11
          self::is_binary($data, true) !== true
1232
          ||
1233 8
          self::is_utf16($data) !== false
1234
          ||
1235 11
          self::is_utf32($data) !== false
1236
      ) {
1237 9
        $data = self::encode('UTF-8', $data, false);
1238 9
        $data = self::cleanup($data);
1239
      }
1240
    }
1241
1242 11
    return $data;
1243
  }
1244
1245
  /**
1246
   * Checks if a file starts with BOM (Byte Order Mark) character.
1247
   *
1248
   * @param string $file_path <p>Path to a valid file.</p>
1249
   *
1250
   * @throws \RuntimeException if file_get_contents() returned false
1251
   *
1252
   * @return bool
1253
   *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.
1254
   */
1255 2
  public static function file_has_bom(string $file_path): bool
1256
  {
1257 2
    $file_content = \file_get_contents($file_path);
1258 2
    if ($file_content === false) {
1259
      throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1260
    }
1261
1262 2
    return self::string_has_bom($file_content);
1263
  }
1264
1265
  /**
1266
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1267
   *
1268
   * @param mixed  $var
1269
   * @param int    $normalization_form
1270
   * @param string $leading_combining
1271
   *
1272
   * @return mixed
1273
   */
1274 43
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1275
  {
1276 43
    switch (\gettype($var)) {
1277 43
      case 'array':
1278 6
        foreach ($var as $k => $v) {
1279
          /** @noinspection AlterInForeachInspection */
1280 6
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1281
        }
1282 6
        break;
1283 43
      case 'object':
1284 4
        foreach ($var as $k => $v) {
1285 4
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1286
        }
1287 4
        break;
1288 43
      case 'string':
1289
1290 43
        if (false !== \strpos($var, "\r")) {
1291
          // Workaround https://bugs.php.net/65732
1292 3
          $var = self::normalize_line_ending($var);
1293
        }
1294
1295 43
        if (self::is_ascii($var) === false) {
1296
          /** @noinspection PhpUndefinedClassInspection */
1297 26
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1298 20
            $n = '-';
1299
          } else {
1300
            /** @noinspection PhpUndefinedClassInspection */
1301 13
            $n = \Normalizer::normalize($var, $normalization_form);
1302
1303 13
            if (isset($n[0])) {
1304 7
              $var = $n;
1305
            } else {
1306 9
              $var = self::encode('UTF-8', $var, true);
1307
            }
1308
          }
1309
1310
          if (
1311 26
              $var[0] >= "\x80"
1312
              &&
1313 26
              isset($n[0], $leading_combining[0])
1314
              &&
1315 26
              \preg_match('/^\p{Mn}/u', $var)
1316
          ) {
1317
            // Prevent leading combining chars
1318
            // for NFC-safe concatenations.
1319 3
            $var = $leading_combining . $var;
1320
          }
1321
        }
1322
1323 43
        break;
1324
    }
1325
1326 43
    return $var;
1327
  }
1328
1329
  /**
1330
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1331
   *
1332
   * Gets a specific external variable by name and optionally filters it
1333
   *
1334
   * @link  http://php.net/manual/en/function.filter-input.php
1335
   *
1336
   * @param int    $type          <p>
1337
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1338
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1339
   *                              <b>INPUT_ENV</b>.
1340
   *                              </p>
1341
   * @param string $variable_name <p>
1342
   *                              Name of a variable to get.
1343
   *                              </p>
1344
   * @param int    $filter        [optional] <p>
1345
   *                              The ID of the filter to apply. The
1346
   *                              manual page lists the available filters.
1347
   *                              </p>
1348
   * @param mixed  $options       [optional] <p>
1349
   *                              Associative array of options or bitwise disjunction of flags. If filter
1350
   *                              accepts options, flags can be provided in "flags" field of array.
1351
   *                              </p>
1352
   *
1353
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1354
   *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1355
   *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1356
   */
1357
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
1358
  {
1359
    if (4 > \func_num_args()) {
1360
      $var = \filter_input($type, $variable_name, $filter);
1361
    } else {
1362
      $var = \filter_input($type, $variable_name, $filter, $options);
1363
    }
1364
1365
    return self::filter($var);
1366
  }
1367
1368
  /**
1369
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1370
   *
1371
   * Gets external variables and optionally filters them
1372
   *
1373
   * @link  http://php.net/manual/en/function.filter-input-array.php
1374
   *
1375
   * @param int   $type       <p>
1376
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1377
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1378
   *                          <b>INPUT_ENV</b>.
1379
   *                          </p>
1380
   * @param mixed $definition [optional] <p>
1381
   *                          An array defining the arguments. A valid key is a string
1382
   *                          containing a variable name and a valid value is either a filter type, or an array
1383
   *                          optionally specifying the filter, flags and options. If the value is an
1384
   *                          array, valid keys are filter which specifies the
1385
   *                          filter type,
1386
   *                          flags which specifies any flags that apply to the
1387
   *                          filter, and options which specifies any options that
1388
   *                          apply to the filter. See the example below for a better understanding.
1389
   *                          </p>
1390
   *                          <p>
1391
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1392
   *                          input array are filtered by this filter.
1393
   *                          </p>
1394
   * @param bool  $add_empty  [optional] <p>
1395
   *                          Add missing keys as <b>NULL</b> to the return value.
1396
   *                          </p>
1397
   *
1398
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1399
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set. Or
1400
   *               if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable is not
1401
   *               set and <b>NULL</b> if the filter fails.
1402
   */
1403
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1404
  {
1405
    if (2 > \func_num_args()) {
1406
      $a = \filter_input_array($type);
1407
    } else {
1408
      $a = \filter_input_array($type, $definition, $add_empty);
1409
    }
1410
1411
    return self::filter($a);
1412
  }
1413
1414
  /**
1415
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1416
   *
1417
   * Filters a variable with a specified filter
1418
   *
1419
   * @link  http://php.net/manual/en/function.filter-var.php
1420
   *
1421
   * @param mixed $variable <p>
1422
   *                        Value to filter.
1423
   *                        </p>
1424
   * @param int   $filter   [optional] <p>
1425
   *                        The ID of the filter to apply. The
1426
   *                        manual page lists the available filters.
1427
   *                        </p>
1428
   * @param mixed $options  [optional] <p>
1429
   *                        Associative array of options or bitwise disjunction of flags. If filter
1430
   *                        accepts options, flags can be provided in "flags" field of array. For
1431
   *                        the "callback" filter, callable type should be passed. The
1432
   *                        callback must accept one argument, the value to be filtered, and return
1433
   *                        the value after filtering/sanitizing it.
1434
   *                        </p>
1435
   *                        <p>
1436
   *                        <code>
1437
   *                        // for filters that accept options, use this format
1438
   *                        $options = array(
1439
   *                        'options' => array(
1440
   *                        'default' => 3, // value to return if the filter fails
1441
   *                        // other options here
1442
   *                        'min_range' => 0
1443
   *                        ),
1444
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1445
   *                        );
1446
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1447
   *                        // for filter that only accept flags, you can pass them directly
1448
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1449
   *                        // for filter that only accept flags, you can also pass as an array
1450
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1451
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1452
   *                        // callback validate filter
1453
   *                        function foo($value)
1454
   *                        {
1455
   *                        // Expected format: Surname, GivenNames
1456
   *                        if (strpos($value, ", ") === false) return false;
1457
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1458
   *                        $empty = (empty($surname) || empty($givennames));
1459
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1460
   *                        if ($empty || $notstrings) {
1461
   *                        return false;
1462
   *                        } else {
1463
   *                        return $value;
1464
   *                        }
1465
   *                        }
1466
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1467
   *                        </code>
1468
   *                        </p>
1469
   *
1470
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1471
   */
1472 2
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
1473
  {
1474 2
    if (3 > \func_num_args()) {
1475 2
      $variable = \filter_var($variable, $filter);
1476
    } else {
1477 2
      $variable = \filter_var($variable, $filter, $options);
1478
    }
1479
1480 2
    return self::filter($variable);
1481
  }
1482
1483
  /**
1484
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1485
   *
1486
   * Gets multiple variables and optionally filters them
1487
   *
1488
   * @link  http://php.net/manual/en/function.filter-var-array.php
1489
   *
1490
   * @param array $data       <p>
1491
   *                          An array with string keys containing the data to filter.
1492
   *                          </p>
1493
   * @param mixed $definition [optional] <p>
1494
   *                          An array defining the arguments. A valid key is a string
1495
   *                          containing a variable name and a valid value is either a
1496
   *                          filter type, or an
1497
   *                          array optionally specifying the filter, flags and options.
1498
   *                          If the value is an array, valid keys are filter
1499
   *                          which specifies the filter type,
1500
   *                          flags which specifies any flags that apply to the
1501
   *                          filter, and options which specifies any options that
1502
   *                          apply to the filter. See the example below for a better understanding.
1503
   *                          </p>
1504
   *                          <p>
1505
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1506
   *                          input array are filtered by this filter.
1507
   *                          </p>
1508
   * @param bool  $add_empty  [optional] <p>
1509
   *                          Add missing keys as <b>NULL</b> to the return value.
1510
   *                          </p>
1511
   *
1512
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1513
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set.
1514
   */
1515 2
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1516
  {
1517 2
    if (2 > \func_num_args()) {
1518 2
      $a = \filter_var_array($data);
1519
    } else {
1520 2
      $a = \filter_var_array($data, $definition, $add_empty);
1521
    }
1522
1523 2
    return self::filter($a);
1524
  }
1525
1526
  /**
1527
   * Checks whether finfo is available on the server.
1528
   *
1529
   * @return bool
1530
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
1531
   */
1532
  public static function finfo_loaded(): bool
1533
  {
1534
    return \class_exists('finfo');
1535
  }
1536
1537
  /**
1538
   * Returns the first $n characters of the string.
1539
   *
1540
   * @param string $str      <p>The input string.</p>
1541
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1542
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1543
   *
1544
   * @return string
1545
   */
1546 13
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1547
  {
1548 13
    if ($n <= 0) {
1549 4
      return '';
1550
    }
1551
1552 9
    $strSub = self::substr($str, 0, $n, $encoding);
1553 9
    if ($strSub === false) {
1554
      return '';
1555
    }
1556
1557 9
    return $strSub;
1558
  }
1559
1560
  /**
1561
   * Check if the number of unicode characters are not more than the specified integer.
1562
   *
1563
   * @param string $str      The original string to be checked.
1564
   * @param int    $box_size The size in number of chars to be checked against string.
1565
   *
1566
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1567
   */
1568 2
  public static function fits_inside(string $str, int $box_size): bool
1569
  {
1570 2
    return (self::strlen($str) <= $box_size);
1571
  }
1572
1573
  /**
1574
   * @param string $str
1575
   * @param bool   $useLower <p>Use uppercase by default, otherwise use lowecase.</p>
1576
   *
1577
   * @return string
1578
   */
1579 22
  private static function fixStrCaseHelper(string $str, $useLower = false): string
1580
  {
1581
    $upper = [
1582 22
        'ẞ',
1583
    ];
1584
    $lower = [
1585 22
        'ß',
1586
    ];
1587
1588 22
    if ($useLower === true) {
1589
      $str = \str_replace(
1590
          $upper,
1591
          $lower,
1592
          $str
1593
      );
1594
    } else {
1595 22
      $str = \str_replace(
1596 22
          $lower,
1597 22
          $upper,
1598 22
          $str
1599
      );
1600
    }
1601
1602
1603 22
    return $str;
1604
  }
1605
1606
  /**
1607
   * Try to fix simple broken UTF-8 strings.
1608
   *
1609
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1610
   *
1611
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1612
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1613
   * See: http://en.wikipedia.org/wiki/Windows-1252
1614
   *
1615
   * @param string $str <p>The input string</p>
1616
   *
1617
   * @return string
1618
   */
1619 42
  public static function fix_simple_utf8(string $str): string
1620
  {
1621 42
    if ('' === $str) {
1622 4
      return '';
1623
    }
1624
1625 42
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1626 42
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1627
1628 42
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1629
1630 1
      if (self::$BROKEN_UTF8_FIX === null) {
1631 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_fix') can also be of type false. However, the property $BROKEN_UTF8_FIX is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1632
      }
1633
1634 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1634
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1635 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1635
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1636
    }
1637
1638 42
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1639
  }
1640
1641
  /**
1642
   * Fix a double (or multiple) encoded UTF8 string.
1643
   *
1644
   * @param string[]|string $str You can use a string or an array of strings.
1645
   *
1646
   * @return string[]|string
1647
   *                          Will return the fixed input-"array" or
1648
   *                          the fixed input-"string".
1649
   */
1650 2
  public static function fix_utf8($str)
1651
  {
1652 2
    if (\is_array($str) === true) {
1653 2
      foreach ($str as $k => $v) {
1654 2
        $str[$k] = self::fix_utf8($v);
1655
      }
1656
1657 2
      return $str;
1658
    }
1659
1660 2
    $str = (string)$str;
1661 2
    $last = '';
1662 2
    while ($last !== $str) {
1663 2
      $last = $str;
1664 2
      $str = self::to_utf8(
1665 2
          self::utf8_decode($str, true)
1666
      );
1667
    }
1668
1669 2
    return $str;
1670
  }
1671
1672
  /**
1673
   * Get character of a specific character.
1674
   *
1675
   * @param string $char
1676
   *
1677
   * @return string 'RTL' or 'LTR'
1678
   */
1679 2
  public static function getCharDirection(string $char): string
1680
  {
1681 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1682
      self::checkForSupport();
1683
    }
1684
1685 2
    if (self::$SUPPORT['intlChar'] === true) {
1686
      /** @noinspection PhpComposerExtensionStubsInspection */
1687 2
      $tmpReturn = \IntlChar::charDirection($char);
1688
1689
      // from "IntlChar"-Class
1690
      $charDirection = [
1691 2
          'RTL' => [1, 13, 14, 15, 21],
1692
          'LTR' => [0, 11, 12, 20],
1693
      ];
1694
1695 2
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1696
        return 'LTR';
1697
      }
1698
1699 2
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1700 2
        return 'RTL';
1701
      }
1702
    }
1703
1704 2
    $c = static::chr_to_decimal($char);
1705
1706 2
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1707 2
      return 'LTR';
1708
    }
1709
1710 2
    if (0x85e >= $c) {
1711
1712 2
      if (0x5be === $c ||
1713 2
          0x5c0 === $c ||
1714 2
          0x5c3 === $c ||
1715 2
          0x5c6 === $c ||
1716 2
          (0x5d0 <= $c && 0x5ea >= $c) ||
1717 2
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1718 2
          0x608 === $c ||
1719 2
          0x60b === $c ||
1720 2
          0x60d === $c ||
1721 2
          0x61b === $c ||
1722 2
          (0x61e <= $c && 0x64a >= $c) ||
1723
          (0x66d <= $c && 0x66f >= $c) ||
1724
          (0x671 <= $c && 0x6d5 >= $c) ||
1725
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1726
          (0x6ee <= $c && 0x6ef >= $c) ||
1727
          (0x6fa <= $c && 0x70d >= $c) ||
1728
          0x710 === $c ||
1729
          (0x712 <= $c && 0x72f >= $c) ||
1730
          (0x74d <= $c && 0x7a5 >= $c) ||
1731
          0x7b1 === $c ||
1732
          (0x7c0 <= $c && 0x7ea >= $c) ||
1733
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1734
          0x7fa === $c ||
1735
          (0x800 <= $c && 0x815 >= $c) ||
1736
          0x81a === $c ||
1737
          0x824 === $c ||
1738
          0x828 === $c ||
1739
          (0x830 <= $c && 0x83e >= $c) ||
1740
          (0x840 <= $c && 0x858 >= $c) ||
1741 2
          0x85e === $c
1742
      ) {
1743 2
        return 'RTL';
1744
      }
1745
1746 2
    } elseif (0x200f === $c) {
1747
1748
      return 'RTL';
1749
1750 2
    } elseif (0xfb1d <= $c) {
1751
1752 2
      if (0xfb1d === $c ||
1753 2
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1754 2
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1755 2
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1756 2
          0xfb3e === $c ||
1757 2
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1758 2
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1759 2
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1760 2
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1761 2
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1762 2
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1763 2
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1764 2
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1765 2
          (0xfe76 <= $c && 0xfefc >= $c) ||
1766 2
          (0x10800 <= $c && 0x10805 >= $c) ||
1767 2
          0x10808 === $c ||
1768 2
          (0x1080a <= $c && 0x10835 >= $c) ||
1769 2
          (0x10837 <= $c && 0x10838 >= $c) ||
1770 2
          0x1083c === $c ||
1771 2
          (0x1083f <= $c && 0x10855 >= $c) ||
1772 2
          (0x10857 <= $c && 0x1085f >= $c) ||
1773 2
          (0x10900 <= $c && 0x1091b >= $c) ||
1774 2
          (0x10920 <= $c && 0x10939 >= $c) ||
1775 2
          0x1093f === $c ||
1776 2
          0x10a00 === $c ||
1777 2
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1778 2
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1779 2
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1780 2
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1781 2
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1782 2
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1783 2
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1784 2
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1785 2
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1786 2
          (0x10b78 <= $c && 0x10b7f >= $c)
1787
      ) {
1788 2
        return 'RTL';
1789
      }
1790
    }
1791
1792 2
    return 'LTR';
1793
  }
1794
1795
  /**
1796
   * get data from "/data/*.ser"
1797
   *
1798
   * @param string $file
1799
   *
1800
   * @return mixed|false Will return false on error.
1801
   */
1802 13
  private static function getData(string $file)
1803
  {
1804 13
    $file = __DIR__ . '/data/' . $file . '.php';
1805 13
    if (\file_exists($file)) {
1806
      /** @noinspection PhpIncludeInspection */
1807 12
      return require $file;
1808
    }
1809
1810 2
    return false;
1811
  }
1812
1813
  /**
1814
   * Check for php-support.
1815
   *
1816
   * @param string|null $key
1817
   *
1818
   * @return mixed
1819
   *               Return the full support-"array", if $key === null<br>
1820
   *               return bool-value, if $key is used and available<br>
1821
   *               otherwise return <strong>null</strong>.
1822
   */
1823 26
  public static function getSupportInfo(string $key = null)
1824
  {
1825 26
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1826
      self::checkForSupport();
1827
    }
1828
1829 26
    if ($key === null) {
1830 4
      return self::$SUPPORT;
1831
    }
1832
1833 24
    if (!isset(self::$SUPPORT[$key])) {
1834 2
      return null;
1835
    }
1836
1837 22
    return self::$SUPPORT[$key];
1838
  }
1839
1840
  /**
1841
   * @param int    $length        <p>Length of the random string.</p>
1842
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
1843
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
1844
   *
1845
   * @return string
1846
   */
1847 1
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
1848
  {
1849
    // init
1850 1
    $i = 0;
1851 1
    $str = '';
1852 1
    $maxlength = self::strlen($possibleChars, $encoding);
1853
1854 1
    if ($maxlength === 0) {
1855 1
      return '';
1856
    }
1857
1858
    // add random chars
1859 1
    while ($i < $length) {
1860
      try {
1861 1
        $randInt = \random_int(0, $maxlength - 1);
1862
      } catch (\Exception $e) {
1863
        /** @noinspection RandomApiMigrationInspection */
1864
        $randInt = \mt_rand(0, $maxlength - 1);
1865
      }
1866 1
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
1867 1
      $str .= $char;
1868 1
      $i++;
1869
    }
1870
1871 1
    return $str;
1872
  }
1873
1874
  /**
1875
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
1876
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
1877
   *
1878
   * @return string
1879
   */
1880 1
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
1881
  {
1882 1
    $uniqueHelper = \mt_rand() .
1883 1
                    \session_id() .
1884 1
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
1885 1
                    ($_SERVER['SERVER_ADDR'] ?? '') .
1886 1
                    $entropyExtra;
1887
1888 1
    $uniqueString = \uniqid($uniqueHelper, true);
1889
1890 1
    if ($md5) {
1891 1
      $uniqueString = \md5($uniqueString . $uniqueHelper);
1892
    }
1893
1894 1
    return $uniqueString;
1895
  }
1896
1897
  /**
1898
   * alias for "UTF8::string_has_bom()"
1899
   *
1900
   * @see        UTF8::string_has_bom()
1901
   *
1902
   * @param string $str
1903
   *
1904
   * @return bool
1905
   *
1906
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1907
   */
1908 2
  public static function hasBom(string $str): bool
1909
  {
1910 2
    return self::string_has_bom($str);
1911
  }
1912
1913
  /**
1914
   * Returns true if the string contains a lower case char, false otherwise.
1915
   *
1916
   * @param string $str <p>The input string.</p>
1917
   *
1918
   * @return bool Whether or not the string contains a lower case character.
1919
   */
1920 47
  public static function has_lowercase(string $str): bool
1921
  {
1922 47
    return self::str_matches_pattern($str, '.*[[:lower:]]');
1923
  }
1924
1925
  /**
1926
   * Returns true if the string contains an upper case char, false otherwise.
1927
   *
1928
   * @param string $str <p>The input string.</p>
1929
   *
1930
   * @return bool Whether or not the string contains an upper case character.
1931
   */
1932 12
  public static function has_uppercase(string $str): bool
1933
  {
1934 12
    return self::str_matches_pattern($str, '.*[[:upper:]]');
1935
  }
1936
1937
  /**
1938
   * Converts a hexadecimal-value into an UTF-8 character.
1939
   *
1940
   * @param string $hexdec <p>The hexadecimal value.</p>
1941
   *
1942
   * @return string|false One single UTF-8 character.
1943
   */
1944 4
  public static function hex_to_chr(string $hexdec)
1945
  {
1946 4
    return self::decimal_to_chr(\hexdec($hexdec));
1947
  }
1948
1949
  /**
1950
   * Converts hexadecimal U+xxxx code point representation to integer.
1951
   *
1952
   * INFO: opposite to UTF8::int_to_hex()
1953
   *
1954
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1955
   *
1956
   * @return int|false The code point, or false on failure.
1957
   */
1958 2
  public static function hex_to_int($hexDec)
1959
  {
1960
    // init
1961 2
    $hexDec = (string)$hexDec;
1962
1963 2
    if ('' === $hexDec) {
1964 2
      return false;
1965
    }
1966
1967 2
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1968 2
      return \intval($match[1], 16);
1969
    }
1970
1971 2
    return false;
1972
  }
1973
1974
  /**
1975
   * alias for "UTF8::html_entity_decode()"
1976
   *
1977
   * @see UTF8::html_entity_decode()
1978
   *
1979
   * @param string $str
1980
   * @param int    $flags
1981
   * @param string $encoding
1982
   *
1983
   * @return string
1984
   */
1985 2
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1986
  {
1987 2
    return self::html_entity_decode($str, $flags, $encoding);
1988
  }
1989
1990
  /**
1991
   * Converts a UTF-8 string to a series of HTML numbered entities.
1992
   *
1993
   * INFO: opposite to UTF8::html_decode()
1994
   *
1995
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1996
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1997
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
1998
   *
1999
   * @return string HTML numbered entities.
2000
   */
2001 11
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2002
  {
2003 11
    if ('' === $str) {
2004 4
      return '';
2005
    }
2006
2007 11
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2008 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2009
    }
2010
2011
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2012 11
    if (\function_exists('mb_encode_numericentity')) {
2013
2014 11
      $startCode = 0x00;
2015 11
      if ($keepAsciiChars === true) {
2016 11
        $startCode = 0x80;
2017
      }
2018
2019 11
      return \mb_encode_numericentity(
2020 11
          $str,
2021 11
          [$startCode, 0xfffff, 0, 0xfffff, 0],
2022 11
          $encoding
2023
      );
2024
    }
2025
2026
    return \implode(
2027
        '',
2028
        \array_map(
2029
            function ($data) use ($keepAsciiChars, $encoding) {
2030
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2031
            },
2032
            self::split($str)
2033
        )
2034
    );
2035
  }
2036
2037
  /**
2038
   * UTF-8 version of html_entity_decode()
2039
   *
2040
   * The reason we are not using html_entity_decode() by itself is because
2041
   * while it is not technically correct to leave out the semicolon
2042
   * at the end of an entity most browsers will still interpret the entity
2043
   * correctly. html_entity_decode() does not convert entities without
2044
   * semicolons, so we are left with our own little solution here. Bummer.
2045
   *
2046
   * Convert all HTML entities to their applicable characters
2047
   *
2048
   * INFO: opposite to UTF8::html_encode()
2049
   *
2050
   * @link http://php.net/manual/en/function.html-entity-decode.php
2051
   *
2052
   * @param string $str      <p>
2053
   *                         The input string.
2054
   *                         </p>
2055
   * @param int    $flags    [optional] <p>
2056
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2057
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2058
   *                         <table>
2059
   *                         Available <i>flags</i> constants
2060
   *                         <tr valign="top">
2061
   *                         <td>Constant Name</td>
2062
   *                         <td>Description</td>
2063
   *                         </tr>
2064
   *                         <tr valign="top">
2065
   *                         <td><b>ENT_COMPAT</b></td>
2066
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2067
   *                         </tr>
2068
   *                         <tr valign="top">
2069
   *                         <td><b>ENT_QUOTES</b></td>
2070
   *                         <td>Will convert both double and single quotes.</td>
2071
   *                         </tr>
2072
   *                         <tr valign="top">
2073
   *                         <td><b>ENT_NOQUOTES</b></td>
2074
   *                         <td>Will leave both double and single quotes unconverted.</td>
2075
   *                         </tr>
2076
   *                         <tr valign="top">
2077
   *                         <td><b>ENT_HTML401</b></td>
2078
   *                         <td>
2079
   *                         Handle code as HTML 4.01.
2080
   *                         </td>
2081
   *                         </tr>
2082
   *                         <tr valign="top">
2083
   *                         <td><b>ENT_XML1</b></td>
2084
   *                         <td>
2085
   *                         Handle code as XML 1.
2086
   *                         </td>
2087
   *                         </tr>
2088
   *                         <tr valign="top">
2089
   *                         <td><b>ENT_XHTML</b></td>
2090
   *                         <td>
2091
   *                         Handle code as XHTML.
2092
   *                         </td>
2093
   *                         </tr>
2094
   *                         <tr valign="top">
2095
   *                         <td><b>ENT_HTML5</b></td>
2096
   *                         <td>
2097
   *                         Handle code as HTML 5.
2098
   *                         </td>
2099
   *                         </tr>
2100
   *                         </table>
2101
   *                         </p>
2102
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2103
   *
2104
   * @return string The decoded string.
2105
   */
2106 38
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2107
  {
2108 38
    if ('' === $str) {
2109 12
      return '';
2110
    }
2111
2112 38
    if (!isset($str[3])) { // examples: &; || &x;
2113 19
      return $str;
2114
    }
2115
2116
    if (
2117 37
        \strpos($str, '&') === false
2118
        ||
2119
        (
2120 37
            \strpos($str, '&#') === false
2121
            &&
2122 37
            \strpos($str, ';') === false
2123
        )
2124
    ) {
2125 18
      return $str;
2126
    }
2127
2128 37
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2129 9
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2130
    }
2131
2132 37
    if ($flags === null) {
2133 10
      $flags = ENT_QUOTES | ENT_HTML5;
2134
    }
2135
2136
    if (
2137 37
        $encoding !== 'UTF-8'
2138
        &&
2139 37
        $encoding !== 'ISO-8859-1'
2140
        &&
2141 37
        $encoding !== 'WINDOWS-1252'
2142
        &&
2143 37
        self::$SUPPORT['mbstring'] === false
2144
    ) {
2145
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2146
    }
2147
2148
    do {
2149 37
      $str_compare = $str;
2150
2151 37
      $str = (string)\preg_replace_callback(
2152 37
          "/&#\d{2,6};/",
2153 37
          function ($matches) use ($encoding) {
2154
            // always fallback via symfony polyfill
2155 29
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2156
2157 29
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2158 26
              return $returnTmp;
2159
            }
2160
2161 15
            return $matches[0];
2162 37
          },
2163 37
          $str
2164
      );
2165
2166
      // decode numeric & UTF16 two byte entities
2167 37
      $str = \html_entity_decode(
2168 37
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2169 37
          $flags,
2170 37
          $encoding
2171
      );
2172
2173 37
    } while ($str_compare !== $str);
2174
2175 37
    return $str;
2176
  }
2177
2178
  /**
2179
   * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2180
   *
2181
   * @param string $str
2182
   * @param string $encoding [optional] <p>Default: UTF-8</p>
2183
   *
2184
   * @return string
2185
   */
2186 6
  public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2187
  {
2188 6
    return self::htmlspecialchars(
2189 6
        $str,
2190 6
        ENT_QUOTES | ENT_SUBSTITUTE,
2191 6
        $encoding
2192
    );
2193
  }
2194
2195
  /**
2196
   * Remove empty html-tag.
2197
   *
2198
   * e.g.: <tag></tag>
2199
   *
2200
   * @param string $str
2201
   *
2202
   * @return string
2203
   */
2204 1
  public static function html_stripe_empty_tags(string $str): string
2205
  {
2206 1
    return (string)\preg_replace(
2207 1
        "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2208 1
        '',
2209 1
        $str
2210
    );
2211
  }
2212
2213
  /**
2214
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2215
   *
2216
   * @link http://php.net/manual/en/function.htmlentities.php
2217
   *
2218
   * @param string $str           <p>
2219
   *                              The input string.
2220
   *                              </p>
2221
   * @param int    $flags         [optional] <p>
2222
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2223
   *                              invalid code unit sequences and the used document type. The default is
2224
   *                              ENT_COMPAT | ENT_HTML401.
2225
   *                              <table>
2226
   *                              Available <i>flags</i> constants
2227
   *                              <tr valign="top">
2228
   *                              <td>Constant Name</td>
2229
   *                              <td>Description</td>
2230
   *                              </tr>
2231
   *                              <tr valign="top">
2232
   *                              <td><b>ENT_COMPAT</b></td>
2233
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2234
   *                              </tr>
2235
   *                              <tr valign="top">
2236
   *                              <td><b>ENT_QUOTES</b></td>
2237
   *                              <td>Will convert both double and single quotes.</td>
2238
   *                              </tr>
2239
   *                              <tr valign="top">
2240
   *                              <td><b>ENT_NOQUOTES</b></td>
2241
   *                              <td>Will leave both double and single quotes unconverted.</td>
2242
   *                              </tr>
2243
   *                              <tr valign="top">
2244
   *                              <td><b>ENT_IGNORE</b></td>
2245
   *                              <td>
2246
   *                              Silently discard invalid code unit sequences instead of returning
2247
   *                              an empty string. Using this flag is discouraged as it
2248
   *                              may have security implications.
2249
   *                              </td>
2250
   *                              </tr>
2251
   *                              <tr valign="top">
2252
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2253
   *                              <td>
2254
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2255
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2256
   *                              </td>
2257
   *                              </tr>
2258
   *                              <tr valign="top">
2259
   *                              <td><b>ENT_DISALLOWED</b></td>
2260
   *                              <td>
2261
   *                              Replace invalid code points for the given document type with a
2262
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2263
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2264
   *                              instance, to ensure the well-formedness of XML documents with
2265
   *                              embedded external content.
2266
   *                              </td>
2267
   *                              </tr>
2268
   *                              <tr valign="top">
2269
   *                              <td><b>ENT_HTML401</b></td>
2270
   *                              <td>
2271
   *                              Handle code as HTML 4.01.
2272
   *                              </td>
2273
   *                              </tr>
2274
   *                              <tr valign="top">
2275
   *                              <td><b>ENT_XML1</b></td>
2276
   *                              <td>
2277
   *                              Handle code as XML 1.
2278
   *                              </td>
2279
   *                              </tr>
2280
   *                              <tr valign="top">
2281
   *                              <td><b>ENT_XHTML</b></td>
2282
   *                              <td>
2283
   *                              Handle code as XHTML.
2284
   *                              </td>
2285
   *                              </tr>
2286
   *                              <tr valign="top">
2287
   *                              <td><b>ENT_HTML5</b></td>
2288
   *                              <td>
2289
   *                              Handle code as HTML 5.
2290
   *                              </td>
2291
   *                              </tr>
2292
   *                              </table>
2293
   *                              </p>
2294
   * @param string $encoding      [optional] <p>
2295
   *                              Like <b>htmlspecialchars</b>,
2296
   *                              <b>htmlentities</b> takes an optional third argument
2297
   *                              <i>encoding</i> which defines encoding used in
2298
   *                              conversion.
2299
   *                              Although this argument is technically optional, you are highly
2300
   *                              encouraged to specify the correct value for your code.
2301
   *                              </p>
2302
   * @param bool   $double_encode [optional] <p>
2303
   *                              When <i>double_encode</i> is turned off PHP will not
2304
   *                              encode existing html entities. The default is to convert everything.
2305
   *                              </p>
2306
   *
2307
   *
2308
   * @return string The encoded string.
2309
   * </p>
2310
   * <p>
2311
   * If the input <i>string</i> contains an invalid code unit
2312
   * sequence within the given <i>encoding</i> an empty string
2313
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2314
   * <b>ENT_SUBSTITUTE</b> flags are set.
2315
   */
2316 9
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2317
  {
2318 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2319 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2320
    }
2321
2322 9
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2323
2324
    /**
2325
     * PHP doesn't replace a backslash to its html entity since this is something
2326
     * that's mostly used to escape characters when inserting in a database. Since
2327
     * we're using a decent database layer, we don't need this shit and we're replacing
2328
     * the double backslashes by its' html entity equivalent.
2329
     *
2330
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2331
     */
2332 9
    $str = \str_replace('\\', '&#92;', $str);
2333
2334 9
    return self::html_encode($str, true, $encoding);
2335
  }
2336
2337
  /**
2338
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2339
   *
2340
   * INFO: Take a look at "UTF8::htmlentities()"
2341
   *
2342
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2343
   *
2344
   * @param string $str           <p>
2345
   *                              The string being converted.
2346
   *                              </p>
2347
   * @param int    $flags         [optional] <p>
2348
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2349
   *                              invalid code unit sequences and the used document type. The default is
2350
   *                              ENT_COMPAT | ENT_HTML401.
2351
   *                              <table>
2352
   *                              Available <i>flags</i> constants
2353
   *                              <tr valign="top">
2354
   *                              <td>Constant Name</td>
2355
   *                              <td>Description</td>
2356
   *                              </tr>
2357
   *                              <tr valign="top">
2358
   *                              <td><b>ENT_COMPAT</b></td>
2359
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2360
   *                              </tr>
2361
   *                              <tr valign="top">
2362
   *                              <td><b>ENT_QUOTES</b></td>
2363
   *                              <td>Will convert both double and single quotes.</td>
2364
   *                              </tr>
2365
   *                              <tr valign="top">
2366
   *                              <td><b>ENT_NOQUOTES</b></td>
2367
   *                              <td>Will leave both double and single quotes unconverted.</td>
2368
   *                              </tr>
2369
   *                              <tr valign="top">
2370
   *                              <td><b>ENT_IGNORE</b></td>
2371
   *                              <td>
2372
   *                              Silently discard invalid code unit sequences instead of returning
2373
   *                              an empty string. Using this flag is discouraged as it
2374
   *                              may have security implications.
2375
   *                              </td>
2376
   *                              </tr>
2377
   *                              <tr valign="top">
2378
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2379
   *                              <td>
2380
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2381
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2382
   *                              </td>
2383
   *                              </tr>
2384
   *                              <tr valign="top">
2385
   *                              <td><b>ENT_DISALLOWED</b></td>
2386
   *                              <td>
2387
   *                              Replace invalid code points for the given document type with a
2388
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2389
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2390
   *                              instance, to ensure the well-formedness of XML documents with
2391
   *                              embedded external content.
2392
   *                              </td>
2393
   *                              </tr>
2394
   *                              <tr valign="top">
2395
   *                              <td><b>ENT_HTML401</b></td>
2396
   *                              <td>
2397
   *                              Handle code as HTML 4.01.
2398
   *                              </td>
2399
   *                              </tr>
2400
   *                              <tr valign="top">
2401
   *                              <td><b>ENT_XML1</b></td>
2402
   *                              <td>
2403
   *                              Handle code as XML 1.
2404
   *                              </td>
2405
   *                              </tr>
2406
   *                              <tr valign="top">
2407
   *                              <td><b>ENT_XHTML</b></td>
2408
   *                              <td>
2409
   *                              Handle code as XHTML.
2410
   *                              </td>
2411
   *                              </tr>
2412
   *                              <tr valign="top">
2413
   *                              <td><b>ENT_HTML5</b></td>
2414
   *                              <td>
2415
   *                              Handle code as HTML 5.
2416
   *                              </td>
2417
   *                              </tr>
2418
   *                              </table>
2419
   *                              </p>
2420
   * @param string $encoding      [optional] <p>
2421
   *                              Defines encoding used in conversion.
2422
   *                              </p>
2423
   *                              <p>
2424
   *                              For the purposes of this function, the encodings
2425
   *                              ISO-8859-1, ISO-8859-15,
2426
   *                              UTF-8, cp866,
2427
   *                              cp1251, cp1252, and
2428
   *                              KOI8-R are effectively equivalent, provided the
2429
   *                              <i>string</i> itself is valid for the encoding, as
2430
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2431
   *                              the same positions in all of these encodings.
2432
   *                              </p>
2433
   * @param bool   $double_encode [optional] <p>
2434
   *                              When <i>double_encode</i> is turned off PHP will not
2435
   *                              encode existing html entities, the default is to convert everything.
2436
   *                              </p>
2437
   *
2438
   * @return string The converted string.
2439
   * </p>
2440
   * <p>
2441
   * If the input <i>string</i> contains an invalid code unit
2442
   * sequence within the given <i>encoding</i> an empty string
2443
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2444
   * <b>ENT_SUBSTITUTE</b> flags are set.
2445
   */
2446 8
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2447
  {
2448 8
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2449 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2450
    }
2451
2452 8
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2453
  }
2454
2455
  /**
2456
   * Checks whether iconv is available on the server.
2457
   *
2458
   * @return bool
2459
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2460
   */
2461
  public static function iconv_loaded(): bool
2462
  {
2463
    return \extension_loaded('iconv') ? true : false;
2464
  }
2465
2466
  /**
2467
   * alias for "UTF8::decimal_to_chr()"
2468
   *
2469
   * @see UTF8::decimal_to_chr()
2470
   *
2471
   * @param mixed $int
2472
   *
2473
   * @return string
2474
   */
2475 4
  public static function int_to_chr($int): string
2476
  {
2477 4
    return self::decimal_to_chr($int);
2478
  }
2479
2480
  /**
2481
   * Converts Integer to hexadecimal U+xxxx code point representation.
2482
   *
2483
   * INFO: opposite to UTF8::hex_to_int()
2484
   *
2485
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2486
   * @param string $pfix [optional]
2487
   *
2488
   * @return string The code point, or empty string on failure.
2489
   */
2490 6
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2491
  {
2492 6
    $hex = \dechex($int);
2493
2494 6
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2495
2496 6
    return $pfix . $hex . '';
2497
  }
2498
2499
  /**
2500
   * Checks whether intl-char is available on the server.
2501
   *
2502
   * @return bool
2503
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2504
   */
2505
  public static function intlChar_loaded(): bool
2506
  {
2507
    return \class_exists('IntlChar');
2508
  }
2509
2510
  /**
2511
   * Checks whether intl is available on the server.
2512
   *
2513
   * @return bool
2514
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2515
   */
2516 5
  public static function intl_loaded(): bool
2517
  {
2518 5
    return \extension_loaded('intl');
2519
  }
2520
2521
  /**
2522
   * alias for "UTF8::is_ascii()"
2523
   *
2524
   * @see        UTF8::is_ascii()
2525
   *
2526
   * @param string $str
2527
   *
2528
   * @return bool
2529
   *
2530
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2531
   */
2532 2
  public static function isAscii(string $str): bool
2533
  {
2534 2
    return self::is_ascii($str);
2535
  }
2536
2537
  /**
2538
   * alias for "UTF8::is_base64()"
2539
   *
2540
   * @see        UTF8::is_base64()
2541
   *
2542
   * @param string $str
2543
   *
2544
   * @return bool
2545
   *
2546
   * @deprecated <p>use "UTF8::is_base64()"</p>
2547
   */
2548 2
  public static function isBase64($str): bool
2549
  {
2550 2
    return self::is_base64($str);
2551
  }
2552
2553
  /**
2554
   * alias for "UTF8::is_binary()"
2555
   *
2556
   * @see        UTF8::is_binary()
2557
   *
2558
   * @param mixed $str
2559
   * @param bool  $strict
2560
   *
2561
   * @return bool
2562
   *
2563
   * @deprecated <p>use "UTF8::is_binary()"</p>
2564
   */
2565 4
  public static function isBinary($str, $strict = false): bool
2566
  {
2567 4
    return self::is_binary($str, $strict);
2568
  }
2569
2570
  /**
2571
   * alias for "UTF8::is_bom()"
2572
   *
2573
   * @see        UTF8::is_bom()
2574
   *
2575
   * @param string $utf8_chr
2576
   *
2577
   * @return bool
2578
   *
2579
   * @deprecated <p>use "UTF8::is_bom()"</p>
2580
   */
2581 2
  public static function isBom(string $utf8_chr): bool
2582
  {
2583 2
    return self::is_bom($utf8_chr);
2584
  }
2585
2586
  /**
2587
   * alias for "UTF8::is_html()"
2588
   *
2589
   * @see        UTF8::is_html()
2590
   *
2591
   * @param string $str
2592
   *
2593
   * @return bool
2594
   *
2595
   * @deprecated <p>use "UTF8::is_html()"</p>
2596
   */
2597 2
  public static function isHtml(string $str): bool
2598
  {
2599 2
    return self::is_html($str);
2600
  }
2601
2602
  /**
2603
   * alias for "UTF8::is_json()"
2604
   *
2605
   * @see        UTF8::is_json()
2606
   *
2607
   * @param string $str
2608
   *
2609
   * @return bool
2610
   *
2611
   * @deprecated <p>use "UTF8::is_json()"</p>
2612
   */
2613
  public static function isJson(string $str): bool
2614
  {
2615
    return self::is_json($str);
2616
  }
2617
2618
  /**
2619
   * alias for "UTF8::is_utf16()"
2620
   *
2621
   * @see        UTF8::is_utf16()
2622
   *
2623
   * @param mixed $str
2624
   *
2625
   * @return int|false
2626
   *                    <strong>false</strong> if is't not UTF16,<br>
2627
   *                    <strong>1</strong> for UTF-16LE,<br>
2628
   *                    <strong>2</strong> for UTF-16BE.
2629
   *
2630
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2631
   */
2632 2
  public static function isUtf16($str)
2633
  {
2634 2
    return self::is_utf16($str);
2635
  }
2636
2637
  /**
2638
   * alias for "UTF8::is_utf32()"
2639
   *
2640
   * @see        UTF8::is_utf32()
2641
   *
2642
   * @param mixed $str
2643
   *
2644
   * @return int|false
2645
   *                   <strong>false</strong> if is't not UTF16,
2646
   *                   <strong>1</strong> for UTF-32LE,
2647
   *                   <strong>2</strong> for UTF-32BE.
2648
   *
2649
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2650
   */
2651 2
  public static function isUtf32($str)
2652
  {
2653 2
    return self::is_utf32($str);
2654
  }
2655
2656
  /**
2657
   * alias for "UTF8::is_utf8()"
2658
   *
2659
   * @see        UTF8::is_utf8()
2660
   *
2661
   * @param string $str
2662
   * @param bool   $strict
2663
   *
2664
   * @return bool
2665
   *
2666
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2667
   */
2668 17
  public static function isUtf8($str, $strict = false): bool
2669
  {
2670 17
    return self::is_utf8($str, $strict);
2671
  }
2672
2673
  /**
2674
   * Returns true if the string contains only alphabetic chars, false otherwise.
2675
   *
2676
   * @param string $str
2677
   *
2678
   * @return bool
2679
   *               Whether or not $str contains only alphabetic chars.
2680
   */
2681 10
  public static function is_alpha(string $str): bool
2682
  {
2683 10
    return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2684
  }
2685
2686
  /**
2687
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2688
   *
2689
   * @param string $str
2690
   *
2691
   * @return bool
2692
   *               Whether or not $str contains only alphanumeric chars.
2693
   */
2694 13
  public static function is_alphanumeric(string $str): bool
2695
  {
2696 13
    return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2697
  }
2698
2699
  /**
2700
   * Checks if a string is 7 bit ASCII.
2701
   *
2702
   * @param string $str <p>The string to check.</p>
2703
   *
2704
   * @return bool
2705
   *              <strong>true</strong> if it is ASCII<br>
2706
   *              <strong>false</strong> otherwise
2707
   *
2708
   */
2709 201
  public static function is_ascii(string $str): bool
2710
  {
2711 201
    if ('' === $str) {
2712 10
      return true;
2713
    }
2714
2715 200
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2716
  }
2717
2718
  /**
2719
   * Returns true if the string is base64 encoded, false otherwise.
2720
   *
2721
   * @param string $str <p>The input string.</p>
2722
   *
2723
   * @return bool Whether or not $str is base64 encoded.
2724
   */
2725 9
  public static function is_base64($str): bool
2726
  {
2727 9
    if ('' === $str) {
2728 3
      return false;
2729
    }
2730
2731 8
    if (is_string($str) === false) {
0 ignored issues
show
introduced by
The condition is_string($str) === false is always false.
Loading history...
2732 2
      return false;
2733
    }
2734
2735 8
    $base64String = (string)\base64_decode($str, true);
2736
2737 8
    return $base64String && \base64_encode($base64String) === $str;
2738
  }
2739
2740
  /**
2741
   * Check if the input is binary... (is look like a hack).
2742
   *
2743
   * @param mixed $input
2744
   * @param bool  $strict
2745
   *
2746
   * @return bool
2747
   */
2748 40
  public static function is_binary($input, bool $strict = false): bool
2749
  {
2750 40
    $input = (string)$input;
2751 40
    if ('' === $input) {
2752 10
      return false;
2753
    }
2754
2755 40
    if (\preg_match('~^[01]+$~', $input)) {
2756 12
      return true;
2757
    }
2758
2759 40
    $testNull = 0;
2760 40
    $testLength = \strlen($input);
2761 40
    if ($testLength) {
2762 40
      $testNull = \substr_count($input, "\x0");
2763 40
      if (($testNull / $testLength) > 0.3) {
2764 12
        return true;
2765
      }
2766
    }
2767
2768 38
    if ($strict === true) {
2769
2770 34
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2771
        self::checkForSupport();
2772
      }
2773
2774 34
      if (self::$SUPPORT['finfo'] === false) {
2775
        throw new \RuntimeException('ext-fileinfo: is not installed');
2776
      }
2777
2778
      /** @noinspection PhpComposerExtensionStubsInspection */
2779 34
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
2780 34
      $finfo_encoding = $finfo->buffer($input);
2781 34
      if ($finfo_encoding && $finfo_encoding === 'binary') {
2782 34
        return true;
2783
      }
2784
2785 17
    } elseif ($testNull > 0) {
2786
2787 6
      return true;
2788
2789
    }
2790
2791 36
    return false;
2792
  }
2793
2794
  /**
2795
   * Check if the file is binary.
2796
   *
2797
   * @param string $file
2798
   *
2799
   * @return bool
2800
   */
2801 6
  public static function is_binary_file($file): bool
2802
  {
2803
    // init
2804 6
    $block = '';
2805
2806 6
    $fp = \fopen($file, 'rb');
2807 6
    if (\is_resource($fp)) {
2808 6
      $block = \fread($fp, 512);
2809 6
      \fclose($fp);
2810
    }
2811
2812 6
    if ($block === '') {
2813 2
      return false;
2814
    }
2815
2816 6
    return self::is_binary($block, true);
2817
  }
2818
2819
  /**
2820
   * Returns true if the string contains only whitespace chars, false otherwise.
2821
   *
2822
   * @param string $str
2823
   *
2824
   * @return bool
2825
   *               Whether or not $str contains only whitespace characters.
2826
   */
2827 15
  public static function is_blank(string $str): bool
2828
  {
2829 15
    return self::str_matches_pattern($str, '^[[:space:]]*$');
2830
  }
2831
2832
  /**
2833
   * Checks if the given string is equal to any "Byte Order Mark".
2834
   *
2835
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2836
   *
2837
   * @param string $str <p>The input string.</p>
2838
   *
2839
   * @return bool
2840
   *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.
2841
   */
2842 2
  public static function is_bom($str): bool
2843
  {
2844 2
    foreach (self::$BOM as $bomString => $bomByteLength) {
2845 2
      if ($str === $bomString) {
2846 2
        return true;
2847
      }
2848
    }
2849
2850 2
    return false;
2851
  }
2852
2853
  /**
2854
   * Determine whether the string is considered to be empty.
2855
   *
2856
   * A variable is considered empty if it does not exist or if its value equals FALSE.
2857
   * empty() does not generate a warning if the variable does not exist.
2858
   *
2859
   * @param mixed $str
2860
   *
2861
   * @return bool Whether or not $str is empty().
2862
   */
2863
  public static function is_empty($str): bool
2864
  {
2865
    return empty($str);
2866
  }
2867
2868
  /**
2869
   * Returns true if the string contains only hexadecimal chars, false otherwise.
2870
   *
2871
   * @param string $str
2872
   *
2873
   * @return bool
2874
   *               Whether or not $str contains only hexadecimal chars.
2875
   */
2876 13
  public static function is_hexadecimal(string $str): bool
2877
  {
2878 13
    return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
2879
  }
2880
2881
  /**
2882
   * Check if the string contains any html-tags <lall>.
2883
   *
2884
   * @param string $str <p>The input string.</p>
2885
   *
2886
   * @return bool
2887
   */
2888 3
  public static function is_html(string $str): bool
2889
  {
2890 3
    if ('' === $str) {
2891 3
      return false;
2892
    }
2893
2894
    // init
2895 3
    $matches = [];
2896
2897 3
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2898
2899 3
    return !(\count($matches) === 0);
2900
  }
2901
2902
  /**
2903
   * Try to check if "$str" is an json-string.
2904
   *
2905
   * @param string $str <p>The input string.</p>
2906
   *
2907
   * @return bool
2908
   */
2909 22
  public static function is_json(string $str): bool
2910
  {
2911 22
    if ('' === $str) {
2912 3
      return false;
2913
    }
2914
2915 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2916
      self::checkForSupport();
2917
    }
2918
2919 21
    if (self::$SUPPORT['json'] === false) {
2920
      throw new \RuntimeException('ext-json: is not installed');
2921
    }
2922
2923 21
    $json = self::json_decode($str);
2924
2925
    /** @noinspection PhpComposerExtensionStubsInspection */
2926
    return (
2927 21
               \is_object($json) === true
2928
               ||
2929 21
               \is_array($json) === true
2930
           )
2931
           &&
2932 21
           \json_last_error() === JSON_ERROR_NONE;
2933
  }
2934
2935
  /**
2936
   * @param string $str
2937
   *
2938
   * @return bool
2939
   */
2940 8
  public static function is_lowercase(string $str): bool
2941
  {
2942 8
    if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
2943 3
      return true;
2944
    }
2945
2946 5
    return false;
2947
  }
2948
2949
  /**
2950
   * Returns true if the string is serialized, false otherwise.
2951
   *
2952
   * @param string $str
2953
   *
2954
   * @return bool Whether or not $str is serialized.
2955
   */
2956 7
  public static function is_serialized(string $str): bool
2957
  {
2958 7
    if ('' === $str) {
2959 1
      return false;
2960
    }
2961
2962
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2963
    /** @noinspection UnserializeExploitsInspection */
2964 6
    return $str === 'b:0;'
2965
           ||
2966 6
           @\unserialize($str) !== false;
2967
  }
2968
2969
  /**
2970
   * Returns true if the string contains only lower case chars, false
2971
   * otherwise.
2972
   *
2973
   * @param string $str <p>The input string.</p>
2974
   *
2975
   * @return bool
2976
   *               Whether or not $str contains only lower case characters.
2977
   */
2978 8
  public static function is_uppercase(string $str): bool
2979
  {
2980 8
    return self::str_matches_pattern($str, '^[[:upper:]]*$');
2981
  }
2982
2983
  /**
2984
   * Check if the string is UTF-16.
2985
   *
2986
   * @param mixed $str <p>The input string.</p>
2987
   *
2988
   * @return int|false
2989
   *                   <strong>false</strong> if is't not UTF-16,<br>
2990
   *                   <strong>1</strong> for UTF-16LE,<br>
2991
   *                   <strong>2</strong> for UTF-16BE.
2992
   */
2993 21
  public static function is_utf16($str)
2994
  {
2995
    // init
2996 21
    $str = (string)$str;
2997
2998 21
    if (self::is_binary($str) === false) {
2999 9
      return false;
3000
    }
3001
3002 16
    if (self::$SUPPORT['mbstring'] === false) {
3003 2
      \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', E_USER_WARNING);
3004
    }
3005
3006
    // init
3007 16
    $strChars = [];
3008
3009 16
    $str = self::remove_bom($str);
3010
3011 16
    $maybeUTF16LE = 0;
3012 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3013 16
    if ($test) {
3014 14
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3015 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3016 14
      if ($test3 === $test) {
3017 14
        if (\count($strChars) === 0) {
3018 14
          $strChars = self::count_chars($str, true);
3019
        }
3020 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3021 14
          if (\in_array($test3char, $strChars, true) === true) {
3022 14
            $maybeUTF16LE++;
3023
          }
3024
        }
3025
      }
3026
    }
3027
3028 16
    $maybeUTF16BE = 0;
3029 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3030 16
    if ($test) {
3031 14
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3032 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3033 14
      if ($test3 === $test) {
3034 14
        if (\count($strChars) === 0) {
3035 6
          $strChars = self::count_chars($str, true);
3036
        }
3037 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3038 14
          if (\in_array($test3char, $strChars, true) === true) {
3039 14
            $maybeUTF16BE++;
3040
          }
3041
        }
3042
      }
3043
    }
3044
3045 16
    if ($maybeUTF16BE !== $maybeUTF16LE) {
3046 6
      if ($maybeUTF16LE > $maybeUTF16BE) {
3047 4
        return 1;
3048
      }
3049
3050 6
      return 2;
3051
    }
3052
3053 12
    return false;
3054
  }
3055
3056
  /**
3057
   * Check if the string is UTF-32.
3058
   *
3059
   * @param mixed $str
3060
   *
3061
   * @return int|false
3062
   *                   <strong>false</strong> if is't not UTF-32,<br>
3063
   *                   <strong>1</strong> for UTF-32LE,<br>
3064
   *                   <strong>2</strong> for UTF-32BE.
3065
   */
3066 17
  public static function is_utf32($str)
3067
  {
3068
    // init
3069 17
    $str = (string)$str;
3070
3071 17
    if (self::is_binary($str) === false) {
3072 9
      return false;
3073
    }
3074
3075 12
    if (self::$SUPPORT['mbstring'] === false) {
3076 2
      \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', E_USER_WARNING);
3077
    }
3078
3079
    // init
3080 12
    $strChars = [];
3081
3082 12
    $str = self::remove_bom($str);
3083
3084 12
    $maybeUTF32LE = 0;
3085 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3086 12
    if ($test) {
3087 10
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3088 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3089 10
      if ($test3 === $test) {
3090 10
        if (\count($strChars) === 0) {
3091 10
          $strChars = self::count_chars($str, true);
3092
        }
3093 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3094 10
          if (\in_array($test3char, $strChars, true) === true) {
3095 10
            $maybeUTF32LE++;
3096
          }
3097
        }
3098
      }
3099
    }
3100
3101 12
    $maybeUTF32BE = 0;
3102 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3103 12
    if ($test) {
3104 10
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3105 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3106 10
      if ($test3 === $test) {
3107 10
        if (\count($strChars) === 0) {
3108 6
          $strChars = self::count_chars($str, true);
3109
        }
3110 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3111 10
          if (\in_array($test3char, $strChars, true) === true) {
3112 10
            $maybeUTF32BE++;
3113
          }
3114
        }
3115
      }
3116
    }
3117
3118 12
    if ($maybeUTF32BE !== $maybeUTF32LE) {
3119 2
      if ($maybeUTF32LE > $maybeUTF32BE) {
3120 2
        return 1;
3121
      }
3122
3123 2
      return 2;
3124
    }
3125
3126 12
    return false;
3127
  }
3128
3129
  /**
3130
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3131
   *
3132
   * @see    http://hsivonen.iki.fi/php-utf8/
3133
   *
3134
   * @param string|string[] $str    <p>The string to be checked.</p>
3135
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3136
   *
3137
   * @return bool
3138
   */
3139 107
  public static function is_utf8($str, bool $strict = false): bool
3140
  {
3141 107
    if (\is_array($str) === true) {
3142 2
      foreach ($str as $k => $v) {
3143 2
        if (false === self::is_utf8($v, $strict)) {
3144 2
          return false;
3145
        }
3146
      }
3147
3148
      return true;
3149
    }
3150
3151 107
    if ('' === $str) {
3152 12
      return true;
3153
    }
3154
3155 103
    if ($strict === true) {
3156 2
      if (self::is_utf16($str) !== false) {
3157 2
        return false;
3158
      }
3159
3160
      if (self::is_utf32($str) !== false) {
3161
        return false;
3162
      }
3163
    }
3164
3165 103
    if (self::pcre_utf8_support() !== true) {
3166
3167
      // If even just the first character can be matched, when the /u
3168
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3169
      // invalid, nothing at all will match, even if the string contains
3170
      // some valid sequences
3171
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
3172
    }
3173
3174 103
    $mState = 0; // cached expected number of octets after the current octet
3175
    // until the beginning of the next UTF8 character sequence
3176 103
    $mUcs4 = 0; // cached Unicode character
3177 103
    $mBytes = 1; // cached expected number of octets in the current sequence
3178
3179 103
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3180
      self::checkForSupport();
3181
    }
3182
3183 103
    if (self::$ORD === null) {
3184
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3185
    }
3186
3187 103
    $len = self::strlen_in_byte((string)$str);
3188
    /** @noinspection ForeachInvariantsInspection */
3189 103
    for ($i = 0; $i < $len; $i++) {
3190 103
      $in = self::$ORD[$str[$i]];
3191 103
      if ($mState === 0) {
3192
        // When mState is zero we expect either a US-ASCII character or a
3193
        // multi-octet sequence.
3194 103
        if (0 === (0x80 & $in)) {
3195
          // US-ASCII, pass straight through.
3196 98
          $mBytes = 1;
3197 84
        } elseif (0xC0 === (0xE0 & $in)) {
3198
          // First octet of 2 octet sequence.
3199 75
          $mUcs4 = $in;
3200 75
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3201 75
          $mState = 1;
3202 75
          $mBytes = 2;
3203 58
        } elseif (0xE0 === (0xF0 & $in)) {
3204
          // First octet of 3 octet sequence.
3205 41
          $mUcs4 = $in;
3206 41
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3207 41
          $mState = 2;
3208 41
          $mBytes = 3;
3209 30
        } elseif (0xF0 === (0xF8 & $in)) {
3210
          // First octet of 4 octet sequence.
3211 19
          $mUcs4 = $in;
3212 19
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3213 19
          $mState = 3;
3214 19
          $mBytes = 4;
3215 13
        } elseif (0xF8 === (0xFC & $in)) {
3216
          /* First octet of 5 octet sequence.
3217
          *
3218
          * This is illegal because the encoded codepoint must be either
3219
          * (a) not the shortest form or
3220
          * (b) outside the Unicode range of 0-0x10FFFF.
3221
          * Rather than trying to resynchronize, we will carry on until the end
3222
          * of the sequence and let the later error handling code catch it.
3223
          */
3224 5
          $mUcs4 = $in;
3225 5
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3226 5
          $mState = 4;
3227 5
          $mBytes = 5;
3228 10
        } elseif (0xFC === (0xFE & $in)) {
3229
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3230 5
          $mUcs4 = $in;
3231 5
          $mUcs4 = ($mUcs4 & 1) << 30;
3232 5
          $mState = 5;
3233 5
          $mBytes = 6;
3234
        } else {
3235
          // Current octet is neither in the US-ASCII range nor a legal first
3236
          // octet of a multi-octet sequence.
3237 103
          return false;
3238
        }
3239
      } else {
3240
        // When mState is non-zero, we expect a continuation of the multi-octet
3241
        // sequence
3242 84
        if (0x80 === (0xC0 & $in)) {
3243
          // Legal continuation.
3244 76
          $shift = ($mState - 1) * 6;
3245 76
          $tmp = $in;
3246 76
          $tmp = ($tmp & 0x0000003F) << $shift;
3247 76
          $mUcs4 |= $tmp;
3248
          // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3249
          // Unicode code point to be output.
3250 76
          if (0 === --$mState) {
3251
            // Check for illegal sequences and code points.
3252
            //
3253
            // From Unicode 3.1, non-shortest form is illegal
3254
            if (
3255 76
                (2 === $mBytes && $mUcs4 < 0x0080)
3256
                ||
3257 76
                (3 === $mBytes && $mUcs4 < 0x0800)
3258
                ||
3259 76
                (4 === $mBytes && $mUcs4 < 0x10000)
3260
                ||
3261 76
                (4 < $mBytes)
3262
                ||
3263
                // From Unicode 3.2, surrogate characters are illegal.
3264 76
                (($mUcs4 & 0xFFFFF800) === 0xD800)
3265
                ||
3266
                // Code points outside the Unicode range are illegal.
3267 76
                ($mUcs4 > 0x10FFFF)
3268
            ) {
3269 8
              return false;
3270
            }
3271
            // initialize UTF8 cache
3272 76
            $mState = 0;
3273 76
            $mUcs4 = 0;
3274 76
            $mBytes = 1;
3275
          }
3276
        } else {
3277
          // ((0xC0 & (*in) != 0x80) && (mState != 0))
3278
          // Incomplete multi-octet sequence.
3279 36
          return false;
3280
        }
3281
      }
3282
    }
3283
3284 67
    return true;
3285
  }
3286
3287
  /**
3288
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3289
   * Decodes a JSON string
3290
   *
3291
   * @link http://php.net/manual/en/function.json-decode.php
3292
   *
3293
   * @param string $json    <p>
3294
   *                        The <i>json</i> string being decoded.
3295
   *                        </p>
3296
   *                        <p>
3297
   *                        This function only works with UTF-8 encoded strings.
3298
   *                        </p>
3299
   *                        <p>PHP implements a superset of
3300
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3301
   *                        only supports these values when they are nested inside an array or an object.
3302
   *                        </p>
3303
   * @param bool   $assoc   [optional] <p>
3304
   *                        When <b>TRUE</b>, returned objects will be converted into
3305
   *                        associative arrays.
3306
   *                        </p>
3307
   * @param int    $depth   [optional] <p>
3308
   *                        User specified recursion depth.
3309
   *                        </p>
3310
   * @param int    $options [optional] <p>
3311
   *                        Bitmask of JSON decode options. Currently only
3312
   *                        <b>JSON_BIGINT_AS_STRING</b>
3313
   *                        is supported (default is to cast large integers as floats)
3314
   *                        </p>
3315
   *
3316
   * @return mixed
3317
   *                The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3318
   *                null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3319
   *                <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3320
   *                is deeper than the recursion limit.
3321
   */
3322 23
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3323
  {
3324 23
    $json = self::filter($json);
3325
3326 23
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3327
      self::checkForSupport();
3328
    }
3329
3330 23
    if (self::$SUPPORT['json'] === false) {
3331
      throw new \RuntimeException('ext-json: is not installed');
3332
    }
3333
3334
    /** @noinspection PhpComposerExtensionStubsInspection */
3335 23
    $json = \json_decode($json, $assoc, $depth, $options);
3336
3337 23
    return $json;
3338
  }
3339
3340
  /**
3341
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3342
   * Returns the JSON representation of a value.
3343
   *
3344
   * @link http://php.net/manual/en/function.json-encode.php
3345
   *
3346
   * @param mixed $value   <p>
3347
   *                       The <i>value</i> being encoded. Can be any type except
3348
   *                       a resource.
3349
   *                       </p>
3350
   *                       <p>
3351
   *                       All string data must be UTF-8 encoded.
3352
   *                       </p>
3353
   *                       <p>PHP implements a superset of
3354
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3355
   *                       only supports these values when they are nested inside an array or an object.
3356
   *                       </p>
3357
   * @param int   $options [optional] <p>
3358
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3359
   *                       <b>JSON_HEX_TAG</b>,
3360
   *                       <b>JSON_HEX_AMP</b>,
3361
   *                       <b>JSON_HEX_APOS</b>,
3362
   *                       <b>JSON_NUMERIC_CHECK</b>,
3363
   *                       <b>JSON_PRETTY_PRINT</b>,
3364
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3365
   *                       <b>JSON_FORCE_OBJECT</b>,
3366
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3367
   *                       constants is described on
3368
   *                       the JSON constants page.
3369
   *                       </p>
3370
   * @param int   $depth   [optional] <p>
3371
   *                       Set the maximum depth. Must be greater than zero.
3372
   *                       </p>
3373
   *
3374
   * @return string|false
3375
   *                      A JSON encoded <strong>string</strong> on success or<br>
3376
   *                      <strong>FALSE</strong> on failure.
3377
   */
3378 4
  public static function json_encode($value, int $options = 0, int $depth = 512)
3379
  {
3380 4
    $value = self::filter($value);
3381
3382 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3383
      self::checkForSupport();
3384
    }
3385
3386 4
    if (self::$SUPPORT['json'] === false) {
3387
      throw new \RuntimeException('ext-json: is not installed');
3388
    }
3389
3390
    /** @noinspection PhpComposerExtensionStubsInspection */
3391 4
    $json = \json_encode($value, $options, $depth);
3392
3393 4
    return $json;
3394
  }
3395
3396
  /**
3397
   * Checks whether JSON is available on the server.
3398
   *
3399
   * @return bool
3400
   *              <strong>true</strong> if available, <strong>false</strong> otherwise
3401
   */
3402
  public static function json_loaded(): bool
3403
  {
3404
    return \function_exists('json_decode');
3405
  }
3406
3407
  /**
3408
   * Makes string's first char lowercase.
3409
   *
3410
   * @param string $str       <p>The input string</p>
3411
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
3412
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3413
   *
3414
   * @return string The resulting string.
3415
   */
3416 46
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3417
  {
3418 46
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3419 46
    if ($strPartTwo === false) {
3420
      $strPartTwo = '';
3421
    }
3422
3423 46
    $strPartOne = self::strtolower(
3424 46
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3425 46
        $encoding,
3426 46
        $cleanUtf8
3427
    );
3428
3429 46
    return $strPartOne . $strPartTwo;
3430
  }
3431
3432
  /**
3433
   * alias for "UTF8::lcfirst()"
3434
   *
3435
   * @see UTF8::lcfirst()
3436
   *
3437
   * @param string $str
3438
   * @param string $encoding
3439
   * @param bool   $cleanUtf8
3440
   *
3441
   * @return string
3442
   */
3443 2
  public static function lcword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3444
  {
3445 2
    return self::lcfirst($str, $encoding, $cleanUtf8);
3446
  }
3447
3448
  /**
3449
   * Lowercase for all words in the string.
3450
   *
3451
   * @param string   $str        <p>The input string.</p>
3452
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3453
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3454
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3455
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3456
   *
3457
   * @return string
3458
   */
3459 2
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3460
  {
3461 2
    if (!$str) {
3462 2
      return '';
3463
    }
3464
3465 2
    $words = self::str_to_words($str, $charlist);
3466 2
    $newWords = [];
3467
3468 2
    if (\count($exceptions) > 0) {
3469 2
      $useExceptions = true;
3470
    } else {
3471 2
      $useExceptions = false;
3472
    }
3473
3474 2
    foreach ($words as $word) {
3475
3476 2
      if (!$word) {
3477 2
        continue;
3478
      }
3479
3480
      if (
3481 2
          $useExceptions === false
3482
          ||
3483
          (
3484 2
              $useExceptions === true
3485
              &&
3486 2
              !\in_array($word, $exceptions, true)
3487
          )
3488
      ) {
3489 2
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3490
      }
3491
3492 2
      $newWords[] = $word;
3493
    }
3494
3495 2
    return \implode('', $newWords);
3496
  }
3497
3498
  /**
3499
   * alias for "UTF8::lcfirst()"
3500
   *
3501
   * @see UTF8::lcfirst()
3502
   *
3503
   * @param string $str
3504
   * @param string $encoding
3505
   * @param bool   $cleanUtf8
3506
   *
3507
   * @return string
3508
   */
3509 5
  public static function lowerCaseFirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3510
  {
3511 5
    return self::lcfirst($str, $encoding, $cleanUtf8);
3512
  }
3513
3514
  /**
3515
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3516
   *
3517
   * @param string $str   <p>The string to be trimmed</p>
3518
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3519
   *
3520
   * @return string The string with unwanted characters stripped from the left.
3521
   */
3522 22
  public static function ltrim(string $str = '', $chars = INF): string
3523
  {
3524 22
    if ('' === $str) {
3525 3
      return '';
3526
    }
3527
3528
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3529 21
    if ($chars === INF || !$chars) {
3530 14
      $pattern = "^[\pZ\pC]+";
3531
    } else {
3532 10
      $chars = \preg_quote($chars, '/');
3533 10
      $pattern = "^[$chars]+";
3534
    }
3535
3536 21
    return self::regex_replace($str, $pattern, '', '', '/');
3537
  }
3538
3539
  /**
3540
   * Returns the UTF-8 character with the maximum code point in the given data.
3541
   *
3542
   * @param string|array<string> $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3543
   *
3544
   * @return string|null The character with the highest code point than others, returns null on failure or empty input.
3545
   */
3546 2
  public static function max($arg)
3547
  {
3548 2
    if (\is_array($arg) === true) {
3549 2
      $arg = \implode('', $arg);
3550
    }
3551
3552 2
    $codepoints = self::codepoints($arg, false);
3553 2
    if (\count($codepoints) === 0) {
3554 2
      return null;
3555
    }
3556
3557 2
    $codepoint_max = \max($codepoints);
3558
3559 2
    return self::chr($codepoint_max);
3560
  }
3561
3562
  /**
3563
   * Calculates and returns the maximum number of bytes taken by any
3564
   * UTF-8 encoded character in the given string.
3565
   *
3566
   * @param string $str <p>The original Unicode string.</p>
3567
   *
3568
   * @return int Max byte lengths of the given chars.
3569
   */
3570 2
  public static function max_chr_width(string $str): int
3571
  {
3572 2
    $bytes = self::chr_size_list($str);
3573 2
    if (\count($bytes) > 0) {
3574 2
      return (int)\max($bytes);
3575
    }
3576
3577 2
    return 0;
3578
  }
3579
3580
  /**
3581
   * Checks whether mbstring is available on the server.
3582
   *
3583
   * @return bool
3584
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
3585
   */
3586 27
  public static function mbstring_loaded(): bool
3587
  {
3588 27
    $return = \extension_loaded('mbstring') ? true : false;
3589
3590 27
    if ($return === true) {
3591 27
      \mb_internal_encoding('UTF-8');
3592
    }
3593
3594 27
    return $return;
3595
  }
3596
3597
  /**
3598
   * Checks whether mbstring "overloaded" is active on the server.
3599
   *
3600
   * @return bool
3601
   */
3602
  private static function mbstring_overloaded(): bool
3603
  {
3604
    /**
3605
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3606
     */
3607
3608
    /** @noinspection PhpComposerExtensionStubsInspection */
3609
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3610
    return \defined('MB_OVERLOAD_STRING')
3611
           &&
3612
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3613
  }
3614
3615
  /**
3616
   * Returns the UTF-8 character with the minimum code point in the given data.
3617
   *
3618
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3619
   *
3620
   * @return string|null The character with the lowest code point than others, returns null on failure or empty input.
3621
   */
3622 2
  public static function min($arg)
3623
  {
3624 2
    if (\is_array($arg) === true) {
3625 2
      $arg = \implode('', $arg);
3626
    }
3627
3628 2
    $codepoints = self::codepoints($arg, false);
3629 2
    if (\count($codepoints) === 0) {
3630 2
      return null;
3631
    }
3632
3633 2
    $codepoint_min = \min($codepoints);
3634
3635 2
    return self::chr($codepoint_min);
3636
  }
3637
3638
  /**
3639
   * alias for "UTF8::normalize_encoding()"
3640
   *
3641
   * @see        UTF8::normalize_encoding()
3642
   *
3643
   * @param mixed $encoding
3644
   * @param mixed $fallback
3645
   *
3646
   * @return mixed
3647
   *
3648
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3649
   */
3650 2
  public static function normalizeEncoding($encoding, $fallback = '')
3651
  {
3652 2
    return self::normalize_encoding($encoding, $fallback);
3653
  }
3654
3655
  /**
3656
   * Normalize the encoding-"name" input.
3657
   *
3658
   * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3659
   * @param mixed $fallback <p>e.g.: UTF-8</p>
3660
   *
3661
   * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3662
   */
3663 340
  public static function normalize_encoding($encoding, $fallback = '')
3664
  {
3665 340
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3666
3667
    // init
3668 340
    $encoding = (string)$encoding;
3669
3670
    if (
3671 340
        !$encoding
3672
        ||
3673 49
        $encoding === '1' // only a fallback, for non "strict_types" usage ...
3674
        ||
3675 340
        $encoding === '0' // only a fallback, for non "strict_types" usage ...
3676
    ) {
3677 296
      return $fallback;
3678
    }
3679
3680
    if (
3681 48
        'UTF-8' === $encoding
3682
        ||
3683 48
        'UTF8' === $encoding
3684
    ) {
3685 21
      return 'UTF-8';
3686
    }
3687
3688 41
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3689 39
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3690
    }
3691
3692 5
    if (self::$ENCODINGS === null) {
3693 1
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3694
    }
3695
3696 5
    if (\in_array($encoding, self::$ENCODINGS, true)) {
0 ignored issues
show
Bug introduced by
It seems like self::ENCODINGS can also be of type false; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3696
    if (\in_array($encoding, /** @scrutinizer ignore-type */ self::$ENCODINGS, true)) {
Loading history...
3697 4
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3698
3699 4
      return $encoding;
3700
    }
3701
3702 4
    $encodingOrig = $encoding;
3703 4
    $encoding = \strtoupper($encoding);
3704 4
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3705
3706
    $equivalences = [
3707 4
        'ISO8859'     => 'ISO-8859-1',
3708
        'ISO88591'    => 'ISO-8859-1',
3709
        'ISO'         => 'ISO-8859-1',
3710
        'LATIN'       => 'ISO-8859-1',
3711
        'LATIN1'      => 'ISO-8859-1', // Western European
3712
        'ISO88592'    => 'ISO-8859-2',
3713
        'LATIN2'      => 'ISO-8859-2', // Central European
3714
        'ISO88593'    => 'ISO-8859-3',
3715
        'LATIN3'      => 'ISO-8859-3', // Southern European
3716
        'ISO88594'    => 'ISO-8859-4',
3717
        'LATIN4'      => 'ISO-8859-4', // Northern European
3718
        'ISO88595'    => 'ISO-8859-5',
3719
        'ISO88596'    => 'ISO-8859-6', // Greek
3720
        'ISO88597'    => 'ISO-8859-7',
3721
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3722
        'ISO88599'    => 'ISO-8859-9',
3723
        'LATIN5'      => 'ISO-8859-9', // Turkish
3724
        'ISO885911'   => 'ISO-8859-11',
3725
        'TIS620'      => 'ISO-8859-11', // Thai
3726
        'ISO885910'   => 'ISO-8859-10',
3727
        'LATIN6'      => 'ISO-8859-10', // Nordic
3728
        'ISO885913'   => 'ISO-8859-13',
3729
        'LATIN7'      => 'ISO-8859-13', // Baltic
3730
        'ISO885914'   => 'ISO-8859-14',
3731
        'LATIN8'      => 'ISO-8859-14', // Celtic
3732
        'ISO885915'   => 'ISO-8859-15',
3733
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3734
        'ISO885916'   => 'ISO-8859-16',
3735
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3736
        'CP1250'      => 'WINDOWS-1250',
3737
        'WIN1250'     => 'WINDOWS-1250',
3738
        'WINDOWS1250' => 'WINDOWS-1250',
3739
        'CP1251'      => 'WINDOWS-1251',
3740
        'WIN1251'     => 'WINDOWS-1251',
3741
        'WINDOWS1251' => 'WINDOWS-1251',
3742
        'CP1252'      => 'WINDOWS-1252',
3743
        'WIN1252'     => 'WINDOWS-1252',
3744
        'WINDOWS1252' => 'WINDOWS-1252',
3745
        'CP1253'      => 'WINDOWS-1253',
3746
        'WIN1253'     => 'WINDOWS-1253',
3747
        'WINDOWS1253' => 'WINDOWS-1253',
3748
        'CP1254'      => 'WINDOWS-1254',
3749
        'WIN1254'     => 'WINDOWS-1254',
3750
        'WINDOWS1254' => 'WINDOWS-1254',
3751
        'CP1255'      => 'WINDOWS-1255',
3752
        'WIN1255'     => 'WINDOWS-1255',
3753
        'WINDOWS1255' => 'WINDOWS-1255',
3754
        'CP1256'      => 'WINDOWS-1256',
3755
        'WIN1256'     => 'WINDOWS-1256',
3756
        'WINDOWS1256' => 'WINDOWS-1256',
3757
        'CP1257'      => 'WINDOWS-1257',
3758
        'WIN1257'     => 'WINDOWS-1257',
3759
        'WINDOWS1257' => 'WINDOWS-1257',
3760
        'CP1258'      => 'WINDOWS-1258',
3761
        'WIN1258'     => 'WINDOWS-1258',
3762
        'WINDOWS1258' => 'WINDOWS-1258',
3763
        'UTF16'       => 'UTF-16',
3764
        'UTF32'       => 'UTF-32',
3765
        'UTF8'        => 'UTF-8',
3766
        'UTF'         => 'UTF-8',
3767
        'UTF7'        => 'UTF-7',
3768
        '8BIT'        => 'CP850',
3769
        'BINARY'      => 'CP850',
3770
    ];
3771
3772 4
    if (!empty($equivalences[$encodingUpperHelper])) {
3773 4
      $encoding = $equivalences[$encodingUpperHelper];
3774
    }
3775
3776 4
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3777
3778 4
    return $encoding;
3779
  }
3780
3781
  /**
3782
   * Standardize line ending to unix-like.
3783
   *
3784
   * @param string $str
3785
   *
3786
   * @return string
3787
   */
3788 5
  public static function normalize_line_ending(string $str): string
3789
  {
3790 5
    return (string)str_replace(["\r\n", "\r"], "\n", $str);
3791
  }
3792
3793
  /**
3794
   * Normalize some MS Word special characters.
3795
   *
3796
   * @param string $str <p>The string to be normalized.</p>
3797
   *
3798
   * @return string
3799
   */
3800 39
  public static function normalize_msword(string $str): string
3801
  {
3802 39
    if ('' === $str) {
3803 2
      return '';
3804
    }
3805
3806 39
    static $UTF8_MSWORD_KEYS_CACHE = null;
3807 39
    static $UTF8_MSWORD_VALUES_CACHE = null;
3808
3809 39
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3810
3811 1
      if (self::$UTF8_MSWORD === null) {
3812 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_msword') can also be of type false. However, the property $UTF8_MSWORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3813
      }
3814
3815 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3815
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
3816 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3816
      $UTF8_MSWORD_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
3817
    }
3818
3819 39
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3820
  }
3821
3822
  /**
3823
   * Normalize the whitespace.
3824
   *
3825
   * @param string $str                     <p>The string to be normalized.</p>
3826
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3827
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3828
   *                                        bidirectional text chars.</p>
3829
   *
3830
   * @return string
3831
   */
3832 87
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
3833
  {
3834 87
    if ('' === $str) {
3835 9
      return '';
3836
    }
3837
3838 87
    static $WHITESPACE_CACHE = [];
3839 87
    $cacheKey = (int)$keepNonBreakingSpace;
3840
3841 87
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3842
3843 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3844
3845 2
      if ($keepNonBreakingSpace === true) {
3846 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3847
      }
3848
3849 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
3850
    }
3851
3852 87
    if ($keepBidiUnicodeControls === false) {
3853 87
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3854
3855 87
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3856 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3857
      }
3858
3859 87
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3860
    }
3861
3862 87
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3863
  }
3864
3865
  /**
3866
   * Calculates Unicode code point of the given UTF-8 encoded character.
3867
   *
3868
   * INFO: opposite to UTF8::chr()
3869
   *
3870
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3871
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3872
   *
3873
   * @return int
3874
   *             Unicode code point of the given character,<br>
3875
   *             0 on invalid UTF-8 byte sequence.
3876
   */
3877 35
  public static function ord($chr, string $encoding = 'UTF-8'): int
3878
  {
3879
    // init
3880 35
    $chr = (string)$chr;
3881
3882 35
    static $CHAR_CACHE = [];
3883
3884
    // save the original string
3885 35
    $chr_orig = $chr;
3886
3887 35
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3888 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3889
3890
      // check again, if it's still not UTF-8
3891 4
      if ($encoding !== 'UTF-8') {
3892 4
        $chr = self::encode($encoding, $chr);
3893
      }
3894
    }
3895
3896 35
    $cacheKey = $chr_orig . $encoding;
3897 35
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3898 35
      return $CHAR_CACHE[$cacheKey];
3899
    }
3900
3901 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3902
      self::checkForSupport();
3903
    }
3904
3905 11
    if (self::$SUPPORT['intlChar'] === true) {
3906
      /** @noinspection PhpComposerExtensionStubsInspection */
3907 10
      $code = \IntlChar::ord($chr);
3908 10
      if ($code) {
3909 9
        return $CHAR_CACHE[$cacheKey] = $code;
3910
      }
3911
    }
3912
3913
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3914 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
0 ignored issues
show
Bug introduced by
$chr of type array is incompatible with the type string expected by parameter $str of voku\helper\UTF8::substr(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3914
    $chr = \unpack('C*', (string)self::substr(/** @scrutinizer ignore-type */ $chr, 0, 4, 'CP850'));
Loading history...
3915 6
    $code = $chr ? $chr[1] : 0;
3916
3917 6
    if (0xF0 <= $code && isset($chr[4])) {
3918
      /** @noinspection UnnecessaryCastingInspection */
3919
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
3920
    }
3921
3922 6
    if (0xE0 <= $code && isset($chr[3])) {
3923
      /** @noinspection UnnecessaryCastingInspection */
3924 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
3925
    }
3926
3927 6
    if (0xC0 <= $code && isset($chr[2])) {
3928
      /** @noinspection UnnecessaryCastingInspection */
3929 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xC0) << 6) + $chr[2] - 0x80);
3930
    }
3931
3932 6
    return $CHAR_CACHE[$cacheKey] = $code;
3933
  }
3934
3935
  /**
3936
   * Parses the string into an array (into the the second parameter).
3937
   *
3938
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3939
   *          if the second parameter is not set!
3940
   *
3941
   * @link http://php.net/manual/en/function.parse-str.php
3942
   *
3943
   * @param string $str       <p>The input string.</p>
3944
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
3945
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3946
   *
3947
   * @return bool Will return <strong>false</strong> if php can't parse the string and we haven't any $result.
3948
   */
3949 2
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
3950
  {
3951 2
    if ($cleanUtf8 === true) {
3952 2
      $str = self::clean($str);
3953
    }
3954
3955
    // always fallback via symfony polyfill
3956 2
    $return = \mb_parse_str($str, $result);
3957
3958 2
    return !($return === false || empty($result));
3959
  }
3960
3961
  /**
3962
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3963
   *
3964
   * @return bool
3965
   *              <strong>true</strong> if support is available,<br>
3966
   *              <strong>false</strong> otherwise.
3967
   */
3968 103
  public static function pcre_utf8_support(): bool
3969
  {
3970
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3971 103
    return (bool)@\preg_match('//u', '');
3972
  }
3973
3974
  /**
3975
   * Create an array containing a range of UTF-8 characters.
3976
   *
3977
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3978
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3979
   *
3980
   * @return string[]
3981
   */
3982 2
  public static function range($var1, $var2): array
3983
  {
3984 2
    if (!$var1 || !$var2) {
3985 2
      return [];
3986
    }
3987
3988 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3989
      self::checkForSupport();
3990
    }
3991
3992 2
    if (self::$SUPPORT['ctype'] === false) {
3993
      throw new \RuntimeException('ext-ctype: is not installed');
3994
    }
3995
3996
    /** @noinspection PhpComposerExtensionStubsInspection */
3997 2
    if (\ctype_digit((string)$var1)) {
3998 2
      $start = (int)$var1;
3999 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4000
      $start = (int)self::hex_to_int($var1);
4001
    } else {
4002 2
      $start = self::ord($var1);
4003
    }
4004
4005 2
    if (!$start) {
4006
      return [];
4007
    }
4008
4009
    /** @noinspection PhpComposerExtensionStubsInspection */
4010 2
    if (\ctype_digit((string)$var2)) {
4011 2
      $end = (int)$var2;
4012 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4013
      $end = (int)self::hex_to_int($var2);
4014
    } else {
4015 2
      $end = self::ord($var2);
4016
    }
4017
4018 2
    if (!$end) {
4019
      return [];
4020
    }
4021
4022 2
    return \array_map(
4023
        [
4024 2
            self::class,
4025
            'chr',
4026
        ],
4027 2
        \range($start, $end)
4028
    );
4029
  }
4030
4031
  /**
4032
   * Multi decode html entity & fix urlencoded-win1252-chars.
4033
   *
4034
   * e.g:
4035
   * 'test+test'                     => 'test+test'
4036
   * 'D&#252;sseldorf'               => 'Düsseldorf'
4037
   * 'D%FCsseldorf'                  => 'Düsseldorf'
4038
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4039
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4040
   * 'Düsseldorf'                   => 'Düsseldorf'
4041
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4042
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4043
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4044
   *
4045
   * @param string $str          <p>The input string.</p>
4046
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
4047
   *
4048
   * @return string
4049
   */
4050 3
  public static function rawurldecode(string $str, bool $multi_decode = true): string
4051
  {
4052 3
    if ('' === $str) {
4053 2
      return '';
4054
    }
4055
4056 3
    $pattern = '/%u([0-9a-f]{3,4})/i';
4057 3
    if (\preg_match($pattern, $str)) {
4058 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4059
    }
4060
4061 3
    $flags = ENT_QUOTES | ENT_HTML5;
4062
4063
    do {
4064 3
      $str_compare = $str;
4065
4066 3
      $str = self::fix_simple_utf8(
4067 3
          \rawurldecode(
4068 3
              self::html_entity_decode(
4069 3
                  self::to_utf8($str),
4070 3
                  $flags
4071
              )
4072
          )
4073
      );
4074
4075 3
    } while ($multi_decode === true && $str_compare !== $str);
4076
4077 3
    return $str;
4078
  }
4079
4080
  /**
4081
   * @param array $strings
4082
   * @param bool  $removeEmptyValues
4083
   * @param int   $removeShortValues
4084
   *
4085
   * @return array
4086
   */
4087 2
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
4088
  {
4089
    // init
4090 2
    $return = [];
4091
4092 2
    foreach ($strings as $str) {
4093
      if (
4094 2
          $removeShortValues !== null
4095
          &&
4096 2
          self::strlen($str) <= $removeShortValues
4097
      ) {
4098 2
        continue;
4099
      }
4100
4101
      if (
4102 2
          $removeEmptyValues === true
4103
          &&
4104 2
          \trim($str) === ''
4105
      ) {
4106 2
        continue;
4107
      }
4108
4109 2
      $return[] = $str;
4110
    }
4111
4112 2
    return $return;
4113
  }
4114
4115
  /**
4116
   * Replaces all occurrences of $pattern in $str by $replacement.
4117
   *
4118
   * @param string $str         <p>The input string.</p>
4119
   * @param string $pattern     <p>The regular expression pattern.</p>
4120
   * @param string $replacement <p>The string to replace with.</p>
4121
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
4122
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4123
   *
4124
   * @return string
4125
   */
4126 291
  public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4127
  {
4128 291
    if ($options === 'msr') {
4129 9
      $options = 'ms';
4130
    }
4131
4132
    // fallback
4133 291
    if (!$delimiter) {
4134
      $delimiter = '/';
4135
    }
4136
4137 291
    $str = (string)\preg_replace(
4138 291
        $delimiter . $pattern . $delimiter . 'u' . $options,
4139 291
        $replacement,
4140 291
        $str
4141
    );
4142
4143 291
    return $str;
4144
  }
4145
4146
  /**
4147
   * alias for "UTF8::remove_bom()"
4148
   *
4149
   * @see        UTF8::remove_bom()
4150
   *
4151
   * @param string $str
4152
   *
4153
   * @return string
4154
   *
4155
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4156
   */
4157
  public static function removeBOM(string $str): string
4158
  {
4159
    return self::remove_bom($str);
4160
  }
4161
4162
  /**
4163
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4164
   *
4165
   * @param string $str <p>The input string.</p>
4166
   *
4167
   * @return string String without UTF-BOM.
4168
   */
4169 75
  public static function remove_bom(string $str): string
4170
  {
4171 75
    if ('' === $str) {
4172 7
      return '';
4173
    }
4174
4175 75
    foreach (self::$BOM as $bomString => $bomByteLength) {
4176 75
      if (0 === self::strpos($str, $bomString, 0, 'CP850')) {
4177 10
        $strTmp = self::substr($str, $bomByteLength, null, 'CP850');
4178 10
        if ($strTmp === false) {
4179
          $strTmp = '';
4180
        }
4181 75
        $str = (string)$strTmp;
4182
      }
4183
    }
4184
4185 75
    return $str;
4186
  }
4187
4188
  /**
4189
   * Removes duplicate occurrences of a string in another string.
4190
   *
4191
   * @param string          $str  <p>The base string.</p>
4192
   * @param string|string[] $what <p>String to search for in the base string.</p>
4193
   *
4194
   * @return string The result string with removed duplicates.
4195
   */
4196 2
  public static function remove_duplicates(string $str, $what = ' '): string
4197
  {
4198 2
    if (\is_string($what) === true) {
4199 2
      $what = [$what];
4200
    }
4201
4202 2
    if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4203
      /** @noinspection ForeachSourceInspection */
4204 2
      foreach ($what as $item) {
4205 2
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4206
      }
4207
    }
4208
4209 2
    return $str;
4210
  }
4211
4212
  /**
4213
   * Remove html via "strip_tags()" from the string.
4214
   *
4215
   * @param string $str
4216
   * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4217
   *                              not be stripped. Default: null
4218
   *                              </p>
4219
   *
4220
   * @return string
4221
   */
4222 6
  public static function remove_html(string $str, string $allowableTags = ''): string
4223
  {
4224 6
    return \strip_tags($str, $allowableTags);
4225
  }
4226
4227
  /**
4228
   * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4229
   *
4230
   * @param string $str
4231
   * @param string $replacement [optional] <p>Default is a empty string.</p>
4232
   *
4233
   * @return string
4234
   */
4235 6
  public static function remove_html_breaks(string $str, string $replacement = ''): string
4236
  {
4237 6
    return (string)\preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4238
  }
4239
4240
  /**
4241
   * Remove invisible characters from a string.
4242
   *
4243
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4244
   *
4245
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4246
   *
4247
   * @param string $str
4248
   * @param bool   $url_encoded
4249
   * @param string $replacement
4250
   *
4251
   * @return string
4252
   */
4253 113
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4254
  {
4255
    // init
4256 113
    $non_displayables = [];
4257
4258
    // every control character except newline (dec 10),
4259
    // carriage return (dec 13) and horizontal tab (dec 09)
4260 113
    if ($url_encoded) {
4261 113
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4262 113
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4263
    }
4264
4265 113
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4266
4267
    do {
4268 113
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4269 113
    } while ($count !== 0);
4270
4271 113
    return $str;
4272
  }
4273
4274
  /**
4275
   * Returns a new string with the prefix $substring removed, if present.
4276
   *
4277
   * @param string $str
4278
   * @param string $substring <p>The prefix to remove.</p>
4279
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4280
   *
4281
   * @return string String without the prefix $substring.
4282
   */
4283 12
  public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4284
  {
4285 12
    if (self::str_starts_with($str, $substring)) {
4286
4287 6
      return (string)self::substr(
4288 6
          $str,
4289 6
          self::strlen($substring, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::strlen($substring, $encoding) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4289
          /** @scrutinizer ignore-type */ self::strlen($substring, $encoding),
Loading history...
4290 6
          null,
4291 6
          $encoding
4292
      );
4293
    }
4294
4295 6
    return $str;
4296
  }
4297
4298
  /**
4299
   * Returns a new string with the suffix $substring removed, if present.
4300
   *
4301
   * @param string $str
4302
   * @param string $substring <p>The suffix to remove.</p>
4303
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4304
   *
4305
   * @return string String having a $str without the suffix $substring.
4306
   */
4307 12
  public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4308
  {
4309 12
    if (self::str_ends_with($str, $substring)) {
4310
4311 6
      return (string)self::substr(
4312 6
          $str,
4313 6
          0,
4314 6
          self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4315
      );
4316
    }
4317
4318 6
    return $str;
4319
  }
4320
4321
  /**
4322
   * Replaces all occurrences of $search in $str by $replacement.
4323
   *
4324
   * @param string $str           <p>The input string.</p>
4325
   * @param string $search        <p>The needle to search for.</p>
4326
   * @param string $replacement   <p>The string to replace with.</p>
4327
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4328
   *
4329
   * @return string String after the replacements.
4330
   */
4331 29
  public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4332
  {
4333 29
    if ($caseSensitive) {
4334 22
      return self::str_replace($search, $replacement, $str);
4335
    }
4336
4337 7
    return self::str_ireplace($search, $replacement, $str);
4338
  }
4339
4340
  /**
4341
   * Replaces all occurrences of $search in $str by $replacement.
4342
   *
4343
   * @param string       $str           <p>The input string.</p>
4344
   * @param array        $search        <p>The elements to search for.</p>
4345
   * @param string|array $replacement   <p>The string to replace with.</p>
4346
   * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4347
   *
4348
   * @return string String after the replacements.
4349
   */
4350 30
  public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4351
  {
4352 30
    if ($caseSensitive) {
4353 23
      return self::str_replace($search, $replacement, $str);
4354
    }
4355
4356 7
    return self::str_ireplace($search, $replacement, $str);
4357
  }
4358
4359
  /**
4360
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4361
   *
4362
   * @param string $str                <p>The input string</p>
4363
   * @param string $replacementChar    <p>The replacement character.</p>
4364
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4365
   *
4366
   * @return string
4367
   */
4368 63
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4369
  {
4370 63
    if ('' === $str) {
4371 9
      return '';
4372
    }
4373
4374 63
    if ($processInvalidUtf8 === true) {
4375 63
      $replacementCharHelper = $replacementChar;
4376 63
      if ($replacementChar === '') {
4377 63
        $replacementCharHelper = 'none';
4378
      }
4379
4380 63
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4381
        self::checkForSupport();
4382
      }
4383
4384 63
      if (self::$SUPPORT['mbstring'] === false) {
4385
        // if there is no native support for "mbstring",
4386
        // then we need to clean the string before ...
4387
        $str = self::clean($str);
4388
      }
4389
4390
      // always fallback via symfony polyfill
4391 63
      $save = \mb_substitute_character();
4392 63
      \mb_substitute_character($replacementCharHelper);
4393 63
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4394 63
      \mb_substitute_character($save);
4395
4396 63
      if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4397 63
        $str = $strTmp;
4398
      } else {
4399
        $str = '';
4400
      }
4401
    }
4402
4403 63
    return str_replace(
4404
        [
4405 63
            "\xEF\xBF\xBD",
4406
            '�',
4407
        ],
4408
        [
4409 63
            $replacementChar,
4410 63
            $replacementChar,
4411
        ],
4412 63
        $str
4413
    );
4414
  }
4415
4416
  /**
4417
   * Strip whitespace or other characters from end of a UTF-8 string.
4418
   *
4419
   * @param string $str   <p>The string to be trimmed.</p>
4420
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4421
   *
4422
   * @return string The string with unwanted characters stripped from the right.
4423
   */
4424 22
  public static function rtrim(string $str = '', $chars = INF): string
4425
  {
4426 22
    if ('' === $str) {
4427 3
      return '';
4428
    }
4429
4430
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4431 21
    if ($chars === INF || !$chars) {
4432 16
      $pattern = "[\pZ\pC]+\$";
4433
    } else {
4434 8
      $chars = \preg_quote($chars, '/');
4435 8
      $pattern = "[$chars]+\$";
4436
    }
4437
4438 21
    return self::regex_replace($str, $pattern, '', '', '/');
4439
  }
4440
4441
  /**
4442
   * rxClass
4443
   *
4444
   * @param string $s
4445
   * @param string $class
4446
   *
4447
   * @return string
4448
   */
4449 37
  private static function rxClass(string $s, string $class = ''): string
4450
  {
4451 37
    static $RX_CLASSS_CACHE = [];
4452
4453 37
    $cacheKey = $s . $class;
4454
4455 37
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4456 25
      return $RX_CLASSS_CACHE[$cacheKey];
4457
    }
4458
4459
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4460 16
    $class = [$class];
4461
4462
    /** @noinspection SuspiciousLoopInspection */
4463 16
    foreach (self::str_split($s) as $s) {
4464 15
      if ('-' === $s) {
4465
        $class[0] = '-' . $class[0];
4466 15
      } elseif (!isset($s[2])) {
4467 15
        $class[0] .= \preg_quote($s, '/');
4468 1
      } elseif (1 === self::strlen($s)) {
4469 1
        $class[0] .= $s;
4470
      } else {
4471 15
        $class[] = $s;
4472
      }
4473
    }
4474
4475 16
    if ($class[0]) {
4476 16
      $class[0] = '[' . $class[0] . ']';
4477
    }
4478
4479 16
    if (1 === \count($class)) {
4480 16
      $return = $class[0];
4481
    } else {
4482
      $return = '(?:' . \implode('|', $class) . ')';
4483
    }
4484
4485 16
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4486
4487 16
    return $return;
4488
  }
4489
4490
  /**
4491
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4492
   */
4493 2
  public static function showSupport()
4494
  {
4495 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4496
      self::checkForSupport();
4497
    }
4498
4499 2
    echo '<pre>';
4500 2
    foreach (self::$SUPPORT as $key => $value) {
4501 2
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4502
    }
4503 2
    echo '</pre>';
4504 2
  }
4505
4506
  /**
4507
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4508
   *
4509
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4510
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4511
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4512
   *
4513
   * @return string The HTML numbered entity.
4514
   */
4515 2
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4516
  {
4517 2
    if ('' === $char) {
4518 2
      return '';
4519
    }
4520
4521
    if (
4522 2
        $keepAsciiChars === true
4523
        &&
4524 2
        self::is_ascii($char) === true
4525
    ) {
4526 2
      return $char;
4527
    }
4528
4529 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4530 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4531
    }
4532
4533 2
    return '&#' . self::ord($char, $encoding) . ';';
4534
  }
4535
4536
  /**
4537
   * @param string $str
4538
   * @param int    $tabLength
4539
   *
4540
   * @return string
4541
   */
4542 5
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4543
  {
4544 5
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4545
  }
4546
4547
  /**
4548
   * Convert a string to an array of Unicode characters.
4549
   *
4550
   * @param string|int $str       <p>The string to split into array.</p>
4551
   * @param int        $length    [optional] <p>Max character length of each array element.</p>
4552
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4553
   *
4554
   * @return string[] An array containing chunks of the string.
4555
   */
4556 60
  public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4557
  {
4558
    // init
4559 60
    $str = (string)$str;
4560
4561 60
    if ('' === $str) {
4562 9
      return [];
4563
    }
4564
4565
    // init
4566 59
    $ret = [];
4567
4568 59
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4569
      self::checkForSupport();
4570
    }
4571
4572 59
    if ($cleanUtf8 === true) {
4573 18
      $str = self::clean($str);
4574
    }
4575
4576 59
    if (self::$SUPPORT['pcre_utf8'] === true) {
4577
4578 55
      \preg_match_all('/./us', $str, $retArray);
4579 55
      if (isset($retArray[0])) {
4580 55
        $ret = $retArray[0];
4581
      }
4582 55
      unset($retArray);
4583
4584
    } else {
4585
4586
      // fallback
4587
4588 8
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4589
        self::checkForSupport();
4590
      }
4591
4592 8
      $len = self::strlen_in_byte($str);
4593
4594
      /** @noinspection ForeachInvariantsInspection */
4595 8
      for ($i = 0; $i < $len; $i++) {
4596
4597 8
        if (($str[$i] & "\x80") === "\x00") {
4598
4599 8
          $ret[] = $str[$i];
4600
4601
        } elseif (
4602 8
            isset($str[$i + 1])
4603
            &&
4604 8
            ($str[$i] & "\xE0") === "\xC0"
4605
        ) {
4606
4607 4
          if (($str[$i + 1] & "\xC0") === "\x80") {
4608 4
            $ret[] = $str[$i] . $str[$i + 1];
4609
4610 4
            $i++;
4611
          }
4612
4613
        } elseif (
4614 6
            isset($str[$i + 2])
4615
            &&
4616 6
            ($str[$i] & "\xF0") === "\xE0"
4617
        ) {
4618
4619
          if (
4620 6
              ($str[$i + 1] & "\xC0") === "\x80"
4621
              &&
4622 6
              ($str[$i + 2] & "\xC0") === "\x80"
4623
          ) {
4624 6
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4625
4626 6
            $i += 2;
4627
          }
4628
4629
        } elseif (
4630
            isset($str[$i + 3])
4631
            &&
4632
            ($str[$i] & "\xF8") === "\xF0"
4633
        ) {
4634
4635
          if (
4636
              ($str[$i + 1] & "\xC0") === "\x80"
4637
              &&
4638
              ($str[$i + 2] & "\xC0") === "\x80"
4639
              &&
4640
              ($str[$i + 3] & "\xC0") === "\x80"
4641
          ) {
4642
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4643
4644
            $i += 3;
4645
          }
4646
4647
        }
4648
      }
4649
    }
4650
4651 59
    if ($length > 1) {
4652 9
      $ret = \array_chunk($ret, $length);
4653
4654 9
      return \array_map(
4655 9
          function ($item) {
4656 9
            return \implode('', $item);
4657 9
          }, $ret
4658
      );
4659
    }
4660
4661 52
    if (isset($ret[0]) && $ret[0] === '') {
4662
      return [];
4663
    }
4664
4665 52
    return $ret;
4666
  }
4667
4668
  /**
4669
   * Returns a camelCase version of the string. Trims surrounding spaces,
4670
   * capitalizes letters following digits, spaces, dashes and underscores,
4671
   * and removes spaces, dashes, as well as underscores.
4672
   *
4673
   * @param string $str      <p>The input string.</p>
4674
   * @param string $encoding [optional] <p>Default: UTF-8</p>
4675
   *
4676
   * @return string
4677
   */
4678 32
  public static function str_camelize(string $str, string $encoding = 'UTF-8'): string
4679
  {
4680 32
    $str = self::lcfirst(self::trim($str), $encoding);
4681 32
    $str = (string)\preg_replace('/^[-_]+/', '', $str);
4682
4683 32
    $str = (string)\preg_replace_callback(
4684 32
        '/[-_\s]+(.)?/u',
4685 32
        function ($match) use ($encoding) {
4686 27
          if (isset($match[1])) {
4687 27
            return UTF8::strtoupper($match[1], $encoding);
4688
          }
4689
4690 1
          return '';
4691 32
        },
4692 32
        $str
4693
    );
4694
4695 32
    $str = (string)\preg_replace_callback(
4696 32
        '/[\d]+(.)?/u',
4697 32
        function ($match) use ($encoding) {
4698 6
          return UTF8::strtoupper($match[0], $encoding);
4699 32
        },
4700 32
        $str
4701
    );
4702
4703 32
    return $str;
4704
  }
4705
4706
  /**
4707
   * Returns the string with the first letter of each word capitalized,
4708
   * except for when the word is a name which shouldn't be capitalized.
4709
   *
4710
   * @param string $str
4711
   *
4712
   * @return string String with $str capitalized.
4713
   */
4714 1
  public static function str_capitalize_name(string $str): string
4715
  {
4716 1
    $str = self::collapse_whitespace($str);
4717
4718 1
    $str = self::str_capitalize_name_helper($str, ' ');
4719 1
    $str = self::str_capitalize_name_helper($str, '-');
4720
4721 1
    return $str;
4722
  }
4723
4724
  /**
4725
   * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
4726
   *
4727
   * @param string $names
4728
   * @param string $delimiter
4729
   * @param string $encoding
4730
   *
4731
   * @return string
4732
   */
4733 1
  private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
4734
  {
4735
    // init
4736 1
    $namesArray = \explode($delimiter, $names);
4737
4738 1
    if ($namesArray === false) {
4739
      return '';
4740
    }
4741
4742
    $specialCases = [
4743 1
        'names'    => [
4744
            'ab',
4745
            'af',
4746
            'al',
4747
            'and',
4748
            'ap',
4749
            'bint',
4750
            'binte',
4751
            'da',
4752
            'de',
4753
            'del',
4754
            'den',
4755
            'der',
4756
            'di',
4757
            'dit',
4758
            'ibn',
4759
            'la',
4760
            'mac',
4761
            'nic',
4762
            'of',
4763
            'ter',
4764
            'the',
4765
            'und',
4766
            'van',
4767
            'von',
4768
            'y',
4769
            'zu',
4770
        ],
4771
        'prefixes' => [
4772
            'al-',
4773
            "d'",
4774
            'ff',
4775
            "l'",
4776
            'mac',
4777
            'mc',
4778
            'nic',
4779
        ],
4780
    ];
4781
4782 1
    foreach ($namesArray as &$name) {
4783 1
      if (\in_array($name, $specialCases['names'], true)) {
4784 1
        continue;
4785
      }
4786
4787 1
      $continue = false;
4788
4789 1
      if ($delimiter == '-') {
4790 1
        foreach ($specialCases['names'] as $beginning) {
4791 1
          if (self::strpos($name, $beginning, 0, $encoding) === 0) {
4792 1
            $continue = true;
4793
          }
4794
        }
4795
      }
4796
4797 1
      foreach ($specialCases['prefixes'] as $beginning) {
4798 1
        if (self::strpos($name, $beginning, 0, $encoding) === 0) {
4799 1
          $continue = true;
4800
        }
4801
      }
4802
4803 1
      if ($continue) {
4804 1
        continue;
4805
      }
4806
4807 1
      $name = self::str_upper_first($name);
4808
    }
4809
4810 1
    return \implode($delimiter, $namesArray);
4811
  }
4812
4813
  /**
4814
   * Returns true if the string contains $needle, false otherwise. By default
4815
   * the comparison is case-sensitive, but can be made insensitive by setting
4816
   * $caseSensitive to false.
4817
   *
4818
   * @param string $haystack      <p>The input string.</p>
4819
   * @param string $needle        <p>Substring to look for.</p>
4820
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4821
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4822
   *
4823
   * @return bool Whether or not $haystack contains $needle.
4824
   */
4825 106
  public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4826
  {
4827 106
    if ('' === $haystack || '' === $needle) {
4828 1
      return false;
4829
    }
4830
4831
    // only a fallback to prevent BC in the api ...
4832 105
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
4833 2
      $encoding = $caseSensitive;
4834
    }
4835
4836 105
    if ($caseSensitive) {
4837 55
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
4838
    }
4839
4840 50
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
4841
  }
4842
4843
  /**
4844
   * Returns true if the string contains all $needles, false otherwise. By
4845
   * default the comparison is case-sensitive, but can be made insensitive by
4846
   * setting $caseSensitive to false.
4847
   *
4848
   * @param string $haystack      <p>The input string.</p>
4849
   * @param array  $needles       <p>SubStrings to look for.</p>
4850
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4851
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4852
   *
4853
   * @return bool Whether or not $haystack contains $needle.
4854
   */
4855 44
  public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4856
  {
4857 44
    if ('' === $haystack) {
4858
      return false;
4859
    }
4860
4861 44
    if (empty($needles)) {
4862 1
      return false;
4863
    }
4864
4865
    // only a fallback to prevent BC in the api ...
4866 43
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
4867 1
      $encoding = $caseSensitive;
4868
    }
4869
4870 43
    foreach ($needles as $needle) {
4871 43
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4872 43
        return false;
4873
      }
4874
    }
4875
4876 24
    return true;
4877
  }
4878
4879
  /**
4880
   * Returns true if the string contains any $needles, false otherwise. By
4881
   * default the comparison is case-sensitive, but can be made insensitive by
4882
   * setting $caseSensitive to false.
4883
   *
4884
   * @param string $haystack      <p>The input string.</p>
4885
   * @param array  $needles       <p>SubStrings to look for.</p>
4886
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4887
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4888
   *
4889
   * @return bool
4890
   *               Whether or not $str contains $needle.
4891
   */
4892 43
  public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4893
  {
4894 43
    if (empty($needles)) {
4895 1
      return false;
4896
    }
4897
4898 42
    foreach ($needles as $needle) {
4899 42
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4900 42
        return true;
4901
      }
4902
    }
4903
4904 18
    return false;
4905
  }
4906
4907
  /**
4908
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
4909
   * inserted before uppercase characters (with the exception of the first
4910
   * character of the string), and in place of spaces as well as underscores.
4911
   *
4912
   * @param string $str      <p>The input string.</p>
4913
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4914
   *
4915
   * @return string
4916
   */
4917 19
  public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
4918
  {
4919 19
    return self::str_delimit($str, '-', $encoding);
4920
  }
4921
4922
  /**
4923
   * Returns a lowercase and trimmed string separated by the given delimiter.
4924
   * Delimiters are inserted before uppercase characters (with the exception
4925
   * of the first character of the string), and in place of spaces, dashes,
4926
   * and underscores. Alpha delimiters are not converted to lowercase.
4927
   *
4928
   * @param string $str       <p>The input string.</p>
4929
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
4930
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4931
   *
4932
   * @return string
4933
   */
4934 49
  public static function str_delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
4935
  {
4936 49
    $str = self::trim($str);
4937
4938 49
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
4939
4940 49
    $str = self::strtolower($str, $encoding);
4941
4942 49
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
4943
  }
4944
4945
  /**
4946
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4947
   *
4948
   * @param string $str <p>The input string.</p>
4949
   *
4950
   * @return false|string
4951
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4952
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
4953
   */
4954 32
  public static function str_detect_encoding($str)
4955
  {
4956
    // init
4957 32
    $str = (string)$str;
4958
4959
    //
4960
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
4961
    //
4962
4963 32
    if (self::is_binary($str, true) === true) {
4964
4965 11
      if (self::is_utf16($str) === 1) {
4966 2
        return 'UTF-16LE';
4967
      }
4968
4969 11
      if (self::is_utf16($str) === 2) {
4970 2
        return 'UTF-16BE';
4971
      }
4972
4973 9
      if (self::is_utf32($str) === 1) {
4974
        return 'UTF-32LE';
4975
      }
4976
4977 9
      if (self::is_utf32($str) === 2) {
4978
        return 'UTF-32BE';
4979
      }
4980
4981
      // is binary but not "UTF-16" or "UTF-32"
4982 9
      return false;
4983
    }
4984
4985
    //
4986
    // 2.) simple check for ASCII chars
4987
    //
4988
4989 27
    if (self::is_ascii($str) === true) {
4990 9
      return 'ASCII';
4991
    }
4992
4993
    //
4994
    // 3.) simple check for UTF-8 chars
4995
    //
4996
4997 27
    if (self::is_utf8($str) === true) {
4998 19
      return 'UTF-8';
4999
    }
5000
5001
    //
5002
    // 4.) check via "mb_detect_encoding()"
5003
    //
5004
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5005
5006
    $detectOrder = [
5007 16
        'ISO-8859-1',
5008
        'ISO-8859-2',
5009
        'ISO-8859-3',
5010
        'ISO-8859-4',
5011
        'ISO-8859-5',
5012
        'ISO-8859-6',
5013
        'ISO-8859-7',
5014
        'ISO-8859-8',
5015
        'ISO-8859-9',
5016
        'ISO-8859-10',
5017
        'ISO-8859-13',
5018
        'ISO-8859-14',
5019
        'ISO-8859-15',
5020
        'ISO-8859-16',
5021
        'WINDOWS-1251',
5022
        'WINDOWS-1252',
5023
        'WINDOWS-1254',
5024
        'CP932',
5025
        'CP936',
5026
        'CP950',
5027
        'CP866',
5028
        'CP850',
5029
        'CP51932',
5030
        'CP50220',
5031
        'CP50221',
5032
        'CP50222',
5033
        'ISO-2022-JP',
5034
        'ISO-2022-KR',
5035
        'JIS',
5036
        'JIS-ms',
5037
        'EUC-CN',
5038
        'EUC-JP',
5039
    ];
5040
5041
    // always fallback via symfony polyfill
5042 16
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
5043 16
    if ($encoding) {
5044 16
      return $encoding;
5045
    }
5046
5047
    //
5048
    // 5.) check via "iconv()"
5049
    //
5050
5051
    if (self::$ENCODINGS === null) {
5052
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
5053
    }
5054
5055
    $md5 = \md5($str);
5056
    foreach (self::$ENCODINGS as $encodingTmp) {
5057
      # INFO: //IGNORE but still throw notice
5058
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
5059
      if (\md5((string)@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
5060
        return $encodingTmp;
5061
      }
5062
    }
5063
5064
    return false;
5065
  }
5066
5067
  /**
5068
   * Check if the string ends with the given substring.
5069
   *
5070
   * @param string $haystack <p>The string to search in.</p>
5071
   * @param string $needle   <p>The substring to search for.</p>
5072
   *
5073
   * @return bool
5074
   */
5075 40
  public static function str_ends_with(string $haystack, string $needle): bool
5076
  {
5077 40
    if ('' === $haystack || '' === $needle) {
5078 4
      return false;
5079
    }
5080
5081 38
    return \substr($haystack, -\strlen($needle)) === $needle;
5082
  }
5083
5084
  /**
5085
   * Returns true if the string ends with any of $substrings, false otherwise.
5086
   *
5087
   * - case-sensitive
5088
   *
5089
   * @param string   $str        <p>The input string.</p>
5090
   * @param string[] $substrings <p>Substrings to look for.</p>
5091
   *
5092
   * @return bool Whether or not $str ends with $substring.
5093
   */
5094 7
  public static function str_ends_with_any(string $str, array $substrings): bool
5095
  {
5096 7
    if (empty($substrings)) {
5097
      return false;
5098
    }
5099
5100 7
    foreach ($substrings as $substring) {
5101 7
      if (self::str_ends_with($str, $substring)) {
5102 7
        return true;
5103
      }
5104
    }
5105
5106 6
    return false;
5107
  }
5108
5109
  /**
5110
   * Ensures that the string begins with $substring. If it doesn't, it's
5111
   * prepended.
5112
   *
5113
   * @param string $str       <p>The input string.</p>
5114
   * @param string $substring <p>The substring to add if not present.</p>
5115
   *
5116
   * @return string
5117
   */
5118 10
  public static function str_ensure_left(string $str, string $substring): string
5119
  {
5120 10
    if (!self::str_starts_with($str, $substring)) {
5121 4
      $str = $substring . $str;
5122
    }
5123
5124 10
    return $str;
5125
  }
5126
5127
  /**
5128
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
5129
   *
5130
   * @param string $str       <p>The input string.</p>
5131
   * @param string $substring <p>The substring to add if not present.</p>
5132
   *
5133
   * @return string
5134
   */
5135 10
  public static function str_ensure_right(string $str, string $substring): string
5136
  {
5137 10
    if (!self::str_ends_with($str, $substring)) {
5138 4
      $str .= $substring;
5139
    }
5140
5141 10
    return $str;
5142
  }
5143
5144
  /**
5145
   * Capitalizes the first word of the string, replaces underscores with
5146
   * spaces, and strips '_id'.
5147
   *
5148
   * @param string $str
5149
   *
5150
   * @return string
5151
   */
5152 3
  public static function str_humanize($str): string
5153
  {
5154 3
    $str = self::str_replace(
5155
        [
5156 3
            '_id',
5157
            '_',
5158
        ],
5159
        [
5160 3
            '',
5161
            ' ',
5162
        ],
5163 3
        $str
5164
    );
5165
5166 3
    return self::ucfirst(self::trim($str));
5167
  }
5168
5169
  /**
5170
   * Check if the string ends with the given substring, case insensitive.
5171
   *
5172
   * @param string $haystack <p>The string to search in.</p>
5173
   * @param string $needle   <p>The substring to search for.</p>
5174
   *
5175
   * @return bool
5176
   */
5177 12
  public static function str_iends_with(string $haystack, string $needle): bool
5178
  {
5179 12
    if ('' === $haystack || '' === $needle) {
5180 2
      return false;
5181
    }
5182
5183 12
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
5184 12
      return true;
5185
    }
5186
5187 8
    return false;
5188
  }
5189
5190
  /**
5191
   * Returns true if the string ends with any of $substrings, false otherwise.
5192
   *
5193
   * - case-insensitive
5194
   *
5195
   * @param string   $str        <p>The input string.</p>
5196
   * @param string[] $substrings <p>Substrings to look for.</p>
5197
   *
5198
   * @return bool Whether or not $str ends with $substring.
5199
   */
5200 4
  public static function str_iends_with_any(string $str, array $substrings): bool
5201
  {
5202 4
    if (empty($substrings)) {
5203
      return false;
5204
    }
5205
5206 4
    foreach ($substrings as $substring) {
5207 4
      if (self::str_iends_with($str, $substring)) {
5208 4
        return true;
5209
      }
5210
    }
5211
5212
    return false;
5213
  }
5214
5215
  /**
5216
   * Returns the index of the first occurrence of $needle in the string,
5217
   * and false if not found. Accepts an optional offset from which to begin
5218
   * the search.
5219
   *
5220
   * @param string $str      <p>The input string.</p>
5221
   * @param string $needle   <p>Substring to look for.</p>
5222
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5223
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5224
   *
5225
   * @return int|false
5226
   *                    The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5227
   */
5228 2
  public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5229
  {
5230 2
    return self::stripos(
5231 2
        $str,
5232 2
        $needle,
5233 2
        $offset,
5234 2
        $encoding
5235
    );
5236
  }
5237
5238
  /**
5239
   * Returns the index of the last occurrence of $needle in the string,
5240
   * and false if not found. Accepts an optional offset from which to begin
5241
   * the search. Offsets may be negative to count from the last character
5242
   * in the string.
5243
   *
5244
   * @param string $str      <p>The input string.</p>
5245
   * @param string $needle   <p>Substring to look for.</p>
5246
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5247
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5248
   *
5249
   * @return int|false
5250
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5251
   */
5252 2
  public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5253
  {
5254 2
    return self::strripos(
5255 2
        $str,
5256 2
        $needle,
5257 2
        $offset,
5258 2
        $encoding
5259
    );
5260
  }
5261
5262
  /**
5263
   * Returns the index of the first occurrence of $needle in the string,
5264
   * and false if not found. Accepts an optional offset from which to begin
5265
   * the search.
5266
   *
5267
   * @param string $str      <p>The input string.</p>
5268
   * @param string $needle   <p>Substring to look for.</p>
5269
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5270
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5271
   *
5272
   * @return int|false
5273
   *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5274
   */
5275 12
  public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5276
  {
5277 12
    return self::strpos(
5278 12
        $str,
5279 12
        $needle,
5280 12
        $offset,
5281 12
        $encoding
5282
    );
5283
  }
5284
5285
  /**
5286
   * Returns the index of the last occurrence of $needle in the string,
5287
   * and false if not found. Accepts an optional offset from which to begin
5288
   * the search. Offsets may be negative to count from the last character
5289
   * in the string.
5290
   *
5291
   * @param string $str      <p>The input string.</p>
5292
   * @param string $needle   <p>Substring to look for.</p>
5293
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5294
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5295
   *
5296
   * @return int|false
5297
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5298
   */
5299 12
  public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5300
  {
5301 12
    return self::strrpos(
5302 12
        $str,
5303 12
        $needle,
5304 12
        $offset,
5305 12
        $encoding
5306
    );
5307
  }
5308
5309
  /**
5310
   * Inserts $substring into the string at the $index provided.
5311
   *
5312
   * @param string $str       <p>The input string.</p>
5313
   * @param string $substring <p>String to be inserted.</p>
5314
   * @param int    $index     <p>The index at which to insert the substring.</p>
5315
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5316
   *
5317
   * @return string
5318
   */
5319 8
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5320
  {
5321 8
    $len = self::strlen($str, $encoding);
5322
5323 8
    if ($index > $len) {
5324 1
      return $str;
5325
    }
5326
5327 7
    $start = self::substr($str, 0, $index, $encoding);
5328 7
    $end = self::substr($str, $index, $len, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $len can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5328
    $end = self::substr($str, $index, /** @scrutinizer ignore-type */ $len, $encoding);
Loading history...
5329
5330 7
    return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5330
    return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5330
    return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
5331
  }
5332
5333
  /**
5334
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5335
   *
5336
   * @link  http://php.net/manual/en/function.str-ireplace.php
5337
   *
5338
   * @param mixed $search  <p>
5339
   *                       Every replacement with search array is
5340
   *                       performed on the result of previous replacement.
5341
   *                       </p>
5342
   * @param mixed $replace <p>
5343
   *                       </p>
5344
   * @param mixed $subject <p>
5345
   *                       If subject is an array, then the search and
5346
   *                       replace is performed with every entry of
5347
   *                       subject, and the return value is an array as
5348
   *                       well.
5349
   *                       </p>
5350
   * @param int   $count   [optional] <p>
5351
   *                       The number of matched and replaced needles will
5352
   *                       be returned in count which is passed by
5353
   *                       reference.
5354
   *                       </p>
5355
   *
5356
   * @return mixed A string or an array of replacements.
5357
   */
5358 41
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5359
  {
5360 41
    $search = (array)$search;
5361
5362
    /** @noinspection AlterInForeachInspection */
5363 41
    foreach ($search as &$s) {
5364 41
      if ('' === $s .= '') {
5365 7
        $s = '/^(?<=.)$/';
5366
      } else {
5367 41
        $s = '/' . \preg_quote($s, '/') . '/ui';
5368
      }
5369
    }
5370
5371 41
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5372 41
    $count = $replace; // used as reference parameter
5373
5374 41
    return $subject;
5375
  }
5376
5377
  /**
5378
   * Check if the string starts with the given substring, case insensitive.
5379
   *
5380
   * @param string $haystack <p>The string to search in.</p>
5381
   * @param string $needle   <p>The substring to search for.</p>
5382
   *
5383
   * @return bool
5384
   */
5385 12
  public static function str_istarts_with(string $haystack, string $needle): bool
5386
  {
5387 12
    if ('' === $haystack || '' === $needle) {
5388 2
      return false;
5389
    }
5390
5391 12
    if (self::stripos($haystack, $needle) === 0) {
5392 12
      return true;
5393
    }
5394
5395 4
    return false;
5396
  }
5397
5398
  /**
5399
   * Returns true if the string begins with any of $substrings, false otherwise.
5400
   *
5401
   * - case-insensitive
5402
   *
5403
   * @param string $str        <p>The input string.</p>
5404
   * @param array  $substrings <p>Substrings to look for.</p>
5405
   *
5406
   * @return bool Whether or not $str starts with $substring.
5407
   */
5408 4
  public static function str_istarts_with_any(string $str, array $substrings): bool
5409
  {
5410 4
    if ('' === $str) {
5411
      return false;
5412
    }
5413
5414 4
    if (empty($substrings)) {
5415
      return false;
5416
    }
5417
5418 4
    foreach ($substrings as $substring) {
5419 4
      if (self::str_istarts_with($str, $substring)) {
5420 4
        return true;
5421
      }
5422
    }
5423
5424
    return false;
5425
  }
5426
5427
  /**
5428
   * Gets the substring after the first occurrence of a separator.
5429
   *
5430
   * @param string $str       <p>The input string.</p>
5431
   * @param string $separator <p>The string separator.</p>
5432
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5433
   *
5434
   * @return string
5435
   */
5436 1
  public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5437
  {
5438
    if (
5439 1
        $separator === ''
5440
        ||
5441 1
        $str === ''
5442
    ) {
5443 1
      return '';
5444
    }
5445
5446 1
    $offset = self::str_iindex_first($str, $separator);
5447 1
    if ($offset === false) {
5448 1
      return '';
5449
    }
5450
5451 1
    return (string)self::substr(
5452 1
        $str,
5453 1
        $offset + self::strlen($separator, $encoding),
5454 1
        null,
5455 1
        $encoding
5456
    );
5457
  }
5458
5459
  /**
5460
   * Gets the substring after the last occurrence of a separator.
5461
   *
5462
   * @param string $str       <p>The input string.</p>
5463
   * @param string $separator <p>The string separator.</p>
5464
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5465
   *
5466
   * @return string
5467
   */
5468 1
  public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5469
  {
5470
    if (
5471 1
        $separator === ''
5472
        ||
5473 1
        $str === ''
5474
    ) {
5475 1
      return '';
5476
    }
5477
5478 1
    $offset = self::str_iindex_last($str, $separator);
5479 1
    if ($offset === false) {
5480 1
      return '';
5481
    }
5482
5483 1
    return (string)self::substr(
5484 1
        $str,
5485 1
        $offset + self::strlen($separator, $encoding),
5486 1
        null,
5487 1
        $encoding
5488
    );
5489
  }
5490
5491
  /**
5492
   * Gets the substring before the first occurrence of a separator.
5493
   *
5494
   * @param string $str       <p>The input string.</p>
5495
   * @param string $separator <p>The string separator.</p>
5496
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5497
   *
5498
   * @return string
5499
   */
5500 1
  public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5501
  {
5502
    if (
5503 1
        $separator === ''
5504
        ||
5505 1
        $str === ''
5506
    ) {
5507 1
      return '';
5508
    }
5509
5510 1
    $offset = self::str_iindex_first($str, $separator);
5511 1
    if ($offset === false) {
5512 1
      return '';
5513
    }
5514
5515 1
    return (string)self::substr($str, 0, $offset, $encoding);
5516
  }
5517
5518
  /**
5519
   * Gets the substring before the last occurrence of a separator.
5520
   *
5521
   * @param string $str       <p>The input string.</p>
5522
   * @param string $separator <p>The string separator.</p>
5523
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5524
   *
5525
   * @return string
5526
   */
5527 1
  public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5528
  {
5529
    if (
5530 1
        $separator === ''
5531
        ||
5532 1
        $str === ''
5533
    ) {
5534 1
      return '';
5535
    }
5536
5537 1
    $offset = self::str_iindex_last($str, $separator);
5538 1
    if ($offset === false) {
5539 1
      return '';
5540
    }
5541
5542 1
    return (string)self::substr($str, 0, $offset, $encoding);
5543
  }
5544
5545
  /**
5546
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5547
   *
5548
   * @param string $str          <p>The input string.</p>
5549
   * @param string $needle       <p>The string to look for.</p>
5550
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5551
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5552
   *
5553
   * @return string
5554
   */
5555 2
  public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5556
  {
5557
    if (
5558 2
        '' === $needle
5559
        ||
5560 2
        '' === $str
5561
    ) {
5562 2
      return '';
5563
    }
5564
5565 2
    $part = self::stristr(
5566 2
        $str,
5567 2
        $needle,
5568 2
        $beforeNeedle,
5569 2
        $encoding
5570
    );
5571 2
    if (false === $part) {
5572 2
      return '';
5573
    }
5574
5575 2
    return $part;
5576
  }
5577
5578
  /**
5579
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5580
   *
5581
   * @param string $str          <p>The input string.</p>
5582
   * @param string $needle       <p>The string to look for.</p>
5583
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5584
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5585
   *
5586
   * @return string
5587
   */
5588 1
  public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5589
  {
5590
    if (
5591 1
        '' === $needle
5592
        ||
5593 1
        '' === $str
5594
    ) {
5595 1
      return '';
5596
    }
5597
5598 1
    $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5599 1
    if (false === $part) {
0 ignored issues
show
introduced by
The condition false === $part is always false.
Loading history...
5600 1
      return '';
5601
    }
5602
5603 1
    return $part;
5604
  }
5605
5606
  /**
5607
   * Returns the last $n characters of the string.
5608
   *
5609
   * @param string $str      <p>The input string.</p>
5610
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5611
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5612
   *
5613
   * @return string
5614
   */
5615 12
  public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5616
  {
5617 12
    if ($n <= 0) {
5618 4
      return '';
5619
    }
5620
5621 8
    $returnTmp = self::substr($str, -$n, null, $encoding);
5622
5623 8
    return ($returnTmp === false ? '' : $returnTmp);
5624
  }
5625
5626
  /**
5627
   * Limit the number of characters in a string.
5628
   *
5629
   * @param string $str      <p>The input string.</p>
5630
   * @param int    $length   [optional] <p>Default: 100</p>
5631
   * @param string $strAddOn [optional] <p>Default: …</p>
5632
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5633
   *
5634
   * @return string
5635
   */
5636 2
  public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5637
  {
5638 2
    if ('' === $str) {
5639 2
      return '';
5640
    }
5641
5642 2
    if ($length <= 0) {
5643 2
      return '';
5644
    }
5645
5646 2
    if (self::strlen($str, $encoding) <= $length) {
5647 2
      return $str;
5648
    }
5649
5650 2
    return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5650
    return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
5651
  }
5652
5653
  /**
5654
   * Limit the number of characters in a string, but also after the next word.
5655
   *
5656
   * @param string $str      <p>The input string.</p>
5657
   * @param int    $length   [optional] <p>Default: 100</p>
5658
   * @param string $strAddOn [optional] <p>Default: …</p>
5659
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5660
   *
5661
   * @return string
5662
   */
5663 6
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5664
  {
5665 6
    if ('' === $str) {
5666 2
      return '';
5667
    }
5668
5669 6
    if ($length <= 0) {
5670 2
      return '';
5671
    }
5672
5673 6
    if (self::strlen($str, $encoding) <= $length) {
5674 2
      return $str;
5675
    }
5676
5677 6
    if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
5678 5
      return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5678
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
5679
    }
5680
5681 3
    $str = (string)self::substr($str, 0, $length, $encoding);
5682 3
    $array = \explode(' ', $str);
5683 3
    \array_pop($array);
5684 3
    $new_str = \implode(' ', $array);
5685
5686 3
    if ($new_str === '') {
5687 2
      $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
5688
    } else {
5689 3
      $str = $new_str . $strAddOn;
5690
    }
5691
5692 3
    return $str;
5693
  }
5694
5695
  /**
5696
   * Returns the longest common prefix between the string and $otherStr.
5697
   *
5698
   * @param string $str      <p>The input sting.</p>
5699
   * @param string $otherStr <p>Second string for comparison.</p>
5700
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5701
   *
5702
   * @return string
5703
   */
5704 10
  public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5705
  {
5706 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5707
5708 10
    $longestCommonPrefix = '';
5709 10
    for ($i = 0; $i < $maxLength; $i++) {
5710 8
      $char = self::substr($str, $i, 1, $encoding);
5711
5712 8
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
5713 6
        $longestCommonPrefix .= $char;
5714
      } else {
5715 6
        break;
5716
      }
5717
    }
5718
5719 10
    return $longestCommonPrefix;
5720
  }
5721
5722
  /**
5723
   * Returns the longest common substring between the string and $otherStr.
5724
   * In the case of ties, it returns that which occurs first.
5725
   *
5726
   * @param string $str
5727
   * @param string $otherStr <p>Second string for comparison.</p>
5728
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5729
   *
5730
   * @return string String with its $str being the longest common substring.
5731
   */
5732 11
  public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5733
  {
5734
    // Uses dynamic programming to solve
5735
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
5736 11
    $strLength = self::strlen($str, $encoding);
5737 11
    $otherLength = self::strlen($otherStr, $encoding);
5738
5739
    // Return if either string is empty
5740 11
    if ($strLength == 0 || $otherLength == 0) {
5741 2
      return '';
5742
    }
5743
5744 9
    $len = 0;
5745 9
    $end = 0;
5746 9
    $table = \array_fill(
5747 9
        0,
5748 9
        $strLength + 1,
5749 9
        \array_fill(0, $otherLength + 1, 0)
5750
    );
5751
5752 9
    for ($i = 1; $i <= $strLength; $i++) {
5753 9
      for ($j = 1; $j <= $otherLength; $j++) {
5754 9
        $strChar = self::substr($str, $i - 1, 1, $encoding);
5755 9
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
5756
5757 9
        if ($strChar == $otherChar) {
5758 8
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
5759 8
          if ($table[$i][$j] > $len) {
5760 8
            $len = $table[$i][$j];
5761 8
            $end = $i;
5762
          }
5763
        } else {
5764 9
          $table[$i][$j] = 0;
5765
        }
5766
      }
5767
    }
5768
5769 9
    $returnTmp = self::substr($str, $end - $len, $len, $encoding);
5770
5771 9
    return ($returnTmp === false ? '' : $returnTmp);
5772
  }
5773
5774
  /**
5775
   * Returns the longest common suffix between the string and $otherStr.
5776
   *
5777
   * @param string $str
5778
   * @param string $otherStr <p>Second string for comparison.</p>
5779
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5780
   *
5781
   * @return string
5782
   */
5783 10
  public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5784
  {
5785 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5786
5787 10
    $longestCommonSuffix = '';
5788 10
    for ($i = 1; $i <= $maxLength; $i++) {
5789 8
      $char = self::substr($str, -$i, 1, $encoding);
5790
5791 8
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
5792 6
        $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5792
        $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
5793
      } else {
5794 6
        break;
5795
      }
5796
    }
5797
5798 10
    return $longestCommonSuffix;
5799
  }
5800
5801
  /**
5802
   * Returns true if $str matches the supplied pattern, false otherwise.
5803
   *
5804
   * @param string $str     <p>The input string.</p>
5805
   * @param string $pattern <p>Regex pattern to match against.</p>
5806
   *
5807
   * @return bool Whether or not $str matches the pattern.
5808
   */
5809 126
  public static function str_matches_pattern(string $str, string $pattern): bool
5810
  {
5811 126
    if (\preg_match('/' . $pattern . '/u', $str)) {
5812 87
      return true;
5813
    }
5814
5815 39
    return false;
5816
  }
5817
5818
  /**
5819
   * Returns whether or not a character exists at an index. Offsets may be
5820
   * negative to count from the last character in the string. Implements
5821
   * part of the ArrayAccess interface.
5822
   *
5823
   * @param string $str      <p>The input string.</p>
5824
   * @param int    $offset   <p>The index to check.</p>
5825
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5826
   *
5827
   *
5828
   * @return bool Whether or not the index exists.
5829
   */
5830 6
  public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
5831
  {
5832
    // init
5833 6
    $length = self::strlen($str, $encoding);
5834
5835 6
    if ($offset >= 0) {
5836 3
      return ($length > $offset);
5837
    }
5838
5839 3
    return ($length >= \abs($offset));
5840
  }
5841
5842
  /**
5843
   * Returns the character at the given index. Offsets may be negative to
5844
   * count from the last character in the string. Implements part of the
5845
   * ArrayAccess interface, and throws an OutOfBoundsException if the index
5846
   * does not exist.
5847
   *
5848
   * @param string $str      <p>The input string.</p>
5849
   * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
5850
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5851
   *
5852
   * @return string The character at the specified index.
5853
   *
5854
   * @throws \OutOfBoundsException If the positive or negative offset does not exist.
5855
   */
5856 2
  public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
5857
  {
5858
    // init
5859 2
    $length = self::strlen($str);
5860
5861
    if (
5862 2
        ($index >= 0 && $length <= $index)
5863
        ||
5864 2
        $length < \abs($index)
5865
    ) {
5866 1
      throw new \OutOfBoundsException('No character exists at the index');
5867
    }
5868
5869 1
    return self::char_at($str, $index, $encoding);
5870
  }
5871
5872
  /**
5873
   * Pad a UTF-8 string to given length with another string.
5874
   *
5875
   * @param string $str        <p>The input string.</p>
5876
   * @param int    $pad_length <p>The length of return string.</p>
5877
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
5878
   * @param int    $pad_type   [optional] <p>
5879
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
5880
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
5881
   *                           </p>
5882
   * @param string $encoding   [optional] <p>Default: UTF-8</p>
5883
   *
5884
   * @return string Returns the padded string.
5885
   */
5886 41
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
5887
  {
5888 41
    if ('' === $str) {
5889
      return '';
5890
    }
5891
5892 41
    if ($pad_type !== (int)$pad_type) {
5893 13
      if ($pad_type == 'left') {
5894 3
        $pad_type = STR_PAD_LEFT;
5895 10
      } elseif ($pad_type == 'right') {
5896 6
        $pad_type = STR_PAD_RIGHT;
5897 4
      } elseif ($pad_type == 'both') {
5898 3
        $pad_type = STR_PAD_BOTH;
5899
      } else {
5900 1
        throw new \InvalidArgumentException(
5901 1
            'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
5902
        );
5903
      }
5904
    }
5905
5906 40
    $str_length = self::strlen($str, $encoding);
5907
5908
    if (
5909 40
        $pad_length > 0
5910
        &&
5911 40
        $pad_length >= $str_length
5912
    ) {
5913 39
      $ps_length = self::strlen($pad_string, $encoding);
5914
5915 39
      $diff = ($pad_length - $str_length);
5916
5917
      switch ($pad_type) {
5918 39
        case STR_PAD_LEFT:
5919 13
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
5920 13
          $pre = (string)self::substr($pre, 0, $diff, $encoding);
5921 13
          $post = '';
5922 13
          break;
5923
5924 29
        case STR_PAD_BOTH:
5925 14
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
5926 14
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2), $encoding);
5927 14
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
5928 14
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2), $encoding);
5929 14
          break;
5930
5931 18
        case STR_PAD_RIGHT:
5932
        default:
5933 18
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
5934 18
          $post = (string)self::substr($post, 0, $diff, $encoding);
5935 18
          $pre = '';
5936
      }
5937
5938 39
      return $pre . $str . $post;
5939
    }
5940
5941 4
    return $str;
5942
  }
5943
5944
  /**
5945
   * Returns a new string of a given length such that both sides of the
5946
   * string are padded. Alias for pad() with a $padType of 'both'.
5947
   *
5948
   * @param string $str
5949
   * @param int    $length   <p>Desired string length after padding.</p>
5950
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
5951
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5952
   *
5953
   * @return string String with padding applied.
5954
   */
5955 11
  public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
5956
  {
5957 11
    $padding = $length - self::strlen($str, $encoding);
5958
5959 11
    return self::apply_padding($str, (int)\floor($padding / 2), (int)\ceil($padding / 2), $padStr, $encoding);
5960
  }
5961
5962
  /**
5963
   * Returns a new string of a given length such that the beginning of the
5964
   * string is padded. Alias for pad() with a $padType of 'left'.
5965
   *
5966
   * @param string $str
5967
   * @param int    $length   <p>Desired string length after padding.</p>
5968
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
5969
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5970
   *
5971
   * @return string String with left padding.
5972
   */
5973 7
  public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
5974
  {
5975 7
    return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
5976
  }
5977
5978
  /**
5979
   * Returns a new string of a given length such that the end of the string
5980
   * is padded. Alias for pad() with a $padType of 'right'.
5981
   *
5982
   * @param string $str
5983
   * @param int    $length   <p>Desired string length after padding.</p>
5984
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
5985
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5986
   *
5987
   * @return string String with right padding.
5988
   */
5989 7
  public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
5990
  {
5991 7
    return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
5992
  }
5993
5994
  /**
5995
   * Repeat a string.
5996
   *
5997
   * @param string $str        <p>
5998
   *                           The string to be repeated.
5999
   *                           </p>
6000
   * @param int    $multiplier <p>
6001
   *                           Number of time the input string should be
6002
   *                           repeated.
6003
   *                           </p>
6004
   *                           <p>
6005
   *                           multiplier has to be greater than or equal to 0.
6006
   *                           If the multiplier is set to 0, the function
6007
   *                           will return an empty string.
6008
   *                           </p>
6009
   *
6010
   * @return string The repeated string.
6011
   */
6012 9
  public static function str_repeat(string $str, int $multiplier): string
6013
  {
6014 9
    $str = self::filter($str);
6015
6016 9
    return \str_repeat($str, $multiplier);
6017
  }
6018
6019
  /**
6020
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6021
   *
6022
   * Replace all occurrences of the search string with the replacement string
6023
   *
6024
   * @link http://php.net/manual/en/function.str-replace.php
6025
   *
6026
   * @param mixed $search  <p>
6027
   *                       The value being searched for, otherwise known as the needle.
6028
   *                       An array may be used to designate multiple needles.
6029
   *                       </p>
6030
   * @param mixed $replace <p>
6031
   *                       The replacement value that replaces found search
6032
   *                       values. An array may be used to designate multiple replacements.
6033
   *                       </p>
6034
   * @param mixed $subject <p>
6035
   *                       The string or array being searched and replaced on,
6036
   *                       otherwise known as the haystack.
6037
   *                       </p>
6038
   *                       <p>
6039
   *                       If subject is an array, then the search and
6040
   *                       replace is performed with every entry of
6041
   *                       subject, and the return value is an array as
6042
   *                       well.
6043
   *                       </p>
6044
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
6045
   *
6046
   * @return mixed This function returns a string or an array with the replaced values.
6047
   */
6048 92
  public static function str_replace($search, $replace, $subject, int &$count = null)
6049
  {
6050 92
    return \str_replace($search, $replace, $subject, $count);
6051
  }
6052
6053
  /**
6054
   * Replaces all occurrences of $search from the beginning of string with $replacement.
6055
   *
6056
   * @param string $str         <p>The input string.</p>
6057
   * @param string $search      <p>The string to search for.</p>
6058
   * @param string $replacement <p>The replacement.</p>
6059
   *
6060
   * @return string String after the replacements.
6061
   */
6062 16
  public static function str_replace_beginning(string $str, string $search, string $replacement): string
6063
  {
6064 16
    return self::regex_replace(
6065 16
        $str,
6066 16
        '^' . \preg_quote($search, '/'),
6067 16
        self::str_replace('\\', '\\\\', $replacement)
6068
    );
6069
  }
6070
6071
  /**
6072
   * Replaces all occurrences of $search from the ending of string with $replacement.
6073
   *
6074
   * @param string $str         <p>The input string.</p>
6075
   * @param string $search      <p>The string to search for.</p>
6076
   * @param string $replacement <p>The replacement.</p>
6077
   *
6078
   * @return string String after the replacements.
6079
   */
6080 16
  public static function str_replace_ending(string $str, string $search, string $replacement): string
6081
  {
6082 16
    return self::regex_replace(
6083 16
        $str,
6084 16
        \preg_quote($search, '/') . '$',
6085 16
        self::str_replace('\\', '\\\\', $replacement)
6086
    );
6087
  }
6088
6089
  /**
6090
   * Replace the first "$search"-term with the "$replace"-term.
6091
   *
6092
   * @param string $search
6093
   * @param string $replace
6094
   * @param string $subject
6095
   *
6096
   * @return string
6097
   */
6098 2
  public static function str_replace_first(string $search, string $replace, string $subject): string
6099
  {
6100 2
    $pos = self::strpos($subject, $search);
6101
6102 2
    if ($pos !== false) {
6103 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6103
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6104
    }
6105
6106 2
    return $subject;
6107
  }
6108
6109
  /**
6110
   * Replace the last "$search"-term with the "$replace"-term.
6111
   *
6112
   * @param string $search
6113
   * @param string $replace
6114
   * @param string $subject
6115
   *
6116
   * @return string
6117
   */
6118 2
  public static function str_replace_last(string $search, string $replace, string $subject): string
6119
  {
6120 2
    $pos = self::strrpos($subject, $search);
6121
6122 2
    if ($pos !== false) {
6123 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6123
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6124
    }
6125
6126 2
    return $subject;
6127
  }
6128
6129
  /**
6130
   * Shuffles all the characters in the string.
6131
   *
6132
   * PS: uses random algorithm which is weak for cryptography purposes
6133
   *
6134
   * @param string $str <p>The input string</p>
6135
   *
6136
   * @return string The shuffled string.
6137
   */
6138 5
  public static function str_shuffle(string $str): string
6139
  {
6140 5
    $indexes = \range(0, self::strlen($str) - 1);
6141
    /** @noinspection NonSecureShuffleUsageInspection */
6142 5
    \shuffle($indexes);
6143
6144 5
    $shuffledStr = '';
6145 5
    foreach ($indexes as $i) {
6146 5
      $shuffledStr .= self::substr($str, $i, 1);
6147
    }
6148
6149 5
    return $shuffledStr;
6150
  }
6151
6152
  /**
6153
   * Returns the substring beginning at $start, and up to, but not including
6154
   * the index specified by $end. If $end is omitted, the function extracts
6155
   * the remaining string. If $end is negative, it is computed from the end
6156
   * of the string.
6157
   *
6158
   * @param string $str
6159
   * @param int    $start    <p>Initial index from which to begin extraction.</p>
6160
   * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6161
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6162
   *
6163
   * @return string|false
6164
   *                     <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6165
   *                     characters long, <b>FALSE</b> will be returned.
6166
   */
6167 18
  public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6168
  {
6169 18
    if ($end === null) {
6170 6
      $length = self::strlen($str);
6171 12
    } elseif ($end >= 0 && $end <= $start) {
6172 4
      return '';
6173 8
    } elseif ($end < 0) {
6174 2
      $length = self::strlen($str) + $end - $start;
6175
    } else {
6176 6
      $length = $end - $start;
6177
    }
6178
6179 14
    return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6179
    return self::substr($str, $start, /** @scrutinizer ignore-type */ $length, $encoding);
Loading history...
6180
  }
6181
6182
  /**
6183
   * Convert a string to e.g.: "snake_case"
6184
   *
6185
   * @param string $str
6186
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6187
   *
6188
   * @return string String in snake_case.
6189
   */
6190 20
  public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6191
  {
6192 20
    $str = self::normalize_whitespace($str);
6193 20
    $str = \str_replace('-', '_', $str);
6194
6195 20
    $str = (string)\preg_replace_callback(
6196 20
        '/([\d|A-Z])/u',
6197 20
        function ($matches) use ($encoding) {
6198 8
          $match = $matches[1];
6199 8
          $matchInt = (int)$match;
6200
6201 8
          if ((string)$matchInt == $match) {
6202 4
            return '_' . $match . '_';
6203
          }
6204
6205 4
          return '_' . UTF8::strtolower($match, $encoding);
6206 20
        },
6207 20
        $str
6208
    );
6209
6210 20
    $str = (string)\preg_replace(
6211
        [
6212 20
            '/\s+/',        // convert spaces to "_"
6213
            '/^\s+|\s+$/',  // trim leading & trailing spaces
6214
            '/_+/',         // remove double "_"
6215
        ],
6216
        [
6217 20
            '_',
6218
            '',
6219
            '_',
6220
        ],
6221 20
        $str
6222
    );
6223
6224 20
    $str = self::trim($str, '_'); // trim leading & trailing "_"
6225 20
    $str = self::trim($str); // trim leading & trailing whitespace
6226
6227 20
    return $str;
6228
  }
6229
6230
  /**
6231
   * Sort all characters according to code points.
6232
   *
6233
   * @param string $str    <p>A UTF-8 string.</p>
6234
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6235
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6236
   *
6237
   * @return string String of sorted characters.
6238
   */
6239 2
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6240
  {
6241 2
    $array = self::codepoints($str);
6242
6243 2
    if ($unique) {
6244 2
      $array = \array_flip(\array_flip($array));
6245
    }
6246
6247 2
    if ($desc) {
6248 2
      \arsort($array);
6249
    } else {
6250 2
      \asort($array);
6251
    }
6252
6253 2
    return self::string($array);
6254
  }
6255
6256
  /**
6257
   * Split a string into an array.
6258
   *
6259
   * @param string|string[] $str
6260
   * @param int             $len
6261
   *
6262
   * @return string[]
6263
   */
6264 25
  public static function str_split($str, int $len = 1): array
6265
  {
6266 25
    if ($len <= 0) {
6267 2
      return [];
6268
    }
6269
6270 25
    if (\is_array($str) === true) {
6271 2
      foreach ($str as $k => $v) {
6272 2
        $str[$k] = self::str_split($v, $len);
6273
      }
6274
6275 2
      return $str;
6276
    }
6277
6278 25
    if ('' === $str) {
6279 4
      return [];
6280
    }
6281
6282
    /** @noinspection NotOptimalRegularExpressionsInspection */
6283 23
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
6284 23
    $a = $a[0];
6285
6286 23
    if ($len === 1) {
6287 23
      return $a;
6288
    }
6289
6290 2
    $arrayOutput = [];
6291 2
    $p = -1;
6292
6293
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
6294 2
    foreach ($a as $l => $a) {
6295 2
      if ($l % $len) {
6296 2
        $arrayOutput[$p] .= $a;
6297
      } else {
6298 2
        $arrayOutput[++$p] = $a;
6299
      }
6300
    }
6301
6302 2
    return $arrayOutput;
6303
  }
6304
6305
  /**
6306
   * Splits the string with the provided regular expression, returning an
6307
   * array of Stringy objects. An optional integer $limit will truncate the
6308
   * results.
6309
   *
6310
   * @param string $str
6311
   * @param string $pattern <p>The regex with which to split the string.</p>
6312
   * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6313
   *
6314
   * @return string[] An array of strings.
6315
   */
6316 16
  public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6317
  {
6318 16
    if ($limit === 0) {
6319 2
      return [];
6320
    }
6321
6322
    // this->split errors when supplied an empty pattern in < PHP 5.4.13
6323
    // and current versions of HHVM (3.8 and below)
6324 14
    if ($pattern === '') {
6325 1
      return [$str];
6326
    }
6327
6328
    // this->split returns the remaining unsplit string in the last index when
6329
    // supplying a limit
6330 13
    if ($limit > 0) {
6331 8
      ++$limit;
6332
    } else {
6333 5
      $limit = -1;
6334
    }
6335
6336 13
    $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6337
6338 13
    if ($array === false) {
6339
      return [];
6340
    }
6341
6342 13
    if ($limit > 0 && \count($array) === $limit) {
6343 4
      \array_pop($array);
6344
    }
6345
6346 13
    return $array;
6347
  }
6348
6349
  /**
6350
   * Check if the string starts with the given substring.
6351
   *
6352
   * @param string $haystack <p>The string to search in.</p>
6353
   * @param string $needle   <p>The substring to search for.</p>
6354
   *
6355
   * @return bool
6356
   */
6357 41
  public static function str_starts_with(string $haystack, string $needle): bool
6358
  {
6359 41
    if ('' === $haystack || '' === $needle) {
6360 4
      return false;
6361
    }
6362
6363 39
    if (\strpos($haystack, $needle) === 0) {
6364 19
      return true;
6365
    }
6366
6367 24
    return false;
6368
  }
6369
6370
  /**
6371
   * Returns true if the string begins with any of $substrings, false otherwise.
6372
   *
6373
   * - case-sensitive
6374
   *
6375
   * @param string $str        <p>The input string.</p>
6376
   * @param array  $substrings <p>Substrings to look for.</p>
6377
   *
6378
   * @return bool Whether or not $str starts with $substring.
6379
   */
6380 8
  public static function str_starts_with_any(string $str, array $substrings): bool
6381
  {
6382 8
    if ('' === $str) {
6383
      return false;
6384
    }
6385
6386 8
    if (empty($substrings)) {
6387
      return false;
6388
    }
6389
6390 8
    foreach ($substrings as $substring) {
6391 8
      if (self::str_starts_with($str, $substring)) {
6392 8
        return true;
6393
      }
6394
    }
6395
6396 6
    return false;
6397
  }
6398
6399
  /**
6400
   * Gets the substring after the first occurrence of a separator.
6401
   *
6402
   * @param string $str       <p>The input string.</p>
6403
   * @param string $separator <p>The string separator.</p>
6404
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6405
   *
6406
   * @return string
6407
   */
6408 1
  public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6409
  {
6410
    if (
6411 1
        $separator === ''
6412
        ||
6413 1
        $str === ''
6414
    ) {
6415 1
      return '';
6416
    }
6417
6418 1
    $offset = self::str_index_first($str, $separator);
6419 1
    if ($offset === false) {
6420 1
      return '';
6421
    }
6422
6423 1
    return (string)self::substr(
6424 1
        $str,
6425 1
        $offset + self::strlen($separator, $encoding),
6426 1
        null,
6427 1
        $encoding
6428
    );
6429
  }
6430
6431
  /**
6432
   * Gets the substring after the last occurrence of a separator.
6433
   *
6434
   * @param string $str       <p>The input string.</p>
6435
   * @param string $separator <p>The string separator.</p>
6436
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6437
   *
6438
   * @return string
6439
   */
6440 1
  public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6441
  {
6442
    if (
6443 1
        $separator === ''
6444
        ||
6445 1
        $str === ''
6446
    ) {
6447 1
      return '';
6448
    }
6449
6450 1
    $offset = self::str_index_last($str, $separator);
6451 1
    if ($offset === false) {
6452 1
      return '';
6453
    }
6454
6455 1
    return (string)self::substr(
6456 1
        $str,
6457 1
        $offset + self::strlen($separator, $encoding),
6458 1
        null,
6459 1
        $encoding
6460
    );
6461
  }
6462
6463
  /**
6464
   * Gets the substring before the first occurrence of a separator.
6465
   *
6466
   * @param string $str       <p>The input string.</p>
6467
   * @param string $separator <p>The string separator.</p>
6468
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6469
   *
6470
   * @return string
6471
   */
6472 1
  public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6473
  {
6474
    if (
6475 1
        $separator === ''
6476
        ||
6477 1
        $str === ''
6478
    ) {
6479 1
      return '';
6480
    }
6481
6482 1
    $offset = self::str_index_first($str, $separator);
6483 1
    if ($offset === false) {
6484 1
      return '';
6485
    }
6486
6487 1
    return (string)self::substr(
6488 1
        $str,
6489 1
        0,
6490 1
        $offset,
6491 1
        $encoding
6492
    );
6493
  }
6494
6495
  /**
6496
   * Gets the substring before the last occurrence of a separator.
6497
   *
6498
   * @param string $str       <p>The input string.</p>
6499
   * @param string $separator <p>The string separator.</p>
6500
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6501
   *
6502
   * @return string
6503
   */
6504 1
  public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6505
  {
6506
    if (
6507 1
        $separator === ''
6508
        ||
6509 1
        $str === ''
6510
    ) {
6511 1
      return '';
6512
    }
6513
6514 1
    $offset = self::str_index_last($str, $separator);
6515 1
    if ($offset === false) {
6516 1
      return '';
6517
    }
6518
6519 1
    return (string)self::substr(
6520 1
        $str,
6521 1
        0,
6522 1
        $offset,
6523 1
        $encoding
6524
    );
6525
  }
6526
6527
  /**
6528
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6529
   *
6530
   * @param string $str          <p>The input string.</p>
6531
   * @param string $needle       <p>The string to look for.</p>
6532
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6533
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6534
   *
6535
   * @return string
6536
   */
6537 2
  public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6538
  {
6539
    if (
6540 2
        '' === $str
6541
        ||
6542 2
        '' === $needle
6543
    ) {
6544 2
      return '';
6545
    }
6546
6547 2
    $part = self::strstr(
6548 2
        $str,
6549 2
        $needle,
6550 2
        $beforeNeedle,
6551 2
        $encoding
6552
    );
6553 2
    if (false === $part) {
6554 2
      return '';
6555
    }
6556
6557 2
    return $part;
6558
  }
6559
6560
  /**
6561
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6562
   *
6563
   * @param string $str          <p>The input string.</p>
6564
   * @param string $needle       <p>The string to look for.</p>
6565
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6566
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6567
   *
6568
   * @return string
6569
   */
6570 2
  public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6571
  {
6572
    if (
6573 2
        '' === $str
6574
        ||
6575 2
        '' === $needle
6576
    ) {
6577 2
      return '';
6578
    }
6579
6580 2
    $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6581 2
    if (false === $part) {
6582 2
      return '';
6583
    }
6584
6585 2
    return $part;
6586
  }
6587
6588
  /**
6589
   * Surrounds $str with the given substring.
6590
   *
6591
   * @param string $str
6592
   * @param string $substring <p>The substring to add to both sides.</P>
6593
   *
6594
   * @return string String with the substring both prepended and appended.
6595
   */
6596 5
  public static function str_surround(string $str, string $substring): string
6597
  {
6598 5
    return \implode('', [$substring, $str, $substring]);
6599
  }
6600
6601
  /**
6602
   * Returns a trimmed string with the first letter of each word capitalized.
6603
   * Also accepts an array, $ignore, allowing you to list words not to be
6604
   * capitalized.
6605
   *
6606
   * @param string              $str
6607
   * @param string[]|array|null $ignore   [optional] <p>An array of words not to capitalize or null. Default: null</p>
6608
   * @param string              $encoding [optional] <p>Default: UTF-8</p>
6609
   *
6610
   * @return string The titleized string.
6611
   */
6612 5
  public static function str_titleize(string $str, array $ignore = null, string $encoding = 'UTF-8'): string
6613
  {
6614 5
    $str = self::trim($str);
6615
6616 5
    $str = (string)\preg_replace_callback(
6617 5
        '/([\S]+)/u',
6618 5
        function ($match) use ($encoding, $ignore) {
6619 5
          if ($ignore && \in_array($match[0], $ignore, true)) {
6620 2
            return $match[0];
6621
          }
6622
6623 5
          return self::str_upper_first(self::strtolower($match[0], $encoding));
6624 5
        },
6625 5
        $str
6626
    );
6627
6628 5
    return $str;
6629
  }
6630
6631
  /**
6632
   * Returns a trimmed string in proper title case.
6633
   *
6634
   * Also accepts an array, $ignore, allowing you to list words not to be
6635
   * capitalized.
6636
   *
6637
   * Adapted from John Gruber's script.
6638
   *
6639
   * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6640
   *
6641
   * @param string $str
6642
   * @param array  $ignore   <p>An array of words not to capitalize.</p>
6643
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6644
   *
6645
   * @return string The titleized string.
6646
   */
6647 35
  public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6648
  {
6649 35
    $smallWords = \array_merge(
6650
        [
6651 35
            '(?<!q&)a',
6652
            'an',
6653
            'and',
6654
            'as',
6655
            'at(?!&t)',
6656
            'but',
6657
            'by',
6658
            'en',
6659
            'for',
6660
            'if',
6661
            'in',
6662
            'of',
6663
            'on',
6664
            'or',
6665
            'the',
6666
            'to',
6667
            'v[.]?',
6668
            'via',
6669
            'vs[.]?',
6670
        ],
6671 35
        $ignore
6672
    );
6673
6674 35
    $smallWordsRx = \implode('|', $smallWords);
6675 35
    $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6676
6677 35
    $str = self::trim($str);
6678
6679 35
    if (self::has_lowercase($str) === false) {
6680 2
      $str = self::strtolower($str);
6681
    }
6682
6683
    // The main substitutions
6684 35
    $str = (string)\preg_replace_callback(
6685
        '~\b (_*) (?:                                                              # 1. Leading underscore and
6686
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6687 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6688
                        |
6689 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6690
                        |
6691 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6692
                        |
6693 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
6694
                      ) (_*) \b                                                           # 6. With trailing underscore
6695
                    ~ux',
6696 35
        function ($matches) use ($encoding) {
6697
          // Preserve leading underscore
6698 35
          $str = $matches[1];
6699 35
          if ($matches[2]) {
6700
            // Preserve URLs, domains, emails and file paths
6701 5
            $str .= $matches[2];
6702 35
          } elseif ($matches[3]) {
6703
            // Lower-case small words
6704 25
            $str .= self::strtolower($matches[3], $encoding);
6705 35
          } elseif ($matches[4]) {
6706
            // Capitalize word w/o internal caps
6707 34
            $str .= static::str_upper_first($matches[4], $encoding);
6708
          } else {
6709
            // Preserve other kinds of word (iPhone)
6710 7
            $str .= $matches[5];
6711
          }
6712
          // Preserve trailing underscore
6713 35
          $str .= $matches[6];
6714
6715 35
          return $str;
6716 35
        },
6717 35
        $str
6718
    );
6719
6720
    // Exceptions for small words: capitalize at start of title...
6721 35
    $str = (string)\preg_replace_callback(
6722
        '~(  \A [[:punct:]]*                # start of title...
6723
                      |  [:.;?!][ ]+               # or of subsentence...
6724
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
6725 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
6726
                     ~uxi',
6727 35
        function ($matches) use ($encoding) {
6728 11
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
6729 35
        },
6730 35
        $str
6731
    );
6732
6733
    // ...and end of title
6734 35
    $str = (string)\preg_replace_callback(
6735 35
        '~\b ( ' . $smallWordsRx . ' ) # small word...
6736
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
6737
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
6738
                     ~uxi',
6739 35
        function ($matches) use ($encoding) {
6740 3
          return static::str_upper_first($matches[1], $encoding);
6741 35
        },
6742 35
        $str
6743
    );
6744
6745
    // Exceptions for small words in hyphenated compound words
6746
    // e.g. "in-flight" -> In-Flight
6747 35
    $str = (string)\preg_replace_callback(
6748
        '~\b
6749
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
6750 35
                        ( ' . $smallWordsRx . ' )
6751
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
6752
                       ~uxi',
6753 35
        function ($matches) use ($encoding) {
6754
          return static::str_upper_first($matches[1], $encoding);
6755 35
        },
6756 35
        $str
6757
    );
6758
6759
    // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
6760 35
    $str = (string)\preg_replace_callback(
6761
        '~\b
6762
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
6763
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
6764 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
6765
                      (?!	- )                   # Negative lookahead for another -
6766
                     ~uxi',
6767 35
        function ($matches) use ($encoding) {
6768
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
6769 35
        },
6770 35
        $str
6771
    );
6772
6773 35
    return $str;
6774
  }
6775
6776
  /**
6777
   * Get a binary representation of a specific string.
6778
   *
6779
   * @param string $str <p>The input string.</p>
6780
   *
6781
   * @return string
6782
   */
6783 2
  public static function str_to_binary(string $str): string
6784
  {
6785 2
    $value = \unpack('H*', $str);
6786
6787 2
    return \base_convert($value[1], 16, 2);
6788
  }
6789
6790
  /**
6791
   * @param string   $str
6792
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6793
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
6794
   *
6795
   * @return string[]
6796
   */
6797 17
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
6798
  {
6799 17
    if ('' === $str) {
6800 1
      return ($removeEmptyValues === true ? [] : ['']);
6801
    }
6802
6803 16
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
6804
6805 16
    if ($return === false) {
6806
      return ($removeEmptyValues === true ? [] : ['']);
6807
    }
6808
6809
    if (
6810 16
        $removeShortValues === null
6811
        &&
6812 16
        $removeEmptyValues === false
6813
    ) {
6814 16
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
6815
    }
6816
6817
    $tmpReturn = self::reduce_string_array(
6818
        $return,
6819
        $removeEmptyValues,
6820
        $removeShortValues
6821
    );
6822
6823
    return $tmpReturn;
6824
  }
6825
6826
  /**
6827
   * Convert a string into an array of words.
6828
   *
6829
   * @param string   $str
6830
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
6831
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6832
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
6833
   *
6834
   * @return string[]
6835
   */
6836 14
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
6837
  {
6838 14
    if ('' === $str) {
6839 4
      return ($removeEmptyValues === true ? [] : ['']);
6840
    }
6841
6842 14
    $charList = self::rxClass($charList, '\pL');
6843
6844 14
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
6845
6846 14
    if ($return === false) {
6847
      return ($removeEmptyValues === true ? [] : ['']);
6848
    }
6849
6850
    if (
6851 14
        $removeShortValues === null
6852
        &&
6853 14
        $removeEmptyValues === false
6854
    ) {
6855 14
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
6856
    }
6857
6858 2
    $tmpReturn = self::reduce_string_array(
6859 2
        $return,
6860 2
        $removeEmptyValues,
6861 2
        $removeShortValues
6862
    );
6863
6864 2
    foreach ($tmpReturn as &$item) {
6865 2
      $item = (string)$item;
6866
    }
6867
6868 2
    return $tmpReturn;
6869
  }
6870
6871
  /**
6872
   * alias for "UTF8::to_ascii()"
6873
   *
6874
   * @see UTF8::to_ascii()
6875
   *
6876
   * @param string $str
6877
   * @param string $unknown
6878
   * @param bool   $strict
6879
   *
6880
   * @return string
6881
   */
6882 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
6883
  {
6884 7
    return self::to_ascii($str, $unknown, $strict);
6885
  }
6886
6887
  /**
6888
   * Truncates the string to a given length. If $substring is provided, and
6889
   * truncating occurs, the string is further truncated so that the substring
6890
   * may be appended without exceeding the desired length.
6891
   *
6892
   * @param string $str
6893
   * @param int    $length    <p>Desired length of the truncated string.</p>
6894
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
6895
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6896
   *
6897
   * @return string String after truncating.
6898
   */
6899 22
  public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
6900
  {
6901
    // init
6902 22
    $str = (string)$str;
6903
6904 22
    if ('' === $str) {
6905
      return '';
6906
    }
6907
6908 22
    if ($length >= self::strlen($str, $encoding)) {
6909 4
      return $str;
6910
    }
6911
6912
    // Need to further trim the string so we can append the substring
6913 18
    $substringLength = self::strlen($substring, $encoding);
6914 18
    $length -= $substringLength;
6915
6916 18
    $truncated = self::substr($str, 0, $length, $encoding);
6917
6918 18
    return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6918
    return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
6919
  }
6920
6921
  /**
6922
   * Truncates the string to a given length, while ensuring that it does not
6923
   * split words. If $substring is provided, and truncating occurs, the
6924
   * string is further truncated so that the substring may be appended without
6925
   * exceeding the desired length.
6926
   *
6927
   * @param string $str
6928
   * @param int    $length    <p>Desired length of the truncated string.</p>
6929
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
6930
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6931
   *
6932
   * @return string String after truncating.
6933
   */
6934 23
  public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
6935
  {
6936 23
    if ($length >= self::strlen($str, $encoding)) {
6937 4
      return $str;
6938
    }
6939
6940
    // need to further trim the string so we can append the substring
6941 19
    $substringLength = self::strlen($substring, $encoding);
6942 19
    $length -= $substringLength;
6943
6944 19
    $truncated = self::substr($str, 0, $length, $encoding);
6945 19
    if ($truncated === false) {
6946
      return '';
6947
    }
6948
6949
    // if the last word was truncated
6950 19
    $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
6951 19
    if ($strPosSpace != $length) {
6952
      // find pos of the last occurrence of a space, get up to that
6953 12
      $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
6954
6955 12
      if ($lastPos !== false || $strPosSpace !== false) {
6956 11
        $truncated = self::substr($truncated, 0, (int)$lastPos, $encoding);
6957
      }
6958
    }
6959
6960 19
    $str = $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6960
    $str = /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
6961
6962 19
    return $str;
6963
  }
6964
6965
  /**
6966
   * Returns a lowercase and trimmed string separated by underscores.
6967
   * Underscores are inserted before uppercase characters (with the exception
6968
   * of the first character of the string), and in place of spaces as well as
6969
   * dashes.
6970
   *
6971
   * @param string $str
6972
   *
6973
   * @return string The underscored string.
6974
   */
6975 16
  public static function str_underscored(string $str): string
6976
  {
6977 16
    return self::str_delimit($str, '_');
6978
  }
6979
6980
  /**
6981
   * Returns an UpperCamelCase version of the supplied string. It trims
6982
   * surrounding spaces, capitalizes letters following digits, spaces, dashes
6983
   * and underscores, and removes spaces, dashes, underscores.
6984
   *
6985
   * @param string $str      <p>The input string.</p>
6986
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6987
   *
6988
   * @return string String in UpperCamelCase.
6989
   */
6990 13
  public static function str_upper_camelize(string $str, string $encoding = 'UTF-8'): string
6991
  {
6992 13
    return self::str_upper_first(self::str_camelize($str, $encoding), $encoding);
6993
  }
6994
6995
  /**
6996
   * alias for "UTF8::ucfirst()"
6997
   *
6998
   * @see UTF8::ucfirst()
6999
   *
7000
   * @param string $str
7001
   * @param string $encoding
7002
   * @param bool   $cleanUtf8
7003
   *
7004
   * @return string
7005
   */
7006 58
  public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7007
  {
7008 58
    return self::ucfirst($str, $encoding, $cleanUtf8);
7009
  }
7010
7011
  /**
7012
   * Counts number of words in the UTF-8 string.
7013
   *
7014
   * @param string $str      <p>The input string.</p>
7015
   * @param int    $format   [optional] <p>
7016
   *                         <strong>0</strong> => return a number of words (default)<br>
7017
   *                         <strong>1</strong> => return an array of words<br>
7018
   *                         <strong>2</strong> => return an array of words with word-offset as key
7019
   *                         </p>
7020
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7021
   *
7022
   * @return string[]|int The number of words in the string
7023
   */
7024 2
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7025
  {
7026 2
    $strParts = self::str_to_words($str, $charlist);
7027
7028 2
    $len = \count($strParts);
7029
7030 2
    if ($format === 1) {
7031
7032 2
      $numberOfWords = [];
7033 2
      for ($i = 1; $i < $len; $i += 2) {
7034 2
        $numberOfWords[] = $strParts[$i];
7035
      }
7036
7037 2
    } elseif ($format === 2) {
7038
7039 2
      $numberOfWords = [];
7040 2
      $offset = self::strlen($strParts[0]);
7041 2
      for ($i = 1; $i < $len; $i += 2) {
7042 2
        $numberOfWords[$offset] = $strParts[$i];
7043 2
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
7044
      }
7045
7046
    } else {
7047
7048 2
      $numberOfWords = (int)(($len - 1) / 2);
7049
7050
    }
7051
7052 2
    return $numberOfWords;
7053
  }
7054
7055
  /**
7056
   * Case-insensitive string comparison.
7057
   *
7058
   * INFO: Case-insensitive version of UTF8::strcmp()
7059
   *
7060
   * @param string $str1
7061
   * @param string $str2
7062
   *
7063
   * @return int
7064
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7065
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7066
   *             <strong>0</strong> if they are equal.
7067
   */
7068 23
  public static function strcasecmp(string $str1, string $str2): int
7069
  {
7070 23
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
7071
  }
7072
7073
  /**
7074
   * alias for "UTF8::strstr()"
7075
   *
7076
   * @see UTF8::strstr()
7077
   *
7078
   * @param string $haystack
7079
   * @param string $needle
7080
   * @param bool   $before_needle
7081
   * @param string $encoding
7082
   * @param bool   $cleanUtf8
7083
   *
7084
   * @return string|false
7085
   */
7086 2
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7087
  {
7088 2
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7089
  }
7090
7091
  /**
7092
   * Case-sensitive string comparison.
7093
   *
7094
   * @param string $str1
7095
   * @param string $str2
7096
   *
7097
   * @return int
7098
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
7099
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
7100
   *              <strong>0</strong> if they are equal.
7101
   */
7102 29
  public static function strcmp(string $str1, string $str2): int
7103
  {
7104
    /** @noinspection PhpUndefinedClassInspection */
7105 29
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7106 24
        \Normalizer::normalize($str1, \Normalizer::NFD),
7107 29
        \Normalizer::normalize($str2, \Normalizer::NFD)
7108
    );
7109
  }
7110
7111
  /**
7112
   * Find length of initial segment not matching mask.
7113
   *
7114
   * @param string $str
7115
   * @param string $charList
7116
   * @param int    $offset
7117
   * @param int    $length
7118
   *
7119
   * @return int|null
7120
   */
7121 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
7122
  {
7123 15
    if ('' === $charList .= '') {
7124 1
      return null;
7125
    }
7126
7127 14
    if ($offset || $length !== null) {
7128 2
      $strTmp = self::substr($str, $offset, $length);
7129 2
      if ($strTmp === false) {
7130
        return null;
7131
      }
7132 2
      $str = (string)$strTmp;
7133
    }
7134
7135 14
    if ('' === $str) {
7136 1
      return null;
7137
    }
7138
7139 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept null|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7139
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7140 13
      return self::strlen($length[1]);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($length[1]) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7141
    }
7142
7143 1
    return self::strlen($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($str) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7144
  }
7145
7146
  /**
7147
   * alias for "UTF8::stristr()"
7148
   *
7149
   * @see UTF8::stristr()
7150
   *
7151
   * @param string $haystack
7152
   * @param string $needle
7153
   * @param bool   $before_needle
7154
   * @param string $encoding
7155
   * @param bool   $cleanUtf8
7156
   *
7157
   * @return string|false
7158
   */
7159 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7160
  {
7161 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7162
  }
7163
7164
  /**
7165
   * Create a UTF-8 string from code points.
7166
   *
7167
   * INFO: opposite to UTF8::codepoints()
7168
   *
7169
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7170
   *
7171
   * @return string UTF-8 encoded string.
7172
   */
7173 4
  public static function string(array $array): string
7174
  {
7175 4
    return \implode(
7176 4
        '',
7177 4
        \array_map(
7178
            [
7179 4
                self::class,
7180
                'chr',
7181
            ],
7182 4
            $array
7183
        )
7184
    );
7185
  }
7186
7187
  /**
7188
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7189
   *
7190
   * @param string $str <p>The input string.</p>
7191
   *
7192
   * @return bool
7193
   *              <strong>true</strong> if the string has BOM at the start,<br>
7194
   *              <strong>false</strong> otherwise.
7195
   */
7196 6
  public static function string_has_bom(string $str): bool
7197
  {
7198 6
    foreach (self::$BOM as $bomString => $bomByteLength) {
7199 6
      if (0 === \strpos($str, $bomString)) {
7200 6
        return true;
7201
      }
7202
    }
7203
7204 6
    return false;
7205
  }
7206
7207
  /**
7208
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7209
   *
7210
   * @link http://php.net/manual/en/function.strip-tags.php
7211
   *
7212
   * @param string $str             <p>
7213
   *                                The input string.
7214
   *                                </p>
7215
   * @param string $allowable_tags  [optional] <p>
7216
   *                                You can use the optional second parameter to specify tags which should
7217
   *                                not be stripped.
7218
   *                                </p>
7219
   *                                <p>
7220
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
7221
   *                                can not be changed with allowable_tags.
7222
   *                                </p>
7223
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
7224
   *
7225
   * @return string The stripped string.
7226
   */
7227 4
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7228
  {
7229 4
    if ('' === $str) {
7230 1
      return '';
7231
    }
7232
7233 4
    if ($cleanUtf8 === true) {
7234 2
      $str = self::clean($str);
7235
    }
7236
7237 4
    return \strip_tags($str, $allowable_tags);
7238
  }
7239
7240
  /**
7241
   * Strip all whitespace characters. This includes tabs and newline
7242
   * characters, as well as multibyte whitespace such as the thin space
7243
   * and ideographic space.
7244
   *
7245
   * @param string $str
7246
   *
7247
   * @return string
7248
   */
7249 36
  public static function strip_whitespace(string $str): string
7250
  {
7251 36
    if ('' === $str) {
7252 3
      return '';
7253
    }
7254
7255 33
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
7256
  }
7257
7258
  /**
7259
   * Finds position of first occurrence of a string within another, case insensitive.
7260
   *
7261
   * @link http://php.net/manual/en/function.mb-stripos.php
7262
   *
7263
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7264
   * @param string $needle    <p>The string to find in haystack.</p>
7265
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7266
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7267
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7268
   *
7269
   * @return int|false
7270
   *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7271
   *                   haystack string,<br> or <strong>false</strong> if needle is not found.
7272
   */
7273 75
  public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7274
  {
7275 75
    if ('' === $haystack || '' === $needle) {
7276 5
      return false;
7277
    }
7278
7279 74
    if ($cleanUtf8 === true) {
7280
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7281
      // if invalid characters are found in $haystack before $needle
7282 1
      $haystack = self::clean($haystack);
7283 1
      $needle = self::clean($needle);
7284
    }
7285
7286 74
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7287 23
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7288
    }
7289
7290 74
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7291
      self::checkForSupport();
7292
    }
7293
7294 74
    if (self::$SUPPORT['mbstring'] === true) {
7295 74
      $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7296 74
      if ($returnTmp !== false) {
7297 54
        return $returnTmp;
7298
      }
7299
    }
7300
7301
    if (
7302 31
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7303
        &&
7304 31
        $offset >= 0 // grapheme_stripos() can't handle negative offset
7305
        &&
7306 31
        self::$SUPPORT['intl'] === true
7307
    ) {
7308 31
      $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7309 31
      if ($returnTmp !== false) {
7310
        return $returnTmp;
7311
      }
7312
    }
7313
7314
    // fallback for ascii only
7315 31
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7316 15
      return \stripos($haystack, $needle, $offset);
7317
    }
7318
7319
    // fallback via symfony polyfill
7320 20
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7321
      return \mb_stripos($haystack, $needle, $offset, $encoding);
7322
    }
7323
7324
    // fallback via vanilla php
7325
7326 20
    $haystack = self::strtoupper($haystack, $encoding, false, null, true);
7327 20
    $needle = self::strtoupper($needle, $encoding, false, null, true);
7328
7329 20
    return self::strpos($haystack, $needle, $offset, $encoding);
7330
  }
7331
7332
  /**
7333
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
7334
   *
7335
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7336
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7337
   * @param bool   $before_needle  [optional] <p>
7338
   *                               If <b>TRUE</b>, it returns the part of the
7339
   *                               haystack before the first occurrence of the needle (excluding the needle).
7340
   *                               </p>
7341
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
7342
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7343
   *
7344
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
7345
   */
7346 19
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7347
  {
7348 19
    if ('' === $haystack || '' === $needle) {
7349 6
      return false;
7350
    }
7351
7352 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7353 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7354
    }
7355
7356 13
    if ($cleanUtf8 === true) {
7357
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7358
      // if invalid characters are found in $haystack before $needle
7359 1
      $needle = self::clean($needle);
7360 1
      $haystack = self::clean($haystack);
7361
    }
7362
7363 13
    if (!$needle) {
7364
      return $haystack;
7365
    }
7366
7367 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7368
      self::checkForSupport();
7369
    }
7370
7371
    if (
7372 13
        $encoding !== 'UTF-8'
7373
        &&
7374 13
        self::$SUPPORT['mbstring'] === false
7375
    ) {
7376
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7377
    }
7378
7379 13
    if (self::$SUPPORT['mbstring'] === true) {
7380 13
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7381
    }
7382
7383
    if (
7384
        $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7385
        &&
7386
        self::$SUPPORT['intl'] === true
7387
    ) {
7388
      $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7389
      if ($returnTmp !== false) {
7390
        return $returnTmp;
7391
      }
7392
    }
7393
7394
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7395
      return \stristr($haystack, $needle, $before_needle);
7396
    }
7397
7398
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7399
7400
    if (!isset($match[1])) {
7401
      return false;
7402
    }
7403
7404
    if ($before_needle) {
7405
      return $match[1];
7406
    }
7407
7408
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7408
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
7409
  }
7410
7411
  /**
7412
   * Get the string length, not the byte-length!
7413
   *
7414
   * @link     http://php.net/manual/en/function.mb-strlen.php
7415
   *
7416
   * @param string $str       <p>The string being checked for length.</p>
7417
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7418
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7419
   *
7420
   * @return int|false
7421
   *             The number <strong>(int)</strong> of characters in the string $str having character encoding $encoding.
7422
   *             (One multi-byte character counted as +1).
7423
   *             <br>
7424
   *             Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid chars.
7425
   */
7426 412
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7427
  {
7428 412
    if ('' === $str) {
7429 37
      return 0;
7430
    }
7431
7432 410
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7433 167
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7434
    }
7435
7436 410
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7437
      self::checkForSupport();
7438
    }
7439
7440 410
    if ($encoding === 'ASCII' || $encoding === 'CP850') {
7441 12
      return self::strlen_in_byte($str);
7442
    }
7443
7444 400
    if ($cleanUtf8 === true) {
7445
      // "mb_strlen" and "\iconv_strlen" returns wrong length,
7446
      // if invalid characters are found in $str
7447 4
      $str = self::clean($str);
7448
    }
7449
7450
    if (
7451 400
        $encoding !== 'UTF-8'
7452
        &&
7453 400
        self::$SUPPORT['mbstring'] === false
7454
        &&
7455 400
        self::$SUPPORT['iconv'] === false
7456
    ) {
7457 2
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7458
    }
7459
7460
    if (
7461 400
        $encoding !== 'UTF-8'
7462
        &&
7463 400
        self::$SUPPORT['iconv'] === true
7464
        &&
7465 400
        self::$SUPPORT['mbstring'] === false
7466
    ) {
7467
      $returnTmp = \iconv_strlen($str, $encoding);
7468
      if ($returnTmp !== false) {
7469
        return $returnTmp;
7470
      }
7471
    }
7472
7473 400
    if (self::$SUPPORT['mbstring'] === true) {
7474 400
      $returnTmp = \mb_strlen($str, $encoding);
7475 400
      if ($returnTmp !== false) {
7476 400
        return $returnTmp;
7477
      }
7478
    }
7479
7480 8
    if (self::$SUPPORT['iconv'] === true) {
7481
      $returnTmp = \iconv_strlen($str, $encoding);
7482
      if ($returnTmp !== false) {
7483
        return $returnTmp;
7484
      }
7485
    }
7486
7487
    if (
7488 8
        $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7489
        &&
7490 8
        self::$SUPPORT['intl'] === true
7491
    ) {
7492
      $returnTmp = \grapheme_strlen($str);
7493
      if ($returnTmp !== null) {
7494
        return $returnTmp;
7495
      }
7496
    }
7497
7498
    // fallback via symfony polyfill
7499 8
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7500
      $returnTmp = \mb_strlen($str, $encoding);
7501
      if ($returnTmp !== false) {
7502
        return $returnTmp;
7503
      }
7504
    }
7505
7506
    // fallback for ascii only
7507 8
    if (self::is_ascii($str)) {
7508 4
      return \strlen($str);
7509
    }
7510
7511
    // fallback via vanilla php
7512 8
    \preg_match_all('/./us', $str, $parts);
7513
7514 8
    $returnTmp = \count($parts[0]);
7515 8
    if ($returnTmp === 0 && isset($str[0])) {
7516
      return false;
7517
    }
7518
7519 8
    return $returnTmp;
7520
  }
7521
7522
  /**
7523
   * Get string length in byte.
7524
   *
7525
   * @param string $str
7526
   *
7527
   * @return int
7528
   */
7529 144
  public static function strlen_in_byte($str): int
7530
  {
7531
    // init
7532 144
    $str = (string)$str;
7533
7534 144
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7535
      // "mb_" is available if overload is used, so use it ...
7536
      return \mb_strlen($str, 'CP850'); // 8-BIT
7537
    }
7538
7539 144
    return \strlen($str);
7540
  }
7541
7542
  /**
7543
   * Case insensitive string comparisons using a "natural order" algorithm.
7544
   *
7545
   * INFO: natural order version of UTF8::strcasecmp()
7546
   *
7547
   * @param string $str1 <p>The first string.</p>
7548
   * @param string $str2 <p>The second string.</p>
7549
   *
7550
   * @return int
7551
   *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7552
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7553
   *             <strong>0</strong> if they are equal
7554
   */
7555 2
  public static function strnatcasecmp(string $str1, string $str2): int
7556
  {
7557 2
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
7558
  }
7559
7560
  /**
7561
   * String comparisons using a "natural order" algorithm
7562
   *
7563
   * INFO: natural order version of UTF8::strcmp()
7564
   *
7565
   * @link  http://php.net/manual/en/function.strnatcmp.php
7566
   *
7567
   * @param string $str1 <p>The first string.</p>
7568
   * @param string $str2 <p>The second string.</p>
7569
   *
7570
   * @return int
7571
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7572
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7573
   *             <strong>0</strong> if they are equal
7574
   */
7575 4
  public static function strnatcmp(string $str1, string $str2): int
7576
  {
7577 4
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
7578
  }
7579
7580
  /**
7581
   * Case-insensitive string comparison of the first n characters.
7582
   *
7583
   * @link  http://php.net/manual/en/function.strncasecmp.php
7584
   *
7585
   * @param string $str1 <p>The first string.</p>
7586
   * @param string $str2 <p>The second string.</p>
7587
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
7588
   *
7589
   * @return int
7590
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7591
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7592
   *             <strong>0</strong> if they are equal
7593
   */
7594 2
  public static function strncasecmp(string $str1, string $str2, int $len): int
7595
  {
7596 2
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
7597
  }
7598
7599
  /**
7600
   * String comparison of the first n characters.
7601
   *
7602
   * @link  http://php.net/manual/en/function.strncmp.php
7603
   *
7604
   * @param string $str1 <p>The first string.</p>
7605
   * @param string $str2 <p>The second string.</p>
7606
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
7607
   *
7608
   * @return int
7609
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7610
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7611
   *             <strong>0</strong> if they are equal
7612
   */
7613 4
  public static function strncmp(string $str1, string $str2, int $len): int
7614
  {
7615 4
    $str1 = (string)self::substr($str1, 0, $len);
7616 4
    $str2 = (string)self::substr($str2, 0, $len);
7617
7618 4
    return self::strcmp($str1, $str2);
7619
  }
7620
7621
  /**
7622
   * Search a string for any of a set of characters.
7623
   *
7624
   * @link  http://php.net/manual/en/function.strpbrk.php
7625
   *
7626
   * @param string $haystack  <p>The string where char_list is looked for.</p>
7627
   * @param string $char_list <p>This parameter is case sensitive.</p>
7628
   *
7629
   * @return string|false String starting from the character found, or false if it is not found.
7630
   */
7631 2
  public static function strpbrk(string $haystack, string $char_list)
7632
  {
7633 2
    if ('' === $haystack || '' === $char_list) {
7634 2
      return false;
7635
    }
7636
7637 2
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7638 2
      return \substr($haystack, (int)\strpos($haystack, $m[0]));
7639
    }
7640
7641 2
    return false;
7642
  }
7643
7644
  /**
7645
   * Find position of first occurrence of string in a string.
7646
   *
7647
   * @link http://php.net/manual/en/function.mb-strpos.php
7648
   *
7649
   * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7650
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7651
   * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7652
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7653
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7654
   *
7655
   * @return int|false
7656
   *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
7657
   *                   string.<br> If needle is not found it returns false.
7658
   */
7659 217
  public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7660
  {
7661 217
    if ('' === $haystack) {
7662 4
      return false;
7663
    }
7664
7665
    // iconv and mbstring do not support integer $needle
7666 216
    if ((int)$needle === $needle && $needle >= 0) {
7667
      $needle = (string)self::chr($needle);
7668
    }
7669 216
    $needle = (string)$needle;
7670
7671 216
    if ('' === $needle) {
7672 2
      return false;
7673
    }
7674
7675 216
    if ($cleanUtf8 === true) {
7676
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7677
      // if invalid characters are found in $haystack before $needle
7678 3
      $needle = self::clean($needle);
7679 3
      $haystack = self::clean($haystack);
7680
    }
7681
7682 216
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7683 55
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7684
    }
7685
7686 216
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7687
      self::checkForSupport();
7688
    }
7689
7690
    if (
7691 216
        $encoding === 'CP850'
7692
        &&
7693 216
        self::$SUPPORT['mbstring_func_overload'] === false
7694
    ) {
7695 77
      return \strpos($haystack, $needle, $offset);
7696
    }
7697
7698
    if (
7699 141
        $encoding !== 'UTF-8'
7700
        &&
7701 141
        self::$SUPPORT['iconv'] === false
7702
        &&
7703 141
        self::$SUPPORT['mbstring'] === false
7704
    ) {
7705 2
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7706
    }
7707
7708
    if (
7709 141
        $offset >= 0 // iconv_strpos() can't handle negative offset
7710
        &&
7711 141
        $encoding !== 'UTF-8'
7712
        &&
7713 141
        self::$SUPPORT['mbstring'] === false
7714
        &&
7715 141
        self::$SUPPORT['iconv'] === true
7716
    ) {
7717
      // ignore invalid negative offset to keep compatibility
7718
      // with php < 5.5.35, < 5.6.21, < 7.0.6
7719
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
7720
      if ($returnTmp !== false) {
7721
        return $returnTmp;
7722
      }
7723
    }
7724
7725 141
    if (self::$SUPPORT['mbstring'] === true) {
7726 141
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
7727 141
      if ($returnTmp !== false) {
7728 86
        return $returnTmp;
7729
      }
7730
    }
7731
7732
    if (
7733 69
        $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
7734
        &&
7735 69
        $offset >= 0 // grapheme_strpos() can't handle negative offset
7736
        &&
7737 69
        self::$SUPPORT['intl'] === true
7738
    ) {
7739 69
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
7740 69
      if ($returnTmp !== false) {
7741
        return $returnTmp;
7742
      }
7743
    }
7744
7745
    if (
7746 69
        $offset >= 0 // iconv_strpos() can't handle negative offset
7747
        &&
7748 69
        self::$SUPPORT['iconv'] === true
7749
    ) {
7750
      // ignore invalid negative offset to keep compatibility
7751
      // with php < 5.5.35, < 5.6.21, < 7.0.6
7752 69
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
7753 69
      if ($returnTmp !== false) {
7754
        return $returnTmp;
7755
      }
7756
    }
7757
7758
    // fallback for ascii only
7759 69
    if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
7760 35
      return \strpos($haystack, $needle, $offset);
7761
    }
7762
7763
    // fallback via symfony polyfill
7764 39
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7765
      return \mb_strpos($haystack, $needle, $offset, $encoding);
7766
    }
7767
7768
    // fallback via vanilla php
7769
7770 39
    if ($haystackIsAscii) {
7771
      $haystackTmp = \substr($haystack, $offset);
7772
    } else {
7773 39
      $haystackTmp = self::substr($haystack, $offset, null, $encoding);
7774
    }
7775 39
    if ($haystackTmp === false) {
7776
      $haystackTmp = '';
7777
    }
7778 39
    $haystack = (string)$haystackTmp;
7779
7780 39
    if ($offset < 0) {
7781 2
      $offset = 0;
7782
    }
7783
7784 39
    $pos = \strpos($haystack, $needle);
7785 39
    if ($pos === false) {
7786 39
      return false;
7787
    }
7788
7789 4
    if ($pos) {
7790 4
      return ($offset + (self::strlen(substr($haystack, 0, $pos), $encoding)));
7791
    }
7792
7793 2
    return ($offset + 0);
7794
  }
7795
7796
  /**
7797
   * Finds the last occurrence of a character in a string within another.
7798
   *
7799
   * @link http://php.net/manual/en/function.mb-strrchr.php
7800
   *
7801
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
7802
   * @param string $needle        <p>The string to find in haystack</p>
7803
   * @param bool   $before_needle [optional] <p>
7804
   *                              Determines which portion of haystack
7805
   *                              this function returns.
7806
   *                              If set to true, it returns all of haystack
7807
   *                              from the beginning to the last occurrence of needle.
7808
   *                              If set to false, it returns all of haystack
7809
   *                              from the last occurrence of needle to the end,
7810
   *                              </p>
7811
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
7812
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
7813
   *
7814
   * @return string|false The portion of haystack or false if needle is not found.
7815
   */
7816 4
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7817
  {
7818 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7819 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7820
    }
7821
7822 4
    if ($cleanUtf8 === true) {
7823
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7824
      // if invalid characters are found in $haystack before $needle
7825 2
      $needle = self::clean($needle);
7826 2
      $haystack = self::clean($haystack);
7827
    }
7828
7829 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7830
      self::checkForSupport();
7831
    }
7832
7833
    if (
7834 4
        $encoding !== 'UTF-8'
7835
        &&
7836 4
        self::$SUPPORT['mbstring'] === false
7837
    ) {
7838
      \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7839
    }
7840
7841 4
    if (self::$SUPPORT['mbstring'] === true) {
7842 4
      return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
7843
    }
7844
7845
    if (
7846
        $before_needle === false
7847
        &&
7848
        ('CP850' === $encoding || 'ASCII' === $encoding)
7849
    ) {
7850
      return \strrchr($haystack, $needle);
7851
    }
7852
7853
    // fallback via symfony polyfill
7854
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7855
      return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
7856
    }
7857
7858
    // fallback via vanilla php
7859
7860
    $needleTmp = self::substr($needle, 0, 1, $encoding);
7861
    if ($needleTmp === false) {
7862
      return false;
7863
    }
7864
    $needle = (string)$needleTmp;
7865
7866
    $pos = self::strrpos($haystack, $needle, null, $encoding);
7867
    if ($pos === false) {
7868
      return false;
7869
    }
7870
7871
    if ($before_needle) {
7872
      return self::substr($haystack, 0, $pos, $encoding);
7873
    }
7874
7875
    return self::substr($haystack, $pos, null, $encoding);
7876
  }
7877
7878
  /**
7879
   * Reverses characters order in the string.
7880
   *
7881
   * @param string $str <p>The input string.</p>
7882
   *
7883
   * @return string The string with characters in the reverse sequence.
7884
   */
7885 10
  public static function strrev(string $str): string
7886
  {
7887 10
    if ('' === $str) {
7888 4
      return '';
7889
    }
7890
7891 8
    $reversed = '';
7892 8
    $i = self::strlen($str);
7893 8
    while ($i--) {
7894 8
      $reversed .= self::substr($str, $i, 1);
0 ignored issues
show
Bug introduced by
It seems like $i can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7894
      $reversed .= self::substr($str, /** @scrutinizer ignore-type */ $i, 1);
Loading history...
7895
    }
7896
7897 8
    return $reversed;
7898
  }
7899
7900
  /**
7901
   * Finds the last occurrence of a character in a string within another, case insensitive.
7902
   *
7903
   * @link http://php.net/manual/en/function.mb-strrichr.php
7904
   *
7905
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
7906
   * @param string $needle         <p>The string to find in haystack.</p>
7907
   * @param bool   $before_needle  [optional] <p>
7908
   *                               Determines which portion of haystack
7909
   *                               this function returns.
7910
   *                               If set to true, it returns all of haystack
7911
   *                               from the beginning to the last occurrence of needle.
7912
   *                               If set to false, it returns all of haystack
7913
   *                               from the last occurrence of needle to the end,
7914
   *                               </p>
7915
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
7916
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7917
   *
7918
   * @return string|false The portion of haystack or<br>false if needle is not found.
7919
   */
7920 3
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7921
  {
7922 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7923 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7924
    }
7925
7926 3
    if ($cleanUtf8 === true) {
7927
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7928
      // if invalid characters are found in $haystack before $needle
7929 2
      $needle = self::clean($needle);
7930 2
      $haystack = self::clean($haystack);
7931
    }
7932
7933
    // always fallback via symfony polyfill
7934 3
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
7935
  }
7936
7937
  /**
7938
   * Find position of last occurrence of a case-insensitive string.
7939
   *
7940
   * @param string     $haystack  <p>The string to look in.</p>
7941
   * @param string|int $needle    <p>The string to look for.</p>
7942
   * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
7943
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7944
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7945
   *
7946
   * @return int|false
7947
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
7948
   *                   string.<br>If needle is not found, it returns false.
7949
   */
7950 4
  public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7951
  {
7952 4
    if ('' === $haystack) {
7953
      return false;
7954
    }
7955
7956
    // iconv and mbstring do not support integer $needle
7957 4
    if ((int)$needle === $needle && $needle >= 0) {
7958
      $needle = (string)self::chr($needle);
7959
    }
7960 4
    $needle = (string)$needle;
7961
7962 4
    if ('' === $needle) {
7963
      return false;
7964
    }
7965
7966 4
    if ($cleanUtf8 === true) {
7967
      // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
7968 2
      $needle = self::clean($needle);
7969 2
      $haystack = self::clean($haystack);
7970
    }
7971
7972 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7973 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7974
    }
7975
7976 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7977
      self::checkForSupport();
7978
    }
7979
7980
    if (
7981 4
        $encoding !== 'UTF-8'
7982
        &&
7983 4
        self::$SUPPORT['mbstring'] === false
7984
    ) {
7985
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7986
    }
7987
7988 4
    if (self::$SUPPORT['mbstring'] === true) {
7989 4
      return \mb_strripos($haystack, $needle, $offset, $encoding);
7990
    }
7991
7992
    if (
7993
        $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
7994
        &&
7995
        $offset >= 0 // grapheme_strripos() can't handle negative offset
7996
        &&
7997
        self::$SUPPORT['intl'] === true
7998
    ) {
7999
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8000
      if ($returnTmp !== false) {
8001
        return $returnTmp;
8002
      }
8003
    }
8004
8005
    // fallback via symfony polyfill
8006
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
8007
      return \mb_strripos($haystack, $needle, $offset, $encoding);
8008
    }
8009
8010
    // fallback for ascii only
8011
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8012
      return \strripos($haystack, $needle, $offset);
8013
    }
8014
8015
    // fallback via vanilla php
8016
8017
    $haystack = self::strtoupper($haystack, $encoding, false, null, true);
8018
    $needle = self::strtoupper($needle, $encoding, false, null, true);
8019
8020
    return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8021
  }
8022
8023
  /**
8024
   * Find position of last occurrence of a string in a string.
8025
   *
8026
   * @link http://php.net/manual/en/function.mb-strrpos.php
8027
   *
8028
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8029
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8030
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8031
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
8032
   *                              the end of the string.
8033
   *                              </p>
8034
   * @param string     $encoding  [optional] <p>Set the charset.</p>
8035
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8036
   *
8037
   * @return int|false
8038
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8039
   *                   string.<br>If needle is not found, it returns false.
8040
   */
8041 38
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8042
  {
8043 38
    if ('' === $haystack) {
8044 3
      return false;
8045
    }
8046
8047
    // iconv and mbstring do not support integer $needle
8048 37
    if ((int)$needle === $needle && $needle >= 0) {
8049 2
      $needle = (string)self::chr($needle);
8050
    }
8051 37
    $needle = (string)$needle;
8052
8053 37
    if ('' === $needle) {
8054 2
      return false;
8055
    }
8056
8057 37
    if ($cleanUtf8 === true) {
8058
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8059 4
      $needle = self::clean($needle);
8060 4
      $haystack = self::clean($haystack);
8061
    }
8062
8063 37
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8064 14
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8065
    }
8066
8067 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8068
      self::checkForSupport();
8069
    }
8070
8071
    if (
8072 37
        $encoding !== 'UTF-8'
8073
        &&
8074 37
        self::$SUPPORT['mbstring'] === false
8075
    ) {
8076
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8077
    }
8078
8079 37
    if (self::$SUPPORT['mbstring'] === true) {
8080 37
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
8081
    }
8082
8083
    if (
8084
        $offset !== null
8085
        &&
8086
        $offset >= 0 // grapheme_strrpos() can't handle negative offset
8087
        &&
8088
        $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8089
        &&
8090
        self::$SUPPORT['intl'] === true
8091
    ) {
8092
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8093
      if ($returnTmp !== false) {
8094
        return $returnTmp;
8095
      }
8096
    }
8097
8098
    // fallback via symfony polyfill
8099
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
8100
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
8101
    }
8102
8103
    // fallback for ascii only
8104
    if (
8105
        $offset !== null
8106
        &&
8107
        self::is_ascii($haystack)
8108
        &&
8109
        self::is_ascii($needle)
8110
    ) {
8111
      return \strrpos($haystack, $needle, $offset);
8112
    }
8113
8114
    // fallback via vanilla php
8115
8116
    $haystackTmp = null;
8117
    if ($offset > 0) {
8118
      $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8118
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
8119
    } elseif ($offset < 0) {
8120
      $haystackTmp = self::substr($haystack, 0, $offset);
8121
      $offset = 0;
8122
    }
8123
8124
    if ($haystackTmp !== null) {
8125
      if ($haystackTmp === false) {
8126
        $haystackTmp = '';
8127
      }
8128
      $haystack = (string)$haystackTmp;
8129
    }
8130
8131
    $pos = \strrpos($haystack, $needle);
8132
    if ($pos === false) {
8133
      return false;
8134
    }
8135
8136
    return $offset + self::strlen(\substr($haystack, 0, $pos));
8137
  }
8138
8139
  /**
8140
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8141
   * mask.
8142
   *
8143
   * @param string $str    <p>The input string.</p>
8144
   * @param string $mask   <p>The mask of chars</p>
8145
   * @param int    $offset [optional]
8146
   * @param int    $length [optional]
8147
   *
8148
   * @return int
8149
   */
8150 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
8151
  {
8152 10
    if ($offset || $length !== null) {
8153 2
      $strTmp = self::substr($str, $offset, $length);
8154 2
      if ($strTmp === false) {
8155
        $strTmp = '';
8156
      }
8157 2
      $str = (string)$strTmp;
8158
    }
8159
8160 10
    if ('' === $str || '' === $mask) {
8161 2
      return 0;
8162
    }
8163
8164 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug introduced by
$str of type string is incompatible with the type null|array expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8164
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
Bug Best Practice introduced by
The expression return preg_match('/^' ....lf::strlen($str[0]) : 0 could return the type false which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
8165
  }
8166
8167
  /**
8168
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8169
   *
8170
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
8171
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
8172
   * @param bool   $before_needle  [optional] <p>
8173
   *                               If <b>TRUE</b>, strstr() returns the part of the
8174
   *                               haystack before the first occurrence of the needle (excluding the needle).
8175
   *                               </p>
8176
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8177
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8178
   *
8179
   * @return string|false
8180
   *                       A sub-string,<br>or <strong>false</strong> if needle is not found.
8181
   */
8182 5
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8183
  {
8184 5
    if ('' === $haystack || '' === $needle) {
8185 2
      return false;
8186
    }
8187
8188 5
    if ($cleanUtf8 === true) {
8189
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8190
      // if invalid characters are found in $haystack before $needle
8191
      $needle = self::clean($needle);
8192
      $haystack = self::clean($haystack);
8193
    }
8194
8195 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8196 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8197
    }
8198
8199 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8200
      self::checkForSupport();
8201
    }
8202
8203
    if (
8204 5
        $encoding !== 'UTF-8'
8205
        &&
8206 5
        self::$SUPPORT['mbstring'] === false
8207
    ) {
8208
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8209
    }
8210
8211 5
    if (self::$SUPPORT['mbstring'] === true) {
8212 5
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8213
    }
8214
8215
    if (
8216
        $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8217
        &&
8218
        self::$SUPPORT['intl'] === true
8219
    ) {
8220
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8221
      if ($returnTmp !== false) {
8222
        return $returnTmp;
8223
      }
8224
    }
8225
8226
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8227
8228
    if (!isset($match[1])) {
8229
      return false;
8230
    }
8231
8232
    if ($before_needle) {
8233
      return $match[1];
8234
    }
8235
8236
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8236
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
8237
  }
8238
8239
  /**
8240
   * Unicode transformation for case-less matching.
8241
   *
8242
   * @link http://unicode.org/reports/tr21/tr21-5.html
8243
   *
8244
   * @param string $str        <p>The input string.</p>
8245
   * @param bool   $full       [optional] <p>
8246
   *                           <b>true</b>, replace full case folding chars (default)<br>
8247
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
8248
   *                           </p>
8249
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
8250
   *
8251
   * @return string
8252
   */
8253 27
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
8254
  {
8255 27
    if ('' === $str) {
8256 5
      return '';
8257
    }
8258
8259 26
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
8260 26
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
8261
8262 26
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
8263 1
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
8264 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
8265
    }
8266
8267 26
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
8268
8269 26
    if ($full) {
8270
8271 26
      static $FULL_CASE_FOLD = null;
8272 26
      if ($FULL_CASE_FOLD === null) {
8273 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
8274
      }
8275
8276 26
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
8277
    }
8278
8279 26
    if ($cleanUtf8 === true) {
8280 2
      $str = self::clean($str);
8281
    }
8282
8283 26
    return self::strtolower($str);
8284
  }
8285
8286
  /**
8287
   * Make a string lowercase.
8288
   *
8289
   * @link http://php.net/manual/en/function.mb-strtolower.php
8290
   *
8291
   * @param string      $str                   <p>The string being lowercased.</p>
8292
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
8293
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8294
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8295
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8296
   *
8297
   * @return string String with all alphabetic characters converted to lowercase.
8298
   */
8299 175
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8300
  {
8301
    // init
8302 175
    $str = (string)$str;
8303
8304 175
    if ('' === $str) {
8305 4
      return '';
8306
    }
8307
8308 173
    if ($cleanUtf8 === true) {
8309
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8310
      // if invalid characters are found in $haystack before $needle
8311 2
      $str = self::clean($str);
8312
    }
8313
8314 173
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8315 94
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8316
    }
8317
8318
    // hack for old php version or for the polyfill ...
8319 173
    if ($tryToKeepStringLength === true) {
8320
      $str = self::fixStrCaseHelper($str, true);
8321
    }
8322
8323 173
    if ($lang !== null) {
8324
8325 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8326
        self::checkForSupport();
8327
      }
8328
8329 2
      if (self::$SUPPORT['intl'] === true) {
8330
8331 2
        $langCode = $lang . '-Lower';
8332 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8333
          \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, E_USER_WARNING);
8334
8335
          $langCode = 'Any-Lower';
8336
        }
8337
8338
        /** @noinspection PhpComposerExtensionStubsInspection */
8339 2
        return transliterator_transliterate($langCode, $str);
8340
      }
8341
8342
      \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, E_USER_WARNING);
8343
    }
8344
8345
    // always fallback via symfony polyfill
8346 173
    return \mb_strtolower($str, $encoding);
8347
  }
8348
8349
  /**
8350
   * Generic case sensitive transformation for collation matching.
8351
   *
8352
   * @param string $str <p>The input string</p>
8353
   *
8354
   * @return string
8355
   */
8356 6
  private static function strtonatfold(string $str): string
8357
  {
8358
    /** @noinspection PhpUndefinedClassInspection */
8359 6
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
8360
  }
8361
8362
  /**
8363
   * Make a string uppercase.
8364
   *
8365
   * @link http://php.net/manual/en/function.mb-strtoupper.php
8366
   *
8367
   * @param string      $str                   <p>The string being uppercased.</p>
8368
   * @param string      $encoding              [optional] <p>Set the charset.</p>
8369
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8370
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8371
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8372
   *
8373
   * @return string String with all alphabetic characters converted to uppercase.
8374
   */
8375 136
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8376
  {
8377
    // init
8378 136
    $str = (string)$str;
8379
8380 136
    if ('' === $str) {
8381 4
      return '';
8382
    }
8383
8384 134
    if ($cleanUtf8 === true) {
8385
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8386
      // if invalid characters are found in $haystack before $needle
8387 3
      $str = self::clean($str);
8388
    }
8389
8390 134
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8391 72
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8392
    }
8393
8394
    // hack for old php version or for the polyfill ...
8395 134
    if ($tryToKeepStringLength === true) {
8396 22
      $str = self::fixStrCaseHelper($str, false);
8397
    }
8398
8399 134
    if ($lang !== null) {
8400
8401 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8402
        self::checkForSupport();
8403
      }
8404
8405 2
      if (self::$SUPPORT['intl'] === true) {
8406
8407 2
        $langCode = $lang . '-Upper';
8408 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8409
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
8410
8411
          $langCode = 'Any-Upper';
8412
        }
8413
8414
        /** @noinspection PhpComposerExtensionStubsInspection */
8415 2
        return transliterator_transliterate($langCode, $str);
8416
      }
8417
8418
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
8419
    }
8420
8421
    // always fallback via symfony polyfill
8422 134
    return \mb_strtoupper($str, $encoding);
8423
  }
8424
8425
  /**
8426
   * Translate characters or replace sub-strings.
8427
   *
8428
   * @link  http://php.net/manual/en/function.strtr.php
8429
   *
8430
   * @param string          $str  <p>The string being translated.</p>
8431
   * @param string|string[] $from <p>The string replacing from.</p>
8432
   * @param string|string[] $to   <p>The string being translated to to.</p>
8433
   *
8434
   * @return string
8435
   *                This function returns a copy of str, translating all occurrences of each character in from to the
8436
   *                corresponding character in to.
8437
   */
8438 2
  public static function strtr(string $str, $from, $to = INF): string
8439
  {
8440 2
    if ('' === $str) {
8441
      return '';
8442
    }
8443
8444 2
    if ($from === $to) {
8445
      return $str;
8446
    }
8447
8448 2
    if (INF !== $to) {
8449 2
      $from = self::str_split($from);
8450 2
      $to = self::str_split($to);
8451 2
      $countFrom = \count($from);
8452 2
      $countTo = \count($to);
8453
8454 2
      if ($countFrom > $countTo) {
8455 2
        $from = \array_slice($from, 0, $countTo);
8456 2
      } elseif ($countFrom < $countTo) {
8457 2
        $to = \array_slice($to, 0, $countFrom);
8458
      }
8459
8460 2
      $from = \array_combine($from, $to);
8461
    }
8462
8463 2
    if (\is_string($from)) {
8464 2
      return \str_replace($from, '', $str);
8465
    }
8466
8467 2
    return \strtr($str, $from);
8468
  }
8469
8470
  /**
8471
   * Return the width of a string.
8472
   *
8473
   * @param string $str       <p>The input string.</p>
8474
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8475
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8476
   *
8477
   * @return int
8478
   */
8479 2
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
8480
  {
8481 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8482 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8483
    }
8484
8485 2
    if ($cleanUtf8 === true) {
8486
      // iconv and mbstring are not tolerant to invalid encoding
8487
      // further, their behaviour is inconsistent with that of PHP's substr
8488 2
      $str = self::clean($str);
8489
    }
8490
8491
    // always fallback via symfony polyfill
8492 2
    return \mb_strwidth($str, $encoding);
8493
  }
8494
8495
  /**
8496
   * Get part of a string.
8497
   *
8498
   * @link http://php.net/manual/en/function.mb-substr.php
8499
   *
8500
   * @param string $str       <p>The string being checked.</p>
8501
   * @param int    $offset    <p>The first position used in str.</p>
8502
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
8503
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8504
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8505
   *
8506
   * @return string|false
8507
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
8508
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
8509
   *                      characters long, <b>FALSE</b> will be returned.
8510
   */
8511 404
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8512
  {
8513 404
    if ('' === $str) {
8514 19
      return '';
8515
    }
8516
8517
    // Empty string
8518 398
    if ($length === 0) {
8519 14
      return '';
8520
    }
8521
8522 395
    if ($cleanUtf8 === true) {
8523
      // iconv and mbstring are not tolerant to invalid encoding
8524
      // further, their behaviour is inconsistent with that of PHP's substr
8525 2
      $str = self::clean($str);
8526
    }
8527
8528
    // Whole string
8529 395
    if (!$offset && $length === null) {
8530 40
      return $str;
8531
    }
8532
8533 366
    $str_length = 0;
8534 366
    if ($offset || $length === null) {
8535 243
      $str_length = self::strlen($str, $encoding);
8536
    }
8537
8538
    // e.g.: invalid chars + mbstring not installed
8539 366
    if ($str_length === false) {
8540
      return false;
8541
    }
8542
8543
    // Empty string
8544 366
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
8545 21
      return '';
8546
    }
8547
8548
    // Impossible
8549 366
    if ($offset && $offset > $str_length) {
8550 3
      return false;
8551
    }
8552
8553 363
    if ($length === null) {
8554 151
      $length = (int)$str_length;
8555
    } else {
8556 321
      $length = (int)$length;
8557
    }
8558
8559 363
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8560 156
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8561
    }
8562
8563 363
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8564
      self::checkForSupport();
8565
    }
8566
8567
    if (
8568 363
        $encoding === 'CP850'
8569
        &&
8570 363
        self::$SUPPORT['mbstring_func_overload'] === false
8571
    ) {
8572 15
      return \substr($str, $offset, $length ?? $str_length);
8573
    }
8574
8575
    if (
8576 348
        $encoding !== 'UTF-8'
8577
        &&
8578 348
        self::$SUPPORT['mbstring'] === false
8579
    ) {
8580 2
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8581
    }
8582
8583 348
    if (self::$SUPPORT['mbstring'] === true) {
8584 348
      return \mb_substr($str, $offset, $length, $encoding);
8585
    }
8586
8587
    if (
8588 4
        $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
8589
        &&
8590 4
        $offset >= 0 // grapheme_substr() can't handle negative offset
8591
        &&
8592 4
        self::$SUPPORT['intl'] === true
8593
    ) {
8594
      $returnTmp = \grapheme_substr($str, $offset, $length);
8595
      if ($returnTmp !== false) {
8596
        return $returnTmp;
8597
      }
8598
    }
8599
8600
    if (
8601 4
        $length >= 0 // "iconv_substr()" can't handle negative length
8602
        &&
8603 4
        self::$SUPPORT['iconv'] === true
8604
    ) {
8605
      $returnTmp = \iconv_substr($str, $offset, $length);
8606
      if ($returnTmp !== false) {
8607
        return $returnTmp;
8608
      }
8609
    }
8610
8611
    // fallback for ascii only
8612 4
    if (self::is_ascii($str)) {
8613
      return \substr($str, $offset, $length);
8614
    }
8615
8616
    // fallback via symfony polyfill
8617 4
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
8618
      return \mb_substr($str, $offset, $length, $encoding);
8619
    }
8620
8621
    // fallback via vanilla php
8622
8623
    // split to array, and remove invalid characters
8624 4
    $array = self::split($str);
8625
8626
    // extract relevant part, and join to make sting again
8627 4
    return \implode('', \array_slice($array, $offset, $length));
8628
  }
8629
8630
  /**
8631
   * Binary safe comparison of two strings from an offset, up to length characters.
8632
   *
8633
   * @param string   $str1               <p>The main string being compared.</p>
8634
   * @param string   $str2               <p>The secondary string being compared.</p>
8635
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
8636
   *                                     counting from the end of the string.</p>
8637
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
8638
   *                                     the length of the str compared to the length of main_str less the offset.</p>
8639
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
8640
   *                                     insensitive.</p>
8641
   *
8642
   * @return int
8643
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8644
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8645
   *             <strong>0</strong> if they are equal.
8646
   */
8647 2
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
8648
  {
8649
    if (
8650 2
        $offset !== 0
8651
        ||
8652 2
        $length !== null
8653
    ) {
8654 2
      $str1Tmp = self::substr($str1, $offset, $length);
8655 2
      if ($str1Tmp === false) {
8656
        $str1Tmp = '';
8657
      }
8658 2
      $str1 = (string)$str1Tmp;
8659
8660 2
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str1) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8660
      $str2Tmp = self::substr($str2, 0, /** @scrutinizer ignore-type */ self::strlen($str1));
Loading history...
8661 2
      if ($str2Tmp === false) {
8662
        $str2Tmp = '';
8663
      }
8664 2
      $str2 = (string)$str2Tmp;
8665
    }
8666
8667 2
    if ($case_insensitivity === true) {
8668 2
      return self::strcasecmp($str1, $str2);
8669
    }
8670
8671 2
    return self::strcmp($str1, $str2);
8672
  }
8673
8674
  /**
8675
   * Count the number of substring occurrences.
8676
   *
8677
   * @link  http://php.net/manual/en/function.substr-count.php
8678
   *
8679
   * @param string $haystack   <p>The string to search in.</p>
8680
   * @param string $needle     <p>The substring to search for.</p>
8681
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
8682
   * @param int    $length     [optional] <p>
8683
   *                           The maximum length after the specified offset to search for the
8684
   *                           substring. It outputs a warning if the offset plus the length is
8685
   *                           greater than the haystack length.
8686
   *                           </p>
8687
   * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8688
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
8689
   *
8690
   * @return int|false This functions returns an integer or false if there isn't a string.
8691
   */
8692 18
  public static function substr_count(
8693
      string $haystack,
8694
      string $needle,
8695
      int $offset = 0,
8696
      int $length = null,
8697
      string $encoding = 'UTF-8',
8698
      bool $cleanUtf8 = false
8699
  )
8700
  {
8701 18
    if ('' === $haystack || '' === $needle) {
8702 2
      return false;
8703
    }
8704
8705 18
    if ($offset || $length !== null) {
8706
8707 2
      if ($length === null) {
8708 2
        $lengthTmp = self::strlen($haystack);
8709 2
        if ($lengthTmp === false) {
8710
          return false;
8711
        }
8712 2
        $length = (int)$lengthTmp;
8713
      }
8714
8715
      if (
8716
          (
8717 2
              $length !== 0
8718
              &&
8719 2
              $offset !== 0
8720
          )
8721
          &&
8722 2
          ($length + $offset) <= 0
8723
          &&
8724 2
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
8725
      ) {
8726 2
        return false;
8727
      }
8728
8729 2
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
8730 2
      if ($haystackTmp === false) {
8731
        $haystackTmp = '';
8732
      }
8733 2
      $haystack = (string)$haystackTmp;
8734
    }
8735
8736 18
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8737 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8738
    }
8739
8740 18
    if ($cleanUtf8 === true) {
8741
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8742
      // if invalid characters are found in $haystack before $needle
8743
      $needle = self::clean($needle);
8744
      $haystack = self::clean($haystack);
8745
    }
8746
8747 18
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8748
      self::checkForSupport();
8749
    }
8750
8751
    if (
8752 18
        $encoding !== 'UTF-8'
8753
        &&
8754 18
        self::$SUPPORT['mbstring'] === false
8755
    ) {
8756
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8757
    }
8758
8759 18
    if (self::$SUPPORT['mbstring'] === true) {
8760 18
      return \mb_substr_count($haystack, $needle, $encoding);
8761
    }
8762
8763
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
8764
8765
    return \count($matches);
8766
  }
8767
8768
  /**
8769
   * Returns the number of occurrences of $substring in the given string.
8770
   * By default, the comparison is case-sensitive, but can be made insensitive
8771
   * by setting $caseSensitive to false.
8772
   *
8773
   * @param string $str           <p>The input string.</p>
8774
   * @param string $substring     <p>The substring to search for.</p>
8775
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
8776
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8777
   *
8778
   * @return int
8779
   */
8780 15
  public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
8781
  {
8782 15
    if ('' === $str || '' === $substring) {
8783 2
      return 0;
8784
    }
8785
8786
    // only a fallback to prevent BC in the api ...
8787 13
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
8788 4
      $encoding = $caseSensitive;
8789
    }
8790
8791 13
    if (!$caseSensitive) {
8792 6
      $str = self::strtoupper($str, $encoding);
8793 6
      $substring = self::strtoupper($substring, $encoding);
8794
    }
8795
8796 13
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
8797
  }
8798
8799
  /**
8800
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
8801
   *
8802
   * @param string $haystack <p>The string to search in.</p>
8803
   * @param string $needle   <p>The substring to search for.</p>
8804
   *
8805
   * @return string Return the sub-string.
8806
   */
8807 2
  public static function substr_ileft(string $haystack, string $needle): string
8808
  {
8809 2
    if ('' === $haystack) {
8810 2
      return '';
8811
    }
8812
8813 2
    if ('' === $needle) {
8814 2
      return $haystack;
8815
    }
8816
8817 2
    if (self::str_istarts_with($haystack, $needle) === true) {
8818 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8818
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
8819 2
      if ($haystackTmp === false) {
8820
        $haystackTmp = '';
8821
      }
8822 2
      $haystack = (string)$haystackTmp;
8823
    }
8824
8825 2
    return $haystack;
8826
  }
8827
8828
  /**
8829
   * Get part of a string process in bytes.
8830
   *
8831
   * @param string $str    <p>The string being checked.</p>
8832
   * @param int    $offset <p>The first position used in str.</p>
8833
   * @param int    $length [optional] <p>The maximum length of the returned string.</p>
8834
   *
8835
   * @return string|false
8836
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
8837
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
8838
   *                      characters long, <b>FALSE</b> will be returned.
8839
   */
8840 16
  public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
8841
  {
8842 16
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8843
      // "mb_" is available if overload is used, so use it ...
8844
      return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
8845
    }
8846
8847 16
    return \substr($str, $offset, $length);
8848
  }
8849
8850
  /**
8851
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
8852
   *
8853
   * @param string $haystack <p>The string to search in.</p>
8854
   * @param string $needle   <p>The substring to search for.</p>
8855
   *
8856
   * @return string Return the sub-string.
8857
   */
8858 2
  public static function substr_iright(string $haystack, string $needle): string
8859
  {
8860 2
    if ('' === $haystack) {
8861 2
      return '';
8862
    }
8863
8864 2
    if ('' === $needle) {
8865 2
      return $haystack;
8866
    }
8867
8868 2
    if (self::str_iends_with($haystack, $needle) === true) {
8869 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
8870 2
      if ($haystackTmp === false) {
8871
        $haystackTmp = '';
8872
      }
8873 2
      $haystack = (string)$haystackTmp;
8874
    }
8875
8876 2
    return $haystack;
8877
  }
8878
8879
  /**
8880
   * Removes an prefix ($needle) from start of the string ($haystack).
8881
   *
8882
   * @param string $haystack <p>The string to search in.</p>
8883
   * @param string $needle   <p>The substring to search for.</p>
8884
   *
8885
   * @return string Return the sub-string.
8886
   */
8887 2
  public static function substr_left(string $haystack, string $needle): string
8888
  {
8889 2
    if ('' === $haystack) {
8890 2
      return '';
8891
    }
8892
8893 2
    if ('' === $needle) {
8894 2
      return $haystack;
8895
    }
8896
8897 2
    if (self::str_starts_with($haystack, $needle) === true) {
8898 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8898
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
8899 2
      if ($haystackTmp === false) {
8900
        $haystackTmp = '';
8901
      }
8902 2
      $haystack = (string)$haystackTmp;
8903
    }
8904
8905 2
    return $haystack;
8906
  }
8907
8908
  /**
8909
   * Replace text within a portion of a string.
8910
   *
8911
   * source: https://gist.github.com/stemar/8287074
8912
   *
8913
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
8914
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
8915
   * @param int|int[]       $offset           <p>
8916
   *                                          If start is positive, the replacing will begin at the start'th offset
8917
   *                                          into string.
8918
   *                                          <br><br>
8919
   *                                          If start is negative, the replacing will begin at the start'th character
8920
   *                                          from the end of string.
8921
   *                                          </p>
8922
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
8923
   *                                          portion of string which is to be replaced. If it is negative, it
8924
   *                                          represents the number of characters from the end of string at which to
8925
   *                                          stop replacing. If it is not given, then it will default to strlen(
8926
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
8927
   *                                          length is zero then this function will have the effect of inserting
8928
   *                                          replacement into string at the given start offset.</p>
8929
   *
8930
   * @return string|string[] The result string is returned. If string is an array then array is returned.
8931
   */
8932 10
  public static function substr_replace($str, $replacement, $offset, $length = null)
8933
  {
8934 10
    if (\is_array($str) === true) {
8935 1
      $num = \count($str);
8936
8937
      // the replacement
8938 1
      if (\is_array($replacement) === true) {
8939 1
        $replacement = \array_slice($replacement, 0, $num);
8940
      } else {
8941 1
        $replacement = \array_pad([$replacement], $num, $replacement);
8942
      }
8943
8944
      // the offset
8945 1
      if (\is_array($offset) === true) {
8946 1
        $offset = \array_slice($offset, 0, $num);
8947 1
        foreach ($offset as &$valueTmp) {
8948 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
8949
        }
8950 1
        unset($valueTmp);
8951
      } else {
8952 1
        $offset = \array_pad([$offset], $num, $offset);
8953
      }
8954
8955
      // the length
8956 1
      if (null === $length) {
8957 1
        $length = \array_fill(0, $num, 0);
8958 1
      } elseif (\is_array($length) === true) {
8959 1
        $length = \array_slice($length, 0, $num);
8960 1
        foreach ($length as &$valueTmpV2) {
8961 1
          if (null !== $valueTmpV2) {
8962 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
8963
          } else {
8964 1
            $valueTmpV2 = 0;
8965
          }
8966
        }
8967 1
        unset($valueTmpV2);
8968
      } else {
8969 1
        $length = \array_pad([$length], $num, $length);
8970
      }
8971
8972
      // recursive call
8973 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
8974
    }
8975
8976 10
    if (\is_array($replacement) === true) {
8977 1
      if (\count($replacement) > 0) {
8978 1
        $replacement = $replacement[0];
8979
      } else {
8980 1
        $replacement = '';
8981
      }
8982
    }
8983
8984
    // init
8985 10
    $str = (string)$str;
8986 10
    $replacement = (string)$replacement;
8987
8988 10
    if ('' === $str) {
8989 1
      return $replacement;
8990
    }
8991
8992 9
    if (self::is_ascii($str)) {
8993 6
      return ($length === null) ?
8994
          \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8994
          \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
8995 6
          \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8995
          \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
8996
    }
8997
8998 8
    \preg_match_all('/./us', $str, $smatches);
8999 8
    \preg_match_all('/./us', $replacement, $rmatches);
9000
9001 8
    if ($length === null) {
9002 3
      $lengthTmp = self::strlen($str);
9003 3
      if ($lengthTmp === false) {
9004
        // e.g.: non mbstring support + invalid chars
9005
        return '';
9006
      }
9007 3
      $length = (int)$lengthTmp;
9008
    }
9009
9010 8
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9010
    \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9010
    \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
9011
9012 8
    return \implode('', $smatches[0]);
9013
  }
9014
9015
  /**
9016
   * Removes an suffix ($needle) from end of the string ($haystack).
9017
   *
9018
   * @param string $haystack <p>The string to search in.</p>
9019
   * @param string $needle   <p>The substring to search for.</p>
9020
   *
9021
   * @return string Return the sub-string.
9022
   */
9023 2
  public static function substr_right(string $haystack, string $needle): string
9024
  {
9025 2
    if ('' === $haystack) {
9026 2
      return '';
9027
    }
9028
9029 2
    if ('' === $needle) {
9030 2
      return $haystack;
9031
    }
9032
9033 2
    if (self::str_ends_with($haystack, $needle) === true) {
9034 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9035 2
      if ($haystackTmp === false) {
9036
        $haystackTmp = '';
9037
      }
9038 2
      $haystack = (string)$haystackTmp;
9039
    }
9040
9041 2
    return $haystack;
9042
  }
9043
9044
  /**
9045
   * Returns a case swapped version of the string.
9046
   *
9047
   * @param string $str       <p>The input string.</p>
9048
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9049
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9050
   *
9051
   * @return string Each character's case swapped.
9052
   */
9053 6
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9054
  {
9055 6
    if ('' === $str) {
9056 1
      return '';
9057
    }
9058
9059 6
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9060 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9061
    }
9062
9063 6
    if ($cleanUtf8 === true) {
9064
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9065
      // if invalid characters are found in $haystack before $needle
9066 2
      $str = self::clean($str);
9067
    }
9068
9069 6
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9070
  }
9071
9072
  /**
9073
   * Checks whether mbstring is available on the server.
9074
   *
9075
   * @return bool
9076
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
9077
   */
9078
  public static function symfony_polyfill_used(): bool
9079
  {
9080
    // init
9081
    $return = false;
9082
9083
    $returnTmp = \extension_loaded('mbstring') ? true : false;
9084
    if ($returnTmp === false && \function_exists('mb_strlen')) {
9085
      $return = true;
9086
    }
9087
9088
    $returnTmp = \extension_loaded('iconv') ? true : false;
9089
    if ($returnTmp === false && \function_exists('iconv')) {
9090
      $return = true;
9091
    }
9092
9093
    return $return;
9094
  }
9095
9096
  /**
9097
   * @param string $str
9098
   * @param int    $tabLength
9099
   *
9100
   * @return string
9101
   */
9102 6
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9103
  {
9104 6
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9105
  }
9106
9107
  /**
9108
   * Converts the first character of each word in the string to uppercase
9109
   * and all other chars to lowercase.
9110
   *
9111
   * @param string $str      <p>The input string.</p>
9112
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9113
   *
9114
   * @return string String with all characters of $str being title-cased.
9115
   */
9116 5
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
9117
  {
9118 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9119 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9120
    }
9121
9122
    // always fallback via symfony polyfill
9123 5
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
9124
  }
9125
9126
  /**
9127
   * alias for "UTF8::to_ascii()"
9128
   *
9129
   * @see        UTF8::to_ascii()
9130
   *
9131
   * @param string $str
9132
   * @param string $subst_chr
9133
   * @param bool   $strict
9134
   *
9135
   * @return string
9136
   *
9137
   * @deprecated <p>use "UTF8::to_ascii()"</p>
9138
   */
9139 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
9140
  {
9141 7
    return self::to_ascii($str, $subst_chr, $strict);
9142
  }
9143
9144
  /**
9145
   * alias for "UTF8::to_iso8859()"
9146
   *
9147
   * @see        UTF8::to_iso8859()
9148
   *
9149
   * @param string|string[] $str
9150
   *
9151
   * @return string|string[]
9152
   *
9153
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
9154
   */
9155 2
  public static function toIso8859($str)
9156
  {
9157 2
    return self::to_iso8859($str);
9158
  }
9159
9160
  /**
9161
   * alias for "UTF8::to_latin1()"
9162
   *
9163
   * @see        UTF8::to_latin1()
9164
   *
9165
   * @param string|string[] $str
9166
   *
9167
   * @return string|string[]
9168
   *
9169
   * @deprecated <p>use "UTF8::to_latin1()"</p>
9170
   */
9171 2
  public static function toLatin1($str)
9172
  {
9173 2
    return self::to_latin1($str);
9174
  }
9175
9176
  /**
9177
   * alias for "UTF8::to_utf8()"
9178
   *
9179
   * @see        UTF8::to_utf8()
9180
   *
9181
   * @param string|string[] $str
9182
   *
9183
   * @return string|string[]
9184
   *
9185
   * @deprecated <p>use "UTF8::to_utf8()"</p>
9186
   */
9187 2
  public static function toUTF8($str)
9188
  {
9189 2
    return self::to_utf8($str);
9190
  }
9191
9192
  /**
9193
   * Convert a string into ASCII.
9194
   *
9195
   * @param string $str     <p>The input string.</p>
9196
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
9197
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
9198
   *                        performance</p>
9199
   *
9200
   * @return string
9201
   */
9202 37
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
9203
  {
9204 37
    static $UTF8_TO_ASCII;
9205
9206 37
    if ('' === $str) {
9207 3
      return '';
9208
    }
9209
9210
    // check if we only have ASCII, first (better performance)
9211 34
    if (self::is_ascii($str) === true) {
9212 6
      return $str;
9213
    }
9214
9215 29
    $str = self::clean(
9216 29
        $str,
9217 29
        true,
9218 29
        true,
9219 29
        true,
9220 29
        false,
9221 29
        true,
9222 29
        true
9223
    );
9224
9225
    // check again, if we only have ASCII, now ...
9226 29
    if (self::is_ascii($str) === true) {
9227 12
      return $str;
9228
    }
9229
9230 18
    if ($strict === true) {
9231
9232 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9233
        self::checkForSupport();
9234
      }
9235
9236 1
      if (self::$SUPPORT['intl'] === true) {
9237
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
9238
        /** @noinspection PhpComposerExtensionStubsInspection */
9239 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
9240
9241
        // check again, if we only have ASCII, now ...
9242 1
        if (self::is_ascii($str) === true) {
9243 1
          return $str;
9244
        }
9245
9246
      }
9247
    }
9248
9249 18
    if (self::$ORD === null) {
9250
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9251
    }
9252
9253 18
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
9254 18
    $chars = $ar[0];
9255 18
    $ord = null;
9256 18
    foreach ($chars as &$c) {
9257
9258 18
      $ordC0 = self::$ORD[$c[0]];
9259
9260 18
      if ($ordC0 >= 0 && $ordC0 <= 127) {
9261 14
        continue;
9262
      }
9263
9264 18
      $ordC1 = self::$ORD[$c[1]];
9265
9266
      // ASCII - next please
9267 18
      if ($ordC0 >= 192 && $ordC0 <= 223) {
9268 16
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
9269
      }
9270
9271 18
      if ($ordC0 >= 224) {
9272 7
        $ordC2 = self::$ORD[$c[2]];
9273
9274 7
        if ($ordC0 <= 239) {
9275 6
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
9276
        }
9277
9278 7
        if ($ordC0 >= 240) {
9279 2
          $ordC3 = self::$ORD[$c[3]];
9280
9281 2
          if ($ordC0 <= 247) {
9282 2
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
9283
          }
9284
9285 2
          if ($ordC0 >= 248) {
9286
            $ordC4 = self::$ORD[$c[4]];
9287
9288
            if ($ordC0 <= 251) {
9289
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
9290
            }
9291
9292
            if ($ordC0 >= 252) {
9293
              $ordC5 = self::$ORD[$c[5]];
9294
9295
              if ($ordC0 <= 253) {
9296
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
9297
              }
9298
            }
9299
          }
9300
        }
9301
      }
9302
9303 18
      if ($ordC0 === 254 || $ordC0 === 255) {
9304
        $c = $unknown;
9305
        continue;
9306
      }
9307
9308 18
      if ($ord === null) {
9309
        $c = $unknown;
9310
        continue;
9311
      }
9312
9313 18
      $bank = $ord >> 8;
9314 18
      if (!isset($UTF8_TO_ASCII[$bank])) {
9315 9
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
9316 9
        if ($UTF8_TO_ASCII[$bank] === false) {
9317 2
          $UTF8_TO_ASCII[$bank] = [];
9318
        }
9319
      }
9320
9321 18
      $newchar = $ord & 255;
9322
9323 18
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
9324
9325
        // keep for debugging
9326
        /*
9327
        echo "file: " . sprintf('x%02x', $bank) . "\n";
9328
        echo "char: " . $c . "\n";
9329
        echo "ord: " . $ord . "\n";
9330
        echo "newchar: " . $newchar . "\n";
9331
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
9332
        echo "bank:" . $bank . "\n\n";
9333
        */
9334
9335 17
        $c = $UTF8_TO_ASCII[$bank][$newchar];
9336
      } else {
9337
9338
        // keep for debugging missing chars
9339
        /*
9340
        echo "file: " . sprintf('x%02x', $bank) . "\n";
9341
        echo "char: " . $c . "\n";
9342
        echo "ord: " . $ord . "\n";
9343
        echo "newchar: " . $newchar . "\n";
9344
        echo "bank:" . $bank . "\n\n";
9345
        */
9346
9347 18
        $c = $unknown;
9348
      }
9349
    }
9350
9351 18
    return \implode('', $chars);
9352
  }
9353
9354
  /**
9355
   * @param mixed $str
9356
   *
9357
   * @return bool
9358
   */
9359 19
  public static function to_boolean($str): bool
9360
  {
9361
    // init
9362 19
    $str = (string)$str;
9363
9364 19
    if ('' === $str) {
9365 2
      return false;
9366
    }
9367
9368 17
    $key = \strtolower($str);
9369
9370
    // Info: http://php.net/manual/en/filter.filters.validate.php
9371
    $map = [
9372 17
        'true'  => true,
9373
        '1'     => true,
9374
        'on'    => true,
9375
        'yes'   => true,
9376
        'false' => false,
9377
        '0'     => false,
9378
        'off'   => false,
9379
        'no'    => false,
9380
    ];
9381
9382 17
    if (isset($map[$key])) {
9383 13
      return $map[$key];
9384
    }
9385
9386
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
9387 4
    if (\is_numeric($str)) {
9388 2
      return (((float)$str + 0) > 0);
9389
    }
9390
9391 2
    return (bool)self::trim($str);
9392
  }
9393
9394
  /**
9395
   * Convert a string into "ISO-8859"-encoding (Latin-1).
9396
   *
9397
   * @param string|string[] $str
9398
   *
9399
   * @return string|string[]
9400
   */
9401 10
  public static function to_iso8859($str)
9402
  {
9403 10
    if (\is_array($str) === true) {
9404 2
      foreach ($str as $k => $v) {
9405 2
        $str[$k] = self::to_iso8859($v);
9406
      }
9407
9408 2
      return $str;
9409
    }
9410
9411 10
    $str = (string)$str;
9412 10
    if ('' === $str) {
9413 2
      return '';
9414
    }
9415
9416 10
    return self::utf8_decode($str);
9417
  }
9418
9419
  /**
9420
   * alias for "UTF8::to_iso8859()"
9421
   *
9422
   * @see UTF8::to_iso8859()
9423
   *
9424
   * @param string|string[] $str
9425
   *
9426
   * @return string|string[]
9427
   */
9428 2
  public static function to_latin1($str)
9429
  {
9430 2
    return self::to_iso8859($str);
9431
  }
9432
9433
  /**
9434
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
9435
   *
9436
   * <ul>
9437
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
9438
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
9439
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
9440
   * case.</li>
9441
   * </ul>
9442
   *
9443
   * @param string|string[] $str                    <p>Any string or array.</p>
9444
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
9445
   *
9446
   * @return string|string[] The UTF-8 encoded string.
9447
   */
9448 42
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
9449
  {
9450 42
    if (\is_array($str) === true) {
9451 4
      foreach ($str as $k => $v) {
9452 4
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
9453
      }
9454
9455 4
      return $str;
9456
    }
9457
9458 42
    $str = (string)$str;
9459 42
    if ('' === $str) {
9460 6
      return $str;
9461
    }
9462
9463 42
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9464
      self::checkForSupport();
9465
    }
9466
9467 42
    $max = self::strlen_in_byte($str);
9468 42
    $buf = '';
9469
9470
    /** @noinspection ForeachInvariantsInspection */
9471 42
    for ($i = 0; $i < $max; $i++) {
9472 42
      $c1 = $str[$i];
9473
9474 42
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
9475
9476 42
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
9477
9478 39
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9479
9480 39
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
9481 29
            $buf .= $c1 . $c2;
9482 29
            $i++;
9483
          } else { // not valid UTF8 - convert it
9484 39
            $buf .= self::to_utf8_convert_helper($c1);
9485
          }
9486
9487 40
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
9488
9489 38
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9490 38
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
9491
9492 38
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
9493 24
            $buf .= $c1 . $c2 . $c3;
9494 24
            $i += 2;
9495
          } else { // not valid UTF8 - convert it
9496 38
            $buf .= self::to_utf8_convert_helper($c1);
9497
          }
9498
9499 28
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
9500
9501 28
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9502 28
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
9503 28
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
9504
9505 28
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
9506 10
            $buf .= $c1 . $c2 . $c3 . $c4;
9507 10
            $i += 3;
9508
          } else { // not valid UTF8 - convert it
9509 28
            $buf .= self::to_utf8_convert_helper($c1);
9510
          }
9511
9512
        } else { // doesn't look like UTF8, but should be converted
9513 42
          $buf .= self::to_utf8_convert_helper($c1);
9514
        }
9515
9516 39
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
9517
9518 4
        $buf .= self::to_utf8_convert_helper($c1);
9519
9520
      } else { // it doesn't need conversion
9521 39
        $buf .= $c1;
9522
      }
9523
    }
9524
9525
    // decode unicode escape sequences
9526 42
    $buf = \preg_replace_callback(
9527 42
        '/\\\\u([0-9a-f]{4})/i',
9528 42
        function ($match) {
9529
          // always fallback via symfony polyfill
9530 8
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
9531 42
        },
9532 42
        $buf
9533
    );
9534
9535
    // decode UTF-8 codepoints
9536 42
    if ($decodeHtmlEntityToUtf8 === true) {
9537 2
      $buf = self::html_entity_decode($buf);
9538
    }
9539
9540 42
    return $buf;
9541
  }
9542
9543
  /**
9544
   * @param int|string $input
9545
   *
9546
   * @return string
9547
   */
9548 30
  private static function to_utf8_convert_helper($input): string
9549
  {
9550
    // init
9551 30
    $buf = '';
9552
9553 30
    if (self::$ORD === null) {
9554 1
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9555
    }
9556
9557 30
    if (self::$CHR === null) {
9558 1
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9559
    }
9560
9561 30
    if (self::$WIN1252_TO_UTF8 === null) {
9562 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9563
    }
9564
9565 30
    $ordC1 = self::$ORD[$input];
9566 30
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
9567 30
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
9568
    } else {
9569 2
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
9570 2
      $cc2 = ((string)$input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
9571 2
      $buf .= $cc1 . $cc2;
9572
    }
9573
9574 30
    return $buf;
9575
  }
9576
9577
  /**
9578
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
9579
   *
9580
   * INFO: This is slower then "trim()"
9581
   *
9582
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
9583
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
9584
   *
9585
   * @param string $str   <p>The string to be trimmed</p>
9586
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
9587
   *
9588
   * @return string The trimmed string.
9589
   */
9590 214
  public static function trim(string $str = '', $chars = INF): string
9591
  {
9592 214
    if ('' === $str) {
9593 11
      return '';
9594
    }
9595
9596
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
9597 206
    if ($chars === INF || !$chars) {
9598 179
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
9599
    } else {
9600 47
      $chars = \preg_quote($chars, '/');
9601 47
      $pattern = "^[$chars]+|[$chars]+\$";
9602
    }
9603
9604 206
    return self::regex_replace($str, $pattern, '', '', '/');
9605
  }
9606
9607
  /**
9608
   * Makes string's first char uppercase.
9609
   *
9610
   * @param string $str       <p>The input string.</p>
9611
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9612
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9613
   *
9614
   * @return string The resulting string.
9615
   */
9616 76
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9617
  {
9618 76
    if ($cleanUtf8 === true) {
9619
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9620
      // if invalid characters are found in $haystack before $needle
9621 1
      $str = self::clean($str);
9622
    }
9623
9624 76
    $strPartTwo = self::substr($str, 1, null, $encoding);
9625 76
    if ($strPartTwo === false) {
9626
      $strPartTwo = '';
9627
    }
9628
9629 76
    $strPartOne = self::strtoupper(
9630 76
        (string)self::substr($str, 0, 1, $encoding),
9631 76
        $encoding,
9632 76
        $cleanUtf8
9633
    );
9634
9635 76
    return $strPartOne . $strPartTwo;
9636
  }
9637
9638
  /**
9639
   * alias for "UTF8::ucfirst()"
9640
   *
9641
   * @see UTF8::ucfirst()
9642
   *
9643
   * @param string $str
9644
   * @param string $encoding
9645
   * @param bool   $cleanUtf8
9646
   *
9647
   * @return string
9648
   */
9649 1
  public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9650
  {
9651 1
    return self::ucfirst($str, $encoding, $cleanUtf8);
9652
  }
9653
9654
  /**
9655
   * Uppercase for all words in the string.
9656
   *
9657
   * @param string   $str        <p>The input string.</p>
9658
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
9659
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9660
   * @param string   $encoding   [optional] <p>Set the charset.</p>
9661
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9662
   *
9663
   * @return string
9664
   */
9665 9
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9666
  {
9667 9
    if (!$str) {
9668 2
      return '';
9669
    }
9670
9671
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
9672
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
9673
9674 8
    if ($cleanUtf8 === true) {
9675
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9676
      // if invalid characters are found in $haystack before $needle
9677 1
      $str = self::clean($str);
9678
    }
9679
9680 8
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
9681
9682
    if (
9683 8
        $usePhpDefaultFunctions === true
9684
        &&
9685 8
        self::is_ascii($str) === true
9686
    ) {
9687
      return \ucwords($str);
9688
    }
9689
9690 8
    $words = self::str_to_words($str, $charlist);
9691 8
    $newWords = [];
9692
9693 8
    if (\count($exceptions) > 0) {
9694 1
      $useExceptions = true;
9695
    } else {
9696 8
      $useExceptions = false;
9697
    }
9698
9699 8
    foreach ($words as $word) {
9700
9701 8
      if (!$word) {
9702 8
        continue;
9703
      }
9704
9705
      if (
9706 8
          $useExceptions === false
9707
          ||
9708
          (
9709 1
              $useExceptions === true
9710
              &&
9711 8
              !\in_array($word, $exceptions, true)
9712
          )
9713
      ) {
9714 8
        $word = self::ucfirst($word, $encoding);
9715
      }
9716
9717 8
      $newWords[] = $word;
9718
    }
9719
9720 8
    return \implode('', $newWords);
9721
  }
9722
9723
  /**
9724
   * Multi decode html entity & fix urlencoded-win1252-chars.
9725
   *
9726
   * e.g:
9727
   * 'test+test'                     => 'test test'
9728
   * 'D&#252;sseldorf'               => 'Düsseldorf'
9729
   * 'D%FCsseldorf'                  => 'Düsseldorf'
9730
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
9731
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
9732
   * 'Düsseldorf'                   => 'Düsseldorf'
9733
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
9734
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
9735
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
9736
   *
9737
   * @param string $str          <p>The input string.</p>
9738
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
9739
   *
9740
   * @return string
9741
   */
9742 2
  public static function urldecode(string $str, bool $multi_decode = true): string
9743
  {
9744 2
    if ('' === $str) {
9745 2
      return '';
9746
    }
9747
9748 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
9749 2
    if (\preg_match($pattern, $str)) {
9750 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
9751
    }
9752
9753 2
    $flags = ENT_QUOTES | ENT_HTML5;
9754
9755
    do {
9756 2
      $str_compare = $str;
9757
9758 2
      $str = self::fix_simple_utf8(
9759 2
          \urldecode(
9760 2
              self::html_entity_decode(
9761 2
                  self::to_utf8($str),
9762 2
                  $flags
9763
              )
9764
          )
9765
      );
9766
9767 2
    } while ($multi_decode === true && $str_compare !== $str);
9768
9769 2
    return $str;
9770
  }
9771
9772
  /**
9773
   * Return a array with "urlencoded"-win1252 -> UTF-8
9774
   *
9775
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
9776
   *
9777
   * @return string[]
9778
   */
9779 2
  public static function urldecode_fix_win1252_chars(): array
9780
  {
9781
    return [
9782 2
        '%20' => ' ',
9783
        '%21' => '!',
9784
        '%22' => '"',
9785
        '%23' => '#',
9786
        '%24' => '$',
9787
        '%25' => '%',
9788
        '%26' => '&',
9789
        '%27' => "'",
9790
        '%28' => '(',
9791
        '%29' => ')',
9792
        '%2A' => '*',
9793
        '%2B' => '+',
9794
        '%2C' => ',',
9795
        '%2D' => '-',
9796
        '%2E' => '.',
9797
        '%2F' => '/',
9798
        '%30' => '0',
9799
        '%31' => '1',
9800
        '%32' => '2',
9801
        '%33' => '3',
9802
        '%34' => '4',
9803
        '%35' => '5',
9804
        '%36' => '6',
9805
        '%37' => '7',
9806
        '%38' => '8',
9807
        '%39' => '9',
9808
        '%3A' => ':',
9809
        '%3B' => ';',
9810
        '%3C' => '<',
9811
        '%3D' => '=',
9812
        '%3E' => '>',
9813
        '%3F' => '?',
9814
        '%40' => '@',
9815
        '%41' => 'A',
9816
        '%42' => 'B',
9817
        '%43' => 'C',
9818
        '%44' => 'D',
9819
        '%45' => 'E',
9820
        '%46' => 'F',
9821
        '%47' => 'G',
9822
        '%48' => 'H',
9823
        '%49' => 'I',
9824
        '%4A' => 'J',
9825
        '%4B' => 'K',
9826
        '%4C' => 'L',
9827
        '%4D' => 'M',
9828
        '%4E' => 'N',
9829
        '%4F' => 'O',
9830
        '%50' => 'P',
9831
        '%51' => 'Q',
9832
        '%52' => 'R',
9833
        '%53' => 'S',
9834
        '%54' => 'T',
9835
        '%55' => 'U',
9836
        '%56' => 'V',
9837
        '%57' => 'W',
9838
        '%58' => 'X',
9839
        '%59' => 'Y',
9840
        '%5A' => 'Z',
9841
        '%5B' => '[',
9842
        '%5C' => '\\',
9843
        '%5D' => ']',
9844
        '%5E' => '^',
9845
        '%5F' => '_',
9846
        '%60' => '`',
9847
        '%61' => 'a',
9848
        '%62' => 'b',
9849
        '%63' => 'c',
9850
        '%64' => 'd',
9851
        '%65' => 'e',
9852
        '%66' => 'f',
9853
        '%67' => 'g',
9854
        '%68' => 'h',
9855
        '%69' => 'i',
9856
        '%6A' => 'j',
9857
        '%6B' => 'k',
9858
        '%6C' => 'l',
9859
        '%6D' => 'm',
9860
        '%6E' => 'n',
9861
        '%6F' => 'o',
9862
        '%70' => 'p',
9863
        '%71' => 'q',
9864
        '%72' => 'r',
9865
        '%73' => 's',
9866
        '%74' => 't',
9867
        '%75' => 'u',
9868
        '%76' => 'v',
9869
        '%77' => 'w',
9870
        '%78' => 'x',
9871
        '%79' => 'y',
9872
        '%7A' => 'z',
9873
        '%7B' => '{',
9874
        '%7C' => '|',
9875
        '%7D' => '}',
9876
        '%7E' => '~',
9877
        '%7F' => '',
9878
        '%80' => '`',
9879
        '%81' => '',
9880
        '%82' => '‚',
9881
        '%83' => 'ƒ',
9882
        '%84' => '„',
9883
        '%85' => '…',
9884
        '%86' => '†',
9885
        '%87' => '‡',
9886
        '%88' => 'ˆ',
9887
        '%89' => '‰',
9888
        '%8A' => 'Š',
9889
        '%8B' => '‹',
9890
        '%8C' => 'Œ',
9891
        '%8D' => '',
9892
        '%8E' => 'Ž',
9893
        '%8F' => '',
9894
        '%90' => '',
9895
        '%91' => '‘',
9896
        '%92' => '’',
9897
        '%93' => '“',
9898
        '%94' => '”',
9899
        '%95' => '•',
9900
        '%96' => '–',
9901
        '%97' => '—',
9902
        '%98' => '˜',
9903
        '%99' => '™',
9904
        '%9A' => 'š',
9905
        '%9B' => '›',
9906
        '%9C' => 'œ',
9907
        '%9D' => '',
9908
        '%9E' => 'ž',
9909
        '%9F' => 'Ÿ',
9910
        '%A0' => '',
9911
        '%A1' => '¡',
9912
        '%A2' => '¢',
9913
        '%A3' => '£',
9914
        '%A4' => '¤',
9915
        '%A5' => '¥',
9916
        '%A6' => '¦',
9917
        '%A7' => '§',
9918
        '%A8' => '¨',
9919
        '%A9' => '©',
9920
        '%AA' => 'ª',
9921
        '%AB' => '«',
9922
        '%AC' => '¬',
9923
        '%AD' => '',
9924
        '%AE' => '®',
9925
        '%AF' => '¯',
9926
        '%B0' => '°',
9927
        '%B1' => '±',
9928
        '%B2' => '²',
9929
        '%B3' => '³',
9930
        '%B4' => '´',
9931
        '%B5' => 'µ',
9932
        '%B6' => '¶',
9933
        '%B7' => '·',
9934
        '%B8' => '¸',
9935
        '%B9' => '¹',
9936
        '%BA' => 'º',
9937
        '%BB' => '»',
9938
        '%BC' => '¼',
9939
        '%BD' => '½',
9940
        '%BE' => '¾',
9941
        '%BF' => '¿',
9942
        '%C0' => 'À',
9943
        '%C1' => 'Á',
9944
        '%C2' => 'Â',
9945
        '%C3' => 'Ã',
9946
        '%C4' => 'Ä',
9947
        '%C5' => 'Å',
9948
        '%C6' => 'Æ',
9949
        '%C7' => 'Ç',
9950
        '%C8' => 'È',
9951
        '%C9' => 'É',
9952
        '%CA' => 'Ê',
9953
        '%CB' => 'Ë',
9954
        '%CC' => 'Ì',
9955
        '%CD' => 'Í',
9956
        '%CE' => 'Î',
9957
        '%CF' => 'Ï',
9958
        '%D0' => 'Ð',
9959
        '%D1' => 'Ñ',
9960
        '%D2' => 'Ò',
9961
        '%D3' => 'Ó',
9962
        '%D4' => 'Ô',
9963
        '%D5' => 'Õ',
9964
        '%D6' => 'Ö',
9965
        '%D7' => '×',
9966
        '%D8' => 'Ø',
9967
        '%D9' => 'Ù',
9968
        '%DA' => 'Ú',
9969
        '%DB' => 'Û',
9970
        '%DC' => 'Ü',
9971
        '%DD' => 'Ý',
9972
        '%DE' => 'Þ',
9973
        '%DF' => 'ß',
9974
        '%E0' => 'à',
9975
        '%E1' => 'á',
9976
        '%E2' => 'â',
9977
        '%E3' => 'ã',
9978
        '%E4' => 'ä',
9979
        '%E5' => 'å',
9980
        '%E6' => 'æ',
9981
        '%E7' => 'ç',
9982
        '%E8' => 'è',
9983
        '%E9' => 'é',
9984
        '%EA' => 'ê',
9985
        '%EB' => 'ë',
9986
        '%EC' => 'ì',
9987
        '%ED' => 'í',
9988
        '%EE' => 'î',
9989
        '%EF' => 'ï',
9990
        '%F0' => 'ð',
9991
        '%F1' => 'ñ',
9992
        '%F2' => 'ò',
9993
        '%F3' => 'ó',
9994
        '%F4' => 'ô',
9995
        '%F5' => 'õ',
9996
        '%F6' => 'ö',
9997
        '%F7' => '÷',
9998
        '%F8' => 'ø',
9999
        '%F9' => 'ù',
10000
        '%FA' => 'ú',
10001
        '%FB' => 'û',
10002
        '%FC' => 'ü',
10003
        '%FD' => 'ý',
10004
        '%FE' => 'þ',
10005
        '%FF' => 'ÿ',
10006
    ];
10007
  }
10008
10009
  /**
10010
   * Decodes an UTF-8 string to ISO-8859-1.
10011
   *
10012
   * @param string $str <p>The input string.</p>
10013
   * @param bool   $keepUtf8Chars
10014
   *
10015
   * @return string
10016
   */
10017 16
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10018
  {
10019 16
    if ('' === $str) {
10020 5
      return '';
10021
    }
10022
10023 16
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10024 16
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10025
10026 16
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10027
10028 1
      if (self::$WIN1252_TO_UTF8 === null) {
10029
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10030
      }
10031
10032 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10032
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10033 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10033
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10034
    }
10035
10036
    /** @noinspection PhpInternalEntityUsedInspection */
10037 16
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10038
10039 16
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10040
      self::checkForSupport();
10041
    }
10042
10043
    // save for later comparision
10044 16
    $str_backup = $str;
10045 16
    $len = self::strlen_in_byte($str);
10046
10047 16
    if (self::$ORD === null) {
10048
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10049
    }
10050
10051 16
    if (self::$CHR === null) {
10052
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10053
    }
10054
10055 16
    $noCharFound = '?';
10056
    /** @noinspection ForeachInvariantsInspection */
10057 16
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10058 16
      switch ($str[$i] & "\xF0") {
10059 16
        case "\xC0":
10060 14
        case "\xD0":
10061 14
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10062 14
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10063 14
          break;
10064
10065
        /** @noinspection PhpMissingBreakStatementInspection */
10066 14
        case "\xF0":
10067
          ++$i;
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment if this fall-through is intended.
Loading history...
10068 14
        case "\xE0":
10069 10
          $str[$j] = $noCharFound;
10070 10
          $i += 2;
10071 10
          break;
10072
10073
        default:
10074 14
          $str[$j] = $str[$i];
10075
      }
10076
    }
10077
10078 16
    $return = self::substr_in_byte($str, 0, $j);
10079 16
    if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
10080
      $return = '';
10081
    }
10082
10083
    if (
10084 16
        $keepUtf8Chars === true
10085
        &&
10086 16
        self::strlen($return) >= self::strlen($str_backup)
10087
    ) {
10088 2
      return $str_backup;
10089
    }
10090
10091 16
    return $return;
10092
  }
10093
10094
  /**
10095
   * Encodes an ISO-8859-1 string to UTF-8.
10096
   *
10097
   * @param string $str <p>The input string.</p>
10098
   *
10099
   * @return string
10100
   */
10101 14
  public static function utf8_encode(string $str): string
10102
  {
10103 14
    if ('' === $str) {
10104 13
      return '';
10105
    }
10106
10107 14
    $str = \utf8_encode($str);
10108
10109
    // the polyfill maybe return false
10110
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10111 14
    if ($str === false) {
10112
      return '';
10113
    }
10114
10115 14
    if (false === \strpos($str, "\xC2")) {
10116 6
      return $str;
10117
    }
10118
10119 12
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
10120 12
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
10121
10122 12
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
10123
10124 1
      if (self::$WIN1252_TO_UTF8 === null) {
10125
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10126
      }
10127
10128 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10128
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10129 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10129
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10130
    }
10131
10132 12
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
10133
  }
10134
10135
  /**
10136
   * fix -> utf8-win1252 chars
10137
   *
10138
   * @param string $str <p>The input string.</p>
10139
   *
10140
   * @return string
10141
   *
10142
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
10143
   */
10144 2
  public static function utf8_fix_win1252_chars(string $str): string
10145
  {
10146 2
    return self::fix_simple_utf8($str);
10147
  }
10148
10149
  /**
10150
   * Returns an array with all utf8 whitespace characters.
10151
   *
10152
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
10153
   *
10154
   * @author: Derek E. [email protected]
10155
   *
10156
   * @return string[]
10157
   *                 An array with all known whitespace characters as values and the type of whitespace as keys
10158
   *                 as defined in above URL.
10159
   */
10160 2
  public static function whitespace_table(): array
10161
  {
10162 2
    return self::$WHITESPACE_TABLE;
10163
  }
10164
10165
  /**
10166
   * Limit the number of words in a string.
10167
   *
10168
   * @param string $str      <p>The input string.</p>
10169
   * @param int    $limit    <p>The limit of words as integer.</p>
10170
   * @param string $strAddOn <p>Replacement for the striped string.</p>
10171
   *
10172
   * @return string
10173
   */
10174 2
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
10175
  {
10176 2
    if ('' === $str) {
10177 2
      return '';
10178
    }
10179
10180 2
    if ($limit < 1) {
10181 2
      return '';
10182
    }
10183
10184 2
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
10185
10186
    if (
10187 2
        !isset($matches[0])
10188
        ||
10189 2
        self::strlen($str) === self::strlen($matches[0])
10190
    ) {
10191 2
      return $str;
10192
    }
10193
10194 2
    return self::rtrim($matches[0]) . $strAddOn;
10195
  }
10196
10197
  /**
10198
   * Wraps a string to a given number of characters
10199
   *
10200
   * @link  http://php.net/manual/en/function.wordwrap.php
10201
   *
10202
   * @param string $str   <p>The input string.</p>
10203
   * @param int    $width [optional] <p>The column width.</p>
10204
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
10205
   * @param bool   $cut   [optional] <p>
10206
   *                      If the cut is set to true, the string is
10207
   *                      always wrapped at or before the specified width. So if you have
10208
   *                      a word that is larger than the given width, it is broken apart.
10209
   *                      </p>
10210
   *
10211
   * @return string The given string wrapped at the specified column.
10212
   */
10213 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
10214
  {
10215 10
    if ('' === $str || '' === $break) {
10216 3
      return '';
10217
    }
10218
10219 8
    $w = '';
10220 8
    $strSplit = \explode($break, $str);
10221 8
    if ($strSplit === false) {
10222
      $count = 0;
10223
    } else {
10224 8
      $count = \count($strSplit);
10225
    }
10226
10227 8
    $chars = [];
10228
    /** @noinspection ForeachInvariantsInspection */
10229 8
    for ($i = 0; $i < $count; ++$i) {
10230
10231 8
      if ($i) {
10232 1
        $chars[] = $break;
10233 1
        $w .= '#';
10234
      }
10235
10236 8
      $c = $strSplit[$i];
10237 8
      unset($strSplit[$i]);
10238
10239 8
      if ($c !== null) {
10240 8
        foreach (self::split($c) as $c) {
10241 8
          $chars[] = $c;
10242 8
          $w .= ' ' === $c ? ' ' : '?';
10243
        }
10244
      }
10245
    }
10246
10247 8
    $strReturn = '';
10248 8
    $j = 0;
10249 8
    $b = $i = -1;
10250 8
    $w = \wordwrap($w, $width, '#', $cut);
10251
10252 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
10253 6
      for (++$i; $i < $b; ++$i) {
10254 6
        $strReturn .= $chars[$j];
10255 6
        unset($chars[$j++]);
10256
      }
10257
10258 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
10259 3
        unset($chars[$j++]);
10260
      }
10261
10262 6
      $strReturn .= $break;
10263
    }
10264
10265 8
    return $strReturn . \implode('', $chars);
10266
  }
10267
10268
  /**
10269
   * Line-Wrap the string after $limit, but also after the next word.
10270
   *
10271
   * @param string $str
10272
   * @param int    $limit
10273
   *
10274
   * @return string
10275
   */
10276 1
  public static function wordwrap_per_line(string $str, int $limit): string
10277
  {
10278 1
    $strings = (array)\preg_split('/\\r\\n|\\r|\\n/', $str);
10279
10280 1
    $string = '';
10281 1
    foreach ($strings as $value) {
10282 1
      if ($value === false) {
10283
        continue;
10284
      }
10285
10286 1
      $string .= wordwrap($value, $limit);
10287 1
      $string .= "\n";
10288
    }
10289
10290 1
    return $string;
10291
  }
10292
10293
  /**
10294
   * Returns an array of Unicode White Space characters.
10295
   *
10296
   * @return string[] An array with numeric code point as key and White Space Character as value.
10297
   */
10298 2
  public static function ws(): array
10299
  {
10300 2
    return self::$WHITESPACE;
10301
  }
10302
10303
}
10304