Passed
Push — master ( becbd8...88376d )
by Lars
03:42
created

UTF8::strchr()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 5
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206
   * __construct()
207
   */
208 32
  public function __construct()
209
  {
210 32
    self::checkForSupport();
211 32
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219
   * @return string Single Multi-Byte character.
220
   */
221 3
  public static function access(string $str, int $pos): string
222
  {
223 3
    if ('' === $str) {
224 1
      return '';
225
    }
226
227 3
    if ($pos < 0) {
228 2
      return '';
229
    }
230
231 3
    return (string)self::substr($str, $pos, 1);
232
  }
233
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string The output string that contains BOM.
242
   */
243 2
  public static function add_bom_to_string(string $str): string
244
  {
245 2
    if (self::string_has_bom($str) === false) {
246 2
      $str = self::bom() . $str;
247
    }
248
249 2
    return $str;
250
  }
251
252
  /**
253
   * Adds the specified amount of left and right padding to the given string.
254
   * The default character used is a space.
255
   *
256
   * @param string $str
257
   * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
258
   * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
259
   * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
260
   * @param string $encoding [optional] <p>Default: UTF-8</p>
261
   *
262
   * @return string String with padding applied.
263
   */
264 25
  private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
265
  {
266 25
    $strlen = self::strlen($str, $encoding);
267
268 25
    if ($left && $right) {
269 8
      $length = ($left + $right) + $strlen;
270 8
      $type = STR_PAD_BOTH;
271 17
    } elseif ($left) {
272 7
      $length = $left + $strlen;
273 7
      $type = STR_PAD_LEFT;
274 10
    } elseif ($right) {
275 10
      $length = $right + $strlen;
276 10
      $type = STR_PAD_RIGHT;
277
    } else {
278
      $length = ($left + $right) + $strlen;
279
      $type = STR_PAD_BOTH;
280
    }
281
282 25
    return self::str_pad($str, $length, $padStr, $type, $encoding);
283
  }
284
285
  /**
286
   * Changes all keys in an array.
287
   *
288
   * @param array $array <p>The array to work on</p>
289
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
290
   *                     or <strong>CASE_LOWER</strong> (default)</p>
291
   *
292
   * @return string[] An array with its keys lower or uppercased.
293
   */
294 2
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
295
  {
296
    if (
297 2
        $case !== CASE_LOWER
298
        &&
299 2
        $case !== CASE_UPPER
300
    ) {
301
      $case = CASE_LOWER;
302
    }
303
304 2
    $return = [];
305 2
    foreach ($array as $key => $value) {
306 2
      if ($case === CASE_LOWER) {
307 2
        $key = self::strtolower($key);
308
      } else {
309 2
        $key = self::strtoupper($key);
310
      }
311
312 2
      $return[$key] = $value;
313
    }
314
315 2
    return $return;
316
  }
317
318
  /**
319
   * Returns the substring between $start and $end, if found, or an empty
320
   * string. An optional offset may be supplied from which to begin the
321
   * search for the start string.
322
   *
323
   * @param string $str
324
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
325
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
326
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
327
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
328
   *
329
   * @return string
330
   */
331 16
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
332
  {
333 16
    $posStart = self::strpos($str, $start, $offset, $encoding);
334 16
    if ($posStart === false) {
335 2
      return '';
336
    }
337
338 14
    $substrIndex = $posStart + self::strlen($start, $encoding);
339 14
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
340
    if (
341 14
        $posEnd === false
342
        ||
343 14
        $posEnd === $substrIndex
344
    ) {
345 4
      return '';
346
    }
347
348 10
    $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
349
350 10
    if ($return === false) {
351
      return '';
352
    }
353
354 10
    return $return;
355
  }
356
357
  /**
358
   * Convert binary into an string.
359
   *
360
   * @param mixed $bin 1|0
361
   *
362
   * @return string
363
   */
364 2
  public static function binary_to_str($bin): string
365
  {
366 2
    if (!isset($bin[0])) {
367
      return '';
368
    }
369
370 2
    $convert = \base_convert($bin, 2, 16);
371 2
    if ($convert === '0') {
372 1
      return '';
373
    }
374
375 2
    return \pack('H*', $convert);
376
  }
377
378
  /**
379
   * Returns the UTF-8 Byte Order Mark Character.
380
   *
381
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
382
   *
383
   * @return string UTF-8 Byte Order Mark
384
   */
385 4
  public static function bom(): string
386
  {
387 4
    return "\xef\xbb\xbf";
388
  }
389
390
  /**
391
   * @alias of UTF8::chr_map()
392
   *
393
   * @see   UTF8::chr_map()
394
   *
395
   * @param string|array $callback
396
   * @param string       $str
397
   *
398
   * @return string[]
399
   */
400 2
  public static function callback($callback, string $str): array
401
  {
402 2
    return self::chr_map($callback, $str);
403
  }
404
405
  /**
406
   * Returns the character at $index, with indexes starting at 0.
407
   *
408
   * @param string $str
409
   * @param int    $index    <p>Position of the character.</p>
410
   * @param string $encoding [optional] <p>Default is UTF-8</p>
411
   *
412
   * @return string The character at $index.
413
   */
414 9
  public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
415
  {
416 9
    return (string)self::substr($str, $index, 1, $encoding);
417
  }
418
419
  /**
420
   * Returns an array consisting of the characters in the string.
421
   *
422
   * @param string $str <p>The input string.</p>
423
   *
424
   * @return string[] An array of chars.
425
   */
426 3
  public static function chars(string $str): array
427
  {
428 3
    return self::str_split($str, 1);
429
  }
430
431
  /**
432
   * This method will auto-detect your server environment for UTF-8 support.
433
   *
434
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
435
   */
436 37
  public static function checkForSupport()
437
  {
438 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
439
440
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
441
442
      // http://php.net/manual/en/book.mbstring.php
443
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
444
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
445
446
      // http://php.net/manual/en/book.iconv.php
447
      self::$SUPPORT['iconv'] = self::iconv_loaded();
448
449
      // http://php.net/manual/en/book.intl.php
450
      self::$SUPPORT['intl'] = self::intl_loaded();
451
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
452
453
      self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
454
455
      if (
456
          self::$SUPPORT['intl'] === true
457
          &&
458
          \function_exists('transliterator_list_ids') === true
459
      ) {
460
        /** @noinspection PhpComposerExtensionStubsInspection */
461
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
462
      }
463
464
      // http://php.net/manual/en/class.intlchar.php
465
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
466
467
      // http://php.net/manual/en/book.ctype.php
468
      self::$SUPPORT['ctype'] = self::ctype_loaded();
469
470
      // http://php.net/manual/en/class.finfo.php
471
      self::$SUPPORT['finfo'] = self::finfo_loaded();
472
473
      // http://php.net/manual/en/book.json.php
474
      self::$SUPPORT['json'] = self::json_loaded();
475
476
      // http://php.net/manual/en/book.pcre.php
477
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
478
    }
479 37
  }
480
481
  /**
482
   * Generates a UTF-8 encoded character from the given code point.
483
   *
484
   * INFO: opposite to UTF8::ord()
485
   *
486
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
487
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
488
   *
489
   * @return string|null Multi-Byte character, returns null on failure or empty input.
490
   */
491 17
  public static function chr($code_point, string $encoding = 'UTF-8')
492
  {
493
    // init
494 17
    static $CHAR_CACHE = [];
495
496 17
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
497
      self::checkForSupport();
498
    }
499
500 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
501 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
502
    }
503
504
    if (
505 17
        $encoding !== 'UTF-8'
506
        &&
507 17
        $encoding !== 'ISO-8859-1'
508
        &&
509 17
        $encoding !== 'WINDOWS-1252'
510
        &&
511 17
        self::$SUPPORT['mbstring'] === false
512
    ) {
513
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
514
    }
515
516 17
    $cacheKey = $code_point . $encoding;
517 17
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
518 16
      return $CHAR_CACHE[$cacheKey];
519
    }
520
521 11
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
522
523 10
      if (self::$CHR === null) {
524
        $chrTmp = self::getData('chr');
525
        if ($chrTmp) {
526
          self::$CHR = (array)$chrTmp;
527
        }
528
      }
529
530 10
      $chr = self::$CHR[$code_point];
531
532 10
      if ($encoding !== 'UTF-8') {
533 1
        $chr = self::encode($encoding, $chr);
534
      }
535
536 10
      return $CHAR_CACHE[$cacheKey] = $chr;
537
    }
538
539 7
    if (self::$SUPPORT['intlChar'] === true) {
540
      /** @noinspection PhpComposerExtensionStubsInspection */
541 7
      $chr = \IntlChar::chr($code_point);
542
543 7
      if ($encoding !== 'UTF-8') {
544
        $chr = self::encode($encoding, $chr);
545
      }
546
547 7
      return $CHAR_CACHE[$cacheKey] = $chr;
548
    }
549
550
    if (self::$CHR === null) {
551
      $chrTmp = self::getData('chr');
552
      if ($chrTmp) {
553
        self::$CHR = (array)$chrTmp;
554
      }
555
    }
556
557
    $code_point = (int)$code_point;
558
    if ($code_point <= 0x7F) {
559
      $chr = self::$CHR[$code_point];
560
    } elseif ($code_point <= 0x7FF) {
561
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
562
             self::$CHR[($code_point & 0x3F) + 0x80];
563
    } elseif ($code_point <= 0xFFFF) {
564
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
565
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
566
             self::$CHR[($code_point & 0x3F) + 0x80];
567
    } else {
568
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
569
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
570
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
571
             self::$CHR[($code_point & 0x3F) + 0x80];
572
    }
573
574
    if ($encoding !== 'UTF-8') {
575
      $chr = self::encode($encoding, $chr);
576
    }
577
578
    return $CHAR_CACHE[$cacheKey] = $chr;
579
  }
580
581
  /**
582
   * Applies callback to all characters of a string.
583
   *
584
   * @param string|array $callback <p>The callback function.</p>
585
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
586
   *
587
   * @return string[] The outcome of callback.
588
   */
589 2
  public static function chr_map($callback, string $str): array
590
  {
591 2
    $chars = self::split($str);
592
593 2
    return \array_map($callback, $chars);
594
  }
595
596
  /**
597
   * Generates an array of byte length of each character of a Unicode string.
598
   *
599
   * 1 byte => U+0000  - U+007F
600
   * 2 byte => U+0080  - U+07FF
601
   * 3 byte => U+0800  - U+FFFF
602
   * 4 byte => U+10000 - U+10FFFF
603
   *
604
   * @param string $str <p>The original unicode string.</p>
605
   *
606
   * @return int[] An array of byte lengths of each character.
607
   */
608 4
  public static function chr_size_list(string $str): array
609
  {
610 4
    if ('' === $str) {
611 4
      return [];
612
    }
613
614 4
    $strSplit = self::split($str);
615
616 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
617
      self::checkForSupport();
618
    }
619
620 4
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
621
      return \array_map(
622
          function ($data) {
623
            return UTF8::strlen_in_byte($data);
624
          },
625
          $strSplit
626
      );
627
    }
628
629 4
    return \array_map('\strlen', $strSplit);
630
  }
631
632
  /**
633
   * Get a decimal code representation of a specific character.
634
   *
635
   * @param string $char <p>The input character.</p>
636
   *
637
   * @return int
638
   */
639 4
  public static function chr_to_decimal(string $char): int
640
  {
641 4
    $code = self::ord($char[0]);
642 4
    $bytes = 1;
643
644 4
    if (!($code & 0x80)) {
645
      // 0xxxxxxx
646 4
      return $code;
647
    }
648
649 4
    if (($code & 0xe0) === 0xc0) {
650
      // 110xxxxx
651 4
      $bytes = 2;
652 4
      $code &= ~0xc0;
653 4
    } elseif (($code & 0xf0) === 0xe0) {
654
      // 1110xxxx
655 4
      $bytes = 3;
656 4
      $code &= ~0xe0;
657 2
    } elseif (($code & 0xf8) === 0xf0) {
658
      // 11110xxx
659 2
      $bytes = 4;
660 2
      $code &= ~0xf0;
661
    }
662
663 4
    for ($i = 2; $i <= $bytes; $i++) {
664
      // 10xxxxxx
665 4
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
666
    }
667
668 4
    return $code;
669
  }
670
671
  /**
672
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
673
   *
674
   * @param string|int $char <p>The input character</p>
675
   * @param string     $pfix [optional]
676
   *
677
   * @return string The code point encoded as U+xxxx
678
   */
679 2
  public static function chr_to_hex($char, string $pfix = 'U+'): string
680
  {
681 2
    if ('' === $char) {
682 2
      return '';
683
    }
684
685 2
    if ($char === '&#0;') {
686 2
      $char = '';
687
    }
688
689 2
    return self::int_to_hex(self::ord($char), $pfix);
690
  }
691
692
  /**
693
   * alias for "UTF8::chr_to_decimal()"
694
   *
695
   * @see UTF8::chr_to_decimal()
696
   *
697
   * @param string $chr
698
   *
699
   * @return int
700
   */
701 2
  public static function chr_to_int(string $chr): int
702
  {
703 2
    return self::chr_to_decimal($chr);
704
  }
705
706
  /**
707
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
708
   *
709
   * @param string $body     <p>The original string to be split.</p>
710
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
711
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
712
   *
713
   * @return string The chunked string.
714
   */
715 4
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
716
  {
717 4
    return \implode($end, self::split($body, $chunklen));
718
  }
719
720
  /**
721
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
722
   *
723
   * @param string $str                           <p>The string to be sanitized.</p>
724
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
725
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
726
   *                                              whitespace.</p>
727
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
728
   *                                              e.g.: "…"
729
   *                                              => "..."</p>
730
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
731
   *                                              combination with
732
   *                                              $normalize_whitespace</p>
733
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
734
   *                                              mark e.g.: "�"</p>
735
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
736
   *                                              characters e.g.: "\0"</p>
737
   *
738
   * @return string Clean UTF-8 encoded string.
739
   */
740 111
  public static function clean(
741
      string $str,
742
      bool $remove_bom = false,
743
      bool $normalize_whitespace = false,
744
      bool $normalize_msword = false,
745
      bool $keep_non_breaking_space = false,
746
      bool $replace_diamond_question_mark = false,
747
      bool $remove_invisible_characters = true
748
  ): string
749
  {
750
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
751
    // caused connection reset problem on larger strings
752
753 111
    $regx = '/
754
      (
755
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
756
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
757
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
758
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
759
        ){1,100}                      # ...one or more times
760
      )
761
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
762
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
763
    /x';
764 111
    $str = (string)\preg_replace($regx, '$1', $str);
765
766 111
    if ($replace_diamond_question_mark === true) {
767 61
      $str = self::replace_diamond_question_mark($str, '');
768
    }
769
770 111
    if ($remove_invisible_characters === true) {
771 111
      $str = self::remove_invisible_characters($str);
772
    }
773
774 111
    if ($normalize_whitespace === true) {
775 65
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
776
    }
777
778 111
    if ($normalize_msword === true) {
779 33
      $str = self::normalize_msword($str);
780
    }
781
782 111
    if ($remove_bom === true) {
783 63
      $str = self::remove_bom($str);
784
    }
785
786 111
    return $str;
787
  }
788
789
  /**
790
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
791
   *
792
   * @param string $str <p>The input string.</p>
793
   *
794
   * @return string
795
   */
796 33
  public static function cleanup($str): string
797
  {
798
    // init
799 33
    $str = (string)$str;
800
801 33
    if ('' === $str) {
802 5
      return '';
803
    }
804
805
    // fixed ISO <-> UTF-8 Errors
806 33
    $str = self::fix_simple_utf8($str);
807
808
    // remove all none UTF-8 symbols
809
    // && remove diamond question mark (�)
810
    // && remove remove invisible characters (e.g. "\0")
811
    // && remove BOM
812
    // && normalize whitespace chars (but keep non-breaking-spaces)
813 33
    $str = self::clean(
814 33
        $str,
815 33
        true,
816 33
        true,
817 33
        false,
818 33
        true,
819 33
        true,
820 33
        true
821
    );
822
823 33
    return $str;
824
  }
825
826
  /**
827
   * Accepts a string or a array of strings and returns an array of Unicode code points.
828
   *
829
   * INFO: opposite to UTF8::string()
830
   *
831
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
832
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
833
   *                                    default, code points will be returned as integers.</p>
834
   *
835
   * @return array<int|string>
836
   *                           The array of code points:<br>
837
   *                           array<int> for $u_style === false<br>
838
   *                           array<string> for $u_style === true<br>
839
   */
840 12
  public static function codepoints($arg, bool $u_style = false): array
841
  {
842 12
    if (\is_string($arg) === true) {
843 12
      $arg = self::split($arg);
844
    }
845
846 12
    $arg = \array_map(
847
        [
848 12
            self::class,
849
            'ord',
850
        ],
851 12
        $arg
852
    );
853
854 12
    if (\count($arg) === 0) {
855 7
      return [];
856
    }
857
858 11
    if ($u_style) {
859 2
      $arg = \array_map(
860
          [
861 2
              self::class,
862
              'int_to_hex',
863
          ],
864 2
          $arg
865
      );
866
    }
867
868 11
    return $arg;
869
  }
870
871
  /**
872
   * Trims the string and replaces consecutive whitespace characters with a
873
   * single space. This includes tabs and newline characters, as well as
874
   * multibyte whitespace such as the thin space and ideographic space.
875
   *
876
   * @param string $str <p>The input string.</p>
877
   *
878
   * @return string String with a trimmed $str and condensed whitespace.
879
   */
880 13
  public static function collapse_whitespace(string $str): string
881
  {
882 13
    return self::trim(
883 13
        self::regex_replace($str, '[[:space:]]+', ' ')
884
    );
885
  }
886
887
  /**
888
   * Returns count of characters used in a string.
889
   *
890
   * @param string $str       <p>The input string.</p>
891
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
892
   *
893
   * @return int[] An associative array of Character as keys and
894
   *               their count as values.
895
   */
896 18
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
897
  {
898 18
    return \array_count_values(self::split($str, 1, $cleanUtf8));
899
  }
900
901
  /**
902
   * Remove css media-queries.
903
   *
904
   * @param string $str
905
   *
906
   * @return string
907
   */
908 1
  public static function css_stripe_media_queries(string $str): string
909
  {
910 1
    return (string)\preg_replace(
911 1
        '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
912 1
        '',
913 1
        $str
914
    );
915
  }
916
917
  /**
918
   * Checks whether ctype is available on the server.
919
   *
920
   * @return bool
921
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
922
   */
923
  public static function ctype_loaded(): bool
924
  {
925
    return \extension_loaded('ctype');
926
  }
927
928
  /**
929
   * Converts a int-value into an UTF-8 character.
930
   *
931
   * @param mixed $int
932
   *
933
   * @return string
934
   */
935 10
  public static function decimal_to_chr($int): string
936
  {
937 10
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
938
  }
939
940
  /**
941
   * Encode a string with a new charset-encoding.
942
   *
943
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
944
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
945
   *
946
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
947
   * @param string $str      <p>The input string</p>
948
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
949
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
950
   *
951
   * @return string
952
   */
953 30
  public static function encode(string $encoding, string $str, bool $force = true): string
954
  {
955 30
    if ('' === $str || '' === $encoding) {
956 12
      return $str;
957
    }
958
959 30
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
960 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
961
    }
962
963 30
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
964
      self::checkForSupport();
965
    }
966
967 30
    $encodingDetected = self::str_detect_encoding($str);
968
969
    // DEBUG
970
    //var_dump($encoding, $encodingDetected, $str, "\n\n");
971
972
    if (
973 30
        $force === true
974
        ||
975
        (
976 13
            $encodingDetected !== false
977
            &&
978 30
            $encodingDetected !== $encoding
979
        )
980
    ) {
981
982
      if (
983 30
          $encoding === 'UTF-8'
984
          &&
985
          (
986 26
              $force === true
987 9
              || $encodingDetected === 'UTF-8'
988 9
              || $encodingDetected === 'WINDOWS-1252'
989 30
              || $encodingDetected === 'ISO-8859-1'
990
          )
991
      ) {
992 25
        return self::to_utf8($str);
993
      }
994
995
      if (
996 11
          $encoding === 'ISO-8859-1'
997
          &&
998
          (
999 8
              $force === true
1000 4
              || $encodingDetected === 'ISO-8859-1'
1001 4
              || $encodingDetected === 'WINDOWS-1252'
1002 11
              || $encodingDetected === 'UTF-8'
1003
          )
1004
      ) {
1005 8
        return self::to_iso8859($str);
1006
      }
1007
1008
      if (
1009 5
          $encoding !== 'UTF-8'
1010
          &&
1011 5
          $encoding !== 'ISO-8859-1'
1012
          &&
1013 5
          $encoding !== 'WINDOWS-1252'
1014
          &&
1015 5
          self::$SUPPORT['mbstring'] === false
1016
      ) {
1017
        \trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1018
      }
1019
1020
      // always fallback via symfony polyfill
1021 5
      $strEncoded = \mb_convert_encoding(
1022 5
          $str,
1023 5
          $encoding,
1024 5
          ($force === true ? $encoding : $encodingDetected)
0 ignored issues
show
Bug introduced by
It seems like $force === true ? $encoding : $encodingDetected can also be of type false; however, parameter $from_encoding of mb_convert_encoding() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1024
          /** @scrutinizer ignore-type */ ($force === true ? $encoding : $encodingDetected)
Loading history...
1025
      );
1026
1027 5
      if ($strEncoded) {
1028 5
        return $strEncoded;
1029
      }
1030
    }
1031
1032 7
    return $str;
1033
  }
1034
1035
  /**
1036
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1037
   *
1038
   * @param string   $str                    <p>The input string.</p>
1039
   * @param string   $search                 <p>The searched string.</p>
1040
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1041
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1042
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1043
   *
1044
   * @return string
1045
   */
1046 1
  public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1047
  {
1048 1
    if ('' === $str) {
1049 1
      return '';
1050
    }
1051
1052 1
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1053
1054 1
    if ($length === null) {
1055 1
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
1056
    }
1057
1058 1
    if (empty($search)) {
1059
1060 1
      $stringLength = self::strlen($str, $encoding);
1061
1062 1
      if ($length > 0) {
1063 1
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1064
      } else {
1065 1
        $end = 0;
1066
      }
1067
1068 1
      $pos = (int)\min(
1069 1
          self::strpos($str, ' ', $end, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $end can also be of type false; however, parameter $offset of voku\helper\UTF8::strpos() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1069
          self::strpos($str, ' ', /** @scrutinizer ignore-type */ $end, $encoding),
Loading history...
1070 1
          self::strpos($str, '.', $end, $encoding)
1071
      );
1072
1073 1
      if ($pos) {
1074 1
        $strSub = self::substr($str, 0, $pos, $encoding);
1075 1
        if ($strSub === false) {
1076
          return '';
1077
        }
1078
1079 1
        return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1080
      }
1081
1082
      return $str;
1083
    }
1084
1085 1
    $wordPos = self::stripos($str, $search, 0, $encoding);
1086 1
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1087
1088 1
    $pos_start = 0;
1089 1
    if ($halfSide > 0) {
1090 1
      $halfText = self::substr($str, 0, $halfSide, $encoding);
1091 1
      if ($halfText !== false) {
1092 1
        $pos_start = (int)\max(
1093 1
            self::strrpos($halfText, ' ', 0, $encoding),
1094 1
            self::strrpos($halfText, '.', 0, $encoding)
1095
        );
1096
      }
1097
    }
1098
1099 1
    if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type integer|false is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1100 1
      $l = $pos_start + $length - 1;
1101 1
      $realLength = self::strlen($str, $encoding);
1102
1103 1
      if ($l > $realLength) {
1104
        $l = $realLength;
1105
      }
1106
1107 1
      $pos_end = (int)\min(
1108 1
              self::strpos($str, ' ', $l, $encoding),
1109 1
              self::strpos($str, '.', $l, $encoding)
1110 1
          ) - $pos_start;
1111
1112 1
      if (!$pos_end || $pos_end <= 0) {
1113 1
        $strSub = self::substr($str, $pos_start, self::strlen($str), $encoding);
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1113
        $strSub = self::substr($str, $pos_start, /** @scrutinizer ignore-type */ self::strlen($str), $encoding);
Loading history...
1114 1
        if ($strSub !== false) {
1115 1
          $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1116
        } else {
1117 1
          $extract = '';
1118
        }
1119
      } else {
1120 1
        $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1121 1
        if ($strSub !== false) {
1122 1
          $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1123
        } else {
1124 1
          $extract = '';
1125
        }
1126
      }
1127
1128
    } else {
1129
1130 1
      $l = $length - 1;
1131 1
      $trueLength = self::strlen($str, $encoding);
1132
1133 1
      if ($l > $trueLength) {
1134
        $l = $trueLength;
1135
      }
1136
1137 1
      $pos_end = \min(
1138 1
          self::strpos($str, ' ', $l, $encoding),
1139 1
          self::strpos($str, '.', $l, $encoding)
1140
      );
1141
1142 1
      if ($pos_end) {
1143 1
        $strSub = self::substr($str, 0, $pos_end, $encoding);
1144 1
        if ($strSub !== false) {
1145 1
          $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1146
        } else {
1147 1
          $extract = '';
1148
        }
1149
      } else {
1150 1
        $extract = $str;
1151
      }
1152
    }
1153
1154 1
    return $extract;
1155
  }
1156
1157
  /**
1158
   * Reads entire file into a string.
1159
   *
1160
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1161
   *
1162
   * @link http://php.net/manual/en/function.file-get-contents.php
1163
   *
1164
   * @param string        $filename         <p>
1165
   *                                        Name of the file to read.
1166
   *                                        </p>
1167
   * @param bool          $use_include_path [optional] <p>
1168
   *                                        Prior to PHP 5, this parameter is called
1169
   *                                        use_include_path and is a bool.
1170
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1171
   *                                        to trigger include path
1172
   *                                        search.
1173
   *                                        </p>
1174
   * @param resource|null $context          [optional] <p>
1175
   *                                        A valid context resource created with
1176
   *                                        stream_context_create. If you don't need to use a
1177
   *                                        custom context, you can skip this parameter by &null;.
1178
   *                                        </p>
1179
   * @param int|null      $offset           [optional] <p>
1180
   *                                        The offset where the reading starts.
1181
   *                                        </p>
1182
   * @param int|null      $maxLength        [optional] <p>
1183
   *                                        Maximum length of data read. The default is to read until end
1184
   *                                        of file is reached.
1185
   *                                        </p>
1186
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1187
   *
1188
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
1189
   *                                        images or pdf, because they used non default utf-8 chars.</p>
1190
   *
1191
   * @return string|false The function returns the read data or false on failure.
1192
   */
1193 11
  public static function file_get_contents(
1194
      string $filename,
1195
      bool $use_include_path = false,
1196
      $context = null,
1197
      int $offset = null,
1198
      int $maxLength = null,
1199
      int $timeout = 10,
1200
      bool $convertToUtf8 = true
1201
  )
1202
  {
1203
    // init
1204 11
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1205
1206 11
    if ($timeout && $context === null) {
1207 9
      $context = \stream_context_create(
1208
          [
1209
              'http' =>
1210
                  [
1211 9
                      'timeout' => $timeout,
1212
                  ],
1213
          ]
1214
      );
1215
    }
1216
1217 11
    if ($offset === null) {
1218 11
      $offset = 0;
1219
    }
1220
1221 11
    if (\is_int($maxLength) === true) {
1222 2
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1223
    } else {
1224 11
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1225
    }
1226
1227
    // return false on error
1228 11
    if ($data === false) {
1229
      return false;
1230
    }
1231
1232 11
    if ($convertToUtf8 === true) {
1233
      // only for non binary, but also for UTF-16 or UTF-32
1234
      if (
1235 11
          self::is_binary($data, true) !== true
1236
          ||
1237 8
          self::is_utf16($data) !== false
1238
          ||
1239 11
          self::is_utf32($data) !== false
1240
      ) {
1241 9
        $data = self::encode('UTF-8', $data, false);
1242 9
        $data = self::cleanup($data);
1243
      }
1244
    }
1245
1246 11
    return $data;
1247
  }
1248
1249
  /**
1250
   * Checks if a file starts with BOM (Byte Order Mark) character.
1251
   *
1252
   * @param string $file_path <p>Path to a valid file.</p>
1253
   *
1254
   * @throws \RuntimeException if file_get_contents() returned false
1255
   *
1256
   * @return bool
1257
   *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.
1258
   */
1259 2
  public static function file_has_bom(string $file_path): bool
1260
  {
1261 2
    $file_content = \file_get_contents($file_path);
1262 2
    if ($file_content === false) {
1263
      throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1264
    }
1265
1266 2
    return self::string_has_bom($file_content);
1267
  }
1268
1269
  /**
1270
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1271
   *
1272
   * @param mixed  $var
1273
   * @param int    $normalization_form
1274
   * @param string $leading_combining
1275
   *
1276
   * @return mixed
1277
   */
1278 43
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1279
  {
1280 43
    switch (\gettype($var)) {
1281 43
      case 'array':
1282 6
        foreach ($var as $k => $v) {
1283
          /** @noinspection AlterInForeachInspection */
1284 6
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1285
        }
1286 6
        break;
1287 43
      case 'object':
1288 4
        foreach ($var as $k => $v) {
1289 4
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1290
        }
1291 4
        break;
1292 43
      case 'string':
1293
1294 43
        if (false !== \strpos($var, "\r")) {
1295
          // Workaround https://bugs.php.net/65732
1296 3
          $var = self::normalize_line_ending($var);
1297
        }
1298
1299 43
        if (self::is_ascii($var) === false) {
1300
          /** @noinspection PhpUndefinedClassInspection */
1301 26
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1302 20
            $n = '-';
1303
          } else {
1304
            /** @noinspection PhpUndefinedClassInspection */
1305 13
            $n = \Normalizer::normalize($var, $normalization_form);
1306
1307 13
            if (isset($n[0])) {
1308 7
              $var = $n;
1309
            } else {
1310 9
              $var = self::encode('UTF-8', $var, true);
1311
            }
1312
          }
1313
1314
          if (
1315 26
              $var[0] >= "\x80"
1316
              &&
1317 26
              isset($n[0], $leading_combining[0])
1318
              &&
1319 26
              \preg_match('/^\p{Mn}/u', $var)
1320
          ) {
1321
            // Prevent leading combining chars
1322
            // for NFC-safe concatenations.
1323 3
            $var = $leading_combining . $var;
1324
          }
1325
        }
1326
1327 43
        break;
1328
    }
1329
1330 43
    return $var;
1331
  }
1332
1333
  /**
1334
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1335
   *
1336
   * Gets a specific external variable by name and optionally filters it
1337
   *
1338
   * @link  http://php.net/manual/en/function.filter-input.php
1339
   *
1340
   * @param int    $type          <p>
1341
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1342
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1343
   *                              <b>INPUT_ENV</b>.
1344
   *                              </p>
1345
   * @param string $variable_name <p>
1346
   *                              Name of a variable to get.
1347
   *                              </p>
1348
   * @param int    $filter        [optional] <p>
1349
   *                              The ID of the filter to apply. The
1350
   *                              manual page lists the available filters.
1351
   *                              </p>
1352
   * @param mixed  $options       [optional] <p>
1353
   *                              Associative array of options or bitwise disjunction of flags. If filter
1354
   *                              accepts options, flags can be provided in "flags" field of array.
1355
   *                              </p>
1356
   *
1357
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1358
   *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1359
   *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1360
   */
1361
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
1362
  {
1363
    if (4 > \func_num_args()) {
1364
      $var = \filter_input($type, $variable_name, $filter);
1365
    } else {
1366
      $var = \filter_input($type, $variable_name, $filter, $options);
1367
    }
1368
1369
    return self::filter($var);
1370
  }
1371
1372
  /**
1373
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1374
   *
1375
   * Gets external variables and optionally filters them
1376
   *
1377
   * @link  http://php.net/manual/en/function.filter-input-array.php
1378
   *
1379
   * @param int   $type       <p>
1380
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1381
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1382
   *                          <b>INPUT_ENV</b>.
1383
   *                          </p>
1384
   * @param mixed $definition [optional] <p>
1385
   *                          An array defining the arguments. A valid key is a string
1386
   *                          containing a variable name and a valid value is either a filter type, or an array
1387
   *                          optionally specifying the filter, flags and options. If the value is an
1388
   *                          array, valid keys are filter which specifies the
1389
   *                          filter type,
1390
   *                          flags which specifies any flags that apply to the
1391
   *                          filter, and options which specifies any options that
1392
   *                          apply to the filter. See the example below for a better understanding.
1393
   *                          </p>
1394
   *                          <p>
1395
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1396
   *                          input array are filtered by this filter.
1397
   *                          </p>
1398
   * @param bool  $add_empty  [optional] <p>
1399
   *                          Add missing keys as <b>NULL</b> to the return value.
1400
   *                          </p>
1401
   *
1402
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1403
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set. Or
1404
   *               if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable is not
1405
   *               set and <b>NULL</b> if the filter fails.
1406
   */
1407
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1408
  {
1409
    if (2 > \func_num_args()) {
1410
      $a = \filter_input_array($type);
1411
    } else {
1412
      $a = \filter_input_array($type, $definition, $add_empty);
1413
    }
1414
1415
    return self::filter($a);
1416
  }
1417
1418
  /**
1419
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1420
   *
1421
   * Filters a variable with a specified filter
1422
   *
1423
   * @link  http://php.net/manual/en/function.filter-var.php
1424
   *
1425
   * @param mixed $variable <p>
1426
   *                        Value to filter.
1427
   *                        </p>
1428
   * @param int   $filter   [optional] <p>
1429
   *                        The ID of the filter to apply. The
1430
   *                        manual page lists the available filters.
1431
   *                        </p>
1432
   * @param mixed $options  [optional] <p>
1433
   *                        Associative array of options or bitwise disjunction of flags. If filter
1434
   *                        accepts options, flags can be provided in "flags" field of array. For
1435
   *                        the "callback" filter, callable type should be passed. The
1436
   *                        callback must accept one argument, the value to be filtered, and return
1437
   *                        the value after filtering/sanitizing it.
1438
   *                        </p>
1439
   *                        <p>
1440
   *                        <code>
1441
   *                        // for filters that accept options, use this format
1442
   *                        $options = array(
1443
   *                        'options' => array(
1444
   *                        'default' => 3, // value to return if the filter fails
1445
   *                        // other options here
1446
   *                        'min_range' => 0
1447
   *                        ),
1448
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1449
   *                        );
1450
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1451
   *                        // for filter that only accept flags, you can pass them directly
1452
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1453
   *                        // for filter that only accept flags, you can also pass as an array
1454
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1455
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1456
   *                        // callback validate filter
1457
   *                        function foo($value)
1458
   *                        {
1459
   *                        // Expected format: Surname, GivenNames
1460
   *                        if (strpos($value, ", ") === false) return false;
1461
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1462
   *                        $empty = (empty($surname) || empty($givennames));
1463
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1464
   *                        if ($empty || $notstrings) {
1465
   *                        return false;
1466
   *                        } else {
1467
   *                        return $value;
1468
   *                        }
1469
   *                        }
1470
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1471
   *                        </code>
1472
   *                        </p>
1473
   *
1474
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1475
   */
1476 2
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
1477
  {
1478 2
    if (3 > \func_num_args()) {
1479 2
      $variable = \filter_var($variable, $filter);
1480
    } else {
1481 2
      $variable = \filter_var($variable, $filter, $options);
1482
    }
1483
1484 2
    return self::filter($variable);
1485
  }
1486
1487
  /**
1488
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1489
   *
1490
   * Gets multiple variables and optionally filters them
1491
   *
1492
   * @link  http://php.net/manual/en/function.filter-var-array.php
1493
   *
1494
   * @param array $data       <p>
1495
   *                          An array with string keys containing the data to filter.
1496
   *                          </p>
1497
   * @param mixed $definition [optional] <p>
1498
   *                          An array defining the arguments. A valid key is a string
1499
   *                          containing a variable name and a valid value is either a
1500
   *                          filter type, or an
1501
   *                          array optionally specifying the filter, flags and options.
1502
   *                          If the value is an array, valid keys are filter
1503
   *                          which specifies the filter type,
1504
   *                          flags which specifies any flags that apply to the
1505
   *                          filter, and options which specifies any options that
1506
   *                          apply to the filter. See the example below for a better understanding.
1507
   *                          </p>
1508
   *                          <p>
1509
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1510
   *                          input array are filtered by this filter.
1511
   *                          </p>
1512
   * @param bool  $add_empty  [optional] <p>
1513
   *                          Add missing keys as <b>NULL</b> to the return value.
1514
   *                          </p>
1515
   *
1516
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1517
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set.
1518
   */
1519 2
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1520
  {
1521 2
    if (2 > \func_num_args()) {
1522 2
      $a = \filter_var_array($data);
1523
    } else {
1524 2
      $a = \filter_var_array($data, $definition, $add_empty);
1525
    }
1526
1527 2
    return self::filter($a);
1528
  }
1529
1530
  /**
1531
   * Checks whether finfo is available on the server.
1532
   *
1533
   * @return bool
1534
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
1535
   */
1536
  public static function finfo_loaded(): bool
1537
  {
1538
    return \class_exists('finfo');
1539
  }
1540
1541
  /**
1542
   * Returns the first $n characters of the string.
1543
   *
1544
   * @param string $str      <p>The input string.</p>
1545
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1546
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1547
   *
1548
   * @return string
1549
   */
1550 13
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1551
  {
1552 13
    if ($n <= 0) {
1553 4
      return '';
1554
    }
1555
1556 9
    $strSub = self::substr($str, 0, $n, $encoding);
1557 9
    if ($strSub === false) {
1558
      return '';
1559
    }
1560
1561 9
    return $strSub;
1562
  }
1563
1564
  /**
1565
   * Check if the number of unicode characters are not more than the specified integer.
1566
   *
1567
   * @param string $str      The original string to be checked.
1568
   * @param int    $box_size The size in number of chars to be checked against string.
1569
   *
1570
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1571
   */
1572 2
  public static function fits_inside(string $str, int $box_size): bool
1573
  {
1574 2
    return (self::strlen($str) <= $box_size);
1575
  }
1576
1577
  /**
1578
   * @param string $str
1579
   * @param bool   $useLower <p>Use uppercase by default, otherwise use lowecase.</p>
1580
   *
1581
   * @return string
1582
   */
1583 22
  private static function fixStrCaseHelper(string $str, $useLower = false): string
1584
  {
1585
    $upper = [
1586 22
        'ẞ',
1587
    ];
1588
    $lower = [
1589 22
        'ß',
1590
    ];
1591
1592 22
    if ($useLower === true) {
1593
      $str = \str_replace(
1594
          $upper,
1595
          $lower,
1596
          $str
1597
      );
1598
    } else {
1599 22
      $str = \str_replace(
1600 22
          $lower,
1601 22
          $upper,
1602 22
          $str
1603
      );
1604
    }
1605
1606
1607 22
    return $str;
1608
  }
1609
1610
  /**
1611
   * Try to fix simple broken UTF-8 strings.
1612
   *
1613
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1614
   *
1615
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1616
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1617
   * See: http://en.wikipedia.org/wiki/Windows-1252
1618
   *
1619
   * @param string $str <p>The input string</p>
1620
   *
1621
   * @return string
1622
   */
1623 42
  public static function fix_simple_utf8(string $str): string
1624
  {
1625 42
    if ('' === $str) {
1626 4
      return '';
1627
    }
1628
1629 42
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1630 42
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1631
1632 42
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1633
1634 1
      if (self::$BROKEN_UTF8_FIX === null) {
1635 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_fix') can also be of type false. However, the property $BROKEN_UTF8_FIX is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1636
      }
1637
1638 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1638
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1639 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1639
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1640
    }
1641
1642 42
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1643
  }
1644
1645
  /**
1646
   * Fix a double (or multiple) encoded UTF8 string.
1647
   *
1648
   * @param string[]|string $str You can use a string or an array of strings.
1649
   *
1650
   * @return string[]|string
1651
   *                          Will return the fixed input-"array" or
1652
   *                          the fixed input-"string".
1653
   */
1654 2
  public static function fix_utf8($str)
1655
  {
1656 2
    if (\is_array($str) === true) {
1657 2
      foreach ($str as $k => $v) {
1658 2
        $str[$k] = self::fix_utf8($v);
1659
      }
1660
1661 2
      return $str;
1662
    }
1663
1664 2
    $str = (string)$str;
1665 2
    $last = '';
1666 2
    while ($last !== $str) {
1667 2
      $last = $str;
1668 2
      $str = self::to_utf8(
1669 2
          self::utf8_decode($str, true)
1670
      );
1671
    }
1672
1673 2
    return $str;
1674
  }
1675
1676
  /**
1677
   * Get character of a specific character.
1678
   *
1679
   * @param string $char
1680
   *
1681
   * @return string 'RTL' or 'LTR'
1682
   */
1683 2
  public static function getCharDirection(string $char): string
1684
  {
1685 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1686
      self::checkForSupport();
1687
    }
1688
1689 2
    if (self::$SUPPORT['intlChar'] === true) {
1690
      /** @noinspection PhpComposerExtensionStubsInspection */
1691 2
      $tmpReturn = \IntlChar::charDirection($char);
1692
1693
      // from "IntlChar"-Class
1694
      $charDirection = [
1695 2
          'RTL' => [1, 13, 14, 15, 21],
1696
          'LTR' => [0, 11, 12, 20],
1697
      ];
1698
1699 2
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1700
        return 'LTR';
1701
      }
1702
1703 2
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1704 2
        return 'RTL';
1705
      }
1706
    }
1707
1708 2
    $c = static::chr_to_decimal($char);
1709
1710 2
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1711 2
      return 'LTR';
1712
    }
1713
1714 2
    if (0x85e >= $c) {
1715
1716 2
      if (0x5be === $c ||
1717 2
          0x5c0 === $c ||
1718 2
          0x5c3 === $c ||
1719 2
          0x5c6 === $c ||
1720 2
          (0x5d0 <= $c && 0x5ea >= $c) ||
1721 2
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1722 2
          0x608 === $c ||
1723 2
          0x60b === $c ||
1724 2
          0x60d === $c ||
1725 2
          0x61b === $c ||
1726 2
          (0x61e <= $c && 0x64a >= $c) ||
1727
          (0x66d <= $c && 0x66f >= $c) ||
1728
          (0x671 <= $c && 0x6d5 >= $c) ||
1729
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1730
          (0x6ee <= $c && 0x6ef >= $c) ||
1731
          (0x6fa <= $c && 0x70d >= $c) ||
1732
          0x710 === $c ||
1733
          (0x712 <= $c && 0x72f >= $c) ||
1734
          (0x74d <= $c && 0x7a5 >= $c) ||
1735
          0x7b1 === $c ||
1736
          (0x7c0 <= $c && 0x7ea >= $c) ||
1737
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1738
          0x7fa === $c ||
1739
          (0x800 <= $c && 0x815 >= $c) ||
1740
          0x81a === $c ||
1741
          0x824 === $c ||
1742
          0x828 === $c ||
1743
          (0x830 <= $c && 0x83e >= $c) ||
1744
          (0x840 <= $c && 0x858 >= $c) ||
1745 2
          0x85e === $c
1746
      ) {
1747 2
        return 'RTL';
1748
      }
1749
1750 2
    } elseif (0x200f === $c) {
1751
1752
      return 'RTL';
1753
1754 2
    } elseif (0xfb1d <= $c) {
1755
1756 2
      if (0xfb1d === $c ||
1757 2
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1758 2
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1759 2
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1760 2
          0xfb3e === $c ||
1761 2
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1762 2
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1763 2
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1764 2
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1765 2
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1766 2
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1767 2
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1768 2
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1769 2
          (0xfe76 <= $c && 0xfefc >= $c) ||
1770 2
          (0x10800 <= $c && 0x10805 >= $c) ||
1771 2
          0x10808 === $c ||
1772 2
          (0x1080a <= $c && 0x10835 >= $c) ||
1773 2
          (0x10837 <= $c && 0x10838 >= $c) ||
1774 2
          0x1083c === $c ||
1775 2
          (0x1083f <= $c && 0x10855 >= $c) ||
1776 2
          (0x10857 <= $c && 0x1085f >= $c) ||
1777 2
          (0x10900 <= $c && 0x1091b >= $c) ||
1778 2
          (0x10920 <= $c && 0x10939 >= $c) ||
1779 2
          0x1093f === $c ||
1780 2
          0x10a00 === $c ||
1781 2
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1782 2
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1783 2
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1784 2
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1785 2
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1786 2
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1787 2
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1788 2
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1789 2
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1790 2
          (0x10b78 <= $c && 0x10b7f >= $c)
1791
      ) {
1792 2
        return 'RTL';
1793
      }
1794
    }
1795
1796 2
    return 'LTR';
1797
  }
1798
1799
  /**
1800
   * get data from "/data/*.ser"
1801
   *
1802
   * @param string $file
1803
   *
1804
   * @return mixed|false Will return false on error.
1805
   */
1806 13
  private static function getData(string $file)
1807
  {
1808 13
    $file = __DIR__ . '/data/' . $file . '.php';
1809 13
    if (\file_exists($file)) {
1810
      /** @noinspection PhpIncludeInspection */
1811 12
      return require $file;
1812
    }
1813
1814 2
    return false;
1815
  }
1816
1817
  /**
1818
   * Check for php-support.
1819
   *
1820
   * @param string|null $key
1821
   *
1822
   * @return mixed
1823
   *               Return the full support-"array", if $key === null<br>
1824
   *               return bool-value, if $key is used and available<br>
1825
   *               otherwise return <strong>null</strong>.
1826
   */
1827 26
  public static function getSupportInfo(string $key = null)
1828
  {
1829 26
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1830
      self::checkForSupport();
1831
    }
1832
1833 26
    if ($key === null) {
1834 4
      return self::$SUPPORT;
1835
    }
1836
1837 24
    if (!isset(self::$SUPPORT[$key])) {
1838 2
      return null;
1839
    }
1840
1841 22
    return self::$SUPPORT[$key];
1842
  }
1843
1844
  /**
1845
   * @param int    $length        <p>Length of the random string.</p>
1846
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
1847
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
1848
   *
1849
   * @return string
1850
   */
1851 1
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
1852
  {
1853
    // init
1854 1
    $i = 0;
1855 1
    $str = '';
1856 1
    $maxlength = self::strlen($possibleChars, $encoding);
1857
1858 1
    if ($maxlength === 0) {
1859 1
      return '';
1860
    }
1861
1862
    // add random chars
1863 1
    while ($i < $length) {
1864
      try {
1865 1
        $randInt = \random_int(0, $maxlength - 1);
1866
      } catch (\Exception $e) {
1867
        /** @noinspection RandomApiMigrationInspection */
1868
        $randInt = \mt_rand(0, $maxlength - 1);
1869
      }
1870 1
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
1871 1
      $str .= $char;
1872 1
      $i++;
1873
    }
1874
1875 1
    return $str;
1876
  }
1877
1878
  /**
1879
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
1880
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
1881
   *
1882
   * @return string
1883
   */
1884 1
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
1885
  {
1886 1
    $uniqueHelper = \mt_rand() .
1887 1
                    \session_id() .
1888 1
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
1889 1
                    ($_SERVER['SERVER_ADDR'] ?? '') .
1890 1
                    $entropyExtra;
1891
1892 1
    $uniqueString = \uniqid($uniqueHelper, true);
1893
1894 1
    if ($md5) {
1895 1
      $uniqueString = \md5($uniqueString . $uniqueHelper);
1896
    }
1897
1898 1
    return $uniqueString;
1899
  }
1900
1901
  /**
1902
   * alias for "UTF8::string_has_bom()"
1903
   *
1904
   * @see        UTF8::string_has_bom()
1905
   *
1906
   * @param string $str
1907
   *
1908
   * @return bool
1909
   *
1910
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1911
   */
1912 2
  public static function hasBom(string $str): bool
1913
  {
1914 2
    return self::string_has_bom($str);
1915
  }
1916
1917
  /**
1918
   * Returns true if the string contains a lower case char, false otherwise.
1919
   *
1920
   * @param string $str <p>The input string.</p>
1921
   *
1922
   * @return bool Whether or not the string contains a lower case character.
1923
   */
1924 47
  public static function has_lowercase(string $str): bool
1925
  {
1926 47
    return self::str_matches_pattern($str, '.*[[:lower:]]');
1927
  }
1928
1929
  /**
1930
   * Returns true if the string contains an upper case char, false otherwise.
1931
   *
1932
   * @param string $str <p>The input string.</p>
1933
   *
1934
   * @return bool Whether or not the string contains an upper case character.
1935
   */
1936 12
  public static function has_uppercase(string $str): bool
1937
  {
1938 12
    return self::str_matches_pattern($str, '.*[[:upper:]]');
1939
  }
1940
1941
  /**
1942
   * Converts a hexadecimal-value into an UTF-8 character.
1943
   *
1944
   * @param string $hexdec <p>The hexadecimal value.</p>
1945
   *
1946
   * @return string|false One single UTF-8 character.
1947
   */
1948 4
  public static function hex_to_chr(string $hexdec)
1949
  {
1950 4
    return self::decimal_to_chr(\hexdec($hexdec));
1951
  }
1952
1953
  /**
1954
   * Converts hexadecimal U+xxxx code point representation to integer.
1955
   *
1956
   * INFO: opposite to UTF8::int_to_hex()
1957
   *
1958
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1959
   *
1960
   * @return int|false The code point, or false on failure.
1961
   */
1962 2
  public static function hex_to_int($hexDec)
1963
  {
1964
    // init
1965 2
    $hexDec = (string)$hexDec;
1966
1967 2
    if ('' === $hexDec) {
1968 2
      return false;
1969
    }
1970
1971 2
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1972 2
      return \intval($match[1], 16);
1973
    }
1974
1975 2
    return false;
1976
  }
1977
1978
  /**
1979
   * alias for "UTF8::html_entity_decode()"
1980
   *
1981
   * @see UTF8::html_entity_decode()
1982
   *
1983
   * @param string $str
1984
   * @param int    $flags
1985
   * @param string $encoding
1986
   *
1987
   * @return string
1988
   */
1989 2
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1990
  {
1991 2
    return self::html_entity_decode($str, $flags, $encoding);
1992
  }
1993
1994
  /**
1995
   * Converts a UTF-8 string to a series of HTML numbered entities.
1996
   *
1997
   * INFO: opposite to UTF8::html_decode()
1998
   *
1999
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2000
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2001
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2002
   *
2003
   * @return string HTML numbered entities.
2004
   */
2005 11
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2006
  {
2007 11
    if ('' === $str) {
2008 4
      return '';
2009
    }
2010
2011 11
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2012 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2013
    }
2014
2015
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2016 11
    if (\function_exists('mb_encode_numericentity')) {
2017
2018 11
      $startCode = 0x00;
2019 11
      if ($keepAsciiChars === true) {
2020 11
        $startCode = 0x80;
2021
      }
2022
2023 11
      return \mb_encode_numericentity(
2024 11
          $str,
2025 11
          [$startCode, 0xfffff, 0, 0xfffff, 0],
2026 11
          $encoding
2027
      );
2028
    }
2029
2030
    return \implode(
2031
        '',
2032
        \array_map(
2033
            function ($data) use ($keepAsciiChars, $encoding) {
2034
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2035
            },
2036
            self::split($str)
2037
        )
2038
    );
2039
  }
2040
2041
  /**
2042
   * UTF-8 version of html_entity_decode()
2043
   *
2044
   * The reason we are not using html_entity_decode() by itself is because
2045
   * while it is not technically correct to leave out the semicolon
2046
   * at the end of an entity most browsers will still interpret the entity
2047
   * correctly. html_entity_decode() does not convert entities without
2048
   * semicolons, so we are left with our own little solution here. Bummer.
2049
   *
2050
   * Convert all HTML entities to their applicable characters
2051
   *
2052
   * INFO: opposite to UTF8::html_encode()
2053
   *
2054
   * @link http://php.net/manual/en/function.html-entity-decode.php
2055
   *
2056
   * @param string $str      <p>
2057
   *                         The input string.
2058
   *                         </p>
2059
   * @param int    $flags    [optional] <p>
2060
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2061
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2062
   *                         <table>
2063
   *                         Available <i>flags</i> constants
2064
   *                         <tr valign="top">
2065
   *                         <td>Constant Name</td>
2066
   *                         <td>Description</td>
2067
   *                         </tr>
2068
   *                         <tr valign="top">
2069
   *                         <td><b>ENT_COMPAT</b></td>
2070
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2071
   *                         </tr>
2072
   *                         <tr valign="top">
2073
   *                         <td><b>ENT_QUOTES</b></td>
2074
   *                         <td>Will convert both double and single quotes.</td>
2075
   *                         </tr>
2076
   *                         <tr valign="top">
2077
   *                         <td><b>ENT_NOQUOTES</b></td>
2078
   *                         <td>Will leave both double and single quotes unconverted.</td>
2079
   *                         </tr>
2080
   *                         <tr valign="top">
2081
   *                         <td><b>ENT_HTML401</b></td>
2082
   *                         <td>
2083
   *                         Handle code as HTML 4.01.
2084
   *                         </td>
2085
   *                         </tr>
2086
   *                         <tr valign="top">
2087
   *                         <td><b>ENT_XML1</b></td>
2088
   *                         <td>
2089
   *                         Handle code as XML 1.
2090
   *                         </td>
2091
   *                         </tr>
2092
   *                         <tr valign="top">
2093
   *                         <td><b>ENT_XHTML</b></td>
2094
   *                         <td>
2095
   *                         Handle code as XHTML.
2096
   *                         </td>
2097
   *                         </tr>
2098
   *                         <tr valign="top">
2099
   *                         <td><b>ENT_HTML5</b></td>
2100
   *                         <td>
2101
   *                         Handle code as HTML 5.
2102
   *                         </td>
2103
   *                         </tr>
2104
   *                         </table>
2105
   *                         </p>
2106
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2107
   *
2108
   * @return string The decoded string.
2109
   */
2110 38
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2111
  {
2112 38
    if ('' === $str) {
2113 12
      return '';
2114
    }
2115
2116 38
    if (!isset($str[3])) { // examples: &; || &x;
2117 19
      return $str;
2118
    }
2119
2120
    if (
2121 37
        \strpos($str, '&') === false
2122
        ||
2123
        (
2124 37
            \strpos($str, '&#') === false
2125
            &&
2126 37
            \strpos($str, ';') === false
2127
        )
2128
    ) {
2129 18
      return $str;
2130
    }
2131
2132 37
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2133 9
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2134
    }
2135
2136 37
    if ($flags === null) {
2137 10
      $flags = ENT_QUOTES | ENT_HTML5;
2138
    }
2139
2140
    if (
2141 37
        $encoding !== 'UTF-8'
2142
        &&
2143 37
        $encoding !== 'ISO-8859-1'
2144
        &&
2145 37
        $encoding !== 'WINDOWS-1252'
2146
        &&
2147 37
        self::$SUPPORT['mbstring'] === false
2148
    ) {
2149
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2150
    }
2151
2152
    do {
2153 37
      $str_compare = $str;
2154
2155 37
      $str = (string)\preg_replace_callback(
2156 37
          "/&#\d{2,6};/",
2157 37
          function ($matches) use ($encoding) {
2158
            // always fallback via symfony polyfill
2159 29
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2160
2161 29
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2162 26
              return $returnTmp;
2163
            }
2164
2165 15
            return $matches[0];
2166 37
          },
2167 37
          $str
2168
      );
2169
2170
      // decode numeric & UTF16 two byte entities
2171 37
      $str = \html_entity_decode(
2172 37
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2173 37
          $flags,
2174 37
          $encoding
2175
      );
2176
2177 37
    } while ($str_compare !== $str);
2178
2179 37
    return $str;
2180
  }
2181
2182
  /**
2183
   * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2184
   *
2185
   * @param string $str
2186
   * @param string $encoding [optional] <p>Default: UTF-8</p>
2187
   *
2188
   * @return string
2189
   */
2190 6
  public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2191
  {
2192 6
    return self::htmlspecialchars(
2193 6
        $str,
2194 6
        ENT_QUOTES | ENT_SUBSTITUTE,
2195 6
        $encoding
2196
    );
2197
  }
2198
2199
  /**
2200
   * Remove empty html-tag.
2201
   *
2202
   * e.g.: <tag></tag>
2203
   *
2204
   * @param string $str
2205
   *
2206
   * @return string
2207
   */
2208 1
  public static function html_stripe_empty_tags(string $str): string
2209
  {
2210 1
    return (string)\preg_replace(
2211 1
        "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2212 1
        '',
2213 1
        $str
2214
    );
2215
  }
2216
2217
  /**
2218
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2219
   *
2220
   * @link http://php.net/manual/en/function.htmlentities.php
2221
   *
2222
   * @param string $str           <p>
2223
   *                              The input string.
2224
   *                              </p>
2225
   * @param int    $flags         [optional] <p>
2226
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2227
   *                              invalid code unit sequences and the used document type. The default is
2228
   *                              ENT_COMPAT | ENT_HTML401.
2229
   *                              <table>
2230
   *                              Available <i>flags</i> constants
2231
   *                              <tr valign="top">
2232
   *                              <td>Constant Name</td>
2233
   *                              <td>Description</td>
2234
   *                              </tr>
2235
   *                              <tr valign="top">
2236
   *                              <td><b>ENT_COMPAT</b></td>
2237
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2238
   *                              </tr>
2239
   *                              <tr valign="top">
2240
   *                              <td><b>ENT_QUOTES</b></td>
2241
   *                              <td>Will convert both double and single quotes.</td>
2242
   *                              </tr>
2243
   *                              <tr valign="top">
2244
   *                              <td><b>ENT_NOQUOTES</b></td>
2245
   *                              <td>Will leave both double and single quotes unconverted.</td>
2246
   *                              </tr>
2247
   *                              <tr valign="top">
2248
   *                              <td><b>ENT_IGNORE</b></td>
2249
   *                              <td>
2250
   *                              Silently discard invalid code unit sequences instead of returning
2251
   *                              an empty string. Using this flag is discouraged as it
2252
   *                              may have security implications.
2253
   *                              </td>
2254
   *                              </tr>
2255
   *                              <tr valign="top">
2256
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2257
   *                              <td>
2258
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2259
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2260
   *                              </td>
2261
   *                              </tr>
2262
   *                              <tr valign="top">
2263
   *                              <td><b>ENT_DISALLOWED</b></td>
2264
   *                              <td>
2265
   *                              Replace invalid code points for the given document type with a
2266
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2267
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2268
   *                              instance, to ensure the well-formedness of XML documents with
2269
   *                              embedded external content.
2270
   *                              </td>
2271
   *                              </tr>
2272
   *                              <tr valign="top">
2273
   *                              <td><b>ENT_HTML401</b></td>
2274
   *                              <td>
2275
   *                              Handle code as HTML 4.01.
2276
   *                              </td>
2277
   *                              </tr>
2278
   *                              <tr valign="top">
2279
   *                              <td><b>ENT_XML1</b></td>
2280
   *                              <td>
2281
   *                              Handle code as XML 1.
2282
   *                              </td>
2283
   *                              </tr>
2284
   *                              <tr valign="top">
2285
   *                              <td><b>ENT_XHTML</b></td>
2286
   *                              <td>
2287
   *                              Handle code as XHTML.
2288
   *                              </td>
2289
   *                              </tr>
2290
   *                              <tr valign="top">
2291
   *                              <td><b>ENT_HTML5</b></td>
2292
   *                              <td>
2293
   *                              Handle code as HTML 5.
2294
   *                              </td>
2295
   *                              </tr>
2296
   *                              </table>
2297
   *                              </p>
2298
   * @param string $encoding      [optional] <p>
2299
   *                              Like <b>htmlspecialchars</b>,
2300
   *                              <b>htmlentities</b> takes an optional third argument
2301
   *                              <i>encoding</i> which defines encoding used in
2302
   *                              conversion.
2303
   *                              Although this argument is technically optional, you are highly
2304
   *                              encouraged to specify the correct value for your code.
2305
   *                              </p>
2306
   * @param bool   $double_encode [optional] <p>
2307
   *                              When <i>double_encode</i> is turned off PHP will not
2308
   *                              encode existing html entities. The default is to convert everything.
2309
   *                              </p>
2310
   *
2311
   *
2312
   * @return string The encoded string.
2313
   * </p>
2314
   * <p>
2315
   * If the input <i>string</i> contains an invalid code unit
2316
   * sequence within the given <i>encoding</i> an empty string
2317
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2318
   * <b>ENT_SUBSTITUTE</b> flags are set.
2319
   */
2320 9
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2321
  {
2322 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2323 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2324
    }
2325
2326 9
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2327
2328
    /**
2329
     * PHP doesn't replace a backslash to its html entity since this is something
2330
     * that's mostly used to escape characters when inserting in a database. Since
2331
     * we're using a decent database layer, we don't need this shit and we're replacing
2332
     * the double backslashes by its' html entity equivalent.
2333
     *
2334
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2335
     */
2336 9
    $str = \str_replace('\\', '&#92;', $str);
2337
2338 9
    return self::html_encode($str, true, $encoding);
2339
  }
2340
2341
  /**
2342
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2343
   *
2344
   * INFO: Take a look at "UTF8::htmlentities()"
2345
   *
2346
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2347
   *
2348
   * @param string $str           <p>
2349
   *                              The string being converted.
2350
   *                              </p>
2351
   * @param int    $flags         [optional] <p>
2352
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2353
   *                              invalid code unit sequences and the used document type. The default is
2354
   *                              ENT_COMPAT | ENT_HTML401.
2355
   *                              <table>
2356
   *                              Available <i>flags</i> constants
2357
   *                              <tr valign="top">
2358
   *                              <td>Constant Name</td>
2359
   *                              <td>Description</td>
2360
   *                              </tr>
2361
   *                              <tr valign="top">
2362
   *                              <td><b>ENT_COMPAT</b></td>
2363
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2364
   *                              </tr>
2365
   *                              <tr valign="top">
2366
   *                              <td><b>ENT_QUOTES</b></td>
2367
   *                              <td>Will convert both double and single quotes.</td>
2368
   *                              </tr>
2369
   *                              <tr valign="top">
2370
   *                              <td><b>ENT_NOQUOTES</b></td>
2371
   *                              <td>Will leave both double and single quotes unconverted.</td>
2372
   *                              </tr>
2373
   *                              <tr valign="top">
2374
   *                              <td><b>ENT_IGNORE</b></td>
2375
   *                              <td>
2376
   *                              Silently discard invalid code unit sequences instead of returning
2377
   *                              an empty string. Using this flag is discouraged as it
2378
   *                              may have security implications.
2379
   *                              </td>
2380
   *                              </tr>
2381
   *                              <tr valign="top">
2382
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2383
   *                              <td>
2384
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2385
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2386
   *                              </td>
2387
   *                              </tr>
2388
   *                              <tr valign="top">
2389
   *                              <td><b>ENT_DISALLOWED</b></td>
2390
   *                              <td>
2391
   *                              Replace invalid code points for the given document type with a
2392
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2393
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2394
   *                              instance, to ensure the well-formedness of XML documents with
2395
   *                              embedded external content.
2396
   *                              </td>
2397
   *                              </tr>
2398
   *                              <tr valign="top">
2399
   *                              <td><b>ENT_HTML401</b></td>
2400
   *                              <td>
2401
   *                              Handle code as HTML 4.01.
2402
   *                              </td>
2403
   *                              </tr>
2404
   *                              <tr valign="top">
2405
   *                              <td><b>ENT_XML1</b></td>
2406
   *                              <td>
2407
   *                              Handle code as XML 1.
2408
   *                              </td>
2409
   *                              </tr>
2410
   *                              <tr valign="top">
2411
   *                              <td><b>ENT_XHTML</b></td>
2412
   *                              <td>
2413
   *                              Handle code as XHTML.
2414
   *                              </td>
2415
   *                              </tr>
2416
   *                              <tr valign="top">
2417
   *                              <td><b>ENT_HTML5</b></td>
2418
   *                              <td>
2419
   *                              Handle code as HTML 5.
2420
   *                              </td>
2421
   *                              </tr>
2422
   *                              </table>
2423
   *                              </p>
2424
   * @param string $encoding      [optional] <p>
2425
   *                              Defines encoding used in conversion.
2426
   *                              </p>
2427
   *                              <p>
2428
   *                              For the purposes of this function, the encodings
2429
   *                              ISO-8859-1, ISO-8859-15,
2430
   *                              UTF-8, cp866,
2431
   *                              cp1251, cp1252, and
2432
   *                              KOI8-R are effectively equivalent, provided the
2433
   *                              <i>string</i> itself is valid for the encoding, as
2434
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2435
   *                              the same positions in all of these encodings.
2436
   *                              </p>
2437
   * @param bool   $double_encode [optional] <p>
2438
   *                              When <i>double_encode</i> is turned off PHP will not
2439
   *                              encode existing html entities, the default is to convert everything.
2440
   *                              </p>
2441
   *
2442
   * @return string The converted string.
2443
   * </p>
2444
   * <p>
2445
   * If the input <i>string</i> contains an invalid code unit
2446
   * sequence within the given <i>encoding</i> an empty string
2447
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2448
   * <b>ENT_SUBSTITUTE</b> flags are set.
2449
   */
2450 8
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2451
  {
2452 8
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2453 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2454
    }
2455
2456 8
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2457
  }
2458
2459
  /**
2460
   * Checks whether iconv is available on the server.
2461
   *
2462
   * @return bool
2463
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2464
   */
2465
  public static function iconv_loaded(): bool
2466
  {
2467
    return \extension_loaded('iconv') ? true : false;
2468
  }
2469
2470
  /**
2471
   * alias for "UTF8::decimal_to_chr()"
2472
   *
2473
   * @see UTF8::decimal_to_chr()
2474
   *
2475
   * @param mixed $int
2476
   *
2477
   * @return string
2478
   */
2479 4
  public static function int_to_chr($int): string
2480
  {
2481 4
    return self::decimal_to_chr($int);
2482
  }
2483
2484
  /**
2485
   * Converts Integer to hexadecimal U+xxxx code point representation.
2486
   *
2487
   * INFO: opposite to UTF8::hex_to_int()
2488
   *
2489
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2490
   * @param string $pfix [optional]
2491
   *
2492
   * @return string The code point, or empty string on failure.
2493
   */
2494 6
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2495
  {
2496 6
    $hex = \dechex($int);
2497
2498 6
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2499
2500 6
    return $pfix . $hex . '';
2501
  }
2502
2503
  /**
2504
   * Checks whether intl-char is available on the server.
2505
   *
2506
   * @return bool
2507
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2508
   */
2509
  public static function intlChar_loaded(): bool
2510
  {
2511
    return \class_exists('IntlChar');
2512
  }
2513
2514
  /**
2515
   * Checks whether intl is available on the server.
2516
   *
2517
   * @return bool
2518
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2519
   */
2520 5
  public static function intl_loaded(): bool
2521
  {
2522 5
    return \extension_loaded('intl');
2523
  }
2524
2525
  /**
2526
   * alias for "UTF8::is_ascii()"
2527
   *
2528
   * @see        UTF8::is_ascii()
2529
   *
2530
   * @param string $str
2531
   *
2532
   * @return bool
2533
   *
2534
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2535
   */
2536 2
  public static function isAscii(string $str): bool
2537
  {
2538 2
    return self::is_ascii($str);
2539
  }
2540
2541
  /**
2542
   * alias for "UTF8::is_base64()"
2543
   *
2544
   * @see        UTF8::is_base64()
2545
   *
2546
   * @param string $str
2547
   *
2548
   * @return bool
2549
   *
2550
   * @deprecated <p>use "UTF8::is_base64()"</p>
2551
   */
2552 2
  public static function isBase64($str): bool
2553
  {
2554 2
    return self::is_base64($str);
2555
  }
2556
2557
  /**
2558
   * alias for "UTF8::is_binary()"
2559
   *
2560
   * @see        UTF8::is_binary()
2561
   *
2562
   * @param mixed $str
2563
   * @param bool  $strict
2564
   *
2565
   * @return bool
2566
   *
2567
   * @deprecated <p>use "UTF8::is_binary()"</p>
2568
   */
2569 4
  public static function isBinary($str, $strict = false): bool
2570
  {
2571 4
    return self::is_binary($str, $strict);
2572
  }
2573
2574
  /**
2575
   * alias for "UTF8::is_bom()"
2576
   *
2577
   * @see        UTF8::is_bom()
2578
   *
2579
   * @param string $utf8_chr
2580
   *
2581
   * @return bool
2582
   *
2583
   * @deprecated <p>use "UTF8::is_bom()"</p>
2584
   */
2585 2
  public static function isBom(string $utf8_chr): bool
2586
  {
2587 2
    return self::is_bom($utf8_chr);
2588
  }
2589
2590
  /**
2591
   * alias for "UTF8::is_html()"
2592
   *
2593
   * @see        UTF8::is_html()
2594
   *
2595
   * @param string $str
2596
   *
2597
   * @return bool
2598
   *
2599
   * @deprecated <p>use "UTF8::is_html()"</p>
2600
   */
2601 2
  public static function isHtml(string $str): bool
2602
  {
2603 2
    return self::is_html($str);
2604
  }
2605
2606
  /**
2607
   * alias for "UTF8::is_json()"
2608
   *
2609
   * @see        UTF8::is_json()
2610
   *
2611
   * @param string $str
2612
   *
2613
   * @return bool
2614
   *
2615
   * @deprecated <p>use "UTF8::is_json()"</p>
2616
   */
2617
  public static function isJson(string $str): bool
2618
  {
2619
    return self::is_json($str);
2620
  }
2621
2622
  /**
2623
   * alias for "UTF8::is_utf16()"
2624
   *
2625
   * @see        UTF8::is_utf16()
2626
   *
2627
   * @param mixed $str
2628
   *
2629
   * @return int|false
2630
   *                    <strong>false</strong> if is't not UTF16,<br>
2631
   *                    <strong>1</strong> for UTF-16LE,<br>
2632
   *                    <strong>2</strong> for UTF-16BE.
2633
   *
2634
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2635
   */
2636 2
  public static function isUtf16($str)
2637
  {
2638 2
    return self::is_utf16($str);
2639
  }
2640
2641
  /**
2642
   * alias for "UTF8::is_utf32()"
2643
   *
2644
   * @see        UTF8::is_utf32()
2645
   *
2646
   * @param mixed $str
2647
   *
2648
   * @return int|false
2649
   *                   <strong>false</strong> if is't not UTF16,
2650
   *                   <strong>1</strong> for UTF-32LE,
2651
   *                   <strong>2</strong> for UTF-32BE.
2652
   *
2653
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2654
   */
2655 2
  public static function isUtf32($str)
2656
  {
2657 2
    return self::is_utf32($str);
2658
  }
2659
2660
  /**
2661
   * alias for "UTF8::is_utf8()"
2662
   *
2663
   * @see        UTF8::is_utf8()
2664
   *
2665
   * @param string $str
2666
   * @param bool   $strict
2667
   *
2668
   * @return bool
2669
   *
2670
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2671
   */
2672 17
  public static function isUtf8($str, $strict = false): bool
2673
  {
2674 17
    return self::is_utf8($str, $strict);
2675
  }
2676
2677
  /**
2678
   * Returns true if the string contains only alphabetic chars, false otherwise.
2679
   *
2680
   * @param string $str
2681
   *
2682
   * @return bool
2683
   *               Whether or not $str contains only alphabetic chars.
2684
   */
2685 10
  public static function is_alpha(string $str): bool
2686
  {
2687 10
    return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2688
  }
2689
2690
  /**
2691
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2692
   *
2693
   * @param string $str
2694
   *
2695
   * @return bool
2696
   *               Whether or not $str contains only alphanumeric chars.
2697
   */
2698 13
  public static function is_alphanumeric(string $str): bool
2699
  {
2700 13
    return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2701
  }
2702
2703
  /**
2704
   * Checks if a string is 7 bit ASCII.
2705
   *
2706
   * @param string $str <p>The string to check.</p>
2707
   *
2708
   * @return bool
2709
   *              <strong>true</strong> if it is ASCII<br>
2710
   *              <strong>false</strong> otherwise
2711
   *
2712
   */
2713 201
  public static function is_ascii(string $str): bool
2714
  {
2715 201
    if ('' === $str) {
2716 10
      return true;
2717
    }
2718
2719 200
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2720
  }
2721
2722
  /**
2723
   * Returns true if the string is base64 encoded, false otherwise.
2724
   *
2725
   * @param string $str <p>The input string.</p>
2726
   *
2727
   * @return bool Whether or not $str is base64 encoded.
2728
   */
2729 9
  public static function is_base64($str): bool
2730
  {
2731 9
    if ('' === $str) {
2732 3
      return false;
2733
    }
2734
2735 8
    if (\is_string($str) === false) {
0 ignored issues
show
introduced by
The condition is_string($str) === false is always false.
Loading history...
2736 2
      return false;
2737
    }
2738
2739 8
    $base64String = (string)\base64_decode($str, true);
2740
2741 8
    return $base64String && \base64_encode($base64String) === $str;
2742
  }
2743
2744
  /**
2745
   * @param string $str
2746
   *
2747
   * @return string[]
2748
   */
2749 40
  private static function get_file_type($str)
2750
  {
2751 40
    if ('' === $str) {
2752
      return ['ext' => '', 'type' => ''];
2753
    }
2754
2755 40
    $str_info = self::substr_in_byte($str, 0, 2);
2756 40
    if (self::strlen_in_byte($str_info) !== 2) {
0 ignored issues
show
Bug introduced by
It seems like $str_info can also be of type false; however, parameter $str of voku\helper\UTF8::strlen_in_byte() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2756
    if (self::strlen_in_byte(/** @scrutinizer ignore-type */ $str_info) !== 2) {
Loading history...
2757 11
      return ['ext' => '', 'type' => ''];
2758
    }
2759
2760 35
    $str_info = \unpack("C2chars", $str_info);
0 ignored issues
show
Bug introduced by
It seems like $str_info can also be of type false; however, parameter $data of unpack() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2760
    $str_info = \unpack("C2chars", /** @scrutinizer ignore-type */ $str_info);
Loading history...
2761 35
    $type_code = (int)($str_info['chars1'] . $str_info['chars2']);
2762
2763
    // DEBUG
2764
    //var_dump($type_code);
2765
2766
    switch ($type_code) {
2767 35
      case 3780:
2768 4
        $ext = 'pdf';
2769 4
        $type = 'binary';
2770 4
        break;
2771 35
      case 7790:
2772
        $ext = 'exe';
2773
        $type = 'binary';
2774
        break;
2775 35
      case 7784:
2776
        $ext = 'midi';
2777
        $type = 'binary';
2778
        break;
2779 35
      case 8075:
2780 6
        $ext = 'zip';
2781 6
        $type = 'binary';
2782 6
        break;
2783 35
      case 8297:
2784
        $ext = 'rar';
2785
        $type = 'binary';
2786
        break;
2787 35
      case 255216:
2788
        $ext = 'jpg';
2789
        $type = 'binary';
2790
        break;
2791 35
      case 7173:
2792
        $ext = 'gif';
2793
        $type = 'binary';
2794
        break;
2795 35
      case 6677:
2796
        $ext = 'bmp';
2797
        $type = 'binary';
2798
        break;
2799 35
      case 13780:
2800 6
        $ext = 'png';
2801 6
        $type = 'binary';
2802 6
        break;
2803
      default:
2804 33
        $ext = '???';
2805 33
        $type = '???';
2806 33
        break;
2807
    }
2808
2809 35
    return ['ext' => $ext, 'type' => $type];
2810
  }
2811
2812
  /**
2813
   * Check if the input is binary... (is look like a hack).
2814
   *
2815
   * @param mixed $input
2816
   * @param bool  $strict
2817
   *
2818
   * @return bool
2819
   */
2820 40
  public static function is_binary($input, bool $strict = false): bool
2821
  {
2822 40
    $input = (string)$input;
2823 40
    if ('' === $input) {
2824 10
      return false;
2825
    }
2826
2827 40
    if (\preg_match('~^[01]+$~', $input)) {
2828 12
      return true;
2829
    }
2830
2831 40
    if ($strict === true) {
2832
2833 34
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2834
        self::checkForSupport();
2835
      }
2836
2837 34
      if (self::$SUPPORT['finfo'] === false) {
2838
        throw new \RuntimeException('ext-fileinfo: is not installed');
2839
      }
2840
2841
      /** @noinspection PhpComposerExtensionStubsInspection */
2842 34
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
2843 34
      $finfo_encoding = $finfo->buffer($input);
2844 34
      if ($finfo_encoding && $finfo_encoding === 'binary') {
2845 15
        return true;
2846
      }
2847
2848
    }
2849
2850 40
    $ext = self::get_file_type($input);
2851 40
    if ($ext['type'] === 'binary') {
2852 6
      return true;
2853
    }
2854
2855 38
    $testLength = self::strlen_in_byte($input);
2856 38
    if ($testLength) {
2857 38
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2858
        self::checkForSupport();
2859
      }
2860
2861 38
      $testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength);
2862 38
      if (($testNull / $testLength) > 0.256) {
2863 12
        return true;
2864
      }
2865
    }
2866
2867 36
    return false;
2868
  }
2869
2870
  /**
2871
   * Check if the file is binary.
2872
   *
2873
   * @param string $file
2874
   *
2875
   * @return bool
2876
   */
2877 6
  public static function is_binary_file($file): bool
2878
  {
2879
    // init
2880 6
    $block = '';
2881
2882 6
    $fp = \fopen($file, 'rb');
2883 6
    if (\is_resource($fp)) {
2884 6
      $block = \fread($fp, 512);
2885 6
      \fclose($fp);
2886
    }
2887
2888 6
    if ($block === '') {
2889 2
      return false;
2890
    }
2891
2892 6
    return self::is_binary($block, true);
2893
  }
2894
2895
  /**
2896
   * Returns true if the string contains only whitespace chars, false otherwise.
2897
   *
2898
   * @param string $str
2899
   *
2900
   * @return bool
2901
   *               Whether or not $str contains only whitespace characters.
2902
   */
2903 15
  public static function is_blank(string $str): bool
2904
  {
2905 15
    return self::str_matches_pattern($str, '^[[:space:]]*$');
2906
  }
2907
2908
  /**
2909
   * Checks if the given string is equal to any "Byte Order Mark".
2910
   *
2911
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2912
   *
2913
   * @param string $str <p>The input string.</p>
2914
   *
2915
   * @return bool
2916
   *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.
2917
   */
2918 2
  public static function is_bom($str): bool
2919
  {
2920 2
    foreach (self::$BOM as $bomString => $bomByteLength) {
2921 2
      if ($str === $bomString) {
2922 2
        return true;
2923
      }
2924
    }
2925
2926 2
    return false;
2927
  }
2928
2929
  /**
2930
   * Determine whether the string is considered to be empty.
2931
   *
2932
   * A variable is considered empty if it does not exist or if its value equals FALSE.
2933
   * empty() does not generate a warning if the variable does not exist.
2934
   *
2935
   * @param mixed $str
2936
   *
2937
   * @return bool Whether or not $str is empty().
2938
   */
2939
  public static function is_empty($str): bool
2940
  {
2941
    return empty($str);
2942
  }
2943
2944
  /**
2945
   * Returns true if the string contains only hexadecimal chars, false otherwise.
2946
   *
2947
   * @param string $str
2948
   *
2949
   * @return bool
2950
   *               Whether or not $str contains only hexadecimal chars.
2951
   */
2952 13
  public static function is_hexadecimal(string $str): bool
2953
  {
2954 13
    return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
2955
  }
2956
2957
  /**
2958
   * Check if the string contains any html-tags <lall>.
2959
   *
2960
   * @param string $str <p>The input string.</p>
2961
   *
2962
   * @return bool
2963
   */
2964 3
  public static function is_html(string $str): bool
2965
  {
2966 3
    if ('' === $str) {
2967 3
      return false;
2968
    }
2969
2970
    // init
2971 3
    $matches = [];
2972
2973 3
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2974
2975 3
    return !(\count($matches) === 0);
2976
  }
2977
2978
  /**
2979
   * Try to check if "$str" is an json-string.
2980
   *
2981
   * @param string $str <p>The input string.</p>
2982
   *
2983
   * @return bool
2984
   */
2985 22
  public static function is_json(string $str): bool
2986
  {
2987 22
    if ('' === $str) {
2988 3
      return false;
2989
    }
2990
2991 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2992
      self::checkForSupport();
2993
    }
2994
2995 21
    if (self::$SUPPORT['json'] === false) {
2996
      throw new \RuntimeException('ext-json: is not installed');
2997
    }
2998
2999 21
    $json = self::json_decode($str);
3000
3001
    /** @noinspection PhpComposerExtensionStubsInspection */
3002
    return (
3003 21
               \is_object($json) === true
3004
               ||
3005 21
               \is_array($json) === true
3006
           )
3007
           &&
3008 21
           \json_last_error() === JSON_ERROR_NONE;
3009
  }
3010
3011
  /**
3012
   * @param string $str
3013
   *
3014
   * @return bool
3015
   */
3016 8
  public static function is_lowercase(string $str): bool
3017
  {
3018 8
    if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
3019 3
      return true;
3020
    }
3021
3022 5
    return false;
3023
  }
3024
3025
  /**
3026
   * Returns true if the string is serialized, false otherwise.
3027
   *
3028
   * @param string $str
3029
   *
3030
   * @return bool Whether or not $str is serialized.
3031
   */
3032 7
  public static function is_serialized(string $str): bool
3033
  {
3034 7
    if ('' === $str) {
3035 1
      return false;
3036
    }
3037
3038
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3039
    /** @noinspection UnserializeExploitsInspection */
3040 6
    return $str === 'b:0;'
3041
           ||
3042 6
           @\unserialize($str) !== false;
3043
  }
3044
3045
  /**
3046
   * Returns true if the string contains only lower case chars, false
3047
   * otherwise.
3048
   *
3049
   * @param string $str <p>The input string.</p>
3050
   *
3051
   * @return bool
3052
   *               Whether or not $str contains only lower case characters.
3053
   */
3054 8
  public static function is_uppercase(string $str): bool
3055
  {
3056 8
    return self::str_matches_pattern($str, '^[[:upper:]]*$');
3057
  }
3058
3059
  /**
3060
   * Check if the string is UTF-16.
3061
   *
3062
   * @param mixed $str <p>The input string.</p>
3063
   *
3064
   * @return int|false
3065
   *                   <strong>false</strong> if is't not UTF-16,<br>
3066
   *                   <strong>1</strong> for UTF-16LE,<br>
3067
   *                   <strong>2</strong> for UTF-16BE.
3068
   */
3069 21
  public static function is_utf16($str)
3070
  {
3071
    // init
3072 21
    $str = (string)$str;
3073
3074 21
    if (self::is_binary($str) === false) {
3075 9
      return false;
3076
    }
3077
3078 16
    if (self::$SUPPORT['mbstring'] === false) {
3079 2
      \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', E_USER_WARNING);
3080
    }
3081
3082
    // init
3083 16
    $strChars = [];
3084
3085 16
    $str = self::remove_bom($str);
3086
3087 16
    $maybeUTF16LE = 0;
3088 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3089 16
    if ($test) {
3090 14
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3091 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3092 14
      if ($test3 === $test) {
3093 14
        if (\count($strChars) === 0) {
3094 14
          $strChars = self::count_chars($str, true);
3095
        }
3096 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3097 14
          if (\in_array($test3char, $strChars, true) === true) {
3098 14
            $maybeUTF16LE++;
3099
          }
3100
        }
3101
      }
3102
    }
3103
3104 16
    $maybeUTF16BE = 0;
3105 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3106 16
    if ($test) {
3107 14
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3108 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3109 14
      if ($test3 === $test) {
3110 14
        if (\count($strChars) === 0) {
3111 6
          $strChars = self::count_chars($str, true);
3112
        }
3113 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3114 14
          if (\in_array($test3char, $strChars, true) === true) {
3115 14
            $maybeUTF16BE++;
3116
          }
3117
        }
3118
      }
3119
    }
3120
3121 16
    if ($maybeUTF16BE !== $maybeUTF16LE) {
3122 6
      if ($maybeUTF16LE > $maybeUTF16BE) {
3123 4
        return 1;
3124
      }
3125
3126 6
      return 2;
3127
    }
3128
3129 12
    return false;
3130
  }
3131
3132
  /**
3133
   * Check if the string is UTF-32.
3134
   *
3135
   * @param mixed $str
3136
   *
3137
   * @return int|false
3138
   *                   <strong>false</strong> if is't not UTF-32,<br>
3139
   *                   <strong>1</strong> for UTF-32LE,<br>
3140
   *                   <strong>2</strong> for UTF-32BE.
3141
   */
3142 17
  public static function is_utf32($str)
3143
  {
3144
    // init
3145 17
    $str = (string)$str;
3146
3147 17
    if (self::is_binary($str) === false) {
3148 9
      return false;
3149
    }
3150
3151 12
    if (self::$SUPPORT['mbstring'] === false) {
3152 2
      \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', E_USER_WARNING);
3153
    }
3154
3155
    // init
3156 12
    $strChars = [];
3157
3158 12
    $str = self::remove_bom($str);
3159
3160 12
    $maybeUTF32LE = 0;
3161 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3162 12
    if ($test) {
3163 10
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3164 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3165 10
      if ($test3 === $test) {
3166 10
        if (\count($strChars) === 0) {
3167 10
          $strChars = self::count_chars($str, true);
3168
        }
3169 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3170 10
          if (\in_array($test3char, $strChars, true) === true) {
3171 10
            $maybeUTF32LE++;
3172
          }
3173
        }
3174
      }
3175
    }
3176
3177 12
    $maybeUTF32BE = 0;
3178 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3179 12
    if ($test) {
3180 10
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3181 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3182 10
      if ($test3 === $test) {
3183 10
        if (\count($strChars) === 0) {
3184 6
          $strChars = self::count_chars($str, true);
3185
        }
3186 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3187 10
          if (\in_array($test3char, $strChars, true) === true) {
3188 10
            $maybeUTF32BE++;
3189
          }
3190
        }
3191
      }
3192
    }
3193
3194 12
    if ($maybeUTF32BE !== $maybeUTF32LE) {
3195 2
      if ($maybeUTF32LE > $maybeUTF32BE) {
3196 2
        return 1;
3197
      }
3198
3199 2
      return 2;
3200
    }
3201
3202 12
    return false;
3203
  }
3204
3205
  /**
3206
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3207
   *
3208
   * @see    http://hsivonen.iki.fi/php-utf8/
3209
   *
3210
   * @param string|string[] $str    <p>The string to be checked.</p>
3211
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3212
   *
3213
   * @return bool
3214
   */
3215 107
  public static function is_utf8($str, bool $strict = false): bool
3216
  {
3217 107
    if (\is_array($str) === true) {
3218 2
      foreach ($str as $k => $v) {
3219 2
        if (false === self::is_utf8($v, $strict)) {
3220 2
          return false;
3221
        }
3222
      }
3223
3224
      return true;
3225
    }
3226
3227 107
    if ('' === $str) {
3228 12
      return true;
3229
    }
3230
3231 103
    if ($strict === true) {
3232 2
      if (self::is_utf16($str) !== false) {
3233 2
        return false;
3234
      }
3235
3236
      if (self::is_utf32($str) !== false) {
3237
        return false;
3238
      }
3239
    }
3240
3241 103
    if (self::pcre_utf8_support() !== true) {
3242
3243
      // If even just the first character can be matched, when the /u
3244
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3245
      // invalid, nothing at all will match, even if the string contains
3246
      // some valid sequences
3247
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
3248
    }
3249
3250 103
    $mState = 0; // cached expected number of octets after the current octet
3251
    // until the beginning of the next UTF8 character sequence
3252 103
    $mUcs4 = 0; // cached Unicode character
3253 103
    $mBytes = 1; // cached expected number of octets in the current sequence
3254
3255 103
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3256
      self::checkForSupport();
3257
    }
3258
3259 103
    if (self::$ORD === null) {
3260
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3261
    }
3262
3263 103
    $len = self::strlen_in_byte((string)$str);
3264
    /** @noinspection ForeachInvariantsInspection */
3265 103
    for ($i = 0; $i < $len; $i++) {
3266 103
      $in = self::$ORD[$str[$i]];
3267 103
      if ($mState === 0) {
3268
        // When mState is zero we expect either a US-ASCII character or a
3269
        // multi-octet sequence.
3270 103
        if (0 === (0x80 & $in)) {
3271
          // US-ASCII, pass straight through.
3272 98
          $mBytes = 1;
3273 84
        } elseif (0xC0 === (0xE0 & $in)) {
3274
          // First octet of 2 octet sequence.
3275 75
          $mUcs4 = $in;
3276 75
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3277 75
          $mState = 1;
3278 75
          $mBytes = 2;
3279 58
        } elseif (0xE0 === (0xF0 & $in)) {
3280
          // First octet of 3 octet sequence.
3281 41
          $mUcs4 = $in;
3282 41
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3283 41
          $mState = 2;
3284 41
          $mBytes = 3;
3285 30
        } elseif (0xF0 === (0xF8 & $in)) {
3286
          // First octet of 4 octet sequence.
3287 19
          $mUcs4 = $in;
3288 19
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3289 19
          $mState = 3;
3290 19
          $mBytes = 4;
3291 13
        } elseif (0xF8 === (0xFC & $in)) {
3292
          /* First octet of 5 octet sequence.
3293
          *
3294
          * This is illegal because the encoded codepoint must be either
3295
          * (a) not the shortest form or
3296
          * (b) outside the Unicode range of 0-0x10FFFF.
3297
          * Rather than trying to resynchronize, we will carry on until the end
3298
          * of the sequence and let the later error handling code catch it.
3299
          */
3300 5
          $mUcs4 = $in;
3301 5
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3302 5
          $mState = 4;
3303 5
          $mBytes = 5;
3304 10
        } elseif (0xFC === (0xFE & $in)) {
3305
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3306 5
          $mUcs4 = $in;
3307 5
          $mUcs4 = ($mUcs4 & 1) << 30;
3308 5
          $mState = 5;
3309 5
          $mBytes = 6;
3310
        } else {
3311
          // Current octet is neither in the US-ASCII range nor a legal first
3312
          // octet of a multi-octet sequence.
3313 103
          return false;
3314
        }
3315
      } else {
3316
        // When mState is non-zero, we expect a continuation of the multi-octet
3317
        // sequence
3318 84
        if (0x80 === (0xC0 & $in)) {
3319
          // Legal continuation.
3320 76
          $shift = ($mState - 1) * 6;
3321 76
          $tmp = $in;
3322 76
          $tmp = ($tmp & 0x0000003F) << $shift;
3323 76
          $mUcs4 |= $tmp;
3324
          // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3325
          // Unicode code point to be output.
3326 76
          if (0 === --$mState) {
3327
            // Check for illegal sequences and code points.
3328
            //
3329
            // From Unicode 3.1, non-shortest form is illegal
3330
            if (
3331 76
                (2 === $mBytes && $mUcs4 < 0x0080)
3332
                ||
3333 76
                (3 === $mBytes && $mUcs4 < 0x0800)
3334
                ||
3335 76
                (4 === $mBytes && $mUcs4 < 0x10000)
3336
                ||
3337 76
                (4 < $mBytes)
3338
                ||
3339
                // From Unicode 3.2, surrogate characters are illegal.
3340 76
                (($mUcs4 & 0xFFFFF800) === 0xD800)
3341
                ||
3342
                // Code points outside the Unicode range are illegal.
3343 76
                ($mUcs4 > 0x10FFFF)
3344
            ) {
3345 8
              return false;
3346
            }
3347
            // initialize UTF8 cache
3348 76
            $mState = 0;
3349 76
            $mUcs4 = 0;
3350 76
            $mBytes = 1;
3351
          }
3352
        } else {
3353
          // ((0xC0 & (*in) != 0x80) && (mState != 0))
3354
          // Incomplete multi-octet sequence.
3355 36
          return false;
3356
        }
3357
      }
3358
    }
3359
3360 67
    return true;
3361
  }
3362
3363
  /**
3364
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3365
   * Decodes a JSON string
3366
   *
3367
   * @link http://php.net/manual/en/function.json-decode.php
3368
   *
3369
   * @param string $json    <p>
3370
   *                        The <i>json</i> string being decoded.
3371
   *                        </p>
3372
   *                        <p>
3373
   *                        This function only works with UTF-8 encoded strings.
3374
   *                        </p>
3375
   *                        <p>PHP implements a superset of
3376
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3377
   *                        only supports these values when they are nested inside an array or an object.
3378
   *                        </p>
3379
   * @param bool   $assoc   [optional] <p>
3380
   *                        When <b>TRUE</b>, returned objects will be converted into
3381
   *                        associative arrays.
3382
   *                        </p>
3383
   * @param int    $depth   [optional] <p>
3384
   *                        User specified recursion depth.
3385
   *                        </p>
3386
   * @param int    $options [optional] <p>
3387
   *                        Bitmask of JSON decode options. Currently only
3388
   *                        <b>JSON_BIGINT_AS_STRING</b>
3389
   *                        is supported (default is to cast large integers as floats)
3390
   *                        </p>
3391
   *
3392
   * @return mixed
3393
   *                The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3394
   *                null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3395
   *                <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3396
   *                is deeper than the recursion limit.
3397
   */
3398 23
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3399
  {
3400 23
    $json = self::filter($json);
3401
3402 23
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3403
      self::checkForSupport();
3404
    }
3405
3406 23
    if (self::$SUPPORT['json'] === false) {
3407
      throw new \RuntimeException('ext-json: is not installed');
3408
    }
3409
3410
    /** @noinspection PhpComposerExtensionStubsInspection */
3411 23
    $json = \json_decode($json, $assoc, $depth, $options);
3412
3413 23
    return $json;
3414
  }
3415
3416
  /**
3417
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3418
   * Returns the JSON representation of a value.
3419
   *
3420
   * @link http://php.net/manual/en/function.json-encode.php
3421
   *
3422
   * @param mixed $value   <p>
3423
   *                       The <i>value</i> being encoded. Can be any type except
3424
   *                       a resource.
3425
   *                       </p>
3426
   *                       <p>
3427
   *                       All string data must be UTF-8 encoded.
3428
   *                       </p>
3429
   *                       <p>PHP implements a superset of
3430
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3431
   *                       only supports these values when they are nested inside an array or an object.
3432
   *                       </p>
3433
   * @param int   $options [optional] <p>
3434
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3435
   *                       <b>JSON_HEX_TAG</b>,
3436
   *                       <b>JSON_HEX_AMP</b>,
3437
   *                       <b>JSON_HEX_APOS</b>,
3438
   *                       <b>JSON_NUMERIC_CHECK</b>,
3439
   *                       <b>JSON_PRETTY_PRINT</b>,
3440
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3441
   *                       <b>JSON_FORCE_OBJECT</b>,
3442
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3443
   *                       constants is described on
3444
   *                       the JSON constants page.
3445
   *                       </p>
3446
   * @param int   $depth   [optional] <p>
3447
   *                       Set the maximum depth. Must be greater than zero.
3448
   *                       </p>
3449
   *
3450
   * @return string|false
3451
   *                      A JSON encoded <strong>string</strong> on success or<br>
3452
   *                      <strong>FALSE</strong> on failure.
3453
   */
3454 4
  public static function json_encode($value, int $options = 0, int $depth = 512)
3455
  {
3456 4
    $value = self::filter($value);
3457
3458 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3459
      self::checkForSupport();
3460
    }
3461
3462 4
    if (self::$SUPPORT['json'] === false) {
3463
      throw new \RuntimeException('ext-json: is not installed');
3464
    }
3465
3466
    /** @noinspection PhpComposerExtensionStubsInspection */
3467 4
    $json = \json_encode($value, $options, $depth);
3468
3469 4
    return $json;
3470
  }
3471
3472
  /**
3473
   * Checks whether JSON is available on the server.
3474
   *
3475
   * @return bool
3476
   *              <strong>true</strong> if available, <strong>false</strong> otherwise
3477
   */
3478
  public static function json_loaded(): bool
3479
  {
3480
    return \function_exists('json_decode');
3481
  }
3482
3483
  /**
3484
   * Makes string's first char lowercase.
3485
   *
3486
   * @param string $str       <p>The input string</p>
3487
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
3488
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3489
   *
3490
   * @return string The resulting string.
3491
   */
3492 46
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3493
  {
3494 46
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3495 46
    if ($strPartTwo === false) {
3496
      $strPartTwo = '';
3497
    }
3498
3499 46
    $strPartOne = self::strtolower(
3500 46
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3501 46
        $encoding,
3502 46
        $cleanUtf8
3503
    );
3504
3505 46
    return $strPartOne . $strPartTwo;
3506
  }
3507
3508
  /**
3509
   * alias for "UTF8::lcfirst()"
3510
   *
3511
   * @see UTF8::lcfirst()
3512
   *
3513
   * @param string $str
3514
   * @param string $encoding
3515
   * @param bool   $cleanUtf8
3516
   *
3517
   * @return string
3518
   */
3519 2
  public static function lcword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3520
  {
3521 2
    return self::lcfirst($str, $encoding, $cleanUtf8);
3522
  }
3523
3524
  /**
3525
   * Lowercase for all words in the string.
3526
   *
3527
   * @param string   $str        <p>The input string.</p>
3528
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3529
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3530
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3531
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3532
   *
3533
   * @return string
3534
   */
3535 2
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3536
  {
3537 2
    if (!$str) {
3538 2
      return '';
3539
    }
3540
3541 2
    $words = self::str_to_words($str, $charlist);
3542 2
    $newWords = [];
3543
3544 2
    if (\count($exceptions) > 0) {
3545 2
      $useExceptions = true;
3546
    } else {
3547 2
      $useExceptions = false;
3548
    }
3549
3550 2
    foreach ($words as $word) {
3551
3552 2
      if (!$word) {
3553 2
        continue;
3554
      }
3555
3556
      if (
3557 2
          $useExceptions === false
3558
          ||
3559
          (
3560 2
              $useExceptions === true
3561
              &&
3562 2
              !\in_array($word, $exceptions, true)
3563
          )
3564
      ) {
3565 2
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3566
      }
3567
3568 2
      $newWords[] = $word;
3569
    }
3570
3571 2
    return \implode('', $newWords);
3572
  }
3573
3574
  /**
3575
   * alias for "UTF8::lcfirst()"
3576
   *
3577
   * @see UTF8::lcfirst()
3578
   *
3579
   * @param string $str
3580
   * @param string $encoding
3581
   * @param bool   $cleanUtf8
3582
   *
3583
   * @return string
3584
   */
3585 5
  public static function lowerCaseFirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3586
  {
3587 5
    return self::lcfirst($str, $encoding, $cleanUtf8);
3588
  }
3589
3590
  /**
3591
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3592
   *
3593
   * @param string $str   <p>The string to be trimmed</p>
3594
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3595
   *
3596
   * @return string The string with unwanted characters stripped from the left.
3597
   */
3598 22
  public static function ltrim(string $str = '', $chars = INF): string
3599
  {
3600 22
    if ('' === $str) {
3601 3
      return '';
3602
    }
3603
3604
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3605 21
    if ($chars === INF || !$chars) {
3606 14
      $pattern = "^[\pZ\pC]+";
3607
    } else {
3608 10
      $chars = \preg_quote($chars, '/');
3609 10
      $pattern = "^[$chars]+";
3610
    }
3611
3612 21
    return self::regex_replace($str, $pattern, '', '', '/');
3613
  }
3614
3615
  /**
3616
   * Returns the UTF-8 character with the maximum code point in the given data.
3617
   *
3618
   * @param string|array<string> $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3619
   *
3620
   * @return string|null The character with the highest code point than others, returns null on failure or empty input.
3621
   */
3622 2
  public static function max($arg)
3623
  {
3624 2
    if (\is_array($arg) === true) {
3625 2
      $arg = \implode('', $arg);
3626
    }
3627
3628 2
    $codepoints = self::codepoints($arg, false);
3629 2
    if (\count($codepoints) === 0) {
3630 2
      return null;
3631
    }
3632
3633 2
    $codepoint_max = \max($codepoints);
3634
3635 2
    return self::chr($codepoint_max);
3636
  }
3637
3638
  /**
3639
   * Calculates and returns the maximum number of bytes taken by any
3640
   * UTF-8 encoded character in the given string.
3641
   *
3642
   * @param string $str <p>The original Unicode string.</p>
3643
   *
3644
   * @return int Max byte lengths of the given chars.
3645
   */
3646 2
  public static function max_chr_width(string $str): int
3647
  {
3648 2
    $bytes = self::chr_size_list($str);
3649 2
    if (\count($bytes) > 0) {
3650 2
      return (int)\max($bytes);
3651
    }
3652
3653 2
    return 0;
3654
  }
3655
3656
  /**
3657
   * Checks whether mbstring is available on the server.
3658
   *
3659
   * @return bool
3660
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
3661
   */
3662 27
  public static function mbstring_loaded(): bool
3663
  {
3664 27
    $return = \extension_loaded('mbstring') ? true : false;
3665
3666 27
    if ($return === true) {
3667 27
      \mb_internal_encoding('UTF-8');
3668
    }
3669
3670 27
    return $return;
3671
  }
3672
3673
  /**
3674
   * Checks whether mbstring "overloaded" is active on the server.
3675
   *
3676
   * @return bool
3677
   */
3678
  private static function mbstring_overloaded(): bool
3679
  {
3680
    /**
3681
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3682
     */
3683
3684
    /** @noinspection PhpComposerExtensionStubsInspection */
3685
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3686
    return \defined('MB_OVERLOAD_STRING')
3687
           &&
3688
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3689
  }
3690
3691
  /**
3692
   * Returns the UTF-8 character with the minimum code point in the given data.
3693
   *
3694
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3695
   *
3696
   * @return string|null The character with the lowest code point than others, returns null on failure or empty input.
3697
   */
3698 2
  public static function min($arg)
3699
  {
3700 2
    if (\is_array($arg) === true) {
3701 2
      $arg = \implode('', $arg);
3702
    }
3703
3704 2
    $codepoints = self::codepoints($arg, false);
3705 2
    if (\count($codepoints) === 0) {
3706 2
      return null;
3707
    }
3708
3709 2
    $codepoint_min = \min($codepoints);
3710
3711 2
    return self::chr($codepoint_min);
3712
  }
3713
3714
  /**
3715
   * alias for "UTF8::normalize_encoding()"
3716
   *
3717
   * @see        UTF8::normalize_encoding()
3718
   *
3719
   * @param mixed $encoding
3720
   * @param mixed $fallback
3721
   *
3722
   * @return mixed
3723
   *
3724
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3725
   */
3726 2
  public static function normalizeEncoding($encoding, $fallback = '')
3727
  {
3728 2
    return self::normalize_encoding($encoding, $fallback);
3729
  }
3730
3731
  /**
3732
   * Normalize the encoding-"name" input.
3733
   *
3734
   * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3735
   * @param mixed $fallback <p>e.g.: UTF-8</p>
3736
   *
3737
   * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3738
   */
3739 340
  public static function normalize_encoding($encoding, $fallback = '')
3740
  {
3741 340
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3742
3743
    // init
3744 340
    $encoding = (string)$encoding;
3745
3746
    if (
3747 340
        !$encoding
3748
        ||
3749 49
        $encoding === '1' // only a fallback, for non "strict_types" usage ...
3750
        ||
3751 340
        $encoding === '0' // only a fallback, for non "strict_types" usage ...
3752
    ) {
3753 296
      return $fallback;
3754
    }
3755
3756
    if (
3757 48
        'UTF-8' === $encoding
3758
        ||
3759 48
        'UTF8' === $encoding
3760
    ) {
3761 21
      return 'UTF-8';
3762
    }
3763
3764 41
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3765 39
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3766
    }
3767
3768 5
    if (self::$ENCODINGS === null) {
3769 1
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3770
    }
3771
3772 5
    if (\in_array($encoding, self::$ENCODINGS, true)) {
0 ignored issues
show
Bug introduced by
It seems like self::ENCODINGS can also be of type false; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3772
    if (\in_array($encoding, /** @scrutinizer ignore-type */ self::$ENCODINGS, true)) {
Loading history...
3773 4
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3774
3775 4
      return $encoding;
3776
    }
3777
3778 4
    $encodingOrig = $encoding;
3779 4
    $encoding = \strtoupper($encoding);
3780 4
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3781
3782
    $equivalences = [
3783 4
        'ISO8859'     => 'ISO-8859-1',
3784
        'ISO88591'    => 'ISO-8859-1',
3785
        'ISO'         => 'ISO-8859-1',
3786
        'LATIN'       => 'ISO-8859-1',
3787
        'LATIN1'      => 'ISO-8859-1', // Western European
3788
        'ISO88592'    => 'ISO-8859-2',
3789
        'LATIN2'      => 'ISO-8859-2', // Central European
3790
        'ISO88593'    => 'ISO-8859-3',
3791
        'LATIN3'      => 'ISO-8859-3', // Southern European
3792
        'ISO88594'    => 'ISO-8859-4',
3793
        'LATIN4'      => 'ISO-8859-4', // Northern European
3794
        'ISO88595'    => 'ISO-8859-5',
3795
        'ISO88596'    => 'ISO-8859-6', // Greek
3796
        'ISO88597'    => 'ISO-8859-7',
3797
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3798
        'ISO88599'    => 'ISO-8859-9',
3799
        'LATIN5'      => 'ISO-8859-9', // Turkish
3800
        'ISO885911'   => 'ISO-8859-11',
3801
        'TIS620'      => 'ISO-8859-11', // Thai
3802
        'ISO885910'   => 'ISO-8859-10',
3803
        'LATIN6'      => 'ISO-8859-10', // Nordic
3804
        'ISO885913'   => 'ISO-8859-13',
3805
        'LATIN7'      => 'ISO-8859-13', // Baltic
3806
        'ISO885914'   => 'ISO-8859-14',
3807
        'LATIN8'      => 'ISO-8859-14', // Celtic
3808
        'ISO885915'   => 'ISO-8859-15',
3809
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3810
        'ISO885916'   => 'ISO-8859-16',
3811
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3812
        'CP1250'      => 'WINDOWS-1250',
3813
        'WIN1250'     => 'WINDOWS-1250',
3814
        'WINDOWS1250' => 'WINDOWS-1250',
3815
        'CP1251'      => 'WINDOWS-1251',
3816
        'WIN1251'     => 'WINDOWS-1251',
3817
        'WINDOWS1251' => 'WINDOWS-1251',
3818
        'CP1252'      => 'WINDOWS-1252',
3819
        'WIN1252'     => 'WINDOWS-1252',
3820
        'WINDOWS1252' => 'WINDOWS-1252',
3821
        'CP1253'      => 'WINDOWS-1253',
3822
        'WIN1253'     => 'WINDOWS-1253',
3823
        'WINDOWS1253' => 'WINDOWS-1253',
3824
        'CP1254'      => 'WINDOWS-1254',
3825
        'WIN1254'     => 'WINDOWS-1254',
3826
        'WINDOWS1254' => 'WINDOWS-1254',
3827
        'CP1255'      => 'WINDOWS-1255',
3828
        'WIN1255'     => 'WINDOWS-1255',
3829
        'WINDOWS1255' => 'WINDOWS-1255',
3830
        'CP1256'      => 'WINDOWS-1256',
3831
        'WIN1256'     => 'WINDOWS-1256',
3832
        'WINDOWS1256' => 'WINDOWS-1256',
3833
        'CP1257'      => 'WINDOWS-1257',
3834
        'WIN1257'     => 'WINDOWS-1257',
3835
        'WINDOWS1257' => 'WINDOWS-1257',
3836
        'CP1258'      => 'WINDOWS-1258',
3837
        'WIN1258'     => 'WINDOWS-1258',
3838
        'WINDOWS1258' => 'WINDOWS-1258',
3839
        'UTF16'       => 'UTF-16',
3840
        'UTF32'       => 'UTF-32',
3841
        'UTF8'        => 'UTF-8',
3842
        'UTF'         => 'UTF-8',
3843
        'UTF7'        => 'UTF-7',
3844
        '8BIT'        => 'CP850',
3845
        'BINARY'      => 'CP850',
3846
    ];
3847
3848 4
    if (!empty($equivalences[$encodingUpperHelper])) {
3849 4
      $encoding = $equivalences[$encodingUpperHelper];
3850
    }
3851
3852 4
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3853
3854 4
    return $encoding;
3855
  }
3856
3857
  /**
3858
   * Standardize line ending to unix-like.
3859
   *
3860
   * @param string $str
3861
   *
3862
   * @return string
3863
   */
3864 5
  public static function normalize_line_ending(string $str): string
3865
  {
3866 5
    return (string)str_replace(["\r\n", "\r"], "\n", $str);
3867
  }
3868
3869
  /**
3870
   * Normalize some MS Word special characters.
3871
   *
3872
   * @param string $str <p>The string to be normalized.</p>
3873
   *
3874
   * @return string
3875
   */
3876 39
  public static function normalize_msword(string $str): string
3877
  {
3878 39
    if ('' === $str) {
3879 2
      return '';
3880
    }
3881
3882 39
    static $UTF8_MSWORD_KEYS_CACHE = null;
3883 39
    static $UTF8_MSWORD_VALUES_CACHE = null;
3884
3885 39
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3886
3887 1
      if (self::$UTF8_MSWORD === null) {
3888 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_msword') can also be of type false. However, the property $UTF8_MSWORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3889
      }
3890
3891 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3891
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
3892 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3892
      $UTF8_MSWORD_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
3893
    }
3894
3895 39
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3896
  }
3897
3898
  /**
3899
   * Normalize the whitespace.
3900
   *
3901
   * @param string $str                     <p>The string to be normalized.</p>
3902
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3903
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3904
   *                                        bidirectional text chars.</p>
3905
   *
3906
   * @return string
3907
   */
3908 87
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
3909
  {
3910 87
    if ('' === $str) {
3911 9
      return '';
3912
    }
3913
3914 87
    static $WHITESPACE_CACHE = [];
3915 87
    $cacheKey = (int)$keepNonBreakingSpace;
3916
3917 87
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3918
3919 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3920
3921 2
      if ($keepNonBreakingSpace === true) {
3922 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3923
      }
3924
3925 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
3926
    }
3927
3928 87
    if ($keepBidiUnicodeControls === false) {
3929 87
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3930
3931 87
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3932 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3933
      }
3934
3935 87
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3936
    }
3937
3938 87
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3939
  }
3940
3941
  /**
3942
   * Calculates Unicode code point of the given UTF-8 encoded character.
3943
   *
3944
   * INFO: opposite to UTF8::chr()
3945
   *
3946
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3947
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3948
   *
3949
   * @return int
3950
   *             Unicode code point of the given character,<br>
3951
   *             0 on invalid UTF-8 byte sequence.
3952
   */
3953 35
  public static function ord($chr, string $encoding = 'UTF-8'): int
3954
  {
3955
    // init
3956 35
    $chr = (string)$chr;
3957
3958 35
    static $CHAR_CACHE = [];
3959
3960
    // save the original string
3961 35
    $chr_orig = $chr;
3962
3963 35
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3964 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3965
3966
      // check again, if it's still not UTF-8
3967 4
      if ($encoding !== 'UTF-8') {
3968 4
        $chr = self::encode($encoding, $chr);
3969
      }
3970
    }
3971
3972 35
    $cacheKey = $chr_orig . $encoding;
3973 35
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3974 35
      return $CHAR_CACHE[$cacheKey];
3975
    }
3976
3977 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3978
      self::checkForSupport();
3979
    }
3980
3981 11
    if (self::$SUPPORT['intlChar'] === true) {
3982
      /** @noinspection PhpComposerExtensionStubsInspection */
3983 10
      $code = \IntlChar::ord($chr);
3984 10
      if ($code) {
3985 9
        return $CHAR_CACHE[$cacheKey] = $code;
3986
      }
3987
    }
3988
3989
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3990 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
0 ignored issues
show
Bug introduced by
$chr of type array is incompatible with the type string expected by parameter $str of voku\helper\UTF8::substr(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3990
    $chr = \unpack('C*', (string)self::substr(/** @scrutinizer ignore-type */ $chr, 0, 4, 'CP850'));
Loading history...
3991 6
    $code = $chr ? $chr[1] : 0;
3992
3993 6
    if (0xF0 <= $code && isset($chr[4])) {
3994
      /** @noinspection UnnecessaryCastingInspection */
3995
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
3996
    }
3997
3998 6
    if (0xE0 <= $code && isset($chr[3])) {
3999
      /** @noinspection UnnecessaryCastingInspection */
4000 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4001
    }
4002
4003 6
    if (0xC0 <= $code && isset($chr[2])) {
4004
      /** @noinspection UnnecessaryCastingInspection */
4005 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xC0) << 6) + $chr[2] - 0x80);
4006
    }
4007
4008 6
    return $CHAR_CACHE[$cacheKey] = $code;
4009
  }
4010
4011
  /**
4012
   * Parses the string into an array (into the the second parameter).
4013
   *
4014
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4015
   *          if the second parameter is not set!
4016
   *
4017
   * @link http://php.net/manual/en/function.parse-str.php
4018
   *
4019
   * @param string $str       <p>The input string.</p>
4020
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4021
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4022
   *
4023
   * @return bool Will return <strong>false</strong> if php can't parse the string and we haven't any $result.
4024
   */
4025 2
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4026
  {
4027 2
    if ($cleanUtf8 === true) {
4028 2
      $str = self::clean($str);
4029
    }
4030
4031
    // always fallback via symfony polyfill
4032 2
    $return = \mb_parse_str($str, $result);
4033
4034 2
    return !($return === false || empty($result));
4035
  }
4036
4037
  /**
4038
   * Checks if \u modifier is available that enables Unicode support in PCRE.
4039
   *
4040
   * @return bool
4041
   *              <strong>true</strong> if support is available,<br>
4042
   *              <strong>false</strong> otherwise.
4043
   */
4044 103
  public static function pcre_utf8_support(): bool
4045
  {
4046
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4047 103
    return (bool)@\preg_match('//u', '');
4048
  }
4049
4050
  /**
4051
   * Create an array containing a range of UTF-8 characters.
4052
   *
4053
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4054
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4055
   *
4056
   * @return string[]
4057
   */
4058 2
  public static function range($var1, $var2): array
4059
  {
4060 2
    if (!$var1 || !$var2) {
4061 2
      return [];
4062
    }
4063
4064 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4065
      self::checkForSupport();
4066
    }
4067
4068 2
    if (self::$SUPPORT['ctype'] === false) {
4069
      throw new \RuntimeException('ext-ctype: is not installed');
4070
    }
4071
4072
    /** @noinspection PhpComposerExtensionStubsInspection */
4073 2
    if (\ctype_digit((string)$var1)) {
4074 2
      $start = (int)$var1;
4075 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4076
      $start = (int)self::hex_to_int($var1);
4077
    } else {
4078 2
      $start = self::ord($var1);
4079
    }
4080
4081 2
    if (!$start) {
4082
      return [];
4083
    }
4084
4085
    /** @noinspection PhpComposerExtensionStubsInspection */
4086 2
    if (\ctype_digit((string)$var2)) {
4087 2
      $end = (int)$var2;
4088 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4089
      $end = (int)self::hex_to_int($var2);
4090
    } else {
4091 2
      $end = self::ord($var2);
4092
    }
4093
4094 2
    if (!$end) {
4095
      return [];
4096
    }
4097
4098 2
    return \array_map(
4099
        [
4100 2
            self::class,
4101
            'chr',
4102
        ],
4103 2
        \range($start, $end)
4104
    );
4105
  }
4106
4107
  /**
4108
   * Multi decode html entity & fix urlencoded-win1252-chars.
4109
   *
4110
   * e.g:
4111
   * 'test+test'                     => 'test+test'
4112
   * 'D&#252;sseldorf'               => 'Düsseldorf'
4113
   * 'D%FCsseldorf'                  => 'Düsseldorf'
4114
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4115
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4116
   * 'Düsseldorf'                   => 'Düsseldorf'
4117
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4118
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4119
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4120
   *
4121
   * @param string $str          <p>The input string.</p>
4122
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
4123
   *
4124
   * @return string
4125
   */
4126 3
  public static function rawurldecode(string $str, bool $multi_decode = true): string
4127
  {
4128 3
    if ('' === $str) {
4129 2
      return '';
4130
    }
4131
4132 3
    $pattern = '/%u([0-9a-f]{3,4})/i';
4133 3
    if (\preg_match($pattern, $str)) {
4134 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4135
    }
4136
4137 3
    $flags = ENT_QUOTES | ENT_HTML5;
4138
4139
    do {
4140 3
      $str_compare = $str;
4141
4142 3
      $str = self::fix_simple_utf8(
4143 3
          \rawurldecode(
4144 3
              self::html_entity_decode(
4145 3
                  self::to_utf8($str),
4146 3
                  $flags
4147
              )
4148
          )
4149
      );
4150
4151 3
    } while ($multi_decode === true && $str_compare !== $str);
4152
4153 3
    return $str;
4154
  }
4155
4156
  /**
4157
   * @param array $strings
4158
   * @param bool  $removeEmptyValues
4159
   * @param int   $removeShortValues
4160
   *
4161
   * @return array
4162
   */
4163 2
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
4164
  {
4165
    // init
4166 2
    $return = [];
4167
4168 2
    foreach ($strings as $str) {
4169
      if (
4170 2
          $removeShortValues !== null
4171
          &&
4172 2
          self::strlen($str) <= $removeShortValues
4173
      ) {
4174 2
        continue;
4175
      }
4176
4177
      if (
4178 2
          $removeEmptyValues === true
4179
          &&
4180 2
          \trim($str) === ''
4181
      ) {
4182 2
        continue;
4183
      }
4184
4185 2
      $return[] = $str;
4186
    }
4187
4188 2
    return $return;
4189
  }
4190
4191
  /**
4192
   * Replaces all occurrences of $pattern in $str by $replacement.
4193
   *
4194
   * @param string $str         <p>The input string.</p>
4195
   * @param string $pattern     <p>The regular expression pattern.</p>
4196
   * @param string $replacement <p>The string to replace with.</p>
4197
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
4198
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4199
   *
4200
   * @return string
4201
   */
4202 291
  public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4203
  {
4204 291
    if ($options === 'msr') {
4205 9
      $options = 'ms';
4206
    }
4207
4208
    // fallback
4209 291
    if (!$delimiter) {
4210
      $delimiter = '/';
4211
    }
4212
4213 291
    $str = (string)\preg_replace(
4214 291
        $delimiter . $pattern . $delimiter . 'u' . $options,
4215 291
        $replacement,
4216 291
        $str
4217
    );
4218
4219 291
    return $str;
4220
  }
4221
4222
  /**
4223
   * alias for "UTF8::remove_bom()"
4224
   *
4225
   * @see        UTF8::remove_bom()
4226
   *
4227
   * @param string $str
4228
   *
4229
   * @return string
4230
   *
4231
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4232
   */
4233
  public static function removeBOM(string $str): string
4234
  {
4235
    return self::remove_bom($str);
4236
  }
4237
4238
  /**
4239
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4240
   *
4241
   * @param string $str <p>The input string.</p>
4242
   *
4243
   * @return string String without UTF-BOM.
4244
   */
4245 75
  public static function remove_bom(string $str): string
4246
  {
4247 75
    if ('' === $str) {
4248 7
      return '';
4249
    }
4250
4251 75
    $strLength = self::strlen_in_byte($str);
4252 75
    foreach (self::$BOM as $bomString => $bomByteLength) {
4253 75
      if (0 === self::strpos_in_byte($str, $bomString, 0)) {
4254 10
        $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4255 10
        if ($strTmp === false) {
4256
          return '';
4257
        }
4258
4259 10
        $strLength -= $bomByteLength;
4260
4261 75
        $str = (string)$strTmp;
4262
      }
4263
    }
4264
4265 75
    return $str;
4266
  }
4267
4268
  /**
4269
   * Removes duplicate occurrences of a string in another string.
4270
   *
4271
   * @param string          $str  <p>The base string.</p>
4272
   * @param string|string[] $what <p>String to search for in the base string.</p>
4273
   *
4274
   * @return string The result string with removed duplicates.
4275
   */
4276 2
  public static function remove_duplicates(string $str, $what = ' '): string
4277
  {
4278 2
    if (\is_string($what) === true) {
4279 2
      $what = [$what];
4280
    }
4281
4282 2
    if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4283
      /** @noinspection ForeachSourceInspection */
4284 2
      foreach ($what as $item) {
4285 2
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4286
      }
4287
    }
4288
4289 2
    return $str;
4290
  }
4291
4292
  /**
4293
   * Remove html via "strip_tags()" from the string.
4294
   *
4295
   * @param string $str
4296
   * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4297
   *                              not be stripped. Default: null
4298
   *                              </p>
4299
   *
4300
   * @return string
4301
   */
4302 6
  public static function remove_html(string $str, string $allowableTags = ''): string
4303
  {
4304 6
    return \strip_tags($str, $allowableTags);
4305
  }
4306
4307
  /**
4308
   * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4309
   *
4310
   * @param string $str
4311
   * @param string $replacement [optional] <p>Default is a empty string.</p>
4312
   *
4313
   * @return string
4314
   */
4315 6
  public static function remove_html_breaks(string $str, string $replacement = ''): string
4316
  {
4317 6
    return (string)\preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4318
  }
4319
4320
  /**
4321
   * Remove invisible characters from a string.
4322
   *
4323
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4324
   *
4325
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4326
   *
4327
   * @param string $str
4328
   * @param bool   $url_encoded
4329
   * @param string $replacement
4330
   *
4331
   * @return string
4332
   */
4333 113
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4334
  {
4335
    // init
4336 113
    $non_displayables = [];
4337
4338
    // every control character except newline (dec 10),
4339
    // carriage return (dec 13) and horizontal tab (dec 09)
4340 113
    if ($url_encoded) {
4341 113
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4342 113
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4343
    }
4344
4345 113
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4346
4347
    do {
4348 113
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4349 113
    } while ($count !== 0);
4350
4351 113
    return $str;
4352
  }
4353
4354
  /**
4355
   * Returns a new string with the prefix $substring removed, if present.
4356
   *
4357
   * @param string $str
4358
   * @param string $substring <p>The prefix to remove.</p>
4359
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4360
   *
4361
   * @return string String without the prefix $substring.
4362
   */
4363 12
  public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4364
  {
4365 12
    if (self::str_starts_with($str, $substring)) {
4366
4367 6
      return (string)self::substr(
4368 6
          $str,
4369 6
          self::strlen($substring, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::strlen($substring, $encoding) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4369
          /** @scrutinizer ignore-type */ self::strlen($substring, $encoding),
Loading history...
4370 6
          null,
4371 6
          $encoding
4372
      );
4373
    }
4374
4375 6
    return $str;
4376
  }
4377
4378
  /**
4379
   * Returns a new string with the suffix $substring removed, if present.
4380
   *
4381
   * @param string $str
4382
   * @param string $substring <p>The suffix to remove.</p>
4383
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4384
   *
4385
   * @return string String having a $str without the suffix $substring.
4386
   */
4387 12
  public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4388
  {
4389 12
    if (self::str_ends_with($str, $substring)) {
4390
4391 6
      return (string)self::substr(
4392 6
          $str,
4393 6
          0,
4394 6
          self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4395
      );
4396
    }
4397
4398 6
    return $str;
4399
  }
4400
4401
  /**
4402
   * Replaces all occurrences of $search in $str by $replacement.
4403
   *
4404
   * @param string $str           <p>The input string.</p>
4405
   * @param string $search        <p>The needle to search for.</p>
4406
   * @param string $replacement   <p>The string to replace with.</p>
4407
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4408
   *
4409
   * @return string String after the replacements.
4410
   */
4411 29
  public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4412
  {
4413 29
    if ($caseSensitive) {
4414 22
      return self::str_replace($search, $replacement, $str);
4415
    }
4416
4417 7
    return self::str_ireplace($search, $replacement, $str);
4418
  }
4419
4420
  /**
4421
   * Replaces all occurrences of $search in $str by $replacement.
4422
   *
4423
   * @param string       $str           <p>The input string.</p>
4424
   * @param array        $search        <p>The elements to search for.</p>
4425
   * @param string|array $replacement   <p>The string to replace with.</p>
4426
   * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4427
   *
4428
   * @return string String after the replacements.
4429
   */
4430 30
  public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4431
  {
4432 30
    if ($caseSensitive) {
4433 23
      return self::str_replace($search, $replacement, $str);
4434
    }
4435
4436 7
    return self::str_ireplace($search, $replacement, $str);
4437
  }
4438
4439
  /**
4440
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4441
   *
4442
   * @param string $str                <p>The input string</p>
4443
   * @param string $replacementChar    <p>The replacement character.</p>
4444
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4445
   *
4446
   * @return string
4447
   */
4448 63
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4449
  {
4450 63
    if ('' === $str) {
4451 9
      return '';
4452
    }
4453
4454 63
    if ($processInvalidUtf8 === true) {
4455 63
      $replacementCharHelper = $replacementChar;
4456 63
      if ($replacementChar === '') {
4457 63
        $replacementCharHelper = 'none';
4458
      }
4459
4460 63
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4461
        self::checkForSupport();
4462
      }
4463
4464 63
      if (self::$SUPPORT['mbstring'] === false) {
4465
        // if there is no native support for "mbstring",
4466
        // then we need to clean the string before ...
4467
        $str = self::clean($str);
4468
      }
4469
4470
      // always fallback via symfony polyfill
4471 63
      $save = \mb_substitute_character();
4472 63
      \mb_substitute_character($replacementCharHelper);
4473 63
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4474 63
      \mb_substitute_character($save);
4475
4476 63
      if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4477 63
        $str = $strTmp;
4478
      } else {
4479
        $str = '';
4480
      }
4481
    }
4482
4483 63
    return str_replace(
4484
        [
4485 63
            "\xEF\xBF\xBD",
4486
            '�',
4487
        ],
4488
        [
4489 63
            $replacementChar,
4490 63
            $replacementChar,
4491
        ],
4492 63
        $str
4493
    );
4494
  }
4495
4496
  /**
4497
   * Strip whitespace or other characters from end of a UTF-8 string.
4498
   *
4499
   * @param string $str   <p>The string to be trimmed.</p>
4500
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4501
   *
4502
   * @return string The string with unwanted characters stripped from the right.
4503
   */
4504 22
  public static function rtrim(string $str = '', $chars = INF): string
4505
  {
4506 22
    if ('' === $str) {
4507 3
      return '';
4508
    }
4509
4510
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4511 21
    if ($chars === INF || !$chars) {
4512 16
      $pattern = "[\pZ\pC]+\$";
4513
    } else {
4514 8
      $chars = \preg_quote($chars, '/');
4515 8
      $pattern = "[$chars]+\$";
4516
    }
4517
4518 21
    return self::regex_replace($str, $pattern, '', '', '/');
4519
  }
4520
4521
  /**
4522
   * rxClass
4523
   *
4524
   * @param string $s
4525
   * @param string $class
4526
   *
4527
   * @return string
4528
   */
4529 37
  private static function rxClass(string $s, string $class = ''): string
4530
  {
4531 37
    static $RX_CLASSS_CACHE = [];
4532
4533 37
    $cacheKey = $s . $class;
4534
4535 37
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4536 25
      return $RX_CLASSS_CACHE[$cacheKey];
4537
    }
4538
4539
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4540 16
    $class = [$class];
4541
4542
    /** @noinspection SuspiciousLoopInspection */
4543 16
    foreach (self::str_split($s) as $s) {
4544 15
      if ('-' === $s) {
4545
        $class[0] = '-' . $class[0];
4546 15
      } elseif (!isset($s[2])) {
4547 15
        $class[0] .= \preg_quote($s, '/');
4548 1
      } elseif (1 === self::strlen($s)) {
4549 1
        $class[0] .= $s;
4550
      } else {
4551 15
        $class[] = $s;
4552
      }
4553
    }
4554
4555 16
    if ($class[0]) {
4556 16
      $class[0] = '[' . $class[0] . ']';
4557
    }
4558
4559 16
    if (1 === \count($class)) {
4560 16
      $return = $class[0];
4561
    } else {
4562
      $return = '(?:' . \implode('|', $class) . ')';
4563
    }
4564
4565 16
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4566
4567 16
    return $return;
4568
  }
4569
4570
  /**
4571
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4572
   */
4573 2
  public static function showSupport()
4574
  {
4575 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4576
      self::checkForSupport();
4577
    }
4578
4579 2
    echo '<pre>';
4580 2
    foreach (self::$SUPPORT as $key => $value) {
4581 2
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4582
    }
4583 2
    echo '</pre>';
4584 2
  }
4585
4586
  /**
4587
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4588
   *
4589
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4590
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4591
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4592
   *
4593
   * @return string The HTML numbered entity.
4594
   */
4595 2
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4596
  {
4597 2
    if ('' === $char) {
4598 2
      return '';
4599
    }
4600
4601
    if (
4602 2
        $keepAsciiChars === true
4603
        &&
4604 2
        self::is_ascii($char) === true
4605
    ) {
4606 2
      return $char;
4607
    }
4608
4609 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4610 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4611
    }
4612
4613 2
    return '&#' . self::ord($char, $encoding) . ';';
4614
  }
4615
4616
  /**
4617
   * @param string $str
4618
   * @param int    $tabLength
4619
   *
4620
   * @return string
4621
   */
4622 5
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4623
  {
4624 5
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4625
  }
4626
4627
  /**
4628
   * Convert a string to an array of Unicode characters.
4629
   *
4630
   * @param string|int $str       <p>The string to split into array.</p>
4631
   * @param int        $length    [optional] <p>Max character length of each array element.</p>
4632
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4633
   *
4634
   * @return string[] An array containing chunks of the string.
4635
   */
4636 60
  public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4637
  {
4638
    // init
4639 60
    $str = (string)$str;
4640
4641 60
    if ('' === $str) {
4642 9
      return [];
4643
    }
4644
4645
    // init
4646 59
    $ret = [];
4647
4648 59
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4649
      self::checkForSupport();
4650
    }
4651
4652 59
    if ($cleanUtf8 === true) {
4653 18
      $str = self::clean($str);
4654
    }
4655
4656 59
    if (self::$SUPPORT['pcre_utf8'] === true) {
4657
4658 55
      \preg_match_all('/./us', $str, $retArray);
4659 55
      if (isset($retArray[0])) {
4660 55
        $ret = $retArray[0];
4661
      }
4662 55
      unset($retArray);
4663
4664
    } else {
4665
4666
      // fallback
4667
4668 8
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4669
        self::checkForSupport();
4670
      }
4671
4672 8
      $len = self::strlen_in_byte($str);
4673
4674
      /** @noinspection ForeachInvariantsInspection */
4675 8
      for ($i = 0; $i < $len; $i++) {
4676
4677 8
        if (($str[$i] & "\x80") === "\x00") {
4678
4679 8
          $ret[] = $str[$i];
4680
4681
        } elseif (
4682 8
            isset($str[$i + 1])
4683
            &&
4684 8
            ($str[$i] & "\xE0") === "\xC0"
4685
        ) {
4686
4687 4
          if (($str[$i + 1] & "\xC0") === "\x80") {
4688 4
            $ret[] = $str[$i] . $str[$i + 1];
4689
4690 4
            $i++;
4691
          }
4692
4693
        } elseif (
4694 6
            isset($str[$i + 2])
4695
            &&
4696 6
            ($str[$i] & "\xF0") === "\xE0"
4697
        ) {
4698
4699
          if (
4700 6
              ($str[$i + 1] & "\xC0") === "\x80"
4701
              &&
4702 6
              ($str[$i + 2] & "\xC0") === "\x80"
4703
          ) {
4704 6
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4705
4706 6
            $i += 2;
4707
          }
4708
4709
        } elseif (
4710
            isset($str[$i + 3])
4711
            &&
4712
            ($str[$i] & "\xF8") === "\xF0"
4713
        ) {
4714
4715
          if (
4716
              ($str[$i + 1] & "\xC0") === "\x80"
4717
              &&
4718
              ($str[$i + 2] & "\xC0") === "\x80"
4719
              &&
4720
              ($str[$i + 3] & "\xC0") === "\x80"
4721
          ) {
4722
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4723
4724
            $i += 3;
4725
          }
4726
4727
        }
4728
      }
4729
    }
4730
4731 59
    if ($length > 1) {
4732 9
      $ret = \array_chunk($ret, $length);
4733
4734 9
      return \array_map(
4735 9
          function ($item) {
4736 9
            return \implode('', $item);
4737 9
          }, $ret
4738
      );
4739
    }
4740
4741 52
    if (isset($ret[0]) && $ret[0] === '') {
4742
      return [];
4743
    }
4744
4745 52
    return $ret;
4746
  }
4747
4748
  /**
4749
   * Returns a camelCase version of the string. Trims surrounding spaces,
4750
   * capitalizes letters following digits, spaces, dashes and underscores,
4751
   * and removes spaces, dashes, as well as underscores.
4752
   *
4753
   * @param string $str      <p>The input string.</p>
4754
   * @param string $encoding [optional] <p>Default: UTF-8</p>
4755
   *
4756
   * @return string
4757
   */
4758 32
  public static function str_camelize(string $str, string $encoding = 'UTF-8'): string
4759
  {
4760 32
    $str = self::lcfirst(self::trim($str), $encoding);
4761 32
    $str = (string)\preg_replace('/^[-_]+/', '', $str);
4762
4763 32
    $str = (string)\preg_replace_callback(
4764 32
        '/[-_\s]+(.)?/u',
4765 32
        function ($match) use ($encoding) {
4766 27
          if (isset($match[1])) {
4767 27
            return UTF8::strtoupper($match[1], $encoding);
4768
          }
4769
4770 1
          return '';
4771 32
        },
4772 32
        $str
4773
    );
4774
4775 32
    $str = (string)\preg_replace_callback(
4776 32
        '/[\d]+(.)?/u',
4777 32
        function ($match) use ($encoding) {
4778 6
          return UTF8::strtoupper($match[0], $encoding);
4779 32
        },
4780 32
        $str
4781
    );
4782
4783 32
    return $str;
4784
  }
4785
4786
  /**
4787
   * Returns the string with the first letter of each word capitalized,
4788
   * except for when the word is a name which shouldn't be capitalized.
4789
   *
4790
   * @param string $str
4791
   *
4792
   * @return string String with $str capitalized.
4793
   */
4794 1
  public static function str_capitalize_name(string $str): string
4795
  {
4796 1
    $str = self::collapse_whitespace($str);
4797
4798 1
    $str = self::str_capitalize_name_helper($str, ' ');
4799 1
    $str = self::str_capitalize_name_helper($str, '-');
4800
4801 1
    return $str;
4802
  }
4803
4804
  /**
4805
   * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
4806
   *
4807
   * @param string $names
4808
   * @param string $delimiter
4809
   * @param string $encoding
4810
   *
4811
   * @return string
4812
   */
4813 1
  private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
4814
  {
4815
    // init
4816 1
    $namesArray = \explode($delimiter, $names);
4817
4818 1
    if ($namesArray === false) {
4819
      return '';
4820
    }
4821
4822
    $specialCases = [
4823 1
        'names'    => [
4824
            'ab',
4825
            'af',
4826
            'al',
4827
            'and',
4828
            'ap',
4829
            'bint',
4830
            'binte',
4831
            'da',
4832
            'de',
4833
            'del',
4834
            'den',
4835
            'der',
4836
            'di',
4837
            'dit',
4838
            'ibn',
4839
            'la',
4840
            'mac',
4841
            'nic',
4842
            'of',
4843
            'ter',
4844
            'the',
4845
            'und',
4846
            'van',
4847
            'von',
4848
            'y',
4849
            'zu',
4850
        ],
4851
        'prefixes' => [
4852
            'al-',
4853
            "d'",
4854
            'ff',
4855
            "l'",
4856
            'mac',
4857
            'mc',
4858
            'nic',
4859
        ],
4860
    ];
4861
4862 1
    foreach ($namesArray as &$name) {
4863 1
      if (\in_array($name, $specialCases['names'], true)) {
4864 1
        continue;
4865
      }
4866
4867 1
      $continue = false;
4868
4869 1
      if ($delimiter == '-') {
4870 1
        foreach ($specialCases['names'] as $beginning) {
4871 1
          if (self::strpos($name, $beginning, 0, $encoding) === 0) {
4872 1
            $continue = true;
4873
          }
4874
        }
4875
      }
4876
4877 1
      foreach ($specialCases['prefixes'] as $beginning) {
4878 1
        if (self::strpos($name, $beginning, 0, $encoding) === 0) {
4879 1
          $continue = true;
4880
        }
4881
      }
4882
4883 1
      if ($continue) {
4884 1
        continue;
4885
      }
4886
4887 1
      $name = self::str_upper_first($name);
4888
    }
4889
4890 1
    return \implode($delimiter, $namesArray);
4891
  }
4892
4893
  /**
4894
   * Returns true if the string contains $needle, false otherwise. By default
4895
   * the comparison is case-sensitive, but can be made insensitive by setting
4896
   * $caseSensitive to false.
4897
   *
4898
   * @param string $haystack      <p>The input string.</p>
4899
   * @param string $needle        <p>Substring to look for.</p>
4900
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4901
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4902
   *
4903
   * @return bool Whether or not $haystack contains $needle.
4904
   */
4905 106
  public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4906
  {
4907 106
    if ('' === $haystack || '' === $needle) {
4908 1
      return false;
4909
    }
4910
4911
    // only a fallback to prevent BC in the api ...
4912 105
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
4913 2
      $encoding = (string)$caseSensitive;
4914
    }
4915
4916 105
    if ($caseSensitive) {
4917 55
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
4918
    }
4919
4920 50
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
4921
  }
4922
4923
  /**
4924
   * Returns true if the string contains all $needles, false otherwise. By
4925
   * default the comparison is case-sensitive, but can be made insensitive by
4926
   * setting $caseSensitive to false.
4927
   *
4928
   * @param string $haystack      <p>The input string.</p>
4929
   * @param array  $needles       <p>SubStrings to look for.</p>
4930
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4931
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4932
   *
4933
   * @return bool Whether or not $haystack contains $needle.
4934
   */
4935 44
  public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4936
  {
4937 44
    if ('' === $haystack) {
4938
      return false;
4939
    }
4940
4941 44
    if (empty($needles)) {
4942 1
      return false;
4943
    }
4944
4945
    // only a fallback to prevent BC in the api ...
4946 43
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
4947 1
      $encoding = (string)$caseSensitive;
4948
    }
4949
4950 43
    foreach ($needles as $needle) {
4951 43
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4952 43
        return false;
4953
      }
4954
    }
4955
4956 24
    return true;
4957
  }
4958
4959
  /**
4960
   * Returns true if the string contains any $needles, false otherwise. By
4961
   * default the comparison is case-sensitive, but can be made insensitive by
4962
   * setting $caseSensitive to false.
4963
   *
4964
   * @param string $haystack      <p>The input string.</p>
4965
   * @param array  $needles       <p>SubStrings to look for.</p>
4966
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4967
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4968
   *
4969
   * @return bool
4970
   *               Whether or not $str contains $needle.
4971
   */
4972 43
  public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4973
  {
4974 43
    if (empty($needles)) {
4975 1
      return false;
4976
    }
4977
4978 42
    foreach ($needles as $needle) {
4979 42
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4980 42
        return true;
4981
      }
4982
    }
4983
4984 18
    return false;
4985
  }
4986
4987
  /**
4988
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
4989
   * inserted before uppercase characters (with the exception of the first
4990
   * character of the string), and in place of spaces as well as underscores.
4991
   *
4992
   * @param string $str      <p>The input string.</p>
4993
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4994
   *
4995
   * @return string
4996
   */
4997 19
  public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
4998
  {
4999 19
    return self::str_delimit($str, '-', $encoding);
5000
  }
5001
5002
  /**
5003
   * Returns a lowercase and trimmed string separated by the given delimiter.
5004
   * Delimiters are inserted before uppercase characters (with the exception
5005
   * of the first character of the string), and in place of spaces, dashes,
5006
   * and underscores. Alpha delimiters are not converted to lowercase.
5007
   *
5008
   * @param string $str       <p>The input string.</p>
5009
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
5010
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5011
   *
5012
   * @return string
5013
   */
5014 49
  public static function str_delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
5015
  {
5016 49
    $str = self::trim($str);
5017
5018 49
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
5019
5020 49
    $str = self::strtolower($str, $encoding);
5021
5022 49
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
5023
  }
5024
5025
  /**
5026
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5027
   *
5028
   * @param string $str <p>The input string.</p>
5029
   *
5030
   * @return false|string
5031
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5032
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5033
   */
5034 32
  public static function str_detect_encoding($str)
5035
  {
5036
    // init
5037 32
    $str = (string)$str;
5038
5039
    //
5040
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5041
    //
5042
5043 32
    if (self::is_binary($str, true) === true) {
5044
5045 11
      if (self::is_utf16($str) === 1) {
5046 2
        return 'UTF-16LE';
5047
      }
5048
5049 11
      if (self::is_utf16($str) === 2) {
5050 2
        return 'UTF-16BE';
5051
      }
5052
5053 9
      if (self::is_utf32($str) === 1) {
5054
        return 'UTF-32LE';
5055
      }
5056
5057 9
      if (self::is_utf32($str) === 2) {
5058
        return 'UTF-32BE';
5059
      }
5060
5061
      // is binary but not "UTF-16" or "UTF-32"
5062 9
      return false;
5063
    }
5064
5065
    //
5066
    // 2.) simple check for ASCII chars
5067
    //
5068
5069 27
    if (self::is_ascii($str) === true) {
5070 9
      return 'ASCII';
5071
    }
5072
5073
    //
5074
    // 3.) simple check for UTF-8 chars
5075
    //
5076
5077 27
    if (self::is_utf8($str) === true) {
5078 19
      return 'UTF-8';
5079
    }
5080
5081
    //
5082
    // 4.) check via "mb_detect_encoding()"
5083
    //
5084
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5085
5086
    $detectOrder = [
5087 16
        'ISO-8859-1',
5088
        'ISO-8859-2',
5089
        'ISO-8859-3',
5090
        'ISO-8859-4',
5091
        'ISO-8859-5',
5092
        'ISO-8859-6',
5093
        'ISO-8859-7',
5094
        'ISO-8859-8',
5095
        'ISO-8859-9',
5096
        'ISO-8859-10',
5097
        'ISO-8859-13',
5098
        'ISO-8859-14',
5099
        'ISO-8859-15',
5100
        'ISO-8859-16',
5101
        'WINDOWS-1251',
5102
        'WINDOWS-1252',
5103
        'WINDOWS-1254',
5104
        'CP932',
5105
        'CP936',
5106
        'CP950',
5107
        'CP866',
5108
        'CP850',
5109
        'CP51932',
5110
        'CP50220',
5111
        'CP50221',
5112
        'CP50222',
5113
        'ISO-2022-JP',
5114
        'ISO-2022-KR',
5115
        'JIS',
5116
        'JIS-ms',
5117
        'EUC-CN',
5118
        'EUC-JP',
5119
    ];
5120
5121
    // always fallback via symfony polyfill
5122 16
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
5123 16
    if ($encoding) {
5124 16
      return $encoding;
5125
    }
5126
5127
    //
5128
    // 5.) check via "iconv()"
5129
    //
5130
5131
    if (self::$ENCODINGS === null) {
5132
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
5133
    }
5134
5135
    $md5 = \md5($str);
5136
    foreach (self::$ENCODINGS as $encodingTmp) {
5137
      # INFO: //IGNORE but still throw notice
5138
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
5139
      if (\md5((string)@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
5140
        return $encodingTmp;
5141
      }
5142
    }
5143
5144
    return false;
5145
  }
5146
5147
  /**
5148
   * Check if the string ends with the given substring.
5149
   *
5150
   * @param string $haystack <p>The string to search in.</p>
5151
   * @param string $needle   <p>The substring to search for.</p>
5152
   *
5153
   * @return bool
5154
   */
5155 40
  public static function str_ends_with(string $haystack, string $needle): bool
5156
  {
5157 40
    if ('' === $haystack || '' === $needle) {
5158 4
      return false;
5159
    }
5160
5161 38
    return \substr($haystack, -\strlen($needle)) === $needle;
5162
  }
5163
5164
  /**
5165
   * Returns true if the string ends with any of $substrings, false otherwise.
5166
   *
5167
   * - case-sensitive
5168
   *
5169
   * @param string   $str        <p>The input string.</p>
5170
   * @param string[] $substrings <p>Substrings to look for.</p>
5171
   *
5172
   * @return bool Whether or not $str ends with $substring.
5173
   */
5174 7
  public static function str_ends_with_any(string $str, array $substrings): bool
5175
  {
5176 7
    if (empty($substrings)) {
5177
      return false;
5178
    }
5179
5180 7
    foreach ($substrings as $substring) {
5181 7
      if (self::str_ends_with($str, $substring)) {
5182 7
        return true;
5183
      }
5184
    }
5185
5186 6
    return false;
5187
  }
5188
5189
  /**
5190
   * Ensures that the string begins with $substring. If it doesn't, it's
5191
   * prepended.
5192
   *
5193
   * @param string $str       <p>The input string.</p>
5194
   * @param string $substring <p>The substring to add if not present.</p>
5195
   *
5196
   * @return string
5197
   */
5198 10
  public static function str_ensure_left(string $str, string $substring): string
5199
  {
5200 10
    if (!self::str_starts_with($str, $substring)) {
5201 4
      $str = $substring . $str;
5202
    }
5203
5204 10
    return $str;
5205
  }
5206
5207
  /**
5208
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
5209
   *
5210
   * @param string $str       <p>The input string.</p>
5211
   * @param string $substring <p>The substring to add if not present.</p>
5212
   *
5213
   * @return string
5214
   */
5215 10
  public static function str_ensure_right(string $str, string $substring): string
5216
  {
5217 10
    if (!self::str_ends_with($str, $substring)) {
5218 4
      $str .= $substring;
5219
    }
5220
5221 10
    return $str;
5222
  }
5223
5224
  /**
5225
   * Capitalizes the first word of the string, replaces underscores with
5226
   * spaces, and strips '_id'.
5227
   *
5228
   * @param string $str
5229
   *
5230
   * @return string
5231
   */
5232 3
  public static function str_humanize($str): string
5233
  {
5234 3
    $str = self::str_replace(
5235
        [
5236 3
            '_id',
5237
            '_',
5238
        ],
5239
        [
5240 3
            '',
5241
            ' ',
5242
        ],
5243 3
        $str
5244
    );
5245
5246 3
    return self::ucfirst(self::trim($str));
5247
  }
5248
5249
  /**
5250
   * Check if the string ends with the given substring, case insensitive.
5251
   *
5252
   * @param string $haystack <p>The string to search in.</p>
5253
   * @param string $needle   <p>The substring to search for.</p>
5254
   *
5255
   * @return bool
5256
   */
5257 12
  public static function str_iends_with(string $haystack, string $needle): bool
5258
  {
5259 12
    if ('' === $haystack || '' === $needle) {
5260 2
      return false;
5261
    }
5262
5263 12
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
5264 12
      return true;
5265
    }
5266
5267 8
    return false;
5268
  }
5269
5270
  /**
5271
   * Returns true if the string ends with any of $substrings, false otherwise.
5272
   *
5273
   * - case-insensitive
5274
   *
5275
   * @param string   $str        <p>The input string.</p>
5276
   * @param string[] $substrings <p>Substrings to look for.</p>
5277
   *
5278
   * @return bool Whether or not $str ends with $substring.
5279
   */
5280 4
  public static function str_iends_with_any(string $str, array $substrings): bool
5281
  {
5282 4
    if (empty($substrings)) {
5283
      return false;
5284
    }
5285
5286 4
    foreach ($substrings as $substring) {
5287 4
      if (self::str_iends_with($str, $substring)) {
5288 4
        return true;
5289
      }
5290
    }
5291
5292
    return false;
5293
  }
5294
5295
  /**
5296
   * Returns the index of the first occurrence of $needle in the string,
5297
   * and false if not found. Accepts an optional offset from which to begin
5298
   * the search.
5299
   *
5300
   * @param string $str      <p>The input string.</p>
5301
   * @param string $needle   <p>Substring to look for.</p>
5302
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5303
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5304
   *
5305
   * @return int|false
5306
   *                    The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5307
   */
5308 2
  public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5309
  {
5310 2
    return self::stripos(
5311 2
        $str,
5312 2
        $needle,
5313 2
        $offset,
5314 2
        $encoding
5315
    );
5316
  }
5317
5318
  /**
5319
   * Returns the index of the last occurrence of $needle in the string,
5320
   * and false if not found. Accepts an optional offset from which to begin
5321
   * the search. Offsets may be negative to count from the last character
5322
   * in the string.
5323
   *
5324
   * @param string $str      <p>The input string.</p>
5325
   * @param string $needle   <p>Substring to look for.</p>
5326
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5327
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5328
   *
5329
   * @return int|false
5330
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5331
   */
5332 2
  public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5333
  {
5334 2
    return self::strripos(
5335 2
        $str,
5336 2
        $needle,
5337 2
        $offset,
5338 2
        $encoding
5339
    );
5340
  }
5341
5342
  /**
5343
   * Returns the index of the first occurrence of $needle in the string,
5344
   * and false if not found. Accepts an optional offset from which to begin
5345
   * the search.
5346
   *
5347
   * @param string $str      <p>The input string.</p>
5348
   * @param string $needle   <p>Substring to look for.</p>
5349
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5350
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5351
   *
5352
   * @return int|false
5353
   *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5354
   */
5355 12
  public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5356
  {
5357 12
    return self::strpos(
5358 12
        $str,
5359 12
        $needle,
5360 12
        $offset,
5361 12
        $encoding
5362
    );
5363
  }
5364
5365
  /**
5366
   * Returns the index of the last occurrence of $needle in the string,
5367
   * and false if not found. Accepts an optional offset from which to begin
5368
   * the search. Offsets may be negative to count from the last character
5369
   * in the string.
5370
   *
5371
   * @param string $str      <p>The input string.</p>
5372
   * @param string $needle   <p>Substring to look for.</p>
5373
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5374
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5375
   *
5376
   * @return int|false
5377
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5378
   */
5379 12
  public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5380
  {
5381 12
    return self::strrpos(
5382 12
        $str,
5383 12
        $needle,
5384 12
        $offset,
5385 12
        $encoding
5386
    );
5387
  }
5388
5389
  /**
5390
   * Inserts $substring into the string at the $index provided.
5391
   *
5392
   * @param string $str       <p>The input string.</p>
5393
   * @param string $substring <p>String to be inserted.</p>
5394
   * @param int    $index     <p>The index at which to insert the substring.</p>
5395
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5396
   *
5397
   * @return string
5398
   */
5399 8
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5400
  {
5401 8
    $len = self::strlen($str, $encoding);
5402
5403 8
    if ($index > $len) {
5404 1
      return $str;
5405
    }
5406
5407 7
    $start = self::substr($str, 0, $index, $encoding);
5408 7
    $end = self::substr($str, $index, $len, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $len can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5408
    $end = self::substr($str, $index, /** @scrutinizer ignore-type */ $len, $encoding);
Loading history...
5409
5410 7
    return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5410
    return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5410
    return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
5411
  }
5412
5413
  /**
5414
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5415
   *
5416
   * @link  http://php.net/manual/en/function.str-ireplace.php
5417
   *
5418
   * @param mixed $search  <p>
5419
   *                       Every replacement with search array is
5420
   *                       performed on the result of previous replacement.
5421
   *                       </p>
5422
   * @param mixed $replace <p>
5423
   *                       </p>
5424
   * @param mixed $subject <p>
5425
   *                       If subject is an array, then the search and
5426
   *                       replace is performed with every entry of
5427
   *                       subject, and the return value is an array as
5428
   *                       well.
5429
   *                       </p>
5430
   * @param int   $count   [optional] <p>
5431
   *                       The number of matched and replaced needles will
5432
   *                       be returned in count which is passed by
5433
   *                       reference.
5434
   *                       </p>
5435
   *
5436
   * @return mixed A string or an array of replacements.
5437
   */
5438 41
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5439
  {
5440 41
    $search = (array)$search;
5441
5442
    /** @noinspection AlterInForeachInspection */
5443 41
    foreach ($search as &$s) {
5444 41
      if ('' === $s .= '') {
5445 7
        $s = '/^(?<=.)$/';
5446
      } else {
5447 41
        $s = '/' . \preg_quote($s, '/') . '/ui';
5448
      }
5449
    }
5450
5451 41
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5452 41
    $count = $replace; // used as reference parameter
5453
5454 41
    return $subject;
5455
  }
5456
5457
  /**
5458
   * Check if the string starts with the given substring, case insensitive.
5459
   *
5460
   * @param string $haystack <p>The string to search in.</p>
5461
   * @param string $needle   <p>The substring to search for.</p>
5462
   *
5463
   * @return bool
5464
   */
5465 12
  public static function str_istarts_with(string $haystack, string $needle): bool
5466
  {
5467 12
    if ('' === $haystack || '' === $needle) {
5468 2
      return false;
5469
    }
5470
5471 12
    if (self::stripos($haystack, $needle) === 0) {
5472 12
      return true;
5473
    }
5474
5475 4
    return false;
5476
  }
5477
5478
  /**
5479
   * Returns true if the string begins with any of $substrings, false otherwise.
5480
   *
5481
   * - case-insensitive
5482
   *
5483
   * @param string $str        <p>The input string.</p>
5484
   * @param array  $substrings <p>Substrings to look for.</p>
5485
   *
5486
   * @return bool Whether or not $str starts with $substring.
5487
   */
5488 4
  public static function str_istarts_with_any(string $str, array $substrings): bool
5489
  {
5490 4
    if ('' === $str) {
5491
      return false;
5492
    }
5493
5494 4
    if (empty($substrings)) {
5495
      return false;
5496
    }
5497
5498 4
    foreach ($substrings as $substring) {
5499 4
      if (self::str_istarts_with($str, $substring)) {
5500 4
        return true;
5501
      }
5502
    }
5503
5504
    return false;
5505
  }
5506
5507
  /**
5508
   * Gets the substring after the first occurrence of a separator.
5509
   *
5510
   * @param string $str       <p>The input string.</p>
5511
   * @param string $separator <p>The string separator.</p>
5512
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5513
   *
5514
   * @return string
5515
   */
5516 1
  public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5517
  {
5518
    if (
5519 1
        $separator === ''
5520
        ||
5521 1
        $str === ''
5522
    ) {
5523 1
      return '';
5524
    }
5525
5526 1
    $offset = self::str_iindex_first($str, $separator);
5527 1
    if ($offset === false) {
5528 1
      return '';
5529
    }
5530
5531 1
    return (string)self::substr(
5532 1
        $str,
5533 1
        $offset + self::strlen($separator, $encoding),
5534 1
        null,
5535 1
        $encoding
5536
    );
5537
  }
5538
5539
  /**
5540
   * Gets the substring after the last occurrence of a separator.
5541
   *
5542
   * @param string $str       <p>The input string.</p>
5543
   * @param string $separator <p>The string separator.</p>
5544
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5545
   *
5546
   * @return string
5547
   */
5548 1
  public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5549
  {
5550
    if (
5551 1
        $separator === ''
5552
        ||
5553 1
        $str === ''
5554
    ) {
5555 1
      return '';
5556
    }
5557
5558 1
    $offset = self::str_iindex_last($str, $separator);
5559 1
    if ($offset === false) {
5560 1
      return '';
5561
    }
5562
5563 1
    return (string)self::substr(
5564 1
        $str,
5565 1
        $offset + self::strlen($separator, $encoding),
5566 1
        null,
5567 1
        $encoding
5568
    );
5569
  }
5570
5571
  /**
5572
   * Gets the substring before the first occurrence of a separator.
5573
   *
5574
   * @param string $str       <p>The input string.</p>
5575
   * @param string $separator <p>The string separator.</p>
5576
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5577
   *
5578
   * @return string
5579
   */
5580 1
  public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5581
  {
5582
    if (
5583 1
        $separator === ''
5584
        ||
5585 1
        $str === ''
5586
    ) {
5587 1
      return '';
5588
    }
5589
5590 1
    $offset = self::str_iindex_first($str, $separator);
5591 1
    if ($offset === false) {
5592 1
      return '';
5593
    }
5594
5595 1
    return (string)self::substr($str, 0, $offset, $encoding);
5596
  }
5597
5598
  /**
5599
   * Gets the substring before the last occurrence of a separator.
5600
   *
5601
   * @param string $str       <p>The input string.</p>
5602
   * @param string $separator <p>The string separator.</p>
5603
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5604
   *
5605
   * @return string
5606
   */
5607 1
  public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5608
  {
5609
    if (
5610 1
        $separator === ''
5611
        ||
5612 1
        $str === ''
5613
    ) {
5614 1
      return '';
5615
    }
5616
5617 1
    $offset = self::str_iindex_last($str, $separator);
5618 1
    if ($offset === false) {
5619 1
      return '';
5620
    }
5621
5622 1
    return (string)self::substr($str, 0, $offset, $encoding);
5623
  }
5624
5625
  /**
5626
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5627
   *
5628
   * @param string $str          <p>The input string.</p>
5629
   * @param string $needle       <p>The string to look for.</p>
5630
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5631
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5632
   *
5633
   * @return string
5634
   */
5635 2
  public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5636
  {
5637
    if (
5638 2
        '' === $needle
5639
        ||
5640 2
        '' === $str
5641
    ) {
5642 2
      return '';
5643
    }
5644
5645 2
    $part = self::stristr(
5646 2
        $str,
5647 2
        $needle,
5648 2
        $beforeNeedle,
5649 2
        $encoding
5650
    );
5651 2
    if (false === $part) {
5652 2
      return '';
5653
    }
5654
5655 2
    return $part;
5656
  }
5657
5658
  /**
5659
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5660
   *
5661
   * @param string $str          <p>The input string.</p>
5662
   * @param string $needle       <p>The string to look for.</p>
5663
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5664
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5665
   *
5666
   * @return string
5667
   */
5668 1
  public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5669
  {
5670
    if (
5671 1
        '' === $needle
5672
        ||
5673 1
        '' === $str
5674
    ) {
5675 1
      return '';
5676
    }
5677
5678 1
    $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5679 1
    if (false === $part) {
0 ignored issues
show
introduced by
The condition false === $part is always false.
Loading history...
5680 1
      return '';
5681
    }
5682
5683 1
    return $part;
5684
  }
5685
5686
  /**
5687
   * Returns the last $n characters of the string.
5688
   *
5689
   * @param string $str      <p>The input string.</p>
5690
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5691
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5692
   *
5693
   * @return string
5694
   */
5695 12
  public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5696
  {
5697 12
    if ($n <= 0) {
5698 4
      return '';
5699
    }
5700
5701 8
    $returnTmp = self::substr($str, -$n, null, $encoding);
5702
5703 8
    return ($returnTmp === false ? '' : $returnTmp);
5704
  }
5705
5706
  /**
5707
   * Limit the number of characters in a string.
5708
   *
5709
   * @param string $str      <p>The input string.</p>
5710
   * @param int    $length   [optional] <p>Default: 100</p>
5711
   * @param string $strAddOn [optional] <p>Default: …</p>
5712
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5713
   *
5714
   * @return string
5715
   */
5716 2
  public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5717
  {
5718 2
    if ('' === $str) {
5719 2
      return '';
5720
    }
5721
5722 2
    if ($length <= 0) {
5723 2
      return '';
5724
    }
5725
5726 2
    if (self::strlen($str, $encoding) <= $length) {
5727 2
      return $str;
5728
    }
5729
5730 2
    return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5730
    return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
5731
  }
5732
5733
  /**
5734
   * Limit the number of characters in a string, but also after the next word.
5735
   *
5736
   * @param string $str      <p>The input string.</p>
5737
   * @param int    $length   [optional] <p>Default: 100</p>
5738
   * @param string $strAddOn [optional] <p>Default: …</p>
5739
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5740
   *
5741
   * @return string
5742
   */
5743 6
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5744
  {
5745 6
    if ('' === $str) {
5746 2
      return '';
5747
    }
5748
5749 6
    if ($length <= 0) {
5750 2
      return '';
5751
    }
5752
5753 6
    if (self::strlen($str, $encoding) <= $length) {
5754 2
      return $str;
5755
    }
5756
5757 6
    if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
5758 5
      return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5758
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
5759
    }
5760
5761 3
    $str = (string)self::substr($str, 0, $length, $encoding);
5762 3
    $array = \explode(' ', $str);
5763 3
    \array_pop($array);
5764 3
    $new_str = \implode(' ', $array);
5765
5766 3
    if ($new_str === '') {
5767 2
      $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
5768
    } else {
5769 3
      $str = $new_str . $strAddOn;
5770
    }
5771
5772 3
    return $str;
5773
  }
5774
5775
  /**
5776
   * Returns the longest common prefix between the string and $otherStr.
5777
   *
5778
   * @param string $str      <p>The input sting.</p>
5779
   * @param string $otherStr <p>Second string for comparison.</p>
5780
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5781
   *
5782
   * @return string
5783
   */
5784 10
  public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5785
  {
5786 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5787
5788 10
    $longestCommonPrefix = '';
5789 10
    for ($i = 0; $i < $maxLength; $i++) {
5790 8
      $char = self::substr($str, $i, 1, $encoding);
5791
5792 8
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
5793 6
        $longestCommonPrefix .= $char;
5794
      } else {
5795 6
        break;
5796
      }
5797
    }
5798
5799 10
    return $longestCommonPrefix;
5800
  }
5801
5802
  /**
5803
   * Returns the longest common substring between the string and $otherStr.
5804
   * In the case of ties, it returns that which occurs first.
5805
   *
5806
   * @param string $str
5807
   * @param string $otherStr <p>Second string for comparison.</p>
5808
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5809
   *
5810
   * @return string String with its $str being the longest common substring.
5811
   */
5812 11
  public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5813
  {
5814
    // Uses dynamic programming to solve
5815
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
5816 11
    $strLength = self::strlen($str, $encoding);
5817 11
    $otherLength = self::strlen($otherStr, $encoding);
5818
5819
    // Return if either string is empty
5820 11
    if ($strLength == 0 || $otherLength == 0) {
5821 2
      return '';
5822
    }
5823
5824 9
    $len = 0;
5825 9
    $end = 0;
5826 9
    $table = \array_fill(
5827 9
        0,
5828 9
        $strLength + 1,
5829 9
        \array_fill(0, $otherLength + 1, 0)
5830
    );
5831
5832 9
    for ($i = 1; $i <= $strLength; $i++) {
5833 9
      for ($j = 1; $j <= $otherLength; $j++) {
5834 9
        $strChar = self::substr($str, $i - 1, 1, $encoding);
5835 9
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
5836
5837 9
        if ($strChar == $otherChar) {
5838 8
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
5839 8
          if ($table[$i][$j] > $len) {
5840 8
            $len = $table[$i][$j];
5841 8
            $end = $i;
5842
          }
5843
        } else {
5844 9
          $table[$i][$j] = 0;
5845
        }
5846
      }
5847
    }
5848
5849 9
    $returnTmp = self::substr($str, $end - $len, $len, $encoding);
5850
5851 9
    return ($returnTmp === false ? '' : $returnTmp);
5852
  }
5853
5854
  /**
5855
   * Returns the longest common suffix between the string and $otherStr.
5856
   *
5857
   * @param string $str
5858
   * @param string $otherStr <p>Second string for comparison.</p>
5859
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5860
   *
5861
   * @return string
5862
   */
5863 10
  public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5864
  {
5865 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5866
5867 10
    $longestCommonSuffix = '';
5868 10
    for ($i = 1; $i <= $maxLength; $i++) {
5869 8
      $char = self::substr($str, -$i, 1, $encoding);
5870
5871 8
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
5872 6
        $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5872
        $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
5873
      } else {
5874 6
        break;
5875
      }
5876
    }
5877
5878 10
    return $longestCommonSuffix;
5879
  }
5880
5881
  /**
5882
   * Returns true if $str matches the supplied pattern, false otherwise.
5883
   *
5884
   * @param string $str     <p>The input string.</p>
5885
   * @param string $pattern <p>Regex pattern to match against.</p>
5886
   *
5887
   * @return bool Whether or not $str matches the pattern.
5888
   */
5889 126
  public static function str_matches_pattern(string $str, string $pattern): bool
5890
  {
5891 126
    if (\preg_match('/' . $pattern . '/u', $str)) {
5892 87
      return true;
5893
    }
5894
5895 39
    return false;
5896
  }
5897
5898
  /**
5899
   * Returns whether or not a character exists at an index. Offsets may be
5900
   * negative to count from the last character in the string. Implements
5901
   * part of the ArrayAccess interface.
5902
   *
5903
   * @param string $str      <p>The input string.</p>
5904
   * @param int    $offset   <p>The index to check.</p>
5905
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5906
   *
5907
   *
5908
   * @return bool Whether or not the index exists.
5909
   */
5910 6
  public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
5911
  {
5912
    // init
5913 6
    $length = self::strlen($str, $encoding);
5914
5915 6
    if ($offset >= 0) {
5916 3
      return ($length > $offset);
5917
    }
5918
5919 3
    return ($length >= \abs($offset));
5920
  }
5921
5922
  /**
5923
   * Returns the character at the given index. Offsets may be negative to
5924
   * count from the last character in the string. Implements part of the
5925
   * ArrayAccess interface, and throws an OutOfBoundsException if the index
5926
   * does not exist.
5927
   *
5928
   * @param string $str      <p>The input string.</p>
5929
   * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
5930
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5931
   *
5932
   * @return string The character at the specified index.
5933
   *
5934
   * @throws \OutOfBoundsException If the positive or negative offset does not exist.
5935
   */
5936 2
  public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
5937
  {
5938
    // init
5939 2
    $length = self::strlen($str);
5940
5941
    if (
5942 2
        ($index >= 0 && $length <= $index)
5943
        ||
5944 2
        $length < \abs($index)
5945
    ) {
5946 1
      throw new \OutOfBoundsException('No character exists at the index');
5947
    }
5948
5949 1
    return self::char_at($str, $index, $encoding);
5950
  }
5951
5952
  /**
5953
   * Pad a UTF-8 string to given length with another string.
5954
   *
5955
   * @param string $str        <p>The input string.</p>
5956
   * @param int    $pad_length <p>The length of return string.</p>
5957
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
5958
   * @param int    $pad_type   [optional] <p>
5959
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
5960
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
5961
   *                           </p>
5962
   * @param string $encoding   [optional] <p>Default: UTF-8</p>
5963
   *
5964
   * @return string Returns the padded string.
5965
   */
5966 41
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
5967
  {
5968 41
    if ('' === $str) {
5969
      return '';
5970
    }
5971
5972 41
    if ($pad_type !== (int)$pad_type) {
5973 13
      if ($pad_type == 'left') {
5974 3
        $pad_type = STR_PAD_LEFT;
5975 10
      } elseif ($pad_type == 'right') {
5976 6
        $pad_type = STR_PAD_RIGHT;
5977 4
      } elseif ($pad_type == 'both') {
5978 3
        $pad_type = STR_PAD_BOTH;
5979
      } else {
5980 1
        throw new \InvalidArgumentException(
5981 1
            'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
5982
        );
5983
      }
5984
    }
5985
5986 40
    $str_length = self::strlen($str, $encoding);
5987
5988
    if (
5989 40
        $pad_length > 0
5990
        &&
5991 40
        $pad_length >= $str_length
5992
    ) {
5993 39
      $ps_length = self::strlen($pad_string, $encoding);
5994
5995 39
      $diff = ($pad_length - $str_length);
5996
5997
      switch ($pad_type) {
5998 39
        case STR_PAD_LEFT:
5999 13
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6000 13
          $pre = (string)self::substr($pre, 0, $diff, $encoding);
6001 13
          $post = '';
6002 13
          break;
6003
6004 29
        case STR_PAD_BOTH:
6005 14
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6006 14
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2), $encoding);
6007 14
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6008 14
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2), $encoding);
6009 14
          break;
6010
6011 18
        case STR_PAD_RIGHT:
6012
        default:
6013 18
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6014 18
          $post = (string)self::substr($post, 0, $diff, $encoding);
6015 18
          $pre = '';
6016
      }
6017
6018 39
      return $pre . $str . $post;
6019
    }
6020
6021 4
    return $str;
6022
  }
6023
6024
  /**
6025
   * Returns a new string of a given length such that both sides of the
6026
   * string are padded. Alias for pad() with a $padType of 'both'.
6027
   *
6028
   * @param string $str
6029
   * @param int    $length   <p>Desired string length after padding.</p>
6030
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6031
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6032
   *
6033
   * @return string String with padding applied.
6034
   */
6035 11
  public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6036
  {
6037 11
    $padding = $length - self::strlen($str, $encoding);
6038
6039 11
    return self::apply_padding($str, (int)\floor($padding / 2), (int)\ceil($padding / 2), $padStr, $encoding);
6040
  }
6041
6042
  /**
6043
   * Returns a new string of a given length such that the beginning of the
6044
   * string is padded. Alias for pad() with a $padType of 'left'.
6045
   *
6046
   * @param string $str
6047
   * @param int    $length   <p>Desired string length after padding.</p>
6048
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6049
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6050
   *
6051
   * @return string String with left padding.
6052
   */
6053 7
  public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6054
  {
6055 7
    return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
6056
  }
6057
6058
  /**
6059
   * Returns a new string of a given length such that the end of the string
6060
   * is padded. Alias for pad() with a $padType of 'right'.
6061
   *
6062
   * @param string $str
6063
   * @param int    $length   <p>Desired string length after padding.</p>
6064
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6065
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6066
   *
6067
   * @return string String with right padding.
6068
   */
6069 7
  public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6070
  {
6071 7
    return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
6072
  }
6073
6074
  /**
6075
   * Repeat a string.
6076
   *
6077
   * @param string $str        <p>
6078
   *                           The string to be repeated.
6079
   *                           </p>
6080
   * @param int    $multiplier <p>
6081
   *                           Number of time the input string should be
6082
   *                           repeated.
6083
   *                           </p>
6084
   *                           <p>
6085
   *                           multiplier has to be greater than or equal to 0.
6086
   *                           If the multiplier is set to 0, the function
6087
   *                           will return an empty string.
6088
   *                           </p>
6089
   *
6090
   * @return string The repeated string.
6091
   */
6092 9
  public static function str_repeat(string $str, int $multiplier): string
6093
  {
6094 9
    $str = self::filter($str);
6095
6096 9
    return \str_repeat($str, $multiplier);
6097
  }
6098
6099
  /**
6100
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6101
   *
6102
   * Replace all occurrences of the search string with the replacement string
6103
   *
6104
   * @link http://php.net/manual/en/function.str-replace.php
6105
   *
6106
   * @param mixed $search  <p>
6107
   *                       The value being searched for, otherwise known as the needle.
6108
   *                       An array may be used to designate multiple needles.
6109
   *                       </p>
6110
   * @param mixed $replace <p>
6111
   *                       The replacement value that replaces found search
6112
   *                       values. An array may be used to designate multiple replacements.
6113
   *                       </p>
6114
   * @param mixed $subject <p>
6115
   *                       The string or array being searched and replaced on,
6116
   *                       otherwise known as the haystack.
6117
   *                       </p>
6118
   *                       <p>
6119
   *                       If subject is an array, then the search and
6120
   *                       replace is performed with every entry of
6121
   *                       subject, and the return value is an array as
6122
   *                       well.
6123
   *                       </p>
6124
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
6125
   *
6126
   * @return mixed This function returns a string or an array with the replaced values.
6127
   */
6128 92
  public static function str_replace($search, $replace, $subject, int &$count = null)
6129
  {
6130 92
    return \str_replace($search, $replace, $subject, $count);
6131
  }
6132
6133
  /**
6134
   * Replaces all occurrences of $search from the beginning of string with $replacement.
6135
   *
6136
   * @param string $str         <p>The input string.</p>
6137
   * @param string $search      <p>The string to search for.</p>
6138
   * @param string $replacement <p>The replacement.</p>
6139
   *
6140
   * @return string String after the replacements.
6141
   */
6142 16
  public static function str_replace_beginning(string $str, string $search, string $replacement): string
6143
  {
6144 16
    return self::regex_replace(
6145 16
        $str,
6146 16
        '^' . \preg_quote($search, '/'),
6147 16
        self::str_replace('\\', '\\\\', $replacement)
6148
    );
6149
  }
6150
6151
  /**
6152
   * Replaces all occurrences of $search from the ending of string with $replacement.
6153
   *
6154
   * @param string $str         <p>The input string.</p>
6155
   * @param string $search      <p>The string to search for.</p>
6156
   * @param string $replacement <p>The replacement.</p>
6157
   *
6158
   * @return string String after the replacements.
6159
   */
6160 16
  public static function str_replace_ending(string $str, string $search, string $replacement): string
6161
  {
6162 16
    return self::regex_replace(
6163 16
        $str,
6164 16
        \preg_quote($search, '/') . '$',
6165 16
        self::str_replace('\\', '\\\\', $replacement)
6166
    );
6167
  }
6168
6169
  /**
6170
   * Replace the first "$search"-term with the "$replace"-term.
6171
   *
6172
   * @param string $search
6173
   * @param string $replace
6174
   * @param string $subject
6175
   *
6176
   * @return string
6177
   */
6178 2
  public static function str_replace_first(string $search, string $replace, string $subject): string
6179
  {
6180 2
    $pos = self::strpos($subject, $search);
6181
6182 2
    if ($pos !== false) {
6183 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6183
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
6184
    }
6185
6186 2
    return $subject;
6187
  }
6188
6189
  /**
6190
   * Replace the last "$search"-term with the "$replace"-term.
6191
   *
6192
   * @param string $search
6193
   * @param string $replace
6194
   * @param string $subject
6195
   *
6196
   * @return string
6197
   */
6198 2
  public static function str_replace_last(string $search, string $replace, string $subject): string
6199
  {
6200 2
    $pos = self::strrpos($subject, $search);
6201
6202 2
    if ($pos !== false) {
6203 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6203
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6204
    }
6205
6206 2
    return $subject;
6207
  }
6208
6209
  /**
6210
   * Shuffles all the characters in the string.
6211
   *
6212
   * PS: uses random algorithm which is weak for cryptography purposes
6213
   *
6214
   * @param string $str <p>The input string</p>
6215
   *
6216
   * @return string The shuffled string.
6217
   */
6218 5
  public static function str_shuffle(string $str): string
6219
  {
6220 5
    $indexes = \range(0, self::strlen($str) - 1);
6221
    /** @noinspection NonSecureShuffleUsageInspection */
6222 5
    \shuffle($indexes);
6223
6224 5
    $shuffledStr = '';
6225 5
    foreach ($indexes as $i) {
6226 5
      $shuffledStr .= self::substr($str, $i, 1);
6227
    }
6228
6229 5
    return $shuffledStr;
6230
  }
6231
6232
  /**
6233
   * Returns the substring beginning at $start, and up to, but not including
6234
   * the index specified by $end. If $end is omitted, the function extracts
6235
   * the remaining string. If $end is negative, it is computed from the end
6236
   * of the string.
6237
   *
6238
   * @param string $str
6239
   * @param int    $start    <p>Initial index from which to begin extraction.</p>
6240
   * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6241
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6242
   *
6243
   * @return string|false
6244
   *                     <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6245
   *                     characters long, <b>FALSE</b> will be returned.
6246
   */
6247 18
  public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6248
  {
6249 18
    if ($end === null) {
6250 6
      $length = self::strlen($str);
6251 12
    } elseif ($end >= 0 && $end <= $start) {
6252 4
      return '';
6253 8
    } elseif ($end < 0) {
6254 2
      $length = self::strlen($str) + $end - $start;
6255
    } else {
6256 6
      $length = $end - $start;
6257
    }
6258
6259 14
    return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6259
    return self::substr($str, $start, /** @scrutinizer ignore-type */ $length, $encoding);
Loading history...
6260
  }
6261
6262
  /**
6263
   * Convert a string to e.g.: "snake_case"
6264
   *
6265
   * @param string $str
6266
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6267
   *
6268
   * @return string String in snake_case.
6269
   */
6270 20
  public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6271
  {
6272 20
    $str = self::normalize_whitespace($str);
6273 20
    $str = \str_replace('-', '_', $str);
6274
6275 20
    $str = (string)\preg_replace_callback(
6276 20
        '/([\d|A-Z])/u',
6277 20
        function ($matches) use ($encoding) {
6278 8
          $match = $matches[1];
6279 8
          $matchInt = (int)$match;
6280
6281 8
          if ((string)$matchInt == $match) {
6282 4
            return '_' . $match . '_';
6283
          }
6284
6285 4
          return '_' . UTF8::strtolower($match, $encoding);
6286 20
        },
6287 20
        $str
6288
    );
6289
6290 20
    $str = (string)\preg_replace(
6291
        [
6292 20
            '/\s+/',        // convert spaces to "_"
6293
            '/^\s+|\s+$/',  // trim leading & trailing spaces
6294
            '/_+/',         // remove double "_"
6295
        ],
6296
        [
6297 20
            '_',
6298
            '',
6299
            '_',
6300
        ],
6301 20
        $str
6302
    );
6303
6304 20
    $str = self::trim($str, '_'); // trim leading & trailing "_"
6305 20
    $str = self::trim($str); // trim leading & trailing whitespace
6306
6307 20
    return $str;
6308
  }
6309
6310
  /**
6311
   * Sort all characters according to code points.
6312
   *
6313
   * @param string $str    <p>A UTF-8 string.</p>
6314
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6315
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6316
   *
6317
   * @return string String of sorted characters.
6318
   */
6319 2
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6320
  {
6321 2
    $array = self::codepoints($str);
6322
6323 2
    if ($unique) {
6324 2
      $array = \array_flip(\array_flip($array));
6325
    }
6326
6327 2
    if ($desc) {
6328 2
      \arsort($array);
6329
    } else {
6330 2
      \asort($array);
6331
    }
6332
6333 2
    return self::string($array);
6334
  }
6335
6336
  /**
6337
   * Split a string into an array.
6338
   *
6339
   * @param string|string[] $str
6340
   * @param int             $len
6341
   *
6342
   * @return string[]
6343
   */
6344 25
  public static function str_split($str, int $len = 1): array
6345
  {
6346 25
    if ($len <= 0) {
6347 2
      return [];
6348
    }
6349
6350 25
    if (\is_array($str) === true) {
6351 2
      foreach ($str as $k => $v) {
6352 2
        $str[$k] = self::str_split($v, $len);
6353
      }
6354
6355 2
      return $str;
6356
    }
6357
6358 25
    if ('' === $str) {
6359 4
      return [];
6360
    }
6361
6362
    /** @noinspection NotOptimalRegularExpressionsInspection */
6363 23
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
6364 23
    $a = $a[0];
6365
6366 23
    if ($len === 1) {
6367 23
      return $a;
6368
    }
6369
6370 2
    $arrayOutput = [];
6371 2
    $p = -1;
6372
6373
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
6374 2
    foreach ($a as $l => $a) {
6375 2
      if ($l % $len) {
6376 2
        $arrayOutput[$p] .= $a;
6377
      } else {
6378 2
        $arrayOutput[++$p] = $a;
6379
      }
6380
    }
6381
6382 2
    return $arrayOutput;
6383
  }
6384
6385
  /**
6386
   * Splits the string with the provided regular expression, returning an
6387
   * array of Stringy objects. An optional integer $limit will truncate the
6388
   * results.
6389
   *
6390
   * @param string $str
6391
   * @param string $pattern <p>The regex with which to split the string.</p>
6392
   * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6393
   *
6394
   * @return string[] An array of strings.
6395
   */
6396 16
  public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6397
  {
6398 16
    if ($limit === 0) {
6399 2
      return [];
6400
    }
6401
6402
    // this->split errors when supplied an empty pattern in < PHP 5.4.13
6403
    // and current versions of HHVM (3.8 and below)
6404 14
    if ($pattern === '') {
6405 1
      return [$str];
6406
    }
6407
6408
    // this->split returns the remaining unsplit string in the last index when
6409
    // supplying a limit
6410 13
    if ($limit > 0) {
6411 8
      ++$limit;
6412
    } else {
6413 5
      $limit = -1;
6414
    }
6415
6416 13
    $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6417
6418 13
    if ($array === false) {
6419
      return [];
6420
    }
6421
6422 13
    if ($limit > 0 && \count($array) === $limit) {
6423 4
      \array_pop($array);
6424
    }
6425
6426 13
    return $array;
6427
  }
6428
6429
  /**
6430
   * Check if the string starts with the given substring.
6431
   *
6432
   * @param string $haystack <p>The string to search in.</p>
6433
   * @param string $needle   <p>The substring to search for.</p>
6434
   *
6435
   * @return bool
6436
   */
6437 41
  public static function str_starts_with(string $haystack, string $needle): bool
6438
  {
6439 41
    if ('' === $haystack || '' === $needle) {
6440 4
      return false;
6441
    }
6442
6443 39
    if (\strpos($haystack, $needle) === 0) {
6444 19
      return true;
6445
    }
6446
6447 24
    return false;
6448
  }
6449
6450
  /**
6451
   * Returns true if the string begins with any of $substrings, false otherwise.
6452
   *
6453
   * - case-sensitive
6454
   *
6455
   * @param string $str        <p>The input string.</p>
6456
   * @param array  $substrings <p>Substrings to look for.</p>
6457
   *
6458
   * @return bool Whether or not $str starts with $substring.
6459
   */
6460 8
  public static function str_starts_with_any(string $str, array $substrings): bool
6461
  {
6462 8
    if ('' === $str) {
6463
      return false;
6464
    }
6465
6466 8
    if (empty($substrings)) {
6467
      return false;
6468
    }
6469
6470 8
    foreach ($substrings as $substring) {
6471 8
      if (self::str_starts_with($str, $substring)) {
6472 8
        return true;
6473
      }
6474
    }
6475
6476 6
    return false;
6477
  }
6478
6479
  /**
6480
   * Gets the substring after the first occurrence of a separator.
6481
   *
6482
   * @param string $str       <p>The input string.</p>
6483
   * @param string $separator <p>The string separator.</p>
6484
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6485
   *
6486
   * @return string
6487
   */
6488 1
  public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6489
  {
6490
    if (
6491 1
        $separator === ''
6492
        ||
6493 1
        $str === ''
6494
    ) {
6495 1
      return '';
6496
    }
6497
6498 1
    $offset = self::str_index_first($str, $separator);
6499 1
    if ($offset === false) {
6500 1
      return '';
6501
    }
6502
6503 1
    return (string)self::substr(
6504 1
        $str,
6505 1
        $offset + self::strlen($separator, $encoding),
6506 1
        null,
6507 1
        $encoding
6508
    );
6509
  }
6510
6511
  /**
6512
   * Gets the substring after the last occurrence of a separator.
6513
   *
6514
   * @param string $str       <p>The input string.</p>
6515
   * @param string $separator <p>The string separator.</p>
6516
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6517
   *
6518
   * @return string
6519
   */
6520 1
  public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6521
  {
6522
    if (
6523 1
        $separator === ''
6524
        ||
6525 1
        $str === ''
6526
    ) {
6527 1
      return '';
6528
    }
6529
6530 1
    $offset = self::str_index_last($str, $separator);
6531 1
    if ($offset === false) {
6532 1
      return '';
6533
    }
6534
6535 1
    return (string)self::substr(
6536 1
        $str,
6537 1
        $offset + self::strlen($separator, $encoding),
6538 1
        null,
6539 1
        $encoding
6540
    );
6541
  }
6542
6543
  /**
6544
   * Gets the substring before the first occurrence of a separator.
6545
   *
6546
   * @param string $str       <p>The input string.</p>
6547
   * @param string $separator <p>The string separator.</p>
6548
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6549
   *
6550
   * @return string
6551
   */
6552 1
  public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6553
  {
6554
    if (
6555 1
        $separator === ''
6556
        ||
6557 1
        $str === ''
6558
    ) {
6559 1
      return '';
6560
    }
6561
6562 1
    $offset = self::str_index_first($str, $separator);
6563 1
    if ($offset === false) {
6564 1
      return '';
6565
    }
6566
6567 1
    return (string)self::substr(
6568 1
        $str,
6569 1
        0,
6570 1
        $offset,
6571 1
        $encoding
6572
    );
6573
  }
6574
6575
  /**
6576
   * Gets the substring before the last occurrence of a separator.
6577
   *
6578
   * @param string $str       <p>The input string.</p>
6579
   * @param string $separator <p>The string separator.</p>
6580
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6581
   *
6582
   * @return string
6583
   */
6584 1
  public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6585
  {
6586
    if (
6587 1
        $separator === ''
6588
        ||
6589 1
        $str === ''
6590
    ) {
6591 1
      return '';
6592
    }
6593
6594 1
    $offset = self::str_index_last($str, $separator);
6595 1
    if ($offset === false) {
6596 1
      return '';
6597
    }
6598
6599 1
    return (string)self::substr(
6600 1
        $str,
6601 1
        0,
6602 1
        $offset,
6603 1
        $encoding
6604
    );
6605
  }
6606
6607
  /**
6608
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6609
   *
6610
   * @param string $str          <p>The input string.</p>
6611
   * @param string $needle       <p>The string to look for.</p>
6612
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6613
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6614
   *
6615
   * @return string
6616
   */
6617 2
  public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6618
  {
6619
    if (
6620 2
        '' === $str
6621
        ||
6622 2
        '' === $needle
6623
    ) {
6624 2
      return '';
6625
    }
6626
6627 2
    $part = self::strstr(
6628 2
        $str,
6629 2
        $needle,
6630 2
        $beforeNeedle,
6631 2
        $encoding
6632
    );
6633 2
    if (false === $part) {
6634 2
      return '';
6635
    }
6636
6637 2
    return $part;
6638
  }
6639
6640
  /**
6641
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6642
   *
6643
   * @param string $str          <p>The input string.</p>
6644
   * @param string $needle       <p>The string to look for.</p>
6645
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6646
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6647
   *
6648
   * @return string
6649
   */
6650 2
  public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6651
  {
6652
    if (
6653 2
        '' === $str
6654
        ||
6655 2
        '' === $needle
6656
    ) {
6657 2
      return '';
6658
    }
6659
6660 2
    $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6661 2
    if (false === $part) {
6662 2
      return '';
6663
    }
6664
6665 2
    return $part;
6666
  }
6667
6668
  /**
6669
   * Surrounds $str with the given substring.
6670
   *
6671
   * @param string $str
6672
   * @param string $substring <p>The substring to add to both sides.</P>
6673
   *
6674
   * @return string String with the substring both prepended and appended.
6675
   */
6676 5
  public static function str_surround(string $str, string $substring): string
6677
  {
6678 5
    return \implode('', [$substring, $str, $substring]);
6679
  }
6680
6681
  /**
6682
   * Returns a trimmed string with the first letter of each word capitalized.
6683
   * Also accepts an array, $ignore, allowing you to list words not to be
6684
   * capitalized.
6685
   *
6686
   * @param string              $str
6687
   * @param string[]|array|null $ignore   [optional] <p>An array of words not to capitalize or null. Default: null</p>
6688
   * @param string              $encoding [optional] <p>Default: UTF-8</p>
6689
   *
6690
   * @return string The titleized string.
6691
   */
6692 5
  public static function str_titleize(string $str, array $ignore = null, string $encoding = 'UTF-8'): string
6693
  {
6694 5
    $str = self::trim($str);
6695
6696 5
    $str = (string)\preg_replace_callback(
6697 5
        '/([\S]+)/u',
6698 5
        function ($match) use ($encoding, $ignore) {
6699 5
          if ($ignore && \in_array($match[0], $ignore, true)) {
6700 2
            return $match[0];
6701
          }
6702
6703 5
          return self::str_upper_first(self::strtolower($match[0], $encoding));
6704 5
        },
6705 5
        $str
6706
    );
6707
6708 5
    return $str;
6709
  }
6710
6711
  /**
6712
   * Returns a trimmed string in proper title case.
6713
   *
6714
   * Also accepts an array, $ignore, allowing you to list words not to be
6715
   * capitalized.
6716
   *
6717
   * Adapted from John Gruber's script.
6718
   *
6719
   * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6720
   *
6721
   * @param string $str
6722
   * @param array  $ignore   <p>An array of words not to capitalize.</p>
6723
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6724
   *
6725
   * @return string The titleized string.
6726
   */
6727 35
  public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6728
  {
6729 35
    $smallWords = \array_merge(
6730
        [
6731 35
            '(?<!q&)a',
6732
            'an',
6733
            'and',
6734
            'as',
6735
            'at(?!&t)',
6736
            'but',
6737
            'by',
6738
            'en',
6739
            'for',
6740
            'if',
6741
            'in',
6742
            'of',
6743
            'on',
6744
            'or',
6745
            'the',
6746
            'to',
6747
            'v[.]?',
6748
            'via',
6749
            'vs[.]?',
6750
        ],
6751 35
        $ignore
6752
    );
6753
6754 35
    $smallWordsRx = \implode('|', $smallWords);
6755 35
    $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6756
6757 35
    $str = self::trim($str);
6758
6759 35
    if (self::has_lowercase($str) === false) {
6760 2
      $str = self::strtolower($str);
6761
    }
6762
6763
    // The main substitutions
6764 35
    $str = (string)\preg_replace_callback(
6765
        '~\b (_*) (?:                                                              # 1. Leading underscore and
6766
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6767 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6768
                        |
6769 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6770
                        |
6771 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6772
                        |
6773 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
6774
                      ) (_*) \b                                                           # 6. With trailing underscore
6775
                    ~ux',
6776 35
        function ($matches) use ($encoding) {
6777
          // Preserve leading underscore
6778 35
          $str = $matches[1];
6779 35
          if ($matches[2]) {
6780
            // Preserve URLs, domains, emails and file paths
6781 5
            $str .= $matches[2];
6782 35
          } elseif ($matches[3]) {
6783
            // Lower-case small words
6784 25
            $str .= self::strtolower($matches[3], $encoding);
6785 35
          } elseif ($matches[4]) {
6786
            // Capitalize word w/o internal caps
6787 34
            $str .= static::str_upper_first($matches[4], $encoding);
6788
          } else {
6789
            // Preserve other kinds of word (iPhone)
6790 7
            $str .= $matches[5];
6791
          }
6792
          // Preserve trailing underscore
6793 35
          $str .= $matches[6];
6794
6795 35
          return $str;
6796 35
        },
6797 35
        $str
6798
    );
6799
6800
    // Exceptions for small words: capitalize at start of title...
6801 35
    $str = (string)\preg_replace_callback(
6802
        '~(  \A [[:punct:]]*                # start of title...
6803
                      |  [:.;?!][ ]+               # or of subsentence...
6804
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
6805 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
6806
                     ~uxi',
6807 35
        function ($matches) use ($encoding) {
6808 11
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
6809 35
        },
6810 35
        $str
6811
    );
6812
6813
    // ...and end of title
6814 35
    $str = (string)\preg_replace_callback(
6815 35
        '~\b ( ' . $smallWordsRx . ' ) # small word...
6816
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
6817
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
6818
                     ~uxi',
6819 35
        function ($matches) use ($encoding) {
6820 3
          return static::str_upper_first($matches[1], $encoding);
6821 35
        },
6822 35
        $str
6823
    );
6824
6825
    // Exceptions for small words in hyphenated compound words
6826
    // e.g. "in-flight" -> In-Flight
6827 35
    $str = (string)\preg_replace_callback(
6828
        '~\b
6829
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
6830 35
                        ( ' . $smallWordsRx . ' )
6831
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
6832
                       ~uxi',
6833 35
        function ($matches) use ($encoding) {
6834
          return static::str_upper_first($matches[1], $encoding);
6835 35
        },
6836 35
        $str
6837
    );
6838
6839
    // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
6840 35
    $str = (string)\preg_replace_callback(
6841
        '~\b
6842
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
6843
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
6844 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
6845
                      (?!	- )                   # Negative lookahead for another -
6846
                     ~uxi',
6847 35
        function ($matches) use ($encoding) {
6848
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
6849 35
        },
6850 35
        $str
6851
    );
6852
6853 35
    return $str;
6854
  }
6855
6856
  /**
6857
   * Get a binary representation of a specific string.
6858
   *
6859
   * @param string $str <p>The input string.</p>
6860
   *
6861
   * @return string
6862
   */
6863 2
  public static function str_to_binary(string $str): string
6864
  {
6865 2
    $value = \unpack('H*', $str);
6866
6867 2
    return \base_convert($value[1], 16, 2);
6868
  }
6869
6870
  /**
6871
   * @param string   $str
6872
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6873
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
6874
   *
6875
   * @return string[]
6876
   */
6877 17
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
6878
  {
6879 17
    if ('' === $str) {
6880 1
      return ($removeEmptyValues === true ? [] : ['']);
6881
    }
6882
6883 16
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
6884
6885 16
    if ($return === false) {
6886
      return ($removeEmptyValues === true ? [] : ['']);
6887
    }
6888
6889
    if (
6890 16
        $removeShortValues === null
6891
        &&
6892 16
        $removeEmptyValues === false
6893
    ) {
6894 16
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
6895
    }
6896
6897
    $tmpReturn = self::reduce_string_array(
6898
        $return,
6899
        $removeEmptyValues,
6900
        $removeShortValues
6901
    );
6902
6903
    return $tmpReturn;
6904
  }
6905
6906
  /**
6907
   * Convert a string into an array of words.
6908
   *
6909
   * @param string   $str
6910
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
6911
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6912
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
6913
   *
6914
   * @return string[]
6915
   */
6916 14
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
6917
  {
6918 14
    if ('' === $str) {
6919 4
      return ($removeEmptyValues === true ? [] : ['']);
6920
    }
6921
6922 14
    $charList = self::rxClass($charList, '\pL');
6923
6924 14
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
6925
6926 14
    if ($return === false) {
6927
      return ($removeEmptyValues === true ? [] : ['']);
6928
    }
6929
6930
    if (
6931 14
        $removeShortValues === null
6932
        &&
6933 14
        $removeEmptyValues === false
6934
    ) {
6935 14
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
6936
    }
6937
6938 2
    $tmpReturn = self::reduce_string_array(
6939 2
        $return,
6940 2
        $removeEmptyValues,
6941 2
        $removeShortValues
6942
    );
6943
6944 2
    foreach ($tmpReturn as &$item) {
6945 2
      $item = (string)$item;
6946
    }
6947
6948 2
    return $tmpReturn;
6949
  }
6950
6951
  /**
6952
   * alias for "UTF8::to_ascii()"
6953
   *
6954
   * @see UTF8::to_ascii()
6955
   *
6956
   * @param string $str
6957
   * @param string $unknown
6958
   * @param bool   $strict
6959
   *
6960
   * @return string
6961
   */
6962 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
6963
  {
6964 7
    return self::to_ascii($str, $unknown, $strict);
6965
  }
6966
6967
  /**
6968
   * Truncates the string to a given length. If $substring is provided, and
6969
   * truncating occurs, the string is further truncated so that the substring
6970
   * may be appended without exceeding the desired length.
6971
   *
6972
   * @param string $str
6973
   * @param int    $length    <p>Desired length of the truncated string.</p>
6974
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
6975
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6976
   *
6977
   * @return string String after truncating.
6978
   */
6979 22
  public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
6980
  {
6981
    // init
6982 22
    $str = (string)$str;
6983
6984 22
    if ('' === $str) {
6985
      return '';
6986
    }
6987
6988 22
    if ($length >= self::strlen($str, $encoding)) {
6989 4
      return $str;
6990
    }
6991
6992
    // Need to further trim the string so we can append the substring
6993 18
    $substringLength = self::strlen($substring, $encoding);
6994 18
    $length -= $substringLength;
6995
6996 18
    $truncated = self::substr($str, 0, $length, $encoding);
6997
6998 18
    return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6998
    return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
6999
  }
7000
7001
  /**
7002
   * Truncates the string to a given length, while ensuring that it does not
7003
   * split words. If $substring is provided, and truncating occurs, the
7004
   * string is further truncated so that the substring may be appended without
7005
   * exceeding the desired length.
7006
   *
7007
   * @param string $str
7008
   * @param int    $length    <p>Desired length of the truncated string.</p>
7009
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7010
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7011
   *
7012
   * @return string String after truncating.
7013
   */
7014 23
  public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7015
  {
7016 23
    if ($length >= self::strlen($str, $encoding)) {
7017 4
      return $str;
7018
    }
7019
7020
    // need to further trim the string so we can append the substring
7021 19
    $substringLength = self::strlen($substring, $encoding);
7022 19
    $length -= $substringLength;
7023
7024 19
    $truncated = self::substr($str, 0, $length, $encoding);
7025 19
    if ($truncated === false) {
7026
      return '';
7027
    }
7028
7029
    // if the last word was truncated
7030 19
    $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7031 19
    if ($strPosSpace != $length) {
7032
      // find pos of the last occurrence of a space, get up to that
7033 12
      $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7034
7035 12
      if ($lastPos !== false || $strPosSpace !== false) {
7036 11
        $truncated = self::substr($truncated, 0, (int)$lastPos, $encoding);
7037
      }
7038
    }
7039
7040 19
    $str = $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7040
    $str = /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7041
7042 19
    return $str;
7043
  }
7044
7045
  /**
7046
   * Returns a lowercase and trimmed string separated by underscores.
7047
   * Underscores are inserted before uppercase characters (with the exception
7048
   * of the first character of the string), and in place of spaces as well as
7049
   * dashes.
7050
   *
7051
   * @param string $str
7052
   *
7053
   * @return string The underscored string.
7054
   */
7055 16
  public static function str_underscored(string $str): string
7056
  {
7057 16
    return self::str_delimit($str, '_');
7058
  }
7059
7060
  /**
7061
   * Returns an UpperCamelCase version of the supplied string. It trims
7062
   * surrounding spaces, capitalizes letters following digits, spaces, dashes
7063
   * and underscores, and removes spaces, dashes, underscores.
7064
   *
7065
   * @param string $str      <p>The input string.</p>
7066
   * @param string $encoding [optional] <p>Default: UTF-8</p>
7067
   *
7068
   * @return string String in UpperCamelCase.
7069
   */
7070 13
  public static function str_upper_camelize(string $str, string $encoding = 'UTF-8'): string
7071
  {
7072 13
    return self::str_upper_first(self::str_camelize($str, $encoding), $encoding);
7073
  }
7074
7075
  /**
7076
   * alias for "UTF8::ucfirst()"
7077
   *
7078
   * @see UTF8::ucfirst()
7079
   *
7080
   * @param string $str
7081
   * @param string $encoding
7082
   * @param bool   $cleanUtf8
7083
   *
7084
   * @return string
7085
   */
7086 58
  public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7087
  {
7088 58
    return self::ucfirst($str, $encoding, $cleanUtf8);
7089
  }
7090
7091
  /**
7092
   * Counts number of words in the UTF-8 string.
7093
   *
7094
   * @param string $str      <p>The input string.</p>
7095
   * @param int    $format   [optional] <p>
7096
   *                         <strong>0</strong> => return a number of words (default)<br>
7097
   *                         <strong>1</strong> => return an array of words<br>
7098
   *                         <strong>2</strong> => return an array of words with word-offset as key
7099
   *                         </p>
7100
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7101
   *
7102
   * @return string[]|int The number of words in the string
7103
   */
7104 2
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7105
  {
7106 2
    $strParts = self::str_to_words($str, $charlist);
7107
7108 2
    $len = \count($strParts);
7109
7110 2
    if ($format === 1) {
7111
7112 2
      $numberOfWords = [];
7113 2
      for ($i = 1; $i < $len; $i += 2) {
7114 2
        $numberOfWords[] = $strParts[$i];
7115
      }
7116
7117 2
    } elseif ($format === 2) {
7118
7119 2
      $numberOfWords = [];
7120 2
      $offset = self::strlen($strParts[0]);
7121 2
      for ($i = 1; $i < $len; $i += 2) {
7122 2
        $numberOfWords[$offset] = $strParts[$i];
7123 2
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
7124
      }
7125
7126
    } else {
7127
7128 2
      $numberOfWords = (int)(($len - 1) / 2);
7129
7130
    }
7131
7132 2
    return $numberOfWords;
7133
  }
7134
7135
  /**
7136
   * Case-insensitive string comparison.
7137
   *
7138
   * INFO: Case-insensitive version of UTF8::strcmp()
7139
   *
7140
   * @param string $str1
7141
   * @param string $str2
7142
   *
7143
   * @return int
7144
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7145
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7146
   *             <strong>0</strong> if they are equal.
7147
   */
7148 23
  public static function strcasecmp(string $str1, string $str2): int
7149
  {
7150 23
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
7151
  }
7152
7153
  /**
7154
   * alias for "UTF8::strstr()"
7155
   *
7156
   * @see UTF8::strstr()
7157
   *
7158
   * @param string $haystack
7159
   * @param string $needle
7160
   * @param bool   $before_needle
7161
   * @param string $encoding
7162
   * @param bool   $cleanUtf8
7163
   *
7164
   * @return string|false
7165
   */
7166 2
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7167
  {
7168 2
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7169
  }
7170
7171
  /**
7172
   * Case-sensitive string comparison.
7173
   *
7174
   * @param string $str1
7175
   * @param string $str2
7176
   *
7177
   * @return int
7178
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
7179
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
7180
   *              <strong>0</strong> if they are equal.
7181
   */
7182 29
  public static function strcmp(string $str1, string $str2): int
7183
  {
7184
    /** @noinspection PhpUndefinedClassInspection */
7185 29
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7186 24
        \Normalizer::normalize($str1, \Normalizer::NFD),
7187 29
        \Normalizer::normalize($str2, \Normalizer::NFD)
7188
    );
7189
  }
7190
7191
  /**
7192
   * Find length of initial segment not matching mask.
7193
   *
7194
   * @param string $str
7195
   * @param string $charList
7196
   * @param int    $offset
7197
   * @param int    $length
7198
   *
7199
   * @return int|null
7200
   */
7201 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
7202
  {
7203 15
    if ('' === $charList .= '') {
7204 1
      return null;
7205
    }
7206
7207 14
    if ($offset || $length !== null) {
7208 2
      $strTmp = self::substr($str, $offset, $length);
7209 2
      if ($strTmp === false) {
7210
        return null;
7211
      }
7212 2
      $str = (string)$strTmp;
7213
    }
7214
7215 14
    if ('' === $str) {
7216 1
      return null;
7217
    }
7218
7219 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept null|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7219
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7220 13
      return self::strlen($length[1]);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($length[1]) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7221
    }
7222
7223 1
    return self::strlen($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($str) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7224
  }
7225
7226
  /**
7227
   * alias for "UTF8::stristr()"
7228
   *
7229
   * @see UTF8::stristr()
7230
   *
7231
   * @param string $haystack
7232
   * @param string $needle
7233
   * @param bool   $before_needle
7234
   * @param string $encoding
7235
   * @param bool   $cleanUtf8
7236
   *
7237
   * @return string|false
7238
   */
7239 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7240
  {
7241 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7242
  }
7243
7244
  /**
7245
   * Create a UTF-8 string from code points.
7246
   *
7247
   * INFO: opposite to UTF8::codepoints()
7248
   *
7249
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7250
   *
7251
   * @return string UTF-8 encoded string.
7252
   */
7253 4
  public static function string(array $array): string
7254
  {
7255 4
    return \implode(
7256 4
        '',
7257 4
        \array_map(
7258
            [
7259 4
                self::class,
7260
                'chr',
7261
            ],
7262 4
            $array
7263
        )
7264
    );
7265
  }
7266
7267
  /**
7268
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7269
   *
7270
   * @param string $str <p>The input string.</p>
7271
   *
7272
   * @return bool
7273
   *              <strong>true</strong> if the string has BOM at the start,<br>
7274
   *              <strong>false</strong> otherwise.
7275
   */
7276 6
  public static function string_has_bom(string $str): bool
7277
  {
7278 6
    foreach (self::$BOM as $bomString => $bomByteLength) {
7279 6
      if (0 === \strpos($str, $bomString)) {
7280 6
        return true;
7281
      }
7282
    }
7283
7284 6
    return false;
7285
  }
7286
7287
  /**
7288
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7289
   *
7290
   * @link http://php.net/manual/en/function.strip-tags.php
7291
   *
7292
   * @param string $str             <p>
7293
   *                                The input string.
7294
   *                                </p>
7295
   * @param string $allowable_tags  [optional] <p>
7296
   *                                You can use the optional second parameter to specify tags which should
7297
   *                                not be stripped.
7298
   *                                </p>
7299
   *                                <p>
7300
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
7301
   *                                can not be changed with allowable_tags.
7302
   *                                </p>
7303
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
7304
   *
7305
   * @return string The stripped string.
7306
   */
7307 4
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7308
  {
7309 4
    if ('' === $str) {
7310 1
      return '';
7311
    }
7312
7313 4
    if ($cleanUtf8 === true) {
7314 2
      $str = self::clean($str);
7315
    }
7316
7317 4
    return \strip_tags($str, $allowable_tags);
7318
  }
7319
7320
  /**
7321
   * Strip all whitespace characters. This includes tabs and newline
7322
   * characters, as well as multibyte whitespace such as the thin space
7323
   * and ideographic space.
7324
   *
7325
   * @param string $str
7326
   *
7327
   * @return string
7328
   */
7329 36
  public static function strip_whitespace(string $str): string
7330
  {
7331 36
    if ('' === $str) {
7332 3
      return '';
7333
    }
7334
7335 33
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
7336
  }
7337
7338
  /**
7339
   * Finds position of first occurrence of a string within another, case insensitive.
7340
   *
7341
   * @link http://php.net/manual/en/function.mb-stripos.php
7342
   *
7343
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7344
   * @param string $needle    <p>The string to find in haystack.</p>
7345
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7346
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7347
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7348
   *
7349
   * @return int|false
7350
   *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7351
   *                   haystack string,<br> or <strong>false</strong> if needle is not found.
7352
   */
7353 75
  public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7354
  {
7355 75
    if ('' === $haystack || '' === $needle) {
7356 5
      return false;
7357
    }
7358
7359 74
    if ($cleanUtf8 === true) {
7360
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7361
      // if invalid characters are found in $haystack before $needle
7362 1
      $haystack = self::clean($haystack);
7363 1
      $needle = self::clean($needle);
7364
    }
7365
7366 74
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7367 23
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7368
    }
7369
7370 74
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7371
      self::checkForSupport();
7372
    }
7373
7374 74
    if (self::$SUPPORT['mbstring'] === true) {
7375 74
      $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7376 74
      if ($returnTmp !== false) {
7377 54
        return $returnTmp;
7378
      }
7379
    }
7380
7381
    if (
7382 31
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7383
        &&
7384 31
        $offset >= 0 // grapheme_stripos() can't handle negative offset
7385
        &&
7386 31
        self::$SUPPORT['intl'] === true
7387
    ) {
7388 31
      $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7389 31
      if ($returnTmp !== false) {
7390
        return $returnTmp;
7391
      }
7392
    }
7393
7394
    // fallback for ascii only
7395 31
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7396 15
      return \stripos($haystack, $needle, $offset);
7397
    }
7398
7399
    // fallback via symfony polyfill
7400 20
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7401
      return \mb_stripos($haystack, $needle, $offset, $encoding);
7402
    }
7403
7404
    // fallback via vanilla php
7405
7406 20
    $haystack = self::strtoupper($haystack, $encoding, false, null, true);
7407 20
    $needle = self::strtoupper($needle, $encoding, false, null, true);
7408
7409 20
    return self::strpos($haystack, $needle, $offset, $encoding);
7410
  }
7411
7412
  /**
7413
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
7414
   *
7415
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7416
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7417
   * @param bool   $before_needle  [optional] <p>
7418
   *                               If <b>TRUE</b>, it returns the part of the
7419
   *                               haystack before the first occurrence of the needle (excluding the needle).
7420
   *                               </p>
7421
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
7422
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7423
   *
7424
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
7425
   */
7426 19
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7427
  {
7428 19
    if ('' === $haystack || '' === $needle) {
7429 6
      return false;
7430
    }
7431
7432 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7433 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7434
    }
7435
7436 13
    if ($cleanUtf8 === true) {
7437
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7438
      // if invalid characters are found in $haystack before $needle
7439 1
      $needle = self::clean($needle);
7440 1
      $haystack = self::clean($haystack);
7441
    }
7442
7443 13
    if (!$needle) {
7444
      return $haystack;
7445
    }
7446
7447 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7448
      self::checkForSupport();
7449
    }
7450
7451
    if (
7452 13
        $encoding !== 'UTF-8'
7453
        &&
7454 13
        self::$SUPPORT['mbstring'] === false
7455
    ) {
7456
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7457
    }
7458
7459 13
    if (self::$SUPPORT['mbstring'] === true) {
7460 13
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7461
    }
7462
7463
    if (
7464
        $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7465
        &&
7466
        self::$SUPPORT['intl'] === true
7467
    ) {
7468
      $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7469
      if ($returnTmp !== false) {
7470
        return $returnTmp;
7471
      }
7472
    }
7473
7474
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7475
      return \stristr($haystack, $needle, $before_needle);
7476
    }
7477
7478
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7479
7480
    if (!isset($match[1])) {
7481
      return false;
7482
    }
7483
7484
    if ($before_needle) {
7485
      return $match[1];
7486
    }
7487
7488
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7488
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
7489
  }
7490
7491
  /**
7492
   * Get the string length, not the byte-length!
7493
   *
7494
   * @link     http://php.net/manual/en/function.mb-strlen.php
7495
   *
7496
   * @param string $str       <p>The string being checked for length.</p>
7497
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7498
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7499
   *
7500
   * @return int|false
7501
   *             The number <strong>(int)</strong> of characters in the string $str having character encoding $encoding.
7502
   *             (One multi-byte character counted as +1).
7503
   *             <br>
7504
   *             Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid chars.
7505
   */
7506 402
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7507
  {
7508 402
    if ('' === $str) {
7509 37
      return 0;
7510
    }
7511
7512 400
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7513 167
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7514
    }
7515
7516 400
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7517
      self::checkForSupport();
7518
    }
7519
7520 400
    if ($encoding === 'ASCII' || $encoding === 'CP850') {
7521 2
      return self::strlen_in_byte($str);
7522
    }
7523
7524 400
    if ($cleanUtf8 === true) {
7525
      // "mb_strlen" and "\iconv_strlen" returns wrong length,
7526
      // if invalid characters are found in $str
7527 4
      $str = self::clean($str);
7528
    }
7529
7530
    if (
7531 400
        $encoding !== 'UTF-8'
7532
        &&
7533 400
        self::$SUPPORT['mbstring'] === false
7534
        &&
7535 400
        self::$SUPPORT['iconv'] === false
7536
    ) {
7537 2
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7538
    }
7539
7540
    if (
7541 400
        $encoding !== 'UTF-8'
7542
        &&
7543 400
        self::$SUPPORT['iconv'] === true
7544
        &&
7545 400
        self::$SUPPORT['mbstring'] === false
7546
    ) {
7547
      $returnTmp = \iconv_strlen($str, $encoding);
7548
      if ($returnTmp !== false) {
7549
        return $returnTmp;
7550
      }
7551
    }
7552
7553 400
    if (self::$SUPPORT['mbstring'] === true) {
7554 400
      $returnTmp = \mb_strlen($str, $encoding);
7555 400
      if ($returnTmp !== false) {
7556 400
        return $returnTmp;
7557
      }
7558
    }
7559
7560 8
    if (self::$SUPPORT['iconv'] === true) {
7561
      $returnTmp = \iconv_strlen($str, $encoding);
7562
      if ($returnTmp !== false) {
7563
        return $returnTmp;
7564
      }
7565
    }
7566
7567
    if (
7568 8
        $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7569
        &&
7570 8
        self::$SUPPORT['intl'] === true
7571
    ) {
7572
      $returnTmp = \grapheme_strlen($str);
7573
      if ($returnTmp !== null) {
7574
        return $returnTmp;
7575
      }
7576
    }
7577
7578
    // fallback via symfony polyfill
7579 8
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7580
      $returnTmp = \mb_strlen($str, $encoding);
7581
      if ($returnTmp !== false) {
7582
        return $returnTmp;
7583
      }
7584
    }
7585
7586
    // fallback for ascii only
7587 8
    if (self::is_ascii($str)) {
7588 4
      return \strlen($str);
7589
    }
7590
7591
    // fallback via vanilla php
7592 8
    \preg_match_all('/./us', $str, $parts);
7593
7594 8
    $returnTmp = \count($parts[0]);
7595 8
    if ($returnTmp === 0 && isset($str[0])) {
7596
      return false;
7597
    }
7598
7599 8
    return $returnTmp;
7600
  }
7601
7602
  /**
7603
   * Get string length in byte.
7604
   *
7605
   * @param string $str
7606
   *
7607
   * @return int
7608
   */
7609 194
  public static function strlen_in_byte(string $str): int
7610
  {
7611 194
    if ($str === '') {
7612
      return 0;
7613
    }
7614
7615 194
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7616
      self::checkForSupport();
7617
    }
7618
7619 194
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7620
      // "mb_" is available if overload is used, so use it ...
7621
      return \mb_strlen($str, 'CP850'); // 8-BIT
7622
    }
7623
7624 194
    return \strlen($str);
7625
  }
7626
7627
  /**
7628
   * Count the number of substring occurrences.
7629
   *
7630
   * @param string $haystack <p>
7631
   *                         The string being checked.
7632
   *                         </p>
7633
   * @param string $needle   <p>
7634
   *                         The string being found.
7635
   *                         </p>
7636
   * @param int    $offset   [optional] <p>
7637
   *                         The offset where to start counting
7638
   *                         </p>
7639
   * @param int    $length   [optional] <p>
7640
   *                         The maximum length after the specified offset to search for the
7641
   *                         substring. It outputs a warning if the offset plus the length is
7642
   *                         greater than the haystack length.
7643
   *                         </p>
7644
   *
7645
   * @return int|false The number of times the
7646
   *                   needle substring occurs in the
7647
   *                   haystack string.
7648
   */
7649 38
  public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null)
7650
  {
7651 38
    if ($haystack === '' || $needle === '') {
7652
      return 0;
7653
    }
7654
7655 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7656
      self::checkForSupport();
7657
    }
7658
7659
    if (
7660 38
        ($offset || $length !== null)
7661
        &&
7662 38
        self::$SUPPORT['mbstring_func_overload'] === true
7663
    ) {
7664
7665
      if ($length === null) {
7666
        $lengthTmp = self::strlen($haystack);
7667
        if ($lengthTmp === false) {
7668
          return false;
7669
        }
7670
        $length = (int)$lengthTmp;
7671
      }
7672
7673
      if (
7674
          (
7675
              $length !== 0
7676
              &&
7677
              $offset !== 0
7678
          )
7679
          &&
7680
          ($length + $offset) <= 0
7681
          &&
7682
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
7683
      ) {
7684
        return false;
7685
      }
7686
7687
      $haystackTmp = self::substr_in_byte($haystack, $offset, $length);
7688
      if ($haystackTmp === false) {
7689
        $haystackTmp = '';
7690
      }
7691
      $haystack = (string)$haystackTmp;
7692
    }
7693
7694 38
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7695
      // "mb_" is available if overload is used, so use it ...
7696
      return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
7697
    }
7698
7699 38
    return \substr_count($haystack, $needle, $offset, $length);
7700
  }
7701
7702
  /**
7703
   * Case insensitive string comparisons using a "natural order" algorithm.
7704
   *
7705
   * INFO: natural order version of UTF8::strcasecmp()
7706
   *
7707
   * @param string $str1 <p>The first string.</p>
7708
   * @param string $str2 <p>The second string.</p>
7709
   *
7710
   * @return int
7711
   *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7712
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7713
   *             <strong>0</strong> if they are equal
7714
   */
7715 2
  public static function strnatcasecmp(string $str1, string $str2): int
7716
  {
7717 2
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
7718
  }
7719
7720
  /**
7721
   * String comparisons using a "natural order" algorithm
7722
   *
7723
   * INFO: natural order version of UTF8::strcmp()
7724
   *
7725
   * @link  http://php.net/manual/en/function.strnatcmp.php
7726
   *
7727
   * @param string $str1 <p>The first string.</p>
7728
   * @param string $str2 <p>The second string.</p>
7729
   *
7730
   * @return int
7731
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7732
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7733
   *             <strong>0</strong> if they are equal
7734
   */
7735 4
  public static function strnatcmp(string $str1, string $str2): int
7736
  {
7737 4
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
7738
  }
7739
7740
  /**
7741
   * Case-insensitive string comparison of the first n characters.
7742
   *
7743
   * @link  http://php.net/manual/en/function.strncasecmp.php
7744
   *
7745
   * @param string $str1 <p>The first string.</p>
7746
   * @param string $str2 <p>The second string.</p>
7747
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
7748
   *
7749
   * @return int
7750
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7751
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7752
   *             <strong>0</strong> if they are equal
7753
   */
7754 2
  public static function strncasecmp(string $str1, string $str2, int $len): int
7755
  {
7756 2
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
7757
  }
7758
7759
  /**
7760
   * String comparison of the first n characters.
7761
   *
7762
   * @link  http://php.net/manual/en/function.strncmp.php
7763
   *
7764
   * @param string $str1 <p>The first string.</p>
7765
   * @param string $str2 <p>The second string.</p>
7766
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
7767
   *
7768
   * @return int
7769
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7770
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7771
   *             <strong>0</strong> if they are equal
7772
   */
7773 4
  public static function strncmp(string $str1, string $str2, int $len): int
7774
  {
7775 4
    $str1 = (string)self::substr($str1, 0, $len);
7776 4
    $str2 = (string)self::substr($str2, 0, $len);
7777
7778 4
    return self::strcmp($str1, $str2);
7779
  }
7780
7781
  /**
7782
   * Search a string for any of a set of characters.
7783
   *
7784
   * @link  http://php.net/manual/en/function.strpbrk.php
7785
   *
7786
   * @param string $haystack  <p>The string where char_list is looked for.</p>
7787
   * @param string $char_list <p>This parameter is case sensitive.</p>
7788
   *
7789
   * @return string|false String starting from the character found, or false if it is not found.
7790
   */
7791 2
  public static function strpbrk(string $haystack, string $char_list)
7792
  {
7793 2
    if ('' === $haystack || '' === $char_list) {
7794 2
      return false;
7795
    }
7796
7797 2
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7798 2
      return \substr($haystack, (int)\strpos($haystack, $m[0]));
7799
    }
7800
7801 2
    return false;
7802
  }
7803
7804
  /**
7805
   * Find position of first occurrence of string in a string.
7806
   *
7807
   * @link http://php.net/manual/en/function.mb-strpos.php
7808
   *
7809
   * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7810
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7811
   * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7812
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7813
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7814
   *
7815
   * @return int|false
7816
   *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
7817
   *                   string.<br> If needle is not found it returns false.
7818
   */
7819 142
  public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7820
  {
7821 142
    if ('' === $haystack) {
7822 4
      return false;
7823
    }
7824
7825
    // iconv and mbstring do not support integer $needle
7826 141
    if ((int)$needle === $needle && $needle >= 0) {
7827
      $needle = (string)self::chr($needle);
7828
    }
7829 141
    $needle = (string)$needle;
7830
7831 141
    if ('' === $needle) {
7832 2
      return false;
7833
    }
7834
7835 141
    if ($cleanUtf8 === true) {
7836
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7837
      // if invalid characters are found in $haystack before $needle
7838 3
      $needle = self::clean($needle);
7839 3
      $haystack = self::clean($haystack);
7840
    }
7841
7842 141
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7843 55
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7844
    }
7845
7846 141
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7847
      self::checkForSupport();
7848
    }
7849
7850
    if (
7851 141
        $encoding === 'CP850'
7852
        &&
7853 141
        self::$SUPPORT['mbstring_func_overload'] === false
7854
    ) {
7855 2
      return \strpos($haystack, $needle, $offset);
7856
    }
7857
7858
    if (
7859 141
        $encoding !== 'UTF-8'
7860
        &&
7861 141
        self::$SUPPORT['iconv'] === false
7862
        &&
7863 141
        self::$SUPPORT['mbstring'] === false
7864
    ) {
7865 2
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7866
    }
7867
7868
    if (
7869 141
        $offset >= 0 // iconv_strpos() can't handle negative offset
7870
        &&
7871 141
        $encoding !== 'UTF-8'
7872
        &&
7873 141
        self::$SUPPORT['mbstring'] === false
7874
        &&
7875 141
        self::$SUPPORT['iconv'] === true
7876
    ) {
7877
      // ignore invalid negative offset to keep compatibility
7878
      // with php < 5.5.35, < 5.6.21, < 7.0.6
7879
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
7880
      if ($returnTmp !== false) {
7881
        return $returnTmp;
7882
      }
7883
    }
7884
7885 141
    if (self::$SUPPORT['mbstring'] === true) {
7886 141
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
7887 141
      if ($returnTmp !== false) {
7888 86
        return $returnTmp;
7889
      }
7890
    }
7891
7892
    if (
7893 69
        $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
7894
        &&
7895 69
        $offset >= 0 // grapheme_strpos() can't handle negative offset
7896
        &&
7897 69
        self::$SUPPORT['intl'] === true
7898
    ) {
7899 69
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
7900 69
      if ($returnTmp !== false) {
7901
        return $returnTmp;
7902
      }
7903
    }
7904
7905
    if (
7906 69
        $offset >= 0 // iconv_strpos() can't handle negative offset
7907
        &&
7908 69
        self::$SUPPORT['iconv'] === true
7909
    ) {
7910
      // ignore invalid negative offset to keep compatibility
7911
      // with php < 5.5.35, < 5.6.21, < 7.0.6
7912 69
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
7913 69
      if ($returnTmp !== false) {
7914
        return $returnTmp;
7915
      }
7916
    }
7917
7918
    // fallback for ascii only
7919 69
    if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
7920 35
      return \strpos($haystack, $needle, $offset);
7921
    }
7922
7923
    // fallback via symfony polyfill
7924 39
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7925
      return \mb_strpos($haystack, $needle, $offset, $encoding);
7926
    }
7927
7928
    // fallback via vanilla php
7929
7930 39
    if ($haystackIsAscii) {
7931
      $haystackTmp = \substr($haystack, $offset);
7932
    } else {
7933 39
      $haystackTmp = self::substr($haystack, $offset, null, $encoding);
7934
    }
7935 39
    if ($haystackTmp === false) {
7936
      $haystackTmp = '';
7937
    }
7938 39
    $haystack = (string)$haystackTmp;
7939
7940 39
    if ($offset < 0) {
7941 2
      $offset = 0;
7942
    }
7943
7944 39
    $pos = \strpos($haystack, $needle);
7945 39
    if ($pos === false) {
7946 39
      return false;
7947
    }
7948
7949 4
    if ($pos) {
7950 4
      return ($offset + (self::strlen(substr($haystack, 0, $pos), $encoding)));
7951
    }
7952
7953 2
    return ($offset + 0);
7954
  }
7955
7956
  /**
7957
   * Finds the last occurrence of a character in a string within another.
7958
   *
7959
   * @link http://php.net/manual/en/function.mb-strrchr.php
7960
   *
7961
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
7962
   * @param string $needle        <p>The string to find in haystack</p>
7963
   * @param bool   $before_needle [optional] <p>
7964
   *                              Determines which portion of haystack
7965
   *                              this function returns.
7966
   *                              If set to true, it returns all of haystack
7967
   *                              from the beginning to the last occurrence of needle.
7968
   *                              If set to false, it returns all of haystack
7969
   *                              from the last occurrence of needle to the end,
7970
   *                              </p>
7971
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
7972
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
7973
   *
7974
   * @return string|false The portion of haystack or false if needle is not found.
7975
   */
7976 4
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7977
  {
7978 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7979 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7980
    }
7981
7982 4
    if ($cleanUtf8 === true) {
7983
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7984
      // if invalid characters are found in $haystack before $needle
7985 2
      $needle = self::clean($needle);
7986 2
      $haystack = self::clean($haystack);
7987
    }
7988
7989 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7990
      self::checkForSupport();
7991
    }
7992
7993
    if (
7994 4
        $encoding !== 'UTF-8'
7995
        &&
7996 4
        self::$SUPPORT['mbstring'] === false
7997
    ) {
7998
      \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7999
    }
8000
8001 4
    if (self::$SUPPORT['mbstring'] === true) {
8002 4
      return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8003
    }
8004
8005
    if (
8006
        $before_needle === false
8007
        &&
8008
        ('CP850' === $encoding || 'ASCII' === $encoding)
8009
    ) {
8010
      return \strrchr($haystack, $needle);
8011
    }
8012
8013
    // fallback via symfony polyfill
8014
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
8015
      return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8016
    }
8017
8018
    // fallback via vanilla php
8019
8020
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8021
    if ($needleTmp === false) {
8022
      return false;
8023
    }
8024
    $needle = (string)$needleTmp;
8025
8026
    $pos = self::strrpos($haystack, $needle, null, $encoding);
8027
    if ($pos === false) {
8028
      return false;
8029
    }
8030
8031
    if ($before_needle) {
8032
      return self::substr($haystack, 0, $pos, $encoding);
8033
    }
8034
8035
    return self::substr($haystack, $pos, null, $encoding);
8036
  }
8037
8038
  /**
8039
   * Reverses characters order in the string.
8040
   *
8041
   * @param string $str <p>The input string.</p>
8042
   *
8043
   * @return string The string with characters in the reverse sequence.
8044
   */
8045 10
  public static function strrev(string $str): string
8046
  {
8047 10
    if ('' === $str) {
8048 4
      return '';
8049
    }
8050
8051 8
    $reversed = '';
8052 8
    $i = self::strlen($str);
8053 8
    while ($i--) {
8054 8
      $reversed .= self::substr($str, $i, 1);
0 ignored issues
show
Bug introduced by
It seems like $i can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8054
      $reversed .= self::substr($str, /** @scrutinizer ignore-type */ $i, 1);
Loading history...
8055
    }
8056
8057 8
    return $reversed;
8058
  }
8059
8060
  /**
8061
   * Finds the last occurrence of a character in a string within another, case insensitive.
8062
   *
8063
   * @link http://php.net/manual/en/function.mb-strrichr.php
8064
   *
8065
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
8066
   * @param string $needle         <p>The string to find in haystack.</p>
8067
   * @param bool   $before_needle  [optional] <p>
8068
   *                               Determines which portion of haystack
8069
   *                               this function returns.
8070
   *                               If set to true, it returns all of haystack
8071
   *                               from the beginning to the last occurrence of needle.
8072
   *                               If set to false, it returns all of haystack
8073
   *                               from the last occurrence of needle to the end,
8074
   *                               </p>
8075
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8076
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8077
   *
8078
   * @return string|false The portion of haystack or<br>false if needle is not found.
8079
   */
8080 3
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8081
  {
8082 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8083 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8084
    }
8085
8086 3
    if ($cleanUtf8 === true) {
8087
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8088
      // if invalid characters are found in $haystack before $needle
8089 2
      $needle = self::clean($needle);
8090 2
      $haystack = self::clean($haystack);
8091
    }
8092
8093
    // always fallback via symfony polyfill
8094 3
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8095
  }
8096
8097
  /**
8098
   * Find position of last occurrence of a case-insensitive string.
8099
   *
8100
   * @param string     $haystack  <p>The string to look in.</p>
8101
   * @param string|int $needle    <p>The string to look for.</p>
8102
   * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8103
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8104
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8105
   *
8106
   * @return int|false
8107
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8108
   *                   string.<br>If needle is not found, it returns false.
8109
   */
8110 4
  public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8111
  {
8112 4
    if ('' === $haystack) {
8113
      return false;
8114
    }
8115
8116
    // iconv and mbstring do not support integer $needle
8117 4
    if ((int)$needle === $needle && $needle >= 0) {
8118
      $needle = (string)self::chr($needle);
8119
    }
8120 4
    $needle = (string)$needle;
8121
8122 4
    if ('' === $needle) {
8123
      return false;
8124
    }
8125
8126 4
    if ($cleanUtf8 === true) {
8127
      // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8128 2
      $needle = self::clean($needle);
8129 2
      $haystack = self::clean($haystack);
8130
    }
8131
8132 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8133 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8134
    }
8135
8136 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8137
      self::checkForSupport();
8138
    }
8139
8140
    if (
8141 4
        $encoding !== 'UTF-8'
8142
        &&
8143 4
        self::$SUPPORT['mbstring'] === false
8144
    ) {
8145
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8146
    }
8147
8148 4
    if (self::$SUPPORT['mbstring'] === true) {
8149 4
      return \mb_strripos($haystack, $needle, $offset, $encoding);
8150
    }
8151
8152
    if (
8153
        $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8154
        &&
8155
        $offset >= 0 // grapheme_strripos() can't handle negative offset
8156
        &&
8157
        self::$SUPPORT['intl'] === true
8158
    ) {
8159
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8160
      if ($returnTmp !== false) {
8161
        return $returnTmp;
8162
      }
8163
    }
8164
8165
    // fallback via symfony polyfill
8166
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
8167
      return \mb_strripos($haystack, $needle, $offset, $encoding);
8168
    }
8169
8170
    // fallback for ascii only
8171
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8172
      return \strripos($haystack, $needle, $offset);
8173
    }
8174
8175
    // fallback via vanilla php
8176
8177
    $haystack = self::strtoupper($haystack, $encoding, false, null, true);
8178
    $needle = self::strtoupper($needle, $encoding, false, null, true);
8179
8180
    return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8181
  }
8182
8183
  /**
8184
   * Find position of last occurrence of a string in a string.
8185
   *
8186
   * @link http://php.net/manual/en/function.mb-strrpos.php
8187
   *
8188
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8189
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8190
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8191
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
8192
   *                              the end of the string.
8193
   *                              </p>
8194
   * @param string     $encoding  [optional] <p>Set the charset.</p>
8195
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8196
   *
8197
   * @return int|false
8198
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8199
   *                   string.<br>If needle is not found, it returns false.
8200
   */
8201 38
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8202
  {
8203 38
    if ('' === $haystack) {
8204 3
      return false;
8205
    }
8206
8207
    // iconv and mbstring do not support integer $needle
8208 37
    if ((int)$needle === $needle && $needle >= 0) {
8209 2
      $needle = (string)self::chr($needle);
8210
    }
8211 37
    $needle = (string)$needle;
8212
8213 37
    if ('' === $needle) {
8214 2
      return false;
8215
    }
8216
8217 37
    if ($cleanUtf8 === true) {
8218
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8219 4
      $needle = self::clean($needle);
8220 4
      $haystack = self::clean($haystack);
8221
    }
8222
8223 37
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8224 14
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8225
    }
8226
8227 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8228
      self::checkForSupport();
8229
    }
8230
8231
    if (
8232 37
        $encoding !== 'UTF-8'
8233
        &&
8234 37
        self::$SUPPORT['mbstring'] === false
8235
    ) {
8236
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8237
    }
8238
8239 37
    if (self::$SUPPORT['mbstring'] === true) {
8240 37
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
8241
    }
8242
8243
    if (
8244
        $offset !== null
8245
        &&
8246
        $offset >= 0 // grapheme_strrpos() can't handle negative offset
8247
        &&
8248
        $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8249
        &&
8250
        self::$SUPPORT['intl'] === true
8251
    ) {
8252
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8253
      if ($returnTmp !== false) {
8254
        return $returnTmp;
8255
      }
8256
    }
8257
8258
    // fallback via symfony polyfill
8259
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
8260
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
8261
    }
8262
8263
    // fallback for ascii only
8264
    if (
8265
        $offset !== null
8266
        &&
8267
        self::is_ascii($haystack)
8268
        &&
8269
        self::is_ascii($needle)
8270
    ) {
8271
      return \strrpos($haystack, $needle, $offset);
8272
    }
8273
8274
    // fallback via vanilla php
8275
8276
    $haystackTmp = null;
8277
    if ($offset > 0) {
8278
      $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8278
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
8279
    } elseif ($offset < 0) {
8280
      $haystackTmp = self::substr($haystack, 0, $offset);
8281
      $offset = 0;
8282
    }
8283
8284
    if ($haystackTmp !== null) {
8285
      if ($haystackTmp === false) {
8286
        $haystackTmp = '';
8287
      }
8288
      $haystack = (string)$haystackTmp;
8289
    }
8290
8291
    $pos = \strrpos($haystack, $needle);
8292
    if ($pos === false) {
8293
      return false;
8294
    }
8295
8296
    return $offset + self::strlen(\substr($haystack, 0, $pos));
8297
  }
8298
8299
  /**
8300
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8301
   * mask.
8302
   *
8303
   * @param string $str    <p>The input string.</p>
8304
   * @param string $mask   <p>The mask of chars</p>
8305
   * @param int    $offset [optional]
8306
   * @param int    $length [optional]
8307
   *
8308
   * @return int
8309
   */
8310 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
8311
  {
8312 10
    if ($offset || $length !== null) {
8313 2
      $strTmp = self::substr($str, $offset, $length);
8314 2
      if ($strTmp === false) {
8315
        $strTmp = '';
8316
      }
8317 2
      $str = (string)$strTmp;
8318
    }
8319
8320 10
    if ('' === $str || '' === $mask) {
8321 2
      return 0;
8322
    }
8323
8324 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug Best Practice introduced by
The expression return preg_match('/^' ....lf::strlen($str[0]) : 0 could return the type false which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
$str of type string is incompatible with the type null|array expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8324
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
8325
  }
8326
8327
  /**
8328
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8329
   *
8330
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
8331
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
8332
   * @param bool   $before_needle  [optional] <p>
8333
   *                               If <b>TRUE</b>, strstr() returns the part of the
8334
   *                               haystack before the first occurrence of the needle (excluding the needle).
8335
   *                               </p>
8336
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8337
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8338
   *
8339
   * @return string|false
8340
   *                       A sub-string,<br>or <strong>false</strong> if needle is not found.
8341
   */
8342 5
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8343
  {
8344 5
    if ('' === $haystack || '' === $needle) {
8345 2
      return false;
8346
    }
8347
8348 5
    if ($cleanUtf8 === true) {
8349
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8350
      // if invalid characters are found in $haystack before $needle
8351
      $needle = self::clean($needle);
8352
      $haystack = self::clean($haystack);
8353
    }
8354
8355 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8356 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8357
    }
8358
8359 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8360
      self::checkForSupport();
8361
    }
8362
8363
    if (
8364 5
        $encoding !== 'UTF-8'
8365
        &&
8366 5
        self::$SUPPORT['mbstring'] === false
8367
    ) {
8368
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8369
    }
8370
8371 5
    if (self::$SUPPORT['mbstring'] === true) {
8372 5
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8373
    }
8374
8375
    if (
8376
        $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8377
        &&
8378
        self::$SUPPORT['intl'] === true
8379
    ) {
8380
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8381
      if ($returnTmp !== false) {
8382
        return $returnTmp;
8383
      }
8384
    }
8385
8386
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8387
8388
    if (!isset($match[1])) {
8389
      return false;
8390
    }
8391
8392
    if ($before_needle) {
8393
      return $match[1];
8394
    }
8395
8396
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8396
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
8397
  }
8398
8399
  /**
8400
   * Unicode transformation for case-less matching.
8401
   *
8402
   * @link http://unicode.org/reports/tr21/tr21-5.html
8403
   *
8404
   * @param string $str        <p>The input string.</p>
8405
   * @param bool   $full       [optional] <p>
8406
   *                           <b>true</b>, replace full case folding chars (default)<br>
8407
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
8408
   *                           </p>
8409
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
8410
   *
8411
   * @return string
8412
   */
8413 27
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
8414
  {
8415 27
    if ('' === $str) {
8416 5
      return '';
8417
    }
8418
8419 26
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
8420 26
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
8421
8422 26
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
8423 1
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
8424 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
8425
    }
8426
8427 26
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
8428
8429 26
    if ($full) {
8430
8431 26
      static $FULL_CASE_FOLD = null;
8432 26
      if ($FULL_CASE_FOLD === null) {
8433 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
8434
      }
8435
8436 26
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
8437
    }
8438
8439 26
    if ($cleanUtf8 === true) {
8440 2
      $str = self::clean($str);
8441
    }
8442
8443 26
    return self::strtolower($str);
8444
  }
8445
8446
  /**
8447
   * Make a string lowercase.
8448
   *
8449
   * @link http://php.net/manual/en/function.mb-strtolower.php
8450
   *
8451
   * @param string      $str                   <p>The string being lowercased.</p>
8452
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
8453
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8454
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8455
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8456
   *
8457
   * @return string String with all alphabetic characters converted to lowercase.
8458
   */
8459 175
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8460
  {
8461
    // init
8462 175
    $str = (string)$str;
8463
8464 175
    if ('' === $str) {
8465 4
      return '';
8466
    }
8467
8468 173
    if ($cleanUtf8 === true) {
8469
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8470
      // if invalid characters are found in $haystack before $needle
8471 2
      $str = self::clean($str);
8472
    }
8473
8474 173
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8475 94
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8476
    }
8477
8478
    // hack for old php version or for the polyfill ...
8479 173
    if ($tryToKeepStringLength === true) {
8480
      $str = self::fixStrCaseHelper($str, true);
8481
    }
8482
8483 173
    if ($lang !== null) {
8484
8485 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8486
        self::checkForSupport();
8487
      }
8488
8489 2
      if (self::$SUPPORT['intl'] === true) {
8490
8491 2
        $langCode = $lang . '-Lower';
8492 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8493
          \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, E_USER_WARNING);
8494
8495
          $langCode = 'Any-Lower';
8496
        }
8497
8498
        /** @noinspection PhpComposerExtensionStubsInspection */
8499 2
        return transliterator_transliterate($langCode, $str);
8500
      }
8501
8502
      \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, E_USER_WARNING);
8503
    }
8504
8505
    // always fallback via symfony polyfill
8506 173
    return \mb_strtolower($str, $encoding);
8507
  }
8508
8509
  /**
8510
   * Generic case sensitive transformation for collation matching.
8511
   *
8512
   * @param string $str <p>The input string</p>
8513
   *
8514
   * @return string
8515
   */
8516 6
  private static function strtonatfold(string $str): string
8517
  {
8518
    /** @noinspection PhpUndefinedClassInspection */
8519 6
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
8520
  }
8521
8522
  /**
8523
   * Make a string uppercase.
8524
   *
8525
   * @link http://php.net/manual/en/function.mb-strtoupper.php
8526
   *
8527
   * @param string      $str                   <p>The string being uppercased.</p>
8528
   * @param string      $encoding              [optional] <p>Set the charset.</p>
8529
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8530
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8531
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8532
   *
8533
   * @return string String with all alphabetic characters converted to uppercase.
8534
   */
8535 136
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8536
  {
8537
    // init
8538 136
    $str = (string)$str;
8539
8540 136
    if ('' === $str) {
8541 4
      return '';
8542
    }
8543
8544 134
    if ($cleanUtf8 === true) {
8545
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8546
      // if invalid characters are found in $haystack before $needle
8547 3
      $str = self::clean($str);
8548
    }
8549
8550 134
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8551 72
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8552
    }
8553
8554
    // hack for old php version or for the polyfill ...
8555 134
    if ($tryToKeepStringLength === true) {
8556 22
      $str = self::fixStrCaseHelper($str, false);
8557
    }
8558
8559 134
    if ($lang !== null) {
8560
8561 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8562
        self::checkForSupport();
8563
      }
8564
8565 2
      if (self::$SUPPORT['intl'] === true) {
8566
8567 2
        $langCode = $lang . '-Upper';
8568 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8569
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
8570
8571
          $langCode = 'Any-Upper';
8572
        }
8573
8574
        /** @noinspection PhpComposerExtensionStubsInspection */
8575 2
        return transliterator_transliterate($langCode, $str);
8576
      }
8577
8578
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
8579
    }
8580
8581
    // always fallback via symfony polyfill
8582 134
    return \mb_strtoupper($str, $encoding);
8583
  }
8584
8585
  /**
8586
   * Translate characters or replace sub-strings.
8587
   *
8588
   * @link  http://php.net/manual/en/function.strtr.php
8589
   *
8590
   * @param string          $str  <p>The string being translated.</p>
8591
   * @param string|string[] $from <p>The string replacing from.</p>
8592
   * @param string|string[] $to   <p>The string being translated to to.</p>
8593
   *
8594
   * @return string
8595
   *                This function returns a copy of str, translating all occurrences of each character in from to the
8596
   *                corresponding character in to.
8597
   */
8598 2
  public static function strtr(string $str, $from, $to = INF): string
8599
  {
8600 2
    if ('' === $str) {
8601
      return '';
8602
    }
8603
8604 2
    if ($from === $to) {
8605
      return $str;
8606
    }
8607
8608 2
    if (INF !== $to) {
8609 2
      $from = self::str_split($from);
8610 2
      $to = self::str_split($to);
8611 2
      $countFrom = \count($from);
8612 2
      $countTo = \count($to);
8613
8614 2
      if ($countFrom > $countTo) {
8615 2
        $from = \array_slice($from, 0, $countTo);
8616 2
      } elseif ($countFrom < $countTo) {
8617 2
        $to = \array_slice($to, 0, $countFrom);
8618
      }
8619
8620 2
      $from = \array_combine($from, $to);
8621
    }
8622
8623 2
    if (\is_string($from)) {
8624 2
      return \str_replace($from, '', $str);
8625
    }
8626
8627 2
    return \strtr($str, $from);
8628
  }
8629
8630
  /**
8631
   * Return the width of a string.
8632
   *
8633
   * @param string $str       <p>The input string.</p>
8634
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8635
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8636
   *
8637
   * @return int
8638
   */
8639 2
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
8640
  {
8641 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8642 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8643
    }
8644
8645 2
    if ($cleanUtf8 === true) {
8646
      // iconv and mbstring are not tolerant to invalid encoding
8647
      // further, their behaviour is inconsistent with that of PHP's substr
8648 2
      $str = self::clean($str);
8649
    }
8650
8651
    // always fallback via symfony polyfill
8652 2
    return \mb_strwidth($str, $encoding);
8653
  }
8654
8655
  /**
8656
   * Get part of a string.
8657
   *
8658
   * @link http://php.net/manual/en/function.mb-substr.php
8659
   *
8660
   * @param string $str       <p>The string being checked.</p>
8661
   * @param int    $offset    <p>The first position used in str.</p>
8662
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
8663
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8664
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8665
   *
8666
   * @return string|false
8667
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
8668
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
8669
   *                      characters long, <b>FALSE</b> will be returned.
8670
   */
8671 394
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8672
  {
8673 394
    if ('' === $str) {
8674 19
      return '';
8675
    }
8676
8677
    // Empty string
8678 388
    if ($length === 0) {
8679 14
      return '';
8680
    }
8681
8682 385
    if ($cleanUtf8 === true) {
8683
      // iconv and mbstring are not tolerant to invalid encoding
8684
      // further, their behaviour is inconsistent with that of PHP's substr
8685 2
      $str = self::clean($str);
8686
    }
8687
8688
    // Whole string
8689 385
    if (!$offset && $length === null) {
8690 40
      return $str;
8691
    }
8692
8693 356
    $str_length = 0;
8694 356
    if ($offset || $length === null) {
8695 233
      $str_length = self::strlen($str, $encoding);
8696
    }
8697
8698
    // e.g.: invalid chars + mbstring not installed
8699 356
    if ($str_length === false) {
8700
      return false;
8701
    }
8702
8703
    // Empty string
8704 356
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
8705 21
      return '';
8706
    }
8707
8708
    // Impossible
8709 356
    if ($offset && $offset > $str_length) {
8710 3
      return false;
8711
    }
8712
8713 353
    if ($length === null) {
8714 141
      $length = (int)$str_length;
8715
    } else {
8716 321
      $length = (int)$length;
8717
    }
8718
8719 353
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8720 156
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8721
    }
8722
8723 353
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8724
      self::checkForSupport();
8725
    }
8726
8727
    if (
8728 353
        $encoding === 'CP850'
8729
        &&
8730 353
        self::$SUPPORT['mbstring_func_overload'] === false
8731
    ) {
8732 5
      return \substr($str, $offset, $length ?? $str_length);
8733
    }
8734
8735
    if (
8736 348
        $encoding !== 'UTF-8'
8737
        &&
8738 348
        self::$SUPPORT['mbstring'] === false
8739
    ) {
8740 2
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8741
    }
8742
8743 348
    if (self::$SUPPORT['mbstring'] === true) {
8744 348
      return \mb_substr($str, $offset, $length, $encoding);
8745
    }
8746
8747
    if (
8748 4
        $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
8749
        &&
8750 4
        $offset >= 0 // grapheme_substr() can't handle negative offset
8751
        &&
8752 4
        self::$SUPPORT['intl'] === true
8753
    ) {
8754
      $returnTmp = \grapheme_substr($str, $offset, $length);
8755
      if ($returnTmp !== false) {
8756
        return $returnTmp;
8757
      }
8758
    }
8759
8760
    if (
8761 4
        $length >= 0 // "iconv_substr()" can't handle negative length
8762
        &&
8763 4
        self::$SUPPORT['iconv'] === true
8764
    ) {
8765
      $returnTmp = \iconv_substr($str, $offset, $length);
8766
      if ($returnTmp !== false) {
8767
        return $returnTmp;
8768
      }
8769
    }
8770
8771
    // fallback for ascii only
8772 4
    if (self::is_ascii($str)) {
8773
      return \substr($str, $offset, $length);
8774
    }
8775
8776
    // fallback via symfony polyfill
8777 4
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
8778
      return \mb_substr($str, $offset, $length, $encoding);
8779
    }
8780
8781
    // fallback via vanilla php
8782
8783
    // split to array, and remove invalid characters
8784 4
    $array = self::split($str);
8785
8786
    // extract relevant part, and join to make sting again
8787 4
    return \implode('', \array_slice($array, $offset, $length));
8788
  }
8789
8790
  /**
8791
   * Binary safe comparison of two strings from an offset, up to length characters.
8792
   *
8793
   * @param string   $str1               <p>The main string being compared.</p>
8794
   * @param string   $str2               <p>The secondary string being compared.</p>
8795
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
8796
   *                                     counting from the end of the string.</p>
8797
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
8798
   *                                     the length of the str compared to the length of main_str less the offset.</p>
8799
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
8800
   *                                     insensitive.</p>
8801
   *
8802
   * @return int
8803
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8804
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8805
   *             <strong>0</strong> if they are equal.
8806
   */
8807 2
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
8808
  {
8809
    if (
8810 2
        $offset !== 0
8811
        ||
8812 2
        $length !== null
8813
    ) {
8814 2
      $str1Tmp = self::substr($str1, $offset, $length);
8815 2
      if ($str1Tmp === false) {
8816
        $str1Tmp = '';
8817
      }
8818 2
      $str1 = (string)$str1Tmp;
8819
8820 2
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str1) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8820
      $str2Tmp = self::substr($str2, 0, /** @scrutinizer ignore-type */ self::strlen($str1));
Loading history...
8821 2
      if ($str2Tmp === false) {
8822
        $str2Tmp = '';
8823
      }
8824 2
      $str2 = (string)$str2Tmp;
8825
    }
8826
8827 2
    if ($case_insensitivity === true) {
8828 2
      return self::strcasecmp($str1, $str2);
8829
    }
8830
8831 2
    return self::strcmp($str1, $str2);
8832
  }
8833
8834
  /**
8835
   * Count the number of substring occurrences.
8836
   *
8837
   * @link  http://php.net/manual/en/function.substr-count.php
8838
   *
8839
   * @param string $haystack   <p>The string to search in.</p>
8840
   * @param string $needle     <p>The substring to search for.</p>
8841
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
8842
   * @param int    $length     [optional] <p>
8843
   *                           The maximum length after the specified offset to search for the
8844
   *                           substring. It outputs a warning if the offset plus the length is
8845
   *                           greater than the haystack length.
8846
   *                           </p>
8847
   * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8848
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
8849
   *
8850
   * @return int|false This functions returns an integer or false if there isn't a string.
8851
   */
8852 18
  public static function substr_count(
8853
      string $haystack,
8854
      string $needle,
8855
      int $offset = 0,
8856
      int $length = null,
8857
      string $encoding = 'UTF-8',
8858
      bool $cleanUtf8 = false
8859
  )
8860
  {
8861 18
    if ('' === $haystack || '' === $needle) {
8862 2
      return false;
8863
    }
8864
8865 18
    if ($offset || $length !== null) {
8866
8867 2
      if ($length === null) {
8868 2
        $lengthTmp = self::strlen($haystack);
8869 2
        if ($lengthTmp === false) {
8870
          return false;
8871
        }
8872 2
        $length = (int)$lengthTmp;
8873
      }
8874
8875
      if (
8876
          (
8877 2
              $length !== 0
8878
              &&
8879 2
              $offset !== 0
8880
          )
8881
          &&
8882 2
          ($length + $offset) <= 0
8883
          &&
8884 2
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
8885
      ) {
8886 2
        return false;
8887
      }
8888
8889 2
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
8890 2
      if ($haystackTmp === false) {
8891
        $haystackTmp = '';
8892
      }
8893 2
      $haystack = (string)$haystackTmp;
8894
    }
8895
8896 18
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8897 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8898
    }
8899
8900 18
    if ($cleanUtf8 === true) {
8901
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8902
      // if invalid characters are found in $haystack before $needle
8903
      $needle = self::clean($needle);
8904
      $haystack = self::clean($haystack);
8905
    }
8906
8907 18
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8908
      self::checkForSupport();
8909
    }
8910
8911
    if (
8912 18
        $encoding !== 'UTF-8'
8913
        &&
8914 18
        self::$SUPPORT['mbstring'] === false
8915
    ) {
8916
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8917
    }
8918
8919 18
    if (self::$SUPPORT['mbstring'] === true) {
8920 18
      return \mb_substr_count($haystack, $needle, $encoding);
8921
    }
8922
8923
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
8924
8925
    return \count($matches);
8926
  }
8927
8928
  /**
8929
   * Returns the number of occurrences of $substring in the given string.
8930
   * By default, the comparison is case-sensitive, but can be made insensitive
8931
   * by setting $caseSensitive to false.
8932
   *
8933
   * @param string $str           <p>The input string.</p>
8934
   * @param string $substring     <p>The substring to search for.</p>
8935
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
8936
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8937
   *
8938
   * @return int
8939
   */
8940 15
  public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
8941
  {
8942 15
    if ('' === $str || '' === $substring) {
8943 2
      return 0;
8944
    }
8945
8946
    // only a fallback to prevent BC in the api ...
8947 13
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
8948 4
      $encoding = (string)$caseSensitive;
8949
    }
8950
8951 13
    if (!$caseSensitive) {
8952 6
      $str = self::strtoupper($str, $encoding);
8953 6
      $substring = self::strtoupper($substring, $encoding);
8954
    }
8955
8956 13
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
8957
  }
8958
8959
  /**
8960
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
8961
   *
8962
   * @param string $haystack <p>The string to search in.</p>
8963
   * @param string $needle   <p>The substring to search for.</p>
8964
   *
8965
   * @return string Return the sub-string.
8966
   */
8967 2
  public static function substr_ileft(string $haystack, string $needle): string
8968
  {
8969 2
    if ('' === $haystack) {
8970 2
      return '';
8971
    }
8972
8973 2
    if ('' === $needle) {
8974 2
      return $haystack;
8975
    }
8976
8977 2
    if (self::str_istarts_with($haystack, $needle) === true) {
8978 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8978
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
8979 2
      if ($haystackTmp === false) {
8980
        $haystackTmp = '';
8981
      }
8982 2
      $haystack = (string)$haystackTmp;
8983
    }
8984
8985 2
    return $haystack;
8986
  }
8987
8988
  /**
8989
   * Get part of a string process in bytes.
8990
   *
8991
   * @param string $str    <p>The string being checked.</p>
8992
   * @param int    $offset <p>The first position used in str.</p>
8993
   * @param int    $length [optional] <p>The maximum length of the returned string.</p>
8994
   *
8995
   * @return string|false
8996
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
8997
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
8998
   *                      characters long, <b>FALSE</b> will be returned.
8999
   */
9000 52
  public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
9001
  {
9002 52
    if ($str === '') {
9003
      return '';
9004
    }
9005
9006
    // Empty string
9007 52
    if ($length === 0) {
9008
      return '';
9009
    }
9010
9011
    // Whole string
9012 52
    if (!$offset && $length === null) {
9013
      return $str;
9014
    }
9015
9016 52
    $str_length = 0;
9017 52
    if ($offset || $length === null) {
9018 10
      $str_length = self::strlen_in_byte($str);
9019
    }
9020
9021
    // e.g.: invalid chars + mbstring not installed
9022 52
    if ($str_length === false) {
0 ignored issues
show
introduced by
The condition $str_length === false is always false.
Loading history...
9023
      return false;
9024
    }
9025
9026
    // Empty string
9027 52
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
9028
      return '';
9029
    }
9030
9031
    // Impossible
9032 52
    if ($offset && $offset > $str_length) {
9033
      return false;
9034
    }
9035
9036 52
    if ($length === null) {
9037
      $length = $str_length;
9038
    } else {
9039 52
      $length = (int)$length;
9040
    }
9041
9042 52
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9043
      self::checkForSupport();
9044
    }
9045
9046 52
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9047
      // "mb_" is available if overload is used, so use it ...
9048
      return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
9049
    }
9050
9051 52
    return \substr($str, $offset, $length);
9052
  }
9053
9054
  /**
9055
   * Find position of first occurrence of string in a string.
9056
   *
9057
   * @param string $haystack <p>
9058
   *                         The string being checked.
9059
   *                         </p>
9060
   * @param string $needle   <p>
9061
   *                         The position counted from the beginning of haystack.
9062
   *                         </p>
9063
   * @param int    $offset   [optional] <p>
9064
   *                         The search offset. If it is not specified, 0 is used.
9065
   *                         </p>
9066
   *
9067
   * @return int|false the numeric position of
9068
   *                   the first occurrence of needle in the
9069
   *                   haystack string. If
9070
   *                   needle is not found, it returns false.
9071
   */
9072 75
  public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9073
  {
9074 75
    if ($haystack === '' || $needle === '') {
9075
      return false;
9076
    }
9077
9078 75
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9079
      self::checkForSupport();
9080
    }
9081
9082 75
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9083
      // "mb_" is available if overload is used, so use it ...
9084
      return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9085
    }
9086
9087 75
    return \strpos($haystack, $needle, $offset);
9088
  }
9089
9090
  /**
9091
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
9092
   *
9093
   * @param string $haystack <p>The string to search in.</p>
9094
   * @param string $needle   <p>The substring to search for.</p>
9095
   *
9096
   * @return string Return the sub-string.
9097
   */
9098 2
  public static function substr_iright(string $haystack, string $needle): string
9099
  {
9100 2
    if ('' === $haystack) {
9101 2
      return '';
9102
    }
9103
9104 2
    if ('' === $needle) {
9105 2
      return $haystack;
9106
    }
9107
9108 2
    if (self::str_iends_with($haystack, $needle) === true) {
9109 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9110 2
      if ($haystackTmp === false) {
9111
        $haystackTmp = '';
9112
      }
9113 2
      $haystack = (string)$haystackTmp;
9114
    }
9115
9116 2
    return $haystack;
9117
  }
9118
9119
  /**
9120
   * Removes an prefix ($needle) from start of the string ($haystack).
9121
   *
9122
   * @param string $haystack <p>The string to search in.</p>
9123
   * @param string $needle   <p>The substring to search for.</p>
9124
   *
9125
   * @return string Return the sub-string.
9126
   */
9127 2
  public static function substr_left(string $haystack, string $needle): string
9128
  {
9129 2
    if ('' === $haystack) {
9130 2
      return '';
9131
    }
9132
9133 2
    if ('' === $needle) {
9134 2
      return $haystack;
9135
    }
9136
9137 2
    if (self::str_starts_with($haystack, $needle) === true) {
9138 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9138
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9139 2
      if ($haystackTmp === false) {
9140
        $haystackTmp = '';
9141
      }
9142 2
      $haystack = (string)$haystackTmp;
9143
    }
9144
9145 2
    return $haystack;
9146
  }
9147
9148
  /**
9149
   * Replace text within a portion of a string.
9150
   *
9151
   * source: https://gist.github.com/stemar/8287074
9152
   *
9153
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
9154
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
9155
   * @param int|int[]       $offset           <p>
9156
   *                                          If start is positive, the replacing will begin at the start'th offset
9157
   *                                          into string.
9158
   *                                          <br><br>
9159
   *                                          If start is negative, the replacing will begin at the start'th character
9160
   *                                          from the end of string.
9161
   *                                          </p>
9162
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
9163
   *                                          portion of string which is to be replaced. If it is negative, it
9164
   *                                          represents the number of characters from the end of string at which to
9165
   *                                          stop replacing. If it is not given, then it will default to strlen(
9166
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
9167
   *                                          length is zero then this function will have the effect of inserting
9168
   *                                          replacement into string at the given start offset.</p>
9169
   *
9170
   * @return string|string[] The result string is returned. If string is an array then array is returned.
9171
   */
9172 10
  public static function substr_replace($str, $replacement, $offset, $length = null)
9173
  {
9174 10
    if (\is_array($str) === true) {
9175 1
      $num = \count($str);
9176
9177
      // the replacement
9178 1
      if (\is_array($replacement) === true) {
9179 1
        $replacement = \array_slice($replacement, 0, $num);
9180
      } else {
9181 1
        $replacement = \array_pad([$replacement], $num, $replacement);
9182
      }
9183
9184
      // the offset
9185 1
      if (\is_array($offset) === true) {
9186 1
        $offset = \array_slice($offset, 0, $num);
9187 1
        foreach ($offset as &$valueTmp) {
9188 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
9189
        }
9190 1
        unset($valueTmp);
9191
      } else {
9192 1
        $offset = \array_pad([$offset], $num, $offset);
9193
      }
9194
9195
      // the length
9196 1
      if (null === $length) {
9197 1
        $length = \array_fill(0, $num, 0);
9198 1
      } elseif (\is_array($length) === true) {
9199 1
        $length = \array_slice($length, 0, $num);
9200 1
        foreach ($length as &$valueTmpV2) {
9201 1
          if (null !== $valueTmpV2) {
9202 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9203
          } else {
9204 1
            $valueTmpV2 = 0;
9205
          }
9206
        }
9207 1
        unset($valueTmpV2);
9208
      } else {
9209 1
        $length = \array_pad([$length], $num, $length);
9210
      }
9211
9212
      // recursive call
9213 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9214
    }
9215
9216 10
    if (\is_array($replacement) === true) {
9217 1
      if (\count($replacement) > 0) {
9218 1
        $replacement = $replacement[0];
9219
      } else {
9220 1
        $replacement = '';
9221
      }
9222
    }
9223
9224
    // init
9225 10
    $str = (string)$str;
9226 10
    $replacement = (string)$replacement;
9227
9228 10
    if ('' === $str) {
9229 1
      return $replacement;
9230
    }
9231
9232 9
    if (self::is_ascii($str)) {
9233 6
      return ($length === null) ?
9234
          \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9234
          \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
9235 6
          \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9235
          \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
9236
    }
9237
9238 8
    \preg_match_all('/./us', $str, $smatches);
9239 8
    \preg_match_all('/./us', $replacement, $rmatches);
9240
9241 8
    if ($length === null) {
9242 3
      $lengthTmp = self::strlen($str);
9243 3
      if ($lengthTmp === false) {
9244
        // e.g.: non mbstring support + invalid chars
9245
        return '';
9246
      }
9247 3
      $length = (int)$lengthTmp;
9248
    }
9249
9250 8
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9250
    \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9250
    \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
9251
9252 8
    return \implode('', $smatches[0]);
9253
  }
9254
9255
  /**
9256
   * Removes an suffix ($needle) from end of the string ($haystack).
9257
   *
9258
   * @param string $haystack <p>The string to search in.</p>
9259
   * @param string $needle   <p>The substring to search for.</p>
9260
   *
9261
   * @return string Return the sub-string.
9262
   */
9263 2
  public static function substr_right(string $haystack, string $needle): string
9264
  {
9265 2
    if ('' === $haystack) {
9266 2
      return '';
9267
    }
9268
9269 2
    if ('' === $needle) {
9270 2
      return $haystack;
9271
    }
9272
9273 2
    if (self::str_ends_with($haystack, $needle) === true) {
9274 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9275 2
      if ($haystackTmp === false) {
9276
        $haystackTmp = '';
9277
      }
9278 2
      $haystack = (string)$haystackTmp;
9279
    }
9280
9281 2
    return $haystack;
9282
  }
9283
9284
  /**
9285
   * Returns a case swapped version of the string.
9286
   *
9287
   * @param string $str       <p>The input string.</p>
9288
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9289
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9290
   *
9291
   * @return string Each character's case swapped.
9292
   */
9293 6
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9294
  {
9295 6
    if ('' === $str) {
9296 1
      return '';
9297
    }
9298
9299 6
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9300 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9301
    }
9302
9303 6
    if ($cleanUtf8 === true) {
9304
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9305
      // if invalid characters are found in $haystack before $needle
9306 2
      $str = self::clean($str);
9307
    }
9308
9309 6
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9310
  }
9311
9312
  /**
9313
   * Checks whether mbstring is available on the server.
9314
   *
9315
   * @return bool
9316
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
9317
   */
9318
  public static function symfony_polyfill_used(): bool
9319
  {
9320
    // init
9321
    $return = false;
9322
9323
    $returnTmp = \extension_loaded('mbstring') ? true : false;
9324
    if ($returnTmp === false && \function_exists('mb_strlen')) {
9325
      $return = true;
9326
    }
9327
9328
    $returnTmp = \extension_loaded('iconv') ? true : false;
9329
    if ($returnTmp === false && \function_exists('iconv')) {
9330
      $return = true;
9331
    }
9332
9333
    return $return;
9334
  }
9335
9336
  /**
9337
   * @param string $str
9338
   * @param int    $tabLength
9339
   *
9340
   * @return string
9341
   */
9342 6
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9343
  {
9344 6
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9345
  }
9346
9347
  /**
9348
   * Converts the first character of each word in the string to uppercase
9349
   * and all other chars to lowercase.
9350
   *
9351
   * @param string $str      <p>The input string.</p>
9352
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9353
   *
9354
   * @return string String with all characters of $str being title-cased.
9355
   */
9356 5
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
9357
  {
9358 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9359 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9360
    }
9361
9362
    // always fallback via symfony polyfill
9363 5
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
9364
  }
9365
9366
  /**
9367
   * alias for "UTF8::to_ascii()"
9368
   *
9369
   * @see        UTF8::to_ascii()
9370
   *
9371
   * @param string $str
9372
   * @param string $subst_chr
9373
   * @param bool   $strict
9374
   *
9375
   * @return string
9376
   *
9377
   * @deprecated <p>use "UTF8::to_ascii()"</p>
9378
   */
9379 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
9380
  {
9381 7
    return self::to_ascii($str, $subst_chr, $strict);
9382
  }
9383
9384
  /**
9385
   * alias for "UTF8::to_iso8859()"
9386
   *
9387
   * @see        UTF8::to_iso8859()
9388
   *
9389
   * @param string|string[] $str
9390
   *
9391
   * @return string|string[]
9392
   *
9393
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
9394
   */
9395 2
  public static function toIso8859($str)
9396
  {
9397 2
    return self::to_iso8859($str);
9398
  }
9399
9400
  /**
9401
   * alias for "UTF8::to_latin1()"
9402
   *
9403
   * @see        UTF8::to_latin1()
9404
   *
9405
   * @param string|string[] $str
9406
   *
9407
   * @return string|string[]
9408
   *
9409
   * @deprecated <p>use "UTF8::to_latin1()"</p>
9410
   */
9411 2
  public static function toLatin1($str)
9412
  {
9413 2
    return self::to_latin1($str);
9414
  }
9415
9416
  /**
9417
   * alias for "UTF8::to_utf8()"
9418
   *
9419
   * @see        UTF8::to_utf8()
9420
   *
9421
   * @param string|string[] $str
9422
   *
9423
   * @return string|string[]
9424
   *
9425
   * @deprecated <p>use "UTF8::to_utf8()"</p>
9426
   */
9427 2
  public static function toUTF8($str)
9428
  {
9429 2
    return self::to_utf8($str);
9430
  }
9431
9432
  /**
9433
   * Convert a string into ASCII.
9434
   *
9435
   * @param string $str     <p>The input string.</p>
9436
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
9437
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
9438
   *                        performance</p>
9439
   *
9440
   * @return string
9441
   */
9442 37
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
9443
  {
9444 37
    static $UTF8_TO_ASCII;
9445
9446 37
    if ('' === $str) {
9447 3
      return '';
9448
    }
9449
9450
    // check if we only have ASCII, first (better performance)
9451 34
    if (self::is_ascii($str) === true) {
9452 6
      return $str;
9453
    }
9454
9455 29
    $str = self::clean(
9456 29
        $str,
9457 29
        true,
9458 29
        true,
9459 29
        true,
9460 29
        false,
9461 29
        true,
9462 29
        true
9463
    );
9464
9465
    // check again, if we only have ASCII, now ...
9466 29
    if (self::is_ascii($str) === true) {
9467 12
      return $str;
9468
    }
9469
9470 18
    if ($strict === true) {
9471
9472 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9473
        self::checkForSupport();
9474
      }
9475
9476 1
      if (self::$SUPPORT['intl'] === true) {
9477
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
9478
        /** @noinspection PhpComposerExtensionStubsInspection */
9479 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
9480
9481
        // check again, if we only have ASCII, now ...
9482 1
        if (self::is_ascii($str) === true) {
9483 1
          return $str;
9484
        }
9485
9486
      }
9487
    }
9488
9489 18
    if (self::$ORD === null) {
9490
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9491
    }
9492
9493 18
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
9494 18
    $chars = $ar[0];
9495 18
    $ord = null;
9496 18
    foreach ($chars as &$c) {
9497
9498 18
      $ordC0 = self::$ORD[$c[0]];
9499
9500 18
      if ($ordC0 >= 0 && $ordC0 <= 127) {
9501 14
        continue;
9502
      }
9503
9504 18
      $ordC1 = self::$ORD[$c[1]];
9505
9506
      // ASCII - next please
9507 18
      if ($ordC0 >= 192 && $ordC0 <= 223) {
9508 16
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
9509
      }
9510
9511 18
      if ($ordC0 >= 224) {
9512 7
        $ordC2 = self::$ORD[$c[2]];
9513
9514 7
        if ($ordC0 <= 239) {
9515 6
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
9516
        }
9517
9518 7
        if ($ordC0 >= 240) {
9519 2
          $ordC3 = self::$ORD[$c[3]];
9520
9521 2
          if ($ordC0 <= 247) {
9522 2
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
9523
          }
9524
9525 2
          if ($ordC0 >= 248) {
9526
            $ordC4 = self::$ORD[$c[4]];
9527
9528
            if ($ordC0 <= 251) {
9529
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
9530
            }
9531
9532
            if ($ordC0 >= 252) {
9533
              $ordC5 = self::$ORD[$c[5]];
9534
9535
              if ($ordC0 <= 253) {
9536
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
9537
              }
9538
            }
9539
          }
9540
        }
9541
      }
9542
9543 18
      if ($ordC0 === 254 || $ordC0 === 255) {
9544
        $c = $unknown;
9545
        continue;
9546
      }
9547
9548 18
      if ($ord === null) {
9549
        $c = $unknown;
9550
        continue;
9551
      }
9552
9553 18
      $bank = $ord >> 8;
9554 18
      if (!isset($UTF8_TO_ASCII[$bank])) {
9555 9
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
9556 9
        if ($UTF8_TO_ASCII[$bank] === false) {
9557 2
          $UTF8_TO_ASCII[$bank] = [];
9558
        }
9559
      }
9560
9561 18
      $newchar = $ord & 255;
9562
9563 18
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
9564
9565
        // keep for debugging
9566
        /*
9567
        echo "file: " . sprintf('x%02x', $bank) . "\n";
9568
        echo "char: " . $c . "\n";
9569
        echo "ord: " . $ord . "\n";
9570
        echo "newchar: " . $newchar . "\n";
9571
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
9572
        echo "bank:" . $bank . "\n\n";
9573
        */
9574
9575 17
        $c = $UTF8_TO_ASCII[$bank][$newchar];
9576
      } else {
9577
9578
        // keep for debugging missing chars
9579
        /*
9580
        echo "file: " . sprintf('x%02x', $bank) . "\n";
9581
        echo "char: " . $c . "\n";
9582
        echo "ord: " . $ord . "\n";
9583
        echo "newchar: " . $newchar . "\n";
9584
        echo "bank:" . $bank . "\n\n";
9585
        */
9586
9587 18
        $c = $unknown;
9588
      }
9589
    }
9590
9591 18
    return \implode('', $chars);
9592
  }
9593
9594
  /**
9595
   * @param mixed $str
9596
   *
9597
   * @return bool
9598
   */
9599 19
  public static function to_boolean($str): bool
9600
  {
9601
    // init
9602 19
    $str = (string)$str;
9603
9604 19
    if ('' === $str) {
9605 2
      return false;
9606
    }
9607
9608 17
    $key = \strtolower($str);
9609
9610
    // Info: http://php.net/manual/en/filter.filters.validate.php
9611
    $map = [
9612 17
        'true'  => true,
9613
        '1'     => true,
9614
        'on'    => true,
9615
        'yes'   => true,
9616
        'false' => false,
9617
        '0'     => false,
9618
        'off'   => false,
9619
        'no'    => false,
9620
    ];
9621
9622 17
    if (isset($map[$key])) {
9623 13
      return $map[$key];
9624
    }
9625
9626
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
9627 4
    if (\is_numeric($str)) {
9628 2
      return (((float)$str + 0) > 0);
9629
    }
9630
9631 2
    return (bool)self::trim($str);
9632
  }
9633
9634
  /**
9635
   * Convert a string into "ISO-8859"-encoding (Latin-1).
9636
   *
9637
   * @param string|string[] $str
9638
   *
9639
   * @return string|string[]
9640
   */
9641 10
  public static function to_iso8859($str)
9642
  {
9643 10
    if (\is_array($str) === true) {
9644 2
      foreach ($str as $k => $v) {
9645 2
        $str[$k] = self::to_iso8859($v);
9646
      }
9647
9648 2
      return $str;
9649
    }
9650
9651 10
    $str = (string)$str;
9652 10
    if ('' === $str) {
9653 2
      return '';
9654
    }
9655
9656 10
    return self::utf8_decode($str);
9657
  }
9658
9659
  /**
9660
   * alias for "UTF8::to_iso8859()"
9661
   *
9662
   * @see UTF8::to_iso8859()
9663
   *
9664
   * @param string|string[] $str
9665
   *
9666
   * @return string|string[]
9667
   */
9668 2
  public static function to_latin1($str)
9669
  {
9670 2
    return self::to_iso8859($str);
9671
  }
9672
9673
  /**
9674
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
9675
   *
9676
   * <ul>
9677
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
9678
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
9679
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
9680
   * case.</li>
9681
   * </ul>
9682
   *
9683
   * @param string|string[] $str                    <p>Any string or array.</p>
9684
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
9685
   *
9686
   * @return string|string[] The UTF-8 encoded string.
9687
   */
9688 42
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
9689
  {
9690 42
    if (\is_array($str) === true) {
9691 4
      foreach ($str as $k => $v) {
9692 4
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
9693
      }
9694
9695 4
      return $str;
9696
    }
9697
9698 42
    $str = (string)$str;
9699 42
    if ('' === $str) {
9700 6
      return $str;
9701
    }
9702
9703 42
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9704
      self::checkForSupport();
9705
    }
9706
9707 42
    $max = self::strlen_in_byte($str);
9708 42
    $buf = '';
9709
9710
    /** @noinspection ForeachInvariantsInspection */
9711 42
    for ($i = 0; $i < $max; $i++) {
9712 42
      $c1 = $str[$i];
9713
9714 42
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
9715
9716 42
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
9717
9718 39
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9719
9720 39
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
9721 29
            $buf .= $c1 . $c2;
9722 29
            $i++;
9723
          } else { // not valid UTF8 - convert it
9724 39
            $buf .= self::to_utf8_convert_helper($c1);
9725
          }
9726
9727 40
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
9728
9729 38
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9730 38
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
9731
9732 38
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
9733 24
            $buf .= $c1 . $c2 . $c3;
9734 24
            $i += 2;
9735
          } else { // not valid UTF8 - convert it
9736 38
            $buf .= self::to_utf8_convert_helper($c1);
9737
          }
9738
9739 28
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
9740
9741 28
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9742 28
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
9743 28
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
9744
9745 28
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
9746 10
            $buf .= $c1 . $c2 . $c3 . $c4;
9747 10
            $i += 3;
9748
          } else { // not valid UTF8 - convert it
9749 28
            $buf .= self::to_utf8_convert_helper($c1);
9750
          }
9751
9752
        } else { // doesn't look like UTF8, but should be converted
9753 42
          $buf .= self::to_utf8_convert_helper($c1);
9754
        }
9755
9756 39
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
9757
9758 4
        $buf .= self::to_utf8_convert_helper($c1);
9759
9760
      } else { // it doesn't need conversion
9761 39
        $buf .= $c1;
9762
      }
9763
    }
9764
9765
    // decode unicode escape sequences
9766 42
    $buf = \preg_replace_callback(
9767 42
        '/\\\\u([0-9a-f]{4})/i',
9768 42
        function ($match) {
9769
          // always fallback via symfony polyfill
9770 8
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
9771 42
        },
9772 42
        $buf
9773
    );
9774
9775
    // decode UTF-8 codepoints
9776 42
    if ($decodeHtmlEntityToUtf8 === true) {
9777 2
      $buf = self::html_entity_decode($buf);
9778
    }
9779
9780 42
    return $buf;
9781
  }
9782
9783
  /**
9784
   * @param int|string $input
9785
   *
9786
   * @return string
9787
   */
9788 30
  private static function to_utf8_convert_helper($input): string
9789
  {
9790
    // init
9791 30
    $buf = '';
9792
9793 30
    if (self::$ORD === null) {
9794 1
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9795
    }
9796
9797 30
    if (self::$CHR === null) {
9798 1
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9799
    }
9800
9801 30
    if (self::$WIN1252_TO_UTF8 === null) {
9802 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9803
    }
9804
9805 30
    $ordC1 = self::$ORD[$input];
9806 30
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
9807 30
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
9808
    } else {
9809 2
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
9810 2
      $cc2 = ((string)$input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
9811 2
      $buf .= $cc1 . $cc2;
9812
    }
9813
9814 30
    return $buf;
9815
  }
9816
9817
  /**
9818
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
9819
   *
9820
   * INFO: This is slower then "trim()"
9821
   *
9822
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
9823
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
9824
   *
9825
   * @param string $str   <p>The string to be trimmed</p>
9826
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
9827
   *
9828
   * @return string The trimmed string.
9829
   */
9830 214
  public static function trim(string $str = '', $chars = INF): string
9831
  {
9832 214
    if ('' === $str) {
9833 11
      return '';
9834
    }
9835
9836
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
9837 206
    if ($chars === INF || !$chars) {
9838 179
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
9839
    } else {
9840 47
      $chars = \preg_quote($chars, '/');
9841 47
      $pattern = "^[$chars]+|[$chars]+\$";
9842
    }
9843
9844 206
    return self::regex_replace($str, $pattern, '', '', '/');
9845
  }
9846
9847
  /**
9848
   * Makes string's first char uppercase.
9849
   *
9850
   * @param string $str       <p>The input string.</p>
9851
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9852
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9853
   *
9854
   * @return string The resulting string.
9855
   */
9856 76
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9857
  {
9858 76
    if ($cleanUtf8 === true) {
9859
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9860
      // if invalid characters are found in $haystack before $needle
9861 1
      $str = self::clean($str);
9862
    }
9863
9864 76
    $strPartTwo = self::substr($str, 1, null, $encoding);
9865 76
    if ($strPartTwo === false) {
9866
      $strPartTwo = '';
9867
    }
9868
9869 76
    $strPartOne = self::strtoupper(
9870 76
        (string)self::substr($str, 0, 1, $encoding),
9871 76
        $encoding,
9872 76
        $cleanUtf8
9873
    );
9874
9875 76
    return $strPartOne . $strPartTwo;
9876
  }
9877
9878
  /**
9879
   * alias for "UTF8::ucfirst()"
9880
   *
9881
   * @see UTF8::ucfirst()
9882
   *
9883
   * @param string $str
9884
   * @param string $encoding
9885
   * @param bool   $cleanUtf8
9886
   *
9887
   * @return string
9888
   */
9889 1
  public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9890
  {
9891 1
    return self::ucfirst($str, $encoding, $cleanUtf8);
9892
  }
9893
9894
  /**
9895
   * Uppercase for all words in the string.
9896
   *
9897
   * @param string   $str        <p>The input string.</p>
9898
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
9899
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9900
   * @param string   $encoding   [optional] <p>Set the charset.</p>
9901
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9902
   *
9903
   * @return string
9904
   */
9905 9
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9906
  {
9907 9
    if (!$str) {
9908 2
      return '';
9909
    }
9910
9911
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
9912
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
9913
9914 8
    if ($cleanUtf8 === true) {
9915
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9916
      // if invalid characters are found in $haystack before $needle
9917 1
      $str = self::clean($str);
9918
    }
9919
9920 8
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
9921
9922
    if (
9923 8
        $usePhpDefaultFunctions === true
9924
        &&
9925 8
        self::is_ascii($str) === true
9926
    ) {
9927
      return \ucwords($str);
9928
    }
9929
9930 8
    $words = self::str_to_words($str, $charlist);
9931 8
    $newWords = [];
9932
9933 8
    if (\count($exceptions) > 0) {
9934 1
      $useExceptions = true;
9935
    } else {
9936 8
      $useExceptions = false;
9937
    }
9938
9939 8
    foreach ($words as $word) {
9940
9941 8
      if (!$word) {
9942 8
        continue;
9943
      }
9944
9945
      if (
9946 8
          $useExceptions === false
9947
          ||
9948
          (
9949 1
              $useExceptions === true
9950
              &&
9951 8
              !\in_array($word, $exceptions, true)
9952
          )
9953
      ) {
9954 8
        $word = self::ucfirst($word, $encoding);
9955
      }
9956
9957 8
      $newWords[] = $word;
9958
    }
9959
9960 8
    return \implode('', $newWords);
9961
  }
9962
9963
  /**
9964
   * Multi decode html entity & fix urlencoded-win1252-chars.
9965
   *
9966
   * e.g:
9967
   * 'test+test'                     => 'test test'
9968
   * 'D&#252;sseldorf'               => 'Düsseldorf'
9969
   * 'D%FCsseldorf'                  => 'Düsseldorf'
9970
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
9971
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
9972
   * 'Düsseldorf'                   => 'Düsseldorf'
9973
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
9974
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
9975
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
9976
   *
9977
   * @param string $str          <p>The input string.</p>
9978
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
9979
   *
9980
   * @return string
9981
   */
9982 2
  public static function urldecode(string $str, bool $multi_decode = true): string
9983
  {
9984 2
    if ('' === $str) {
9985 2
      return '';
9986
    }
9987
9988 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
9989 2
    if (\preg_match($pattern, $str)) {
9990 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
9991
    }
9992
9993 2
    $flags = ENT_QUOTES | ENT_HTML5;
9994
9995
    do {
9996 2
      $str_compare = $str;
9997
9998 2
      $str = self::fix_simple_utf8(
9999 2
          \urldecode(
10000 2
              self::html_entity_decode(
10001 2
                  self::to_utf8($str),
10002 2
                  $flags
10003
              )
10004
          )
10005
      );
10006
10007 2
    } while ($multi_decode === true && $str_compare !== $str);
10008
10009 2
    return $str;
10010
  }
10011
10012
  /**
10013
   * Return a array with "urlencoded"-win1252 -> UTF-8
10014
   *
10015
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
10016
   *
10017
   * @return string[]
10018
   */
10019 2
  public static function urldecode_fix_win1252_chars(): array
10020
  {
10021
    return [
10022 2
        '%20' => ' ',
10023
        '%21' => '!',
10024
        '%22' => '"',
10025
        '%23' => '#',
10026
        '%24' => '$',
10027
        '%25' => '%',
10028
        '%26' => '&',
10029
        '%27' => "'",
10030
        '%28' => '(',
10031
        '%29' => ')',
10032
        '%2A' => '*',
10033
        '%2B' => '+',
10034
        '%2C' => ',',
10035
        '%2D' => '-',
10036
        '%2E' => '.',
10037
        '%2F' => '/',
10038
        '%30' => '0',
10039
        '%31' => '1',
10040
        '%32' => '2',
10041
        '%33' => '3',
10042
        '%34' => '4',
10043
        '%35' => '5',
10044
        '%36' => '6',
10045
        '%37' => '7',
10046
        '%38' => '8',
10047
        '%39' => '9',
10048
        '%3A' => ':',
10049
        '%3B' => ';',
10050
        '%3C' => '<',
10051
        '%3D' => '=',
10052
        '%3E' => '>',
10053
        '%3F' => '?',
10054
        '%40' => '@',
10055
        '%41' => 'A',
10056
        '%42' => 'B',
10057
        '%43' => 'C',
10058
        '%44' => 'D',
10059
        '%45' => 'E',
10060
        '%46' => 'F',
10061
        '%47' => 'G',
10062
        '%48' => 'H',
10063
        '%49' => 'I',
10064
        '%4A' => 'J',
10065
        '%4B' => 'K',
10066
        '%4C' => 'L',
10067
        '%4D' => 'M',
10068
        '%4E' => 'N',
10069
        '%4F' => 'O',
10070
        '%50' => 'P',
10071
        '%51' => 'Q',
10072
        '%52' => 'R',
10073
        '%53' => 'S',
10074
        '%54' => 'T',
10075
        '%55' => 'U',
10076
        '%56' => 'V',
10077
        '%57' => 'W',
10078
        '%58' => 'X',
10079
        '%59' => 'Y',
10080
        '%5A' => 'Z',
10081
        '%5B' => '[',
10082
        '%5C' => '\\',
10083
        '%5D' => ']',
10084
        '%5E' => '^',
10085
        '%5F' => '_',
10086
        '%60' => '`',
10087
        '%61' => 'a',
10088
        '%62' => 'b',
10089
        '%63' => 'c',
10090
        '%64' => 'd',
10091
        '%65' => 'e',
10092
        '%66' => 'f',
10093
        '%67' => 'g',
10094
        '%68' => 'h',
10095
        '%69' => 'i',
10096
        '%6A' => 'j',
10097
        '%6B' => 'k',
10098
        '%6C' => 'l',
10099
        '%6D' => 'm',
10100
        '%6E' => 'n',
10101
        '%6F' => 'o',
10102
        '%70' => 'p',
10103
        '%71' => 'q',
10104
        '%72' => 'r',
10105
        '%73' => 's',
10106
        '%74' => 't',
10107
        '%75' => 'u',
10108
        '%76' => 'v',
10109
        '%77' => 'w',
10110
        '%78' => 'x',
10111
        '%79' => 'y',
10112
        '%7A' => 'z',
10113
        '%7B' => '{',
10114
        '%7C' => '|',
10115
        '%7D' => '}',
10116
        '%7E' => '~',
10117
        '%7F' => '',
10118
        '%80' => '`',
10119
        '%81' => '',
10120
        '%82' => '‚',
10121
        '%83' => 'ƒ',
10122
        '%84' => '„',
10123
        '%85' => '…',
10124
        '%86' => '†',
10125
        '%87' => '‡',
10126
        '%88' => 'ˆ',
10127
        '%89' => '‰',
10128
        '%8A' => 'Š',
10129
        '%8B' => '‹',
10130
        '%8C' => 'Œ',
10131
        '%8D' => '',
10132
        '%8E' => 'Ž',
10133
        '%8F' => '',
10134
        '%90' => '',
10135
        '%91' => '‘',
10136
        '%92' => '’',
10137
        '%93' => '“',
10138
        '%94' => '”',
10139
        '%95' => '•',
10140
        '%96' => '–',
10141
        '%97' => '—',
10142
        '%98' => '˜',
10143
        '%99' => '™',
10144
        '%9A' => 'š',
10145
        '%9B' => '›',
10146
        '%9C' => 'œ',
10147
        '%9D' => '',
10148
        '%9E' => 'ž',
10149
        '%9F' => 'Ÿ',
10150
        '%A0' => '',
10151
        '%A1' => '¡',
10152
        '%A2' => '¢',
10153
        '%A3' => '£',
10154
        '%A4' => '¤',
10155
        '%A5' => '¥',
10156
        '%A6' => '¦',
10157
        '%A7' => '§',
10158
        '%A8' => '¨',
10159
        '%A9' => '©',
10160
        '%AA' => 'ª',
10161
        '%AB' => '«',
10162
        '%AC' => '¬',
10163
        '%AD' => '',
10164
        '%AE' => '®',
10165
        '%AF' => '¯',
10166
        '%B0' => '°',
10167
        '%B1' => '±',
10168
        '%B2' => '²',
10169
        '%B3' => '³',
10170
        '%B4' => '´',
10171
        '%B5' => 'µ',
10172
        '%B6' => '¶',
10173
        '%B7' => '·',
10174
        '%B8' => '¸',
10175
        '%B9' => '¹',
10176
        '%BA' => 'º',
10177
        '%BB' => '»',
10178
        '%BC' => '¼',
10179
        '%BD' => '½',
10180
        '%BE' => '¾',
10181
        '%BF' => '¿',
10182
        '%C0' => 'À',
10183
        '%C1' => 'Á',
10184
        '%C2' => 'Â',
10185
        '%C3' => 'Ã',
10186
        '%C4' => 'Ä',
10187
        '%C5' => 'Å',
10188
        '%C6' => 'Æ',
10189
        '%C7' => 'Ç',
10190
        '%C8' => 'È',
10191
        '%C9' => 'É',
10192
        '%CA' => 'Ê',
10193
        '%CB' => 'Ë',
10194
        '%CC' => 'Ì',
10195
        '%CD' => 'Í',
10196
        '%CE' => 'Î',
10197
        '%CF' => 'Ï',
10198
        '%D0' => 'Ð',
10199
        '%D1' => 'Ñ',
10200
        '%D2' => 'Ò',
10201
        '%D3' => 'Ó',
10202
        '%D4' => 'Ô',
10203
        '%D5' => 'Õ',
10204
        '%D6' => 'Ö',
10205
        '%D7' => '×',
10206
        '%D8' => 'Ø',
10207
        '%D9' => 'Ù',
10208
        '%DA' => 'Ú',
10209
        '%DB' => 'Û',
10210
        '%DC' => 'Ü',
10211
        '%DD' => 'Ý',
10212
        '%DE' => 'Þ',
10213
        '%DF' => 'ß',
10214
        '%E0' => 'à',
10215
        '%E1' => 'á',
10216
        '%E2' => 'â',
10217
        '%E3' => 'ã',
10218
        '%E4' => 'ä',
10219
        '%E5' => 'å',
10220
        '%E6' => 'æ',
10221
        '%E7' => 'ç',
10222
        '%E8' => 'è',
10223
        '%E9' => 'é',
10224
        '%EA' => 'ê',
10225
        '%EB' => 'ë',
10226
        '%EC' => 'ì',
10227
        '%ED' => 'í',
10228
        '%EE' => 'î',
10229
        '%EF' => 'ï',
10230
        '%F0' => 'ð',
10231
        '%F1' => 'ñ',
10232
        '%F2' => 'ò',
10233
        '%F3' => 'ó',
10234
        '%F4' => 'ô',
10235
        '%F5' => 'õ',
10236
        '%F6' => 'ö',
10237
        '%F7' => '÷',
10238
        '%F8' => 'ø',
10239
        '%F9' => 'ù',
10240
        '%FA' => 'ú',
10241
        '%FB' => 'û',
10242
        '%FC' => 'ü',
10243
        '%FD' => 'ý',
10244
        '%FE' => 'þ',
10245
        '%FF' => 'ÿ',
10246
    ];
10247
  }
10248
10249
  /**
10250
   * Decodes an UTF-8 string to ISO-8859-1.
10251
   *
10252
   * @param string $str <p>The input string.</p>
10253
   * @param bool   $keepUtf8Chars
10254
   *
10255
   * @return string
10256
   */
10257 16
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10258
  {
10259 16
    if ('' === $str) {
10260 5
      return '';
10261
    }
10262
10263 16
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10264 16
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10265
10266 16
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10267
10268 1
      if (self::$WIN1252_TO_UTF8 === null) {
10269
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10270
      }
10271
10272 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10272
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10273 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10273
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10274
    }
10275
10276
    /** @noinspection PhpInternalEntityUsedInspection */
10277 16
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10278
10279 16
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10280
      self::checkForSupport();
10281
    }
10282
10283
    // save for later comparision
10284 16
    $str_backup = $str;
10285 16
    $len = self::strlen_in_byte($str);
10286
10287 16
    if (self::$ORD === null) {
10288
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10289
    }
10290
10291 16
    if (self::$CHR === null) {
10292
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10293
    }
10294
10295 16
    $noCharFound = '?';
10296
    /** @noinspection ForeachInvariantsInspection */
10297 16
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10298 16
      switch ($str[$i] & "\xF0") {
10299 16
        case "\xC0":
10300 14
        case "\xD0":
10301 14
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10302 14
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10303 14
          break;
10304
10305
        /** @noinspection PhpMissingBreakStatementInspection */
10306 14
        case "\xF0":
10307
          ++$i;
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment if this fall-through is intended.
Loading history...
10308 14
        case "\xE0":
10309 10
          $str[$j] = $noCharFound;
10310 10
          $i += 2;
10311 10
          break;
10312
10313
        default:
10314 14
          $str[$j] = $str[$i];
10315
      }
10316
    }
10317
10318 16
    $return = self::substr_in_byte($str, 0, $j);
10319 16
    if ($return === false) {
10320
      $return = '';
10321
    }
10322
10323
    if (
10324 16
        $keepUtf8Chars === true
10325
        &&
10326 16
        self::strlen($return) >= self::strlen($str_backup)
10327
    ) {
10328 2
      return $str_backup;
10329
    }
10330
10331 16
    return $return;
10332
  }
10333
10334
  /**
10335
   * Encodes an ISO-8859-1 string to UTF-8.
10336
   *
10337
   * @param string $str <p>The input string.</p>
10338
   *
10339
   * @return string
10340
   */
10341 14
  public static function utf8_encode(string $str): string
10342
  {
10343 14
    if ('' === $str) {
10344 13
      return '';
10345
    }
10346
10347 14
    $str = \utf8_encode($str);
10348
10349
    // the polyfill maybe return false
10350
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10351 14
    if ($str === false) {
10352
      return '';
10353
    }
10354
10355 14
    if (false === \strpos($str, "\xC2")) {
10356 6
      return $str;
10357
    }
10358
10359 12
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
10360 12
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
10361
10362 12
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
10363
10364 1
      if (self::$WIN1252_TO_UTF8 === null) {
10365
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10366
      }
10367
10368 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10368
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10369 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10369
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10370
    }
10371
10372 12
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
10373
  }
10374
10375
  /**
10376
   * fix -> utf8-win1252 chars
10377
   *
10378
   * @param string $str <p>The input string.</p>
10379
   *
10380
   * @return string
10381
   *
10382
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
10383
   */
10384 2
  public static function utf8_fix_win1252_chars(string $str): string
10385
  {
10386 2
    return self::fix_simple_utf8($str);
10387
  }
10388
10389
  /**
10390
   * Returns an array with all utf8 whitespace characters.
10391
   *
10392
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
10393
   *
10394
   * @author: Derek E. [email protected]
10395
   *
10396
   * @return string[]
10397
   *                 An array with all known whitespace characters as values and the type of whitespace as keys
10398
   *                 as defined in above URL.
10399
   */
10400 2
  public static function whitespace_table(): array
10401
  {
10402 2
    return self::$WHITESPACE_TABLE;
10403
  }
10404
10405
  /**
10406
   * Limit the number of words in a string.
10407
   *
10408
   * @param string $str      <p>The input string.</p>
10409
   * @param int    $limit    <p>The limit of words as integer.</p>
10410
   * @param string $strAddOn <p>Replacement for the striped string.</p>
10411
   *
10412
   * @return string
10413
   */
10414 2
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
10415
  {
10416 2
    if ('' === $str) {
10417 2
      return '';
10418
    }
10419
10420 2
    if ($limit < 1) {
10421 2
      return '';
10422
    }
10423
10424 2
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
10425
10426
    if (
10427 2
        !isset($matches[0])
10428
        ||
10429 2
        self::strlen($str) === self::strlen($matches[0])
10430
    ) {
10431 2
      return $str;
10432
    }
10433
10434 2
    return self::rtrim($matches[0]) . $strAddOn;
10435
  }
10436
10437
  /**
10438
   * Wraps a string to a given number of characters
10439
   *
10440
   * @link  http://php.net/manual/en/function.wordwrap.php
10441
   *
10442
   * @param string $str   <p>The input string.</p>
10443
   * @param int    $width [optional] <p>The column width.</p>
10444
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
10445
   * @param bool   $cut   [optional] <p>
10446
   *                      If the cut is set to true, the string is
10447
   *                      always wrapped at or before the specified width. So if you have
10448
   *                      a word that is larger than the given width, it is broken apart.
10449
   *                      </p>
10450
   *
10451
   * @return string The given string wrapped at the specified column.
10452
   */
10453 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
10454
  {
10455 10
    if ('' === $str || '' === $break) {
10456 3
      return '';
10457
    }
10458
10459 8
    $w = '';
10460 8
    $strSplit = \explode($break, $str);
10461 8
    if ($strSplit === false) {
10462
      $count = 0;
10463
    } else {
10464 8
      $count = \count($strSplit);
10465
    }
10466
10467 8
    $chars = [];
10468
    /** @noinspection ForeachInvariantsInspection */
10469 8
    for ($i = 0; $i < $count; ++$i) {
10470
10471 8
      if ($i) {
10472 1
        $chars[] = $break;
10473 1
        $w .= '#';
10474
      }
10475
10476 8
      $c = $strSplit[$i];
10477 8
      unset($strSplit[$i]);
10478
10479 8
      if ($c !== null) {
10480 8
        foreach (self::split($c) as $c) {
10481 8
          $chars[] = $c;
10482 8
          $w .= ' ' === $c ? ' ' : '?';
10483
        }
10484
      }
10485
    }
10486
10487 8
    $strReturn = '';
10488 8
    $j = 0;
10489 8
    $b = $i = -1;
10490 8
    $w = \wordwrap($w, $width, '#', $cut);
10491
10492 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
10493 6
      for (++$i; $i < $b; ++$i) {
10494 6
        $strReturn .= $chars[$j];
10495 6
        unset($chars[$j++]);
10496
      }
10497
10498 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
10499 3
        unset($chars[$j++]);
10500
      }
10501
10502 6
      $strReturn .= $break;
10503
    }
10504
10505 8
    return $strReturn . \implode('', $chars);
10506
  }
10507
10508
  /**
10509
   * Line-Wrap the string after $limit, but also after the next word.
10510
   *
10511
   * @param string $str
10512
   * @param int    $limit
10513
   *
10514
   * @return string
10515
   */
10516 1
  public static function wordwrap_per_line(string $str, int $limit): string
10517
  {
10518 1
    $strings = (array)\preg_split('/\\r\\n|\\r|\\n/', $str);
10519
10520 1
    $string = '';
10521 1
    foreach ($strings as $value) {
10522 1
      if ($value === false) {
10523
        continue;
10524
      }
10525
10526 1
      $string .= wordwrap($value, $limit);
10527 1
      $string .= "\n";
10528
    }
10529
10530 1
    return $string;
10531
  }
10532
10533
  /**
10534
   * Returns an array of Unicode White Space characters.
10535
   *
10536
   * @return string[] An array with numeric code point as key and White Space Character as value.
10537
   */
10538 2
  public static function ws(): array
10539
  {
10540 2
    return self::$WHITESPACE;
10541
  }
10542
10543
}
10544