Passed
Push — master ( 2b7dc8...9b52b9 )
by Lars
03:48
created

UTF8::is_alphanumeric()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 1
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206
   * __construct()
207
   */
208 32
  public function __construct()
209
  {
210 32
    self::checkForSupport();
211 32
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219
   * @return string Single Multi-Byte character.
220
   */
221 3
  public static function access(string $str, int $pos): string
222
  {
223 3
    if ('' === $str) {
224 1
      return '';
225
    }
226
227 3
    if ($pos < 0) {
228 2
      return '';
229
    }
230
231 3
    return (string)self::substr($str, $pos, 1);
232
  }
233
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string The output string that contains BOM.
242
   */
243 2
  public static function add_bom_to_string(string $str): string
244
  {
245 2
    if (self::string_has_bom($str) === false) {
246 2
      $str = self::bom() . $str;
247
    }
248
249 2
    return $str;
250
  }
251
252
  /**
253
   * Adds the specified amount of left and right padding to the given string.
254
   * The default character used is a space.
255
   *
256
   * @param string $str
257
   * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
258
   * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
259
   * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
260
   * @param string $encoding [optional] <p>Default: UTF-8</p>
261
   *
262
   * @return string String with padding applied.
263
   */
264 25
  private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
265
  {
266 25
    $strlen = self::strlen($str, $encoding);
267
268 25
    if ($left && $right) {
269 8
      $length = ($left + $right) + $strlen;
270 8
      $type = STR_PAD_BOTH;
271 17
    } elseif ($left) {
272 7
      $length = $left + $strlen;
273 7
      $type = STR_PAD_LEFT;
274 10
    } elseif ($right) {
275 10
      $length = $right + $strlen;
276 10
      $type = STR_PAD_RIGHT;
277
    } else {
278
      $length = ($left + $right) + $strlen;
279
      $type = STR_PAD_BOTH;
280
    }
281
282 25
    return self::str_pad($str, $length, $padStr, $type, $encoding);
283
  }
284
285
  /**
286
   * Changes all keys in an array.
287
   *
288
   * @param array $array <p>The array to work on</p>
289
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
290
   *                     or <strong>CASE_LOWER</strong> (default)</p>
291
   *
292
   * @return string[] An array with its keys lower or uppercased.
293
   */
294 2
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
295
  {
296
    if (
297 2
        $case !== CASE_LOWER
298
        &&
299 2
        $case !== CASE_UPPER
300
    ) {
301
      $case = CASE_LOWER;
302
    }
303
304 2
    $return = [];
305 2
    foreach ($array as $key => $value) {
306 2
      if ($case === CASE_LOWER) {
307 2
        $key = self::strtolower($key);
308
      } else {
309 2
        $key = self::strtoupper($key);
310
      }
311
312 2
      $return[$key] = $value;
313
    }
314
315 2
    return $return;
316
  }
317
318
  /**
319
   * Returns the substring between $start and $end, if found, or an empty
320
   * string. An optional offset may be supplied from which to begin the
321
   * search for the start string.
322
   *
323
   * @param string $str
324
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
325
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
326
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
327
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
328
   *
329
   * @return string
330
   */
331 16
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
332
  {
333 16
    $posStart = self::strpos($str, $start, $offset, $encoding);
334 16
    if ($posStart === false) {
335 2
      return '';
336
    }
337
338 14
    $substrIndex = $posStart + self::strlen($start, $encoding);
339 14
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
340
    if (
341 14
        $posEnd === false
342
        ||
343 14
        $posEnd === $substrIndex
344
    ) {
345 4
      return '';
346
    }
347
348 10
    $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
349
350 10
    if ($return === false) {
351
      return '';
352
    }
353
354 10
    return $return;
355
  }
356
357
  /**
358
   * Convert binary into an string.
359
   *
360
   * @param mixed $bin 1|0
361
   *
362
   * @return string
363
   */
364 2
  public static function binary_to_str($bin): string
365
  {
366 2
    if (!isset($bin[0])) {
367
      return '';
368
    }
369
370 2
    $convert = \base_convert($bin, 2, 16);
371 2
    if ($convert === '0') {
372 1
      return '';
373
    }
374
375 2
    return \pack('H*', $convert);
376
  }
377
378
  /**
379
   * Returns the UTF-8 Byte Order Mark Character.
380
   *
381
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
382
   *
383
   * @return string UTF-8 Byte Order Mark
384
   */
385 4
  public static function bom(): string
386
  {
387 4
    return "\xef\xbb\xbf";
388
  }
389
390
  /**
391
   * @alias of UTF8::chr_map()
392
   *
393
   * @see   UTF8::chr_map()
394
   *
395
   * @param string|array $callback
396
   * @param string       $str
397
   *
398
   * @return string[]
399
   */
400 2
  public static function callback($callback, string $str): array
401
  {
402 2
    return self::chr_map($callback, $str);
403
  }
404
405
  /**
406
   * Returns the character at $index, with indexes starting at 0.
407
   *
408
   * @param string $str
409
   * @param int    $index    <p>Position of the character.</p>
410
   * @param string $encoding [optional] <p>Default is UTF-8</p>
411
   *
412
   * @return string The character at $index.
413
   */
414 9
  public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
415
  {
416 9
    return (string)self::substr($str, $index, 1, $encoding);
417
  }
418
419
  /**
420
   * Returns an array consisting of the characters in the string.
421
   *
422
   * @param string $str <p>The input string.</p>
423
   *
424
   * @return string[] An array of chars.
425
   */
426 3
  public static function chars(string $str): array
427
  {
428 3
    return self::str_split($str, 1);
429
  }
430
431
  /**
432
   * This method will auto-detect your server environment for UTF-8 support.
433
   *
434
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
435
   */
436 37
  public static function checkForSupport()
437
  {
438 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
439
440
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
441
442
      // http://php.net/manual/en/book.mbstring.php
443
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
444
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
445
446
      // http://php.net/manual/en/book.iconv.php
447
      self::$SUPPORT['iconv'] = self::iconv_loaded();
448
449
      // http://php.net/manual/en/book.intl.php
450
      self::$SUPPORT['intl'] = self::intl_loaded();
451
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
452
453
      self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
454
455
      if (
456
          self::$SUPPORT['intl'] === true
457
          &&
458
          \function_exists('transliterator_list_ids') === true
459
      ) {
460
        /** @noinspection PhpComposerExtensionStubsInspection */
461
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
462
      }
463
464
      // http://php.net/manual/en/class.intlchar.php
465
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
466
467
      // http://php.net/manual/en/book.ctype.php
468
      self::$SUPPORT['ctype'] = self::ctype_loaded();
469
470
      // http://php.net/manual/en/class.finfo.php
471
      self::$SUPPORT['finfo'] = self::finfo_loaded();
472
473
      // http://php.net/manual/en/book.json.php
474
      self::$SUPPORT['json'] = self::json_loaded();
475
476
      // http://php.net/manual/en/book.pcre.php
477
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
478
    }
479 37
  }
480
481
  /**
482
   * Generates a UTF-8 encoded character from the given code point.
483
   *
484
   * INFO: opposite to UTF8::ord()
485
   *
486
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
487
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
488
   *
489
   * @return string|null Multi-Byte character, returns null on failure or empty input.
490
   */
491 17
  public static function chr($code_point, string $encoding = 'UTF-8')
492
  {
493
    // init
494 17
    static $CHAR_CACHE = [];
495
496 17
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
497
      self::checkForSupport();
498
    }
499
500 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
501 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
502
    }
503
504
    if (
505 17
        $encoding !== 'UTF-8'
506
        &&
507 17
        $encoding !== 'ISO-8859-1'
508
        &&
509 17
        $encoding !== 'WINDOWS-1252'
510
        &&
511 17
        self::$SUPPORT['mbstring'] === false
512
    ) {
513
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
514
    }
515
516 17
    $cacheKey = $code_point . $encoding;
517 17
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
518 16
      return $CHAR_CACHE[$cacheKey];
519
    }
520
521 11
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
522
523 10
      if (self::$CHR === null) {
524
        $chrTmp = self::getData('chr');
525
        if ($chrTmp) {
526
          self::$CHR = (array)$chrTmp;
527
        }
528
      }
529
530 10
      $chr = self::$CHR[$code_point];
531
532 10
      if ($encoding !== 'UTF-8') {
533 1
        $chr = self::encode($encoding, $chr);
534
      }
535
536 10
      return $CHAR_CACHE[$cacheKey] = $chr;
537
    }
538
539 7
    if (self::$SUPPORT['intlChar'] === true) {
540
      /** @noinspection PhpComposerExtensionStubsInspection */
541 7
      $chr = \IntlChar::chr($code_point);
542
543 7
      if ($encoding !== 'UTF-8') {
544
        $chr = self::encode($encoding, $chr);
545
      }
546
547 7
      return $CHAR_CACHE[$cacheKey] = $chr;
548
    }
549
550
    if (self::$CHR === null) {
551
      $chrTmp = self::getData('chr');
552
      if ($chrTmp) {
553
        self::$CHR = (array)$chrTmp;
554
      }
555
    }
556
557
    $code_point = (int)$code_point;
558
    if ($code_point <= 0x7F) {
559
      $chr = self::$CHR[$code_point];
560
    } elseif ($code_point <= 0x7FF) {
561
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
562
             self::$CHR[($code_point & 0x3F) + 0x80];
563
    } elseif ($code_point <= 0xFFFF) {
564
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
565
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
566
             self::$CHR[($code_point & 0x3F) + 0x80];
567
    } else {
568
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
569
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
570
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
571
             self::$CHR[($code_point & 0x3F) + 0x80];
572
    }
573
574
    if ($encoding !== 'UTF-8') {
575
      $chr = self::encode($encoding, $chr);
576
    }
577
578
    return $CHAR_CACHE[$cacheKey] = $chr;
579
  }
580
581
  /**
582
   * Applies callback to all characters of a string.
583
   *
584
   * @param string|array $callback <p>The callback function.</p>
585
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
586
   *
587
   * @return string[] The outcome of callback.
588
   */
589 2
  public static function chr_map($callback, string $str): array
590
  {
591 2
    $chars = self::split($str);
592
593 2
    return \array_map($callback, $chars);
594
  }
595
596
  /**
597
   * Generates an array of byte length of each character of a Unicode string.
598
   *
599
   * 1 byte => U+0000  - U+007F
600
   * 2 byte => U+0080  - U+07FF
601
   * 3 byte => U+0800  - U+FFFF
602
   * 4 byte => U+10000 - U+10FFFF
603
   *
604
   * @param string $str <p>The original unicode string.</p>
605
   *
606
   * @return int[] An array of byte lengths of each character.
607
   */
608 4
  public static function chr_size_list(string $str): array
609
  {
610 4
    if ('' === $str) {
611 4
      return [];
612
    }
613
614 4
    $strSplit = self::split($str);
615
616 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
617
      self::checkForSupport();
618
    }
619
620 4
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
621
      return \array_map(
622
          function ($data) {
623
            return UTF8::strlen_in_byte($data);
624
          },
625
          $strSplit
626
      );
627
    }
628
629 4
    return \array_map('\strlen', $strSplit);
630
  }
631
632
  /**
633
   * Get a decimal code representation of a specific character.
634
   *
635
   * @param string $char <p>The input character.</p>
636
   *
637
   * @return int
638
   */
639 4
  public static function chr_to_decimal(string $char): int
640
  {
641 4
    $code = self::ord($char[0]);
642 4
    $bytes = 1;
643
644 4
    if (!($code & 0x80)) {
645
      // 0xxxxxxx
646 4
      return $code;
647
    }
648
649 4
    if (($code & 0xe0) === 0xc0) {
650
      // 110xxxxx
651 4
      $bytes = 2;
652 4
      $code &= ~0xc0;
653 4
    } elseif (($code & 0xf0) === 0xe0) {
654
      // 1110xxxx
655 4
      $bytes = 3;
656 4
      $code &= ~0xe0;
657 2
    } elseif (($code & 0xf8) === 0xf0) {
658
      // 11110xxx
659 2
      $bytes = 4;
660 2
      $code &= ~0xf0;
661
    }
662
663 4
    for ($i = 2; $i <= $bytes; $i++) {
664
      // 10xxxxxx
665 4
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
666
    }
667
668 4
    return $code;
669
  }
670
671
  /**
672
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
673
   *
674
   * @param string|int $char <p>The input character</p>
675
   * @param string     $pfix [optional]
676
   *
677
   * @return string The code point encoded as U+xxxx
678
   */
679 2
  public static function chr_to_hex($char, string $pfix = 'U+'): string
680
  {
681 2
    if ('' === $char) {
682 2
      return '';
683
    }
684
685 2
    if ($char === '&#0;') {
686 2
      $char = '';
687
    }
688
689 2
    return self::int_to_hex(self::ord($char), $pfix);
690
  }
691
692
  /**
693
   * alias for "UTF8::chr_to_decimal()"
694
   *
695
   * @see UTF8::chr_to_decimal()
696
   *
697
   * @param string $chr
698
   *
699
   * @return int
700
   */
701 2
  public static function chr_to_int(string $chr): int
702
  {
703 2
    return self::chr_to_decimal($chr);
704
  }
705
706
  /**
707
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
708
   *
709
   * @param string $body     <p>The original string to be split.</p>
710
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
711
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
712
   *
713
   * @return string The chunked string.
714
   */
715 4
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
716
  {
717 4
    return \implode($end, self::split($body, $chunklen));
718
  }
719
720
  /**
721
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
722
   *
723
   * @param string $str                           <p>The string to be sanitized.</p>
724
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
725
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
726
   *                                              whitespace.</p>
727
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
728
   *                                              e.g.: "…"
729
   *                                              => "..."</p>
730
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
731
   *                                              combination with
732
   *                                              $normalize_whitespace</p>
733
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
734
   *                                              mark e.g.: "�"</p>
735
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
736
   *                                              characters e.g.: "\0"</p>
737
   *
738
   * @return string Clean UTF-8 encoded string.
739
   */
740 111
  public static function clean(
741
      string $str,
742
      bool $remove_bom = false,
743
      bool $normalize_whitespace = false,
744
      bool $normalize_msword = false,
745
      bool $keep_non_breaking_space = false,
746
      bool $replace_diamond_question_mark = false,
747
      bool $remove_invisible_characters = true
748
  ): string
749
  {
750
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
751
    // caused connection reset problem on larger strings
752
753 111
    $regx = '/
754
      (
755
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
756
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
757
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
758
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
759
        ){1,100}                      # ...one or more times
760
      )
761
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
762
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
763
    /x';
764 111
    $str = (string)\preg_replace($regx, '$1', $str);
765
766 111
    if ($replace_diamond_question_mark === true) {
767 61
      $str = self::replace_diamond_question_mark($str, '');
768
    }
769
770 111
    if ($remove_invisible_characters === true) {
771 111
      $str = self::remove_invisible_characters($str);
772
    }
773
774 111
    if ($normalize_whitespace === true) {
775 65
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
776
    }
777
778 111
    if ($normalize_msword === true) {
779 33
      $str = self::normalize_msword($str);
780
    }
781
782 111
    if ($remove_bom === true) {
783 63
      $str = self::remove_bom($str);
784
    }
785
786 111
    return $str;
787
  }
788
789
  /**
790
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
791
   *
792
   * @param string $str <p>The input string.</p>
793
   *
794
   * @return string
795
   */
796 33
  public static function cleanup($str): string
797
  {
798
    // init
799 33
    $str = (string)$str;
800
801 33
    if ('' === $str) {
802 5
      return '';
803
    }
804
805
    // fixed ISO <-> UTF-8 Errors
806 33
    $str = self::fix_simple_utf8($str);
807
808
    // remove all none UTF-8 symbols
809
    // && remove diamond question mark (�)
810
    // && remove remove invisible characters (e.g. "\0")
811
    // && remove BOM
812
    // && normalize whitespace chars (but keep non-breaking-spaces)
813 33
    $str = self::clean(
814 33
        $str,
815 33
        true,
816 33
        true,
817 33
        false,
818 33
        true,
819 33
        true,
820 33
        true
821
    );
822
823 33
    return $str;
824
  }
825
826
  /**
827
   * Accepts a string or a array of strings and returns an array of Unicode code points.
828
   *
829
   * INFO: opposite to UTF8::string()
830
   *
831
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
832
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
833
   *                                    default, code points will be returned as integers.</p>
834
   *
835
   * @return array<int|string>
836
   *                           The array of code points:<br>
837
   *                           array<int> for $u_style === false<br>
838
   *                           array<string> for $u_style === true<br>
839
   */
840 12
  public static function codepoints($arg, bool $u_style = false): array
841
  {
842 12
    if (\is_string($arg) === true) {
843 12
      $arg = self::split($arg);
844
    }
845
846 12
    $arg = \array_map(
847
        [
848 12
            self::class,
849
            'ord',
850
        ],
851 12
        $arg
852
    );
853
854 12
    if (\count($arg) === 0) {
855 7
      return [];
856
    }
857
858 11
    if ($u_style) {
859 2
      $arg = \array_map(
860
          [
861 2
              self::class,
862
              'int_to_hex',
863
          ],
864 2
          $arg
865
      );
866
    }
867
868 11
    return $arg;
869
  }
870
871
  /**
872
   * Trims the string and replaces consecutive whitespace characters with a
873
   * single space. This includes tabs and newline characters, as well as
874
   * multibyte whitespace such as the thin space and ideographic space.
875
   *
876
   * @param string $str <p>The input string.</p>
877
   *
878
   * @return string String with a trimmed $str and condensed whitespace.
879
   */
880 13
  public static function collapse_whitespace(string $str): string
881
  {
882 13
    return self::trim(
883 13
        self::regex_replace($str, '[[:space:]]+', ' ')
884
    );
885
  }
886
887
  /**
888
   * Returns count of characters used in a string.
889
   *
890
   * @param string $str       <p>The input string.</p>
891
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
892
   *
893
   * @return int[] An associative array of Character as keys and
894
   *               their count as values.
895
   */
896 18
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
897
  {
898 18
    return \array_count_values(self::split($str, 1, $cleanUtf8));
899
  }
900
901
  /**
902
   * Remove css media-queries.
903
   *
904
   * @param string $str
905
   *
906
   * @return string
907
   */
908 1
  public static function css_stripe_media_queries(string $str): string
909
  {
910 1
    return (string)\preg_replace(
911 1
        '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
912 1
        '',
913 1
        $str
914
    );
915
  }
916
917
  /**
918
   * Checks whether ctype is available on the server.
919
   *
920
   * @return bool
921
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
922
   */
923
  public static function ctype_loaded(): bool
924
  {
925
    return \extension_loaded('ctype');
926
  }
927
928
  /**
929
   * Converts a int-value into an UTF-8 character.
930
   *
931
   * @param mixed $int
932
   *
933
   * @return string
934
   */
935 10
  public static function decimal_to_chr($int): string
936
  {
937 10
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
938
  }
939
940
  /**
941
   * Encode a string with a new charset-encoding.
942
   *
943
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
944
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
945
   *
946
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
947
   * @param string $str      <p>The input string</p>
948
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
949
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
950
   *
951
   * @return string
952
   */
953 30
  public static function encode(string $encoding, string $str, bool $force = true): string
954
  {
955 30
    if ('' === $str || '' === $encoding) {
956 12
      return $str;
957
    }
958
959 30
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
960 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
961
    }
962
963 30
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
964
      self::checkForSupport();
965
    }
966
967 30
    $encodingDetected = self::str_detect_encoding($str);
968
969
    // DEBUG
970
    //var_dump($encoding, $encodingDetected, $str, "\n\n");
971
972
    if (
973 30
        $force === true
974
        ||
975
        (
976 13
            $encodingDetected !== false
977
            &&
978 30
            $encodingDetected !== $encoding
979
        )
980
    ) {
981
982
      if (
983 30
          $encoding === 'UTF-8'
984
          &&
985
          (
986 26
              $force === true
987 9
              || $encodingDetected === 'UTF-8'
988 9
              || $encodingDetected === 'WINDOWS-1252'
989 30
              || $encodingDetected === 'ISO-8859-1'
990
          )
991
      ) {
992 25
        return self::to_utf8($str);
993
      }
994
995
      if (
996 11
          $encoding === 'ISO-8859-1'
997
          &&
998
          (
999 8
              $force === true
1000 4
              || $encodingDetected === 'ISO-8859-1'
1001 4
              || $encodingDetected === 'WINDOWS-1252'
1002 11
              || $encodingDetected === 'UTF-8'
1003
          )
1004
      ) {
1005 8
        return self::to_iso8859($str);
1006
      }
1007
1008
      if (
1009 5
          $encoding !== 'UTF-8'
1010
          &&
1011 5
          $encoding !== 'ISO-8859-1'
1012
          &&
1013 5
          $encoding !== 'WINDOWS-1252'
1014
          &&
1015 5
          self::$SUPPORT['mbstring'] === false
1016
      ) {
1017
        \trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1018
      }
1019
1020
      // always fallback via symfony polyfill
1021 5
      $strEncoded = \mb_convert_encoding(
1022 5
          $str,
1023 5
          $encoding,
1024 5
          ($force === true ? $encoding : $encodingDetected)
0 ignored issues
show
Bug introduced by
It seems like $force === true ? $encoding : $encodingDetected can also be of type false; however, parameter $from_encoding of mb_convert_encoding() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1024
          /** @scrutinizer ignore-type */ ($force === true ? $encoding : $encodingDetected)
Loading history...
1025
      );
1026
1027 5
      if ($strEncoded) {
1028 5
        return $strEncoded;
1029
      }
1030
    }
1031
1032 7
    return $str;
1033
  }
1034
1035
  /**
1036
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1037
   *
1038
   * @param string   $str                    <p>The input string.</p>
1039
   * @param string   $search                 <p>The searched string.</p>
1040
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1041
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1042
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1043
   *
1044
   * @return string
1045
   */
1046 1
  public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1047
  {
1048 1
    if ('' === $str) {
1049 1
      return '';
1050
    }
1051
1052 1
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1053
1054 1
    if ($length === null) {
1055 1
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
1056
    }
1057
1058 1
    if (empty($search)) {
1059
1060 1
      $stringLength = self::strlen($str, $encoding);
1061
1062 1
      if ($length > 0) {
1063 1
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1064
      } else {
1065 1
        $end = 0;
1066
      }
1067
1068 1
      $pos = (int)\min(
1069 1
          self::strpos($str, ' ', $end, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $end can also be of type false; however, parameter $offset of voku\helper\UTF8::strpos() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1069
          self::strpos($str, ' ', /** @scrutinizer ignore-type */ $end, $encoding),
Loading history...
1070 1
          self::strpos($str, '.', $end, $encoding)
1071
      );
1072
1073 1
      if ($pos) {
1074 1
        $strSub = self::substr($str, 0, $pos, $encoding);
1075 1
        if ($strSub === false) {
1076
          return '';
1077
        }
1078
1079 1
        return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1080
      }
1081
1082
      return $str;
1083
    }
1084
1085 1
    $wordPos = self::stripos($str, $search, 0, $encoding);
1086 1
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1087
1088 1
    $pos_start = 0;
1089 1
    if ($halfSide > 0) {
1090 1
      $halfText = self::substr($str, 0, $halfSide, $encoding);
1091 1
      if ($halfText !== false) {
1092 1
        $pos_start = (int)\max(
1093 1
            self::strrpos($halfText, ' ', 0, $encoding),
1094 1
            self::strrpos($halfText, '.', 0, $encoding)
1095
        );
1096
      }
1097
    }
1098
1099 1
    if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type integer|false is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1100 1
      $l = $pos_start + $length - 1;
1101 1
      $realLength = self::strlen($str, $encoding);
1102
1103 1
      if ($l > $realLength) {
1104
        $l = $realLength;
1105
      }
1106
1107 1
      $pos_end = (int)\min(
1108 1
              self::strpos($str, ' ', $l, $encoding),
1109 1
              self::strpos($str, '.', $l, $encoding)
1110 1
          ) - $pos_start;
1111
1112 1
      if (!$pos_end || $pos_end <= 0) {
1113 1
        $strSub = self::substr($str, $pos_start, self::strlen($str), $encoding);
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1113
        $strSub = self::substr($str, $pos_start, /** @scrutinizer ignore-type */ self::strlen($str), $encoding);
Loading history...
1114 1
        if ($strSub !== false) {
1115 1
          $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1116
        } else {
1117 1
          $extract = '';
1118
        }
1119
      } else {
1120 1
        $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1121 1
        if ($strSub !== false) {
1122 1
          $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1123
        } else {
1124 1
          $extract = '';
1125
        }
1126
      }
1127
1128
    } else {
1129
1130 1
      $l = $length - 1;
1131 1
      $trueLength = self::strlen($str, $encoding);
1132
1133 1
      if ($l > $trueLength) {
1134
        $l = $trueLength;
1135
      }
1136
1137 1
      $pos_end = \min(
1138 1
          self::strpos($str, ' ', $l, $encoding),
1139 1
          self::strpos($str, '.', $l, $encoding)
1140
      );
1141
1142 1
      if ($pos_end) {
1143 1
        $strSub = self::substr($str, 0, $pos_end, $encoding);
1144 1
        if ($strSub !== false) {
1145 1
          $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1146
        } else {
1147 1
          $extract = '';
1148
        }
1149
      } else {
1150 1
        $extract = $str;
1151
      }
1152
    }
1153
1154 1
    return $extract;
1155
  }
1156
1157
  /**
1158
   * Reads entire file into a string.
1159
   *
1160
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1161
   *
1162
   * @link http://php.net/manual/en/function.file-get-contents.php
1163
   *
1164
   * @param string        $filename         <p>
1165
   *                                        Name of the file to read.
1166
   *                                        </p>
1167
   * @param bool          $use_include_path [optional] <p>
1168
   *                                        Prior to PHP 5, this parameter is called
1169
   *                                        use_include_path and is a bool.
1170
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1171
   *                                        to trigger include path
1172
   *                                        search.
1173
   *                                        </p>
1174
   * @param resource|null $context          [optional] <p>
1175
   *                                        A valid context resource created with
1176
   *                                        stream_context_create. If you don't need to use a
1177
   *                                        custom context, you can skip this parameter by &null;.
1178
   *                                        </p>
1179
   * @param int|null      $offset           [optional] <p>
1180
   *                                        The offset where the reading starts.
1181
   *                                        </p>
1182
   * @param int|null      $maxLength        [optional] <p>
1183
   *                                        Maximum length of data read. The default is to read until end
1184
   *                                        of file is reached.
1185
   *                                        </p>
1186
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1187
   *
1188
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
1189
   *                                        images or pdf, because they used non default utf-8 chars.</p>
1190
   *
1191
   * @return string|false The function returns the read data or false on failure.
1192
   */
1193 11
  public static function file_get_contents(
1194
      string $filename,
1195
      bool $use_include_path = false,
1196
      $context = null,
1197
      int $offset = null,
1198
      int $maxLength = null,
1199
      int $timeout = 10,
1200
      bool $convertToUtf8 = true
1201
  )
1202
  {
1203
    // init
1204 11
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1205
1206 11
    if ($timeout && $context === null) {
1207 9
      $context = \stream_context_create(
1208
          [
1209
              'http' =>
1210
                  [
1211 9
                      'timeout' => $timeout,
1212
                  ],
1213
          ]
1214
      );
1215
    }
1216
1217 11
    if ($offset === null) {
1218 11
      $offset = 0;
1219
    }
1220
1221 11
    if (\is_int($maxLength) === true) {
1222 2
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1223
    } else {
1224 11
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1225
    }
1226
1227
    // return false on error
1228 11
    if ($data === false) {
1229
      return false;
1230
    }
1231
1232 11
    if ($convertToUtf8 === true) {
1233
      // only for non binary, but also for UTF-16 or UTF-32
1234
      if (
1235 11
          self::is_binary($data, true) !== true
1236
          ||
1237 8
          self::is_utf16($data) !== false
1238
          ||
1239 11
          self::is_utf32($data) !== false
1240
      ) {
1241 9
        $data = self::encode('UTF-8', $data, false);
1242 9
        $data = self::cleanup($data);
1243
      }
1244
    }
1245
1246 11
    return $data;
1247
  }
1248
1249
  /**
1250
   * Checks if a file starts with BOM (Byte Order Mark) character.
1251
   *
1252
   * @param string $file_path <p>Path to a valid file.</p>
1253
   *
1254
   * @throws \RuntimeException if file_get_contents() returned false
1255
   *
1256
   * @return bool
1257
   *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.
1258
   */
1259 2
  public static function file_has_bom(string $file_path): bool
1260
  {
1261 2
    $file_content = \file_get_contents($file_path);
1262 2
    if ($file_content === false) {
1263
      throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1264
    }
1265
1266 2
    return self::string_has_bom($file_content);
1267
  }
1268
1269
  /**
1270
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1271
   *
1272
   * @param mixed  $var
1273
   * @param int    $normalization_form
1274
   * @param string $leading_combining
1275
   *
1276
   * @return mixed
1277
   */
1278 43
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1279
  {
1280 43
    switch (\gettype($var)) {
1281 43
      case 'array':
1282 6
        foreach ($var as $k => $v) {
1283
          /** @noinspection AlterInForeachInspection */
1284 6
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1285
        }
1286 6
        break;
1287 43
      case 'object':
1288 4
        foreach ($var as $k => $v) {
1289 4
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1290
        }
1291 4
        break;
1292 43
      case 'string':
1293
1294 43
        if (false !== \strpos($var, "\r")) {
1295
          // Workaround https://bugs.php.net/65732
1296 3
          $var = self::normalize_line_ending($var);
1297
        }
1298
1299 43
        if (self::is_ascii($var) === false) {
1300
          /** @noinspection PhpUndefinedClassInspection */
1301 26
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1302 20
            $n = '-';
1303
          } else {
1304
            /** @noinspection PhpUndefinedClassInspection */
1305 13
            $n = \Normalizer::normalize($var, $normalization_form);
1306
1307 13
            if (isset($n[0])) {
1308 7
              $var = $n;
1309
            } else {
1310 9
              $var = self::encode('UTF-8', $var, true);
1311
            }
1312
          }
1313
1314
          if (
1315 26
              $var[0] >= "\x80"
1316
              &&
1317 26
              isset($n[0], $leading_combining[0])
1318
              &&
1319 26
              \preg_match('/^\p{Mn}/u', $var)
1320
          ) {
1321
            // Prevent leading combining chars
1322
            // for NFC-safe concatenations.
1323 3
            $var = $leading_combining . $var;
1324
          }
1325
        }
1326
1327 43
        break;
1328
    }
1329
1330 43
    return $var;
1331
  }
1332
1333
  /**
1334
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1335
   *
1336
   * Gets a specific external variable by name and optionally filters it
1337
   *
1338
   * @link  http://php.net/manual/en/function.filter-input.php
1339
   *
1340
   * @param int    $type          <p>
1341
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1342
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1343
   *                              <b>INPUT_ENV</b>.
1344
   *                              </p>
1345
   * @param string $variable_name <p>
1346
   *                              Name of a variable to get.
1347
   *                              </p>
1348
   * @param int    $filter        [optional] <p>
1349
   *                              The ID of the filter to apply. The
1350
   *                              manual page lists the available filters.
1351
   *                              </p>
1352
   * @param mixed  $options       [optional] <p>
1353
   *                              Associative array of options or bitwise disjunction of flags. If filter
1354
   *                              accepts options, flags can be provided in "flags" field of array.
1355
   *                              </p>
1356
   *
1357
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1358
   *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1359
   *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1360
   */
1361
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
1362
  {
1363
    if (4 > \func_num_args()) {
1364
      $var = \filter_input($type, $variable_name, $filter);
1365
    } else {
1366
      $var = \filter_input($type, $variable_name, $filter, $options);
1367
    }
1368
1369
    return self::filter($var);
1370
  }
1371
1372
  /**
1373
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1374
   *
1375
   * Gets external variables and optionally filters them
1376
   *
1377
   * @link  http://php.net/manual/en/function.filter-input-array.php
1378
   *
1379
   * @param int   $type       <p>
1380
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1381
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1382
   *                          <b>INPUT_ENV</b>.
1383
   *                          </p>
1384
   * @param mixed $definition [optional] <p>
1385
   *                          An array defining the arguments. A valid key is a string
1386
   *                          containing a variable name and a valid value is either a filter type, or an array
1387
   *                          optionally specifying the filter, flags and options. If the value is an
1388
   *                          array, valid keys are filter which specifies the
1389
   *                          filter type,
1390
   *                          flags which specifies any flags that apply to the
1391
   *                          filter, and options which specifies any options that
1392
   *                          apply to the filter. See the example below for a better understanding.
1393
   *                          </p>
1394
   *                          <p>
1395
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1396
   *                          input array are filtered by this filter.
1397
   *                          </p>
1398
   * @param bool  $add_empty  [optional] <p>
1399
   *                          Add missing keys as <b>NULL</b> to the return value.
1400
   *                          </p>
1401
   *
1402
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1403
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set. Or
1404
   *               if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable is not
1405
   *               set and <b>NULL</b> if the filter fails.
1406
   */
1407
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1408
  {
1409
    if (2 > \func_num_args()) {
1410
      $a = \filter_input_array($type);
1411
    } else {
1412
      $a = \filter_input_array($type, $definition, $add_empty);
1413
    }
1414
1415
    return self::filter($a);
1416
  }
1417
1418
  /**
1419
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1420
   *
1421
   * Filters a variable with a specified filter
1422
   *
1423
   * @link  http://php.net/manual/en/function.filter-var.php
1424
   *
1425
   * @param mixed $variable <p>
1426
   *                        Value to filter.
1427
   *                        </p>
1428
   * @param int   $filter   [optional] <p>
1429
   *                        The ID of the filter to apply. The
1430
   *                        manual page lists the available filters.
1431
   *                        </p>
1432
   * @param mixed $options  [optional] <p>
1433
   *                        Associative array of options or bitwise disjunction of flags. If filter
1434
   *                        accepts options, flags can be provided in "flags" field of array. For
1435
   *                        the "callback" filter, callable type should be passed. The
1436
   *                        callback must accept one argument, the value to be filtered, and return
1437
   *                        the value after filtering/sanitizing it.
1438
   *                        </p>
1439
   *                        <p>
1440
   *                        <code>
1441
   *                        // for filters that accept options, use this format
1442
   *                        $options = array(
1443
   *                        'options' => array(
1444
   *                        'default' => 3, // value to return if the filter fails
1445
   *                        // other options here
1446
   *                        'min_range' => 0
1447
   *                        ),
1448
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1449
   *                        );
1450
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1451
   *                        // for filter that only accept flags, you can pass them directly
1452
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1453
   *                        // for filter that only accept flags, you can also pass as an array
1454
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1455
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1456
   *                        // callback validate filter
1457
   *                        function foo($value)
1458
   *                        {
1459
   *                        // Expected format: Surname, GivenNames
1460
   *                        if (strpos($value, ", ") === false) return false;
1461
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1462
   *                        $empty = (empty($surname) || empty($givennames));
1463
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1464
   *                        if ($empty || $notstrings) {
1465
   *                        return false;
1466
   *                        } else {
1467
   *                        return $value;
1468
   *                        }
1469
   *                        }
1470
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1471
   *                        </code>
1472
   *                        </p>
1473
   *
1474
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1475
   */
1476 2
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
1477
  {
1478 2
    if (3 > \func_num_args()) {
1479 2
      $variable = \filter_var($variable, $filter);
1480
    } else {
1481 2
      $variable = \filter_var($variable, $filter, $options);
1482
    }
1483
1484 2
    return self::filter($variable);
1485
  }
1486
1487
  /**
1488
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1489
   *
1490
   * Gets multiple variables and optionally filters them
1491
   *
1492
   * @link  http://php.net/manual/en/function.filter-var-array.php
1493
   *
1494
   * @param array $data       <p>
1495
   *                          An array with string keys containing the data to filter.
1496
   *                          </p>
1497
   * @param mixed $definition [optional] <p>
1498
   *                          An array defining the arguments. A valid key is a string
1499
   *                          containing a variable name and a valid value is either a
1500
   *                          filter type, or an
1501
   *                          array optionally specifying the filter, flags and options.
1502
   *                          If the value is an array, valid keys are filter
1503
   *                          which specifies the filter type,
1504
   *                          flags which specifies any flags that apply to the
1505
   *                          filter, and options which specifies any options that
1506
   *                          apply to the filter. See the example below for a better understanding.
1507
   *                          </p>
1508
   *                          <p>
1509
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1510
   *                          input array are filtered by this filter.
1511
   *                          </p>
1512
   * @param bool  $add_empty  [optional] <p>
1513
   *                          Add missing keys as <b>NULL</b> to the return value.
1514
   *                          </p>
1515
   *
1516
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1517
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set.
1518
   */
1519 2
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1520
  {
1521 2
    if (2 > \func_num_args()) {
1522 2
      $a = \filter_var_array($data);
1523
    } else {
1524 2
      $a = \filter_var_array($data, $definition, $add_empty);
1525
    }
1526
1527 2
    return self::filter($a);
1528
  }
1529
1530
  /**
1531
   * Checks whether finfo is available on the server.
1532
   *
1533
   * @return bool
1534
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
1535
   */
1536
  public static function finfo_loaded(): bool
1537
  {
1538
    return \class_exists('finfo');
1539
  }
1540
1541
  /**
1542
   * Returns the first $n characters of the string.
1543
   *
1544
   * @param string $str      <p>The input string.</p>
1545
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1546
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1547
   *
1548
   * @return string
1549
   */
1550 13
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1551
  {
1552 13
    if ($n <= 0) {
1553 4
      return '';
1554
    }
1555
1556 9
    $strSub = self::substr($str, 0, $n, $encoding);
1557 9
    if ($strSub === false) {
1558
      return '';
1559
    }
1560
1561 9
    return $strSub;
1562
  }
1563
1564
  /**
1565
   * Check if the number of unicode characters are not more than the specified integer.
1566
   *
1567
   * @param string $str      The original string to be checked.
1568
   * @param int    $box_size The size in number of chars to be checked against string.
1569
   *
1570
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1571
   */
1572 2
  public static function fits_inside(string $str, int $box_size): bool
1573
  {
1574 2
    return (self::strlen($str) <= $box_size);
1575
  }
1576
1577
  /**
1578
   * @param string $str
1579
   * @param bool   $useLower <p>Use uppercase by default, otherwise use lowecase.</p>
1580
   *
1581
   * @return string
1582
   */
1583 22
  private static function fixStrCaseHelper(string $str, $useLower = false): string
1584
  {
1585
    $upper = [
1586 22
        'ẞ',
1587
    ];
1588
    $lower = [
1589 22
        'ß',
1590
    ];
1591
1592 22
    if ($useLower === true) {
1593
      $str = \str_replace(
1594
          $upper,
1595
          $lower,
1596
          $str
1597
      );
1598
    } else {
1599 22
      $str = \str_replace(
1600 22
          $lower,
1601 22
          $upper,
1602 22
          $str
1603
      );
1604
    }
1605
1606
1607 22
    return $str;
1608
  }
1609
1610
  /**
1611
   * Try to fix simple broken UTF-8 strings.
1612
   *
1613
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1614
   *
1615
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1616
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1617
   * See: http://en.wikipedia.org/wiki/Windows-1252
1618
   *
1619
   * @param string $str <p>The input string</p>
1620
   *
1621
   * @return string
1622
   */
1623 42
  public static function fix_simple_utf8(string $str): string
1624
  {
1625 42
    if ('' === $str) {
1626 4
      return '';
1627
    }
1628
1629 42
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1630 42
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1631
1632 42
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1633
1634 1
      if (self::$BROKEN_UTF8_FIX === null) {
1635 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_fix') can also be of type false. However, the property $BROKEN_UTF8_FIX is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1636
      }
1637
1638 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1638
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1639 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1639
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1640
    }
1641
1642 42
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1643
  }
1644
1645
  /**
1646
   * Fix a double (or multiple) encoded UTF8 string.
1647
   *
1648
   * @param string[]|string $str You can use a string or an array of strings.
1649
   *
1650
   * @return string[]|string
1651
   *                          Will return the fixed input-"array" or
1652
   *                          the fixed input-"string".
1653
   */
1654 2
  public static function fix_utf8($str)
1655
  {
1656 2
    if (\is_array($str) === true) {
1657 2
      foreach ($str as $k => $v) {
1658 2
        $str[$k] = self::fix_utf8($v);
1659
      }
1660
1661 2
      return $str;
1662
    }
1663
1664 2
    $str = (string)$str;
1665 2
    $last = '';
1666 2
    while ($last !== $str) {
1667 2
      $last = $str;
1668 2
      $str = self::to_utf8(
1669 2
          self::utf8_decode($str, true)
1670
      );
1671
    }
1672
1673 2
    return $str;
1674
  }
1675
1676
  /**
1677
   * Get character of a specific character.
1678
   *
1679
   * @param string $char
1680
   *
1681
   * @return string 'RTL' or 'LTR'
1682
   */
1683 2
  public static function getCharDirection(string $char): string
1684
  {
1685 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1686
      self::checkForSupport();
1687
    }
1688
1689 2
    if (self::$SUPPORT['intlChar'] === true) {
1690
      /** @noinspection PhpComposerExtensionStubsInspection */
1691 2
      $tmpReturn = \IntlChar::charDirection($char);
1692
1693
      // from "IntlChar"-Class
1694
      $charDirection = [
1695 2
          'RTL' => [1, 13, 14, 15, 21],
1696
          'LTR' => [0, 11, 12, 20],
1697
      ];
1698
1699 2
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1700
        return 'LTR';
1701
      }
1702
1703 2
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1704 2
        return 'RTL';
1705
      }
1706
    }
1707
1708 2
    $c = static::chr_to_decimal($char);
1709
1710 2
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1711 2
      return 'LTR';
1712
    }
1713
1714 2
    if (0x85e >= $c) {
1715
1716 2
      if (0x5be === $c ||
1717 2
          0x5c0 === $c ||
1718 2
          0x5c3 === $c ||
1719 2
          0x5c6 === $c ||
1720 2
          (0x5d0 <= $c && 0x5ea >= $c) ||
1721 2
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1722 2
          0x608 === $c ||
1723 2
          0x60b === $c ||
1724 2
          0x60d === $c ||
1725 2
          0x61b === $c ||
1726 2
          (0x61e <= $c && 0x64a >= $c) ||
1727
          (0x66d <= $c && 0x66f >= $c) ||
1728
          (0x671 <= $c && 0x6d5 >= $c) ||
1729
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1730
          (0x6ee <= $c && 0x6ef >= $c) ||
1731
          (0x6fa <= $c && 0x70d >= $c) ||
1732
          0x710 === $c ||
1733
          (0x712 <= $c && 0x72f >= $c) ||
1734
          (0x74d <= $c && 0x7a5 >= $c) ||
1735
          0x7b1 === $c ||
1736
          (0x7c0 <= $c && 0x7ea >= $c) ||
1737
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1738
          0x7fa === $c ||
1739
          (0x800 <= $c && 0x815 >= $c) ||
1740
          0x81a === $c ||
1741
          0x824 === $c ||
1742
          0x828 === $c ||
1743
          (0x830 <= $c && 0x83e >= $c) ||
1744
          (0x840 <= $c && 0x858 >= $c) ||
1745 2
          0x85e === $c
1746
      ) {
1747 2
        return 'RTL';
1748
      }
1749
1750 2
    } elseif (0x200f === $c) {
1751
1752
      return 'RTL';
1753
1754 2
    } elseif (0xfb1d <= $c) {
1755
1756 2
      if (0xfb1d === $c ||
1757 2
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1758 2
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1759 2
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1760 2
          0xfb3e === $c ||
1761 2
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1762 2
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1763 2
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1764 2
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1765 2
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1766 2
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1767 2
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1768 2
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1769 2
          (0xfe76 <= $c && 0xfefc >= $c) ||
1770 2
          (0x10800 <= $c && 0x10805 >= $c) ||
1771 2
          0x10808 === $c ||
1772 2
          (0x1080a <= $c && 0x10835 >= $c) ||
1773 2
          (0x10837 <= $c && 0x10838 >= $c) ||
1774 2
          0x1083c === $c ||
1775 2
          (0x1083f <= $c && 0x10855 >= $c) ||
1776 2
          (0x10857 <= $c && 0x1085f >= $c) ||
1777 2
          (0x10900 <= $c && 0x1091b >= $c) ||
1778 2
          (0x10920 <= $c && 0x10939 >= $c) ||
1779 2
          0x1093f === $c ||
1780 2
          0x10a00 === $c ||
1781 2
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1782 2
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1783 2
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1784 2
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1785 2
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1786 2
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1787 2
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1788 2
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1789 2
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1790 2
          (0x10b78 <= $c && 0x10b7f >= $c)
1791
      ) {
1792 2
        return 'RTL';
1793
      }
1794
    }
1795
1796 2
    return 'LTR';
1797
  }
1798
1799
  /**
1800
   * get data from "/data/*.ser"
1801
   *
1802
   * @param string $file
1803
   *
1804
   * @return mixed|false Will return false on error.
1805
   */
1806 13
  private static function getData(string $file)
1807
  {
1808 13
    $file = __DIR__ . '/data/' . $file . '.php';
1809 13
    if (\file_exists($file)) {
1810
      /** @noinspection PhpIncludeInspection */
1811 12
      return require $file;
1812
    }
1813
1814 2
    return false;
1815
  }
1816
1817
  /**
1818
   * Check for php-support.
1819
   *
1820
   * @param string|null $key
1821
   *
1822
   * @return mixed
1823
   *               Return the full support-"array", if $key === null<br>
1824
   *               return bool-value, if $key is used and available<br>
1825
   *               otherwise return <strong>null</strong>.
1826
   */
1827 26
  public static function getSupportInfo(string $key = null)
1828
  {
1829 26
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1830
      self::checkForSupport();
1831
    }
1832
1833 26
    if ($key === null) {
1834 4
      return self::$SUPPORT;
1835
    }
1836
1837 24
    if (!isset(self::$SUPPORT[$key])) {
1838 2
      return null;
1839
    }
1840
1841 22
    return self::$SUPPORT[$key];
1842
  }
1843
1844
  /**
1845
   * @param int    $length        <p>Length of the random string.</p>
1846
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
1847
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
1848
   *
1849
   * @return string
1850
   */
1851 1
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
1852
  {
1853
    // init
1854 1
    $i = 0;
1855 1
    $str = '';
1856 1
    $maxlength = self::strlen($possibleChars, $encoding);
1857
1858 1
    if ($maxlength === 0) {
1859 1
      return '';
1860
    }
1861
1862
    // add random chars
1863 1
    while ($i < $length) {
1864
      try {
1865 1
        $randInt = \random_int(0, $maxlength - 1);
1866
      } catch (\Exception $e) {
1867
        /** @noinspection RandomApiMigrationInspection */
1868
        $randInt = \mt_rand(0, $maxlength - 1);
1869
      }
1870 1
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
1871 1
      $str .= $char;
1872 1
      $i++;
1873
    }
1874
1875 1
    return $str;
1876
  }
1877
1878
  /**
1879
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
1880
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
1881
   *
1882
   * @return string
1883
   */
1884 1
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
1885
  {
1886 1
    $uniqueHelper = \mt_rand() .
1887 1
                    \session_id() .
1888 1
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
1889 1
                    ($_SERVER['SERVER_ADDR'] ?? '') .
1890 1
                    $entropyExtra;
1891
1892 1
    $uniqueString = \uniqid($uniqueHelper, true);
1893
1894 1
    if ($md5) {
1895 1
      $uniqueString = \md5($uniqueString . $uniqueHelper);
1896
    }
1897
1898 1
    return $uniqueString;
1899
  }
1900
1901
  /**
1902
   * alias for "UTF8::string_has_bom()"
1903
   *
1904
   * @see        UTF8::string_has_bom()
1905
   *
1906
   * @param string $str
1907
   *
1908
   * @return bool
1909
   *
1910
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1911
   */
1912 2
  public static function hasBom(string $str): bool
1913
  {
1914 2
    return self::string_has_bom($str);
1915
  }
1916
1917
  /**
1918
   * Returns true if the string contains a lower case char, false otherwise.
1919
   *
1920
   * @param string $str <p>The input string.</p>
1921
   *
1922
   * @return bool Whether or not the string contains a lower case character.
1923
   */
1924 47
  public static function has_lowercase(string $str): bool
1925
  {
1926 47
    return self::str_matches_pattern($str, '.*[[:lower:]]');
1927
  }
1928
1929
  /**
1930
   * Returns true if the string contains an upper case char, false otherwise.
1931
   *
1932
   * @param string $str <p>The input string.</p>
1933
   *
1934
   * @return bool Whether or not the string contains an upper case character.
1935
   */
1936 12
  public static function has_uppercase(string $str): bool
1937
  {
1938 12
    return self::str_matches_pattern($str, '.*[[:upper:]]');
1939
  }
1940
1941
  /**
1942
   * Converts a hexadecimal-value into an UTF-8 character.
1943
   *
1944
   * @param string $hexdec <p>The hexadecimal value.</p>
1945
   *
1946
   * @return string|false One single UTF-8 character.
1947
   */
1948 4
  public static function hex_to_chr(string $hexdec)
1949
  {
1950 4
    return self::decimal_to_chr(\hexdec($hexdec));
1951
  }
1952
1953
  /**
1954
   * Converts hexadecimal U+xxxx code point representation to integer.
1955
   *
1956
   * INFO: opposite to UTF8::int_to_hex()
1957
   *
1958
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1959
   *
1960
   * @return int|false The code point, or false on failure.
1961
   */
1962 2
  public static function hex_to_int($hexDec)
1963
  {
1964
    // init
1965 2
    $hexDec = (string)$hexDec;
1966
1967 2
    if ('' === $hexDec) {
1968 2
      return false;
1969
    }
1970
1971 2
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1972 2
      return \intval($match[1], 16);
1973
    }
1974
1975 2
    return false;
1976
  }
1977
1978
  /**
1979
   * alias for "UTF8::html_entity_decode()"
1980
   *
1981
   * @see UTF8::html_entity_decode()
1982
   *
1983
   * @param string $str
1984
   * @param int    $flags
1985
   * @param string $encoding
1986
   *
1987
   * @return string
1988
   */
1989 2
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1990
  {
1991 2
    return self::html_entity_decode($str, $flags, $encoding);
1992
  }
1993
1994
  /**
1995
   * Converts a UTF-8 string to a series of HTML numbered entities.
1996
   *
1997
   * INFO: opposite to UTF8::html_decode()
1998
   *
1999
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2000
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2001
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2002
   *
2003
   * @return string HTML numbered entities.
2004
   */
2005 11
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2006
  {
2007 11
    if ('' === $str) {
2008 4
      return '';
2009
    }
2010
2011 11
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2012 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2013
    }
2014
2015
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2016 11
    if (\function_exists('mb_encode_numericentity')) {
2017
2018 11
      $startCode = 0x00;
2019 11
      if ($keepAsciiChars === true) {
2020 11
        $startCode = 0x80;
2021
      }
2022
2023 11
      return \mb_encode_numericentity(
2024 11
          $str,
2025 11
          [$startCode, 0xfffff, 0, 0xfffff, 0],
2026 11
          $encoding
2027
      );
2028
    }
2029
2030
    return \implode(
2031
        '',
2032
        \array_map(
2033
            function ($data) use ($keepAsciiChars, $encoding) {
2034
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2035
            },
2036
            self::split($str)
2037
        )
2038
    );
2039
  }
2040
2041
  /**
2042
   * UTF-8 version of html_entity_decode()
2043
   *
2044
   * The reason we are not using html_entity_decode() by itself is because
2045
   * while it is not technically correct to leave out the semicolon
2046
   * at the end of an entity most browsers will still interpret the entity
2047
   * correctly. html_entity_decode() does not convert entities without
2048
   * semicolons, so we are left with our own little solution here. Bummer.
2049
   *
2050
   * Convert all HTML entities to their applicable characters
2051
   *
2052
   * INFO: opposite to UTF8::html_encode()
2053
   *
2054
   * @link http://php.net/manual/en/function.html-entity-decode.php
2055
   *
2056
   * @param string $str      <p>
2057
   *                         The input string.
2058
   *                         </p>
2059
   * @param int    $flags    [optional] <p>
2060
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2061
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2062
   *                         <table>
2063
   *                         Available <i>flags</i> constants
2064
   *                         <tr valign="top">
2065
   *                         <td>Constant Name</td>
2066
   *                         <td>Description</td>
2067
   *                         </tr>
2068
   *                         <tr valign="top">
2069
   *                         <td><b>ENT_COMPAT</b></td>
2070
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2071
   *                         </tr>
2072
   *                         <tr valign="top">
2073
   *                         <td><b>ENT_QUOTES</b></td>
2074
   *                         <td>Will convert both double and single quotes.</td>
2075
   *                         </tr>
2076
   *                         <tr valign="top">
2077
   *                         <td><b>ENT_NOQUOTES</b></td>
2078
   *                         <td>Will leave both double and single quotes unconverted.</td>
2079
   *                         </tr>
2080
   *                         <tr valign="top">
2081
   *                         <td><b>ENT_HTML401</b></td>
2082
   *                         <td>
2083
   *                         Handle code as HTML 4.01.
2084
   *                         </td>
2085
   *                         </tr>
2086
   *                         <tr valign="top">
2087
   *                         <td><b>ENT_XML1</b></td>
2088
   *                         <td>
2089
   *                         Handle code as XML 1.
2090
   *                         </td>
2091
   *                         </tr>
2092
   *                         <tr valign="top">
2093
   *                         <td><b>ENT_XHTML</b></td>
2094
   *                         <td>
2095
   *                         Handle code as XHTML.
2096
   *                         </td>
2097
   *                         </tr>
2098
   *                         <tr valign="top">
2099
   *                         <td><b>ENT_HTML5</b></td>
2100
   *                         <td>
2101
   *                         Handle code as HTML 5.
2102
   *                         </td>
2103
   *                         </tr>
2104
   *                         </table>
2105
   *                         </p>
2106
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2107
   *
2108
   * @return string The decoded string.
2109
   */
2110 38
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2111
  {
2112 38
    if ('' === $str) {
2113 12
      return '';
2114
    }
2115
2116 38
    if (!isset($str[3])) { // examples: &; || &x;
2117 19
      return $str;
2118
    }
2119
2120
    if (
2121 37
        \strpos($str, '&') === false
2122
        ||
2123
        (
2124 37
            \strpos($str, '&#') === false
2125
            &&
2126 37
            \strpos($str, ';') === false
2127
        )
2128
    ) {
2129 18
      return $str;
2130
    }
2131
2132 37
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2133 9
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2134
    }
2135
2136 37
    if ($flags === null) {
2137 10
      $flags = ENT_QUOTES | ENT_HTML5;
2138
    }
2139
2140
    if (
2141 37
        $encoding !== 'UTF-8'
2142
        &&
2143 37
        $encoding !== 'ISO-8859-1'
2144
        &&
2145 37
        $encoding !== 'WINDOWS-1252'
2146
        &&
2147 37
        self::$SUPPORT['mbstring'] === false
2148
    ) {
2149
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2150
    }
2151
2152
    do {
2153 37
      $str_compare = $str;
2154
2155 37
      $str = (string)\preg_replace_callback(
2156 37
          "/&#\d{2,6};/",
2157
          function ($matches) use ($encoding) {
2158
            // always fallback via symfony polyfill
2159 29
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2160
2161 29
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2162 26
              return $returnTmp;
2163
            }
2164
2165 15
            return $matches[0];
2166 37
          },
2167 37
          $str
2168
      );
2169
2170
      // decode numeric & UTF16 two byte entities
2171 37
      $str = \html_entity_decode(
2172 37
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2173 37
          $flags,
2174 37
          $encoding
2175
      );
2176
2177 37
    } while ($str_compare !== $str);
2178
2179 37
    return $str;
2180
  }
2181
2182
  /**
2183
   * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2184
   *
2185
   * @param string $str
2186
   * @param string $encoding [optional] <p>Default: UTF-8</p>
2187
   *
2188
   * @return string
2189
   */
2190 6
  public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2191
  {
2192 6
    return self::htmlspecialchars(
2193 6
        $str,
2194 6
        ENT_QUOTES | ENT_SUBSTITUTE,
2195 6
        $encoding
2196
    );
2197
  }
2198
2199
  /**
2200
   * Remove empty html-tag.
2201
   *
2202
   * e.g.: <tag></tag>
2203
   *
2204
   * @param string $str
2205
   *
2206
   * @return string
2207
   */
2208 1
  public static function html_stripe_empty_tags(string $str): string
2209
  {
2210 1
    return (string)\preg_replace(
2211 1
        "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2212 1
        '',
2213 1
        $str
2214
    );
2215
  }
2216
2217
  /**
2218
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2219
   *
2220
   * @link http://php.net/manual/en/function.htmlentities.php
2221
   *
2222
   * @param string $str           <p>
2223
   *                              The input string.
2224
   *                              </p>
2225
   * @param int    $flags         [optional] <p>
2226
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2227
   *                              invalid code unit sequences and the used document type. The default is
2228
   *                              ENT_COMPAT | ENT_HTML401.
2229
   *                              <table>
2230
   *                              Available <i>flags</i> constants
2231
   *                              <tr valign="top">
2232
   *                              <td>Constant Name</td>
2233
   *                              <td>Description</td>
2234
   *                              </tr>
2235
   *                              <tr valign="top">
2236
   *                              <td><b>ENT_COMPAT</b></td>
2237
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2238
   *                              </tr>
2239
   *                              <tr valign="top">
2240
   *                              <td><b>ENT_QUOTES</b></td>
2241
   *                              <td>Will convert both double and single quotes.</td>
2242
   *                              </tr>
2243
   *                              <tr valign="top">
2244
   *                              <td><b>ENT_NOQUOTES</b></td>
2245
   *                              <td>Will leave both double and single quotes unconverted.</td>
2246
   *                              </tr>
2247
   *                              <tr valign="top">
2248
   *                              <td><b>ENT_IGNORE</b></td>
2249
   *                              <td>
2250
   *                              Silently discard invalid code unit sequences instead of returning
2251
   *                              an empty string. Using this flag is discouraged as it
2252
   *                              may have security implications.
2253
   *                              </td>
2254
   *                              </tr>
2255
   *                              <tr valign="top">
2256
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2257
   *                              <td>
2258
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2259
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2260
   *                              </td>
2261
   *                              </tr>
2262
   *                              <tr valign="top">
2263
   *                              <td><b>ENT_DISALLOWED</b></td>
2264
   *                              <td>
2265
   *                              Replace invalid code points for the given document type with a
2266
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2267
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2268
   *                              instance, to ensure the well-formedness of XML documents with
2269
   *                              embedded external content.
2270
   *                              </td>
2271
   *                              </tr>
2272
   *                              <tr valign="top">
2273
   *                              <td><b>ENT_HTML401</b></td>
2274
   *                              <td>
2275
   *                              Handle code as HTML 4.01.
2276
   *                              </td>
2277
   *                              </tr>
2278
   *                              <tr valign="top">
2279
   *                              <td><b>ENT_XML1</b></td>
2280
   *                              <td>
2281
   *                              Handle code as XML 1.
2282
   *                              </td>
2283
   *                              </tr>
2284
   *                              <tr valign="top">
2285
   *                              <td><b>ENT_XHTML</b></td>
2286
   *                              <td>
2287
   *                              Handle code as XHTML.
2288
   *                              </td>
2289
   *                              </tr>
2290
   *                              <tr valign="top">
2291
   *                              <td><b>ENT_HTML5</b></td>
2292
   *                              <td>
2293
   *                              Handle code as HTML 5.
2294
   *                              </td>
2295
   *                              </tr>
2296
   *                              </table>
2297
   *                              </p>
2298
   * @param string $encoding      [optional] <p>
2299
   *                              Like <b>htmlspecialchars</b>,
2300
   *                              <b>htmlentities</b> takes an optional third argument
2301
   *                              <i>encoding</i> which defines encoding used in
2302
   *                              conversion.
2303
   *                              Although this argument is technically optional, you are highly
2304
   *                              encouraged to specify the correct value for your code.
2305
   *                              </p>
2306
   * @param bool   $double_encode [optional] <p>
2307
   *                              When <i>double_encode</i> is turned off PHP will not
2308
   *                              encode existing html entities. The default is to convert everything.
2309
   *                              </p>
2310
   *
2311
   *
2312
   * @return string The encoded string.
2313
   * </p>
2314
   * <p>
2315
   * If the input <i>string</i> contains an invalid code unit
2316
   * sequence within the given <i>encoding</i> an empty string
2317
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2318
   * <b>ENT_SUBSTITUTE</b> flags are set.
2319
   */
2320 9
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2321
  {
2322 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2323 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2324
    }
2325
2326 9
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2327
2328
    /**
2329
     * PHP doesn't replace a backslash to its html entity since this is something
2330
     * that's mostly used to escape characters when inserting in a database. Since
2331
     * we're using a decent database layer, we don't need this shit and we're replacing
2332
     * the double backslashes by its' html entity equivalent.
2333
     *
2334
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2335
     */
2336 9
    $str = \str_replace('\\', '&#92;', $str);
2337
2338 9
    return self::html_encode($str, true, $encoding);
2339
  }
2340
2341
  /**
2342
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2343
   *
2344
   * INFO: Take a look at "UTF8::htmlentities()"
2345
   *
2346
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2347
   *
2348
   * @param string $str           <p>
2349
   *                              The string being converted.
2350
   *                              </p>
2351
   * @param int    $flags         [optional] <p>
2352
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2353
   *                              invalid code unit sequences and the used document type. The default is
2354
   *                              ENT_COMPAT | ENT_HTML401.
2355
   *                              <table>
2356
   *                              Available <i>flags</i> constants
2357
   *                              <tr valign="top">
2358
   *                              <td>Constant Name</td>
2359
   *                              <td>Description</td>
2360
   *                              </tr>
2361
   *                              <tr valign="top">
2362
   *                              <td><b>ENT_COMPAT</b></td>
2363
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2364
   *                              </tr>
2365
   *                              <tr valign="top">
2366
   *                              <td><b>ENT_QUOTES</b></td>
2367
   *                              <td>Will convert both double and single quotes.</td>
2368
   *                              </tr>
2369
   *                              <tr valign="top">
2370
   *                              <td><b>ENT_NOQUOTES</b></td>
2371
   *                              <td>Will leave both double and single quotes unconverted.</td>
2372
   *                              </tr>
2373
   *                              <tr valign="top">
2374
   *                              <td><b>ENT_IGNORE</b></td>
2375
   *                              <td>
2376
   *                              Silently discard invalid code unit sequences instead of returning
2377
   *                              an empty string. Using this flag is discouraged as it
2378
   *                              may have security implications.
2379
   *                              </td>
2380
   *                              </tr>
2381
   *                              <tr valign="top">
2382
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2383
   *                              <td>
2384
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2385
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2386
   *                              </td>
2387
   *                              </tr>
2388
   *                              <tr valign="top">
2389
   *                              <td><b>ENT_DISALLOWED</b></td>
2390
   *                              <td>
2391
   *                              Replace invalid code points for the given document type with a
2392
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2393
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2394
   *                              instance, to ensure the well-formedness of XML documents with
2395
   *                              embedded external content.
2396
   *                              </td>
2397
   *                              </tr>
2398
   *                              <tr valign="top">
2399
   *                              <td><b>ENT_HTML401</b></td>
2400
   *                              <td>
2401
   *                              Handle code as HTML 4.01.
2402
   *                              </td>
2403
   *                              </tr>
2404
   *                              <tr valign="top">
2405
   *                              <td><b>ENT_XML1</b></td>
2406
   *                              <td>
2407
   *                              Handle code as XML 1.
2408
   *                              </td>
2409
   *                              </tr>
2410
   *                              <tr valign="top">
2411
   *                              <td><b>ENT_XHTML</b></td>
2412
   *                              <td>
2413
   *                              Handle code as XHTML.
2414
   *                              </td>
2415
   *                              </tr>
2416
   *                              <tr valign="top">
2417
   *                              <td><b>ENT_HTML5</b></td>
2418
   *                              <td>
2419
   *                              Handle code as HTML 5.
2420
   *                              </td>
2421
   *                              </tr>
2422
   *                              </table>
2423
   *                              </p>
2424
   * @param string $encoding      [optional] <p>
2425
   *                              Defines encoding used in conversion.
2426
   *                              </p>
2427
   *                              <p>
2428
   *                              For the purposes of this function, the encodings
2429
   *                              ISO-8859-1, ISO-8859-15,
2430
   *                              UTF-8, cp866,
2431
   *                              cp1251, cp1252, and
2432
   *                              KOI8-R are effectively equivalent, provided the
2433
   *                              <i>string</i> itself is valid for the encoding, as
2434
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2435
   *                              the same positions in all of these encodings.
2436
   *                              </p>
2437
   * @param bool   $double_encode [optional] <p>
2438
   *                              When <i>double_encode</i> is turned off PHP will not
2439
   *                              encode existing html entities, the default is to convert everything.
2440
   *                              </p>
2441
   *
2442
   * @return string The converted string.
2443
   * </p>
2444
   * <p>
2445
   * If the input <i>string</i> contains an invalid code unit
2446
   * sequence within the given <i>encoding</i> an empty string
2447
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2448
   * <b>ENT_SUBSTITUTE</b> flags are set.
2449
   */
2450 8
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2451
  {
2452 8
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2453 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2454
    }
2455
2456 8
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2457
  }
2458
2459
  /**
2460
   * Checks whether iconv is available on the server.
2461
   *
2462
   * @return bool
2463
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2464
   */
2465
  public static function iconv_loaded(): bool
2466
  {
2467
    return \extension_loaded('iconv') ? true : false;
2468
  }
2469
2470
  /**
2471
   * alias for "UTF8::decimal_to_chr()"
2472
   *
2473
   * @see UTF8::decimal_to_chr()
2474
   *
2475
   * @param mixed $int
2476
   *
2477
   * @return string
2478
   */
2479 4
  public static function int_to_chr($int): string
2480
  {
2481 4
    return self::decimal_to_chr($int);
2482
  }
2483
2484
  /**
2485
   * Converts Integer to hexadecimal U+xxxx code point representation.
2486
   *
2487
   * INFO: opposite to UTF8::hex_to_int()
2488
   *
2489
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2490
   * @param string $pfix [optional]
2491
   *
2492
   * @return string The code point, or empty string on failure.
2493
   */
2494 6
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2495
  {
2496 6
    $hex = \dechex($int);
2497
2498 6
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2499
2500 6
    return $pfix . $hex . '';
2501
  }
2502
2503
  /**
2504
   * Checks whether intl-char is available on the server.
2505
   *
2506
   * @return bool
2507
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2508
   */
2509
  public static function intlChar_loaded(): bool
2510
  {
2511
    return \class_exists('IntlChar');
2512
  }
2513
2514
  /**
2515
   * Checks whether intl is available on the server.
2516
   *
2517
   * @return bool
2518
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2519
   */
2520 5
  public static function intl_loaded(): bool
2521
  {
2522 5
    return \extension_loaded('intl');
2523
  }
2524
2525
  /**
2526
   * alias for "UTF8::is_ascii()"
2527
   *
2528
   * @see        UTF8::is_ascii()
2529
   *
2530
   * @param string $str
2531
   *
2532
   * @return bool
2533
   *
2534
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2535
   */
2536 2
  public static function isAscii(string $str): bool
2537
  {
2538 2
    return self::is_ascii($str);
2539
  }
2540
2541
  /**
2542
   * alias for "UTF8::is_base64()"
2543
   *
2544
   * @see        UTF8::is_base64()
2545
   *
2546
   * @param string $str
2547
   *
2548
   * @return bool
2549
   *
2550
   * @deprecated <p>use "UTF8::is_base64()"</p>
2551
   */
2552 2
  public static function isBase64($str): bool
2553
  {
2554 2
    return self::is_base64($str);
2555
  }
2556
2557
  /**
2558
   * alias for "UTF8::is_binary()"
2559
   *
2560
   * @see        UTF8::is_binary()
2561
   *
2562
   * @param mixed $str
2563
   * @param bool  $strict
2564
   *
2565
   * @return bool
2566
   *
2567
   * @deprecated <p>use "UTF8::is_binary()"</p>
2568
   */
2569 4
  public static function isBinary($str, $strict = false): bool
2570
  {
2571 4
    return self::is_binary($str, $strict);
2572
  }
2573
2574
  /**
2575
   * alias for "UTF8::is_bom()"
2576
   *
2577
   * @see        UTF8::is_bom()
2578
   *
2579
   * @param string $utf8_chr
2580
   *
2581
   * @return bool
2582
   *
2583
   * @deprecated <p>use "UTF8::is_bom()"</p>
2584
   */
2585 2
  public static function isBom(string $utf8_chr): bool
2586
  {
2587 2
    return self::is_bom($utf8_chr);
2588
  }
2589
2590
  /**
2591
   * alias for "UTF8::is_html()"
2592
   *
2593
   * @see        UTF8::is_html()
2594
   *
2595
   * @param string $str
2596
   *
2597
   * @return bool
2598
   *
2599
   * @deprecated <p>use "UTF8::is_html()"</p>
2600
   */
2601 2
  public static function isHtml(string $str): bool
2602
  {
2603 2
    return self::is_html($str);
2604
  }
2605
2606
  /**
2607
   * alias for "UTF8::is_json()"
2608
   *
2609
   * @see        UTF8::is_json()
2610
   *
2611
   * @param string $str
2612
   *
2613
   * @return bool
2614
   *
2615
   * @deprecated <p>use "UTF8::is_json()"</p>
2616
   */
2617
  public static function isJson(string $str): bool
2618
  {
2619
    return self::is_json($str);
2620
  }
2621
2622
  /**
2623
   * alias for "UTF8::is_utf16()"
2624
   *
2625
   * @see        UTF8::is_utf16()
2626
   *
2627
   * @param mixed $str
2628
   *
2629
   * @return int|false
2630
   *                    <strong>false</strong> if is't not UTF16,<br>
2631
   *                    <strong>1</strong> for UTF-16LE,<br>
2632
   *                    <strong>2</strong> for UTF-16BE.
2633
   *
2634
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2635
   */
2636 2
  public static function isUtf16($str)
2637
  {
2638 2
    return self::is_utf16($str);
2639
  }
2640
2641
  /**
2642
   * alias for "UTF8::is_utf32()"
2643
   *
2644
   * @see        UTF8::is_utf32()
2645
   *
2646
   * @param mixed $str
2647
   *
2648
   * @return int|false
2649
   *                   <strong>false</strong> if is't not UTF16,
2650
   *                   <strong>1</strong> for UTF-32LE,
2651
   *                   <strong>2</strong> for UTF-32BE.
2652
   *
2653
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2654
   */
2655 2
  public static function isUtf32($str)
2656
  {
2657 2
    return self::is_utf32($str);
2658
  }
2659
2660
  /**
2661
   * alias for "UTF8::is_utf8()"
2662
   *
2663
   * @see        UTF8::is_utf8()
2664
   *
2665
   * @param string $str
2666
   * @param bool   $strict
2667
   *
2668
   * @return bool
2669
   *
2670
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2671
   */
2672 17
  public static function isUtf8($str, $strict = false): bool
2673
  {
2674 17
    return self::is_utf8($str, $strict);
2675
  }
2676
2677
  /**
2678
   * Returns true if the string contains only alphabetic chars, false otherwise.
2679
   *
2680
   * @param string $str
2681
   *
2682
   * @return bool
2683
   *               Whether or not $str contains only alphabetic chars.
2684
   */
2685 10
  public static function is_alpha(string $str): bool
2686
  {
2687 10
    return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2688
  }
2689
2690
  /**
2691
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2692
   *
2693
   * @param string $str
2694
   *
2695
   * @return bool
2696
   *               Whether or not $str contains only alphanumeric chars.
2697
   */
2698 13
  public static function is_alphanumeric(string $str): bool
2699
  {
2700 13
    return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2701
  }
2702
2703
  /**
2704
   * Checks if a string is 7 bit ASCII.
2705
   *
2706
   * @param string $str <p>The string to check.</p>
2707
   *
2708
   * @return bool
2709
   *              <strong>true</strong> if it is ASCII<br>
2710
   *              <strong>false</strong> otherwise
2711
   *
2712
   */
2713 201
  public static function is_ascii(string $str): bool
2714
  {
2715 201
    if ('' === $str) {
2716 10
      return true;
2717
    }
2718
2719 200
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2720
  }
2721
2722
  /**
2723
   * Returns true if the string is base64 encoded, false otherwise.
2724
   *
2725
   * @param string $str <p>The input string.</p>
2726
   *
2727
   * @return bool Whether or not $str is base64 encoded.
2728
   */
2729 9
  public static function is_base64($str): bool
2730
  {
2731 9
    if ('' === $str) {
2732 3
      return false;
2733
    }
2734
2735 8
    if (\is_string($str) === false) {
0 ignored issues
show
introduced by
The condition is_string($str) === false is always false.
Loading history...
2736 2
      return false;
2737
    }
2738
2739 8
    $base64String = (string)\base64_decode($str, true);
2740
2741 8
    return $base64String && \base64_encode($base64String) === $str;
2742
  }
2743
2744
  /**
2745
   * @param string $str
2746
   *
2747
   * @return string[]
2748
   */
2749 40
  private static function get_file_type($str)
2750
  {
2751 40
    if ('' === $str) {
2752
      return ['ext' => '', 'type' => ''];
2753
    }
2754
2755 40
    $str_info = substr($str, 0, 2);
2756 40
    if (strlen($str_info) !== 2) {
2757 11
      return ['ext' => '', 'type' => ''];
2758
    }
2759
2760 35
    $str_info = \unpack("C2chars", $str_info);
2761 35
    $type_code = (int)($str_info['chars1'] . $str_info['chars2']);
2762
2763
    // DEBUG
2764
    //var_dump($type_code);
2765
2766
    switch ($type_code) {
2767 35
      case 3780:
2768 4
        $ext = 'pdf';
2769 4
        $type = 'binary';
2770 4
        break;
2771 35
      case 7790:
2772
        $ext = 'exe';
2773
        $type = 'binary';
2774
        break;
2775 35
      case 7784:
2776
        $ext = 'midi';
2777
        $type = 'binary';
2778
        break;
2779 35
      case 8075:
2780 6
        $ext = 'zip';
2781 6
        $type = 'binary';
2782 6
        break;
2783 35
      case 8297:
2784
        $ext = 'rar';
2785
        $type = 'binary';
2786
        break;
2787 35
      case 255216:
2788
        $ext = 'jpg';
2789
        $type = 'binary';
2790
        break;
2791 35
      case 7173:
2792
        $ext = 'gif';
2793
        $type = 'binary';
2794
        break;
2795 35
      case 6677:
2796
        $ext = 'bmp';
2797
        $type = 'binary';
2798
        break;
2799 35
      case 13780:
2800 6
        $ext = 'png';
2801 6
        $type = 'binary';
2802 6
        break;
2803
      default:
2804 33
        $ext = '???';
2805 33
        $type = '???';
2806 33
        break;
2807
    }
2808
2809 35
    return ['ext' => $ext, 'type' => $type];
2810
  }
2811
2812
  /**
2813
   * Check if the input is binary... (is look like a hack).
2814
   *
2815
   * @param mixed $input
2816
   * @param bool  $strict
2817
   *
2818
   * @return bool
2819
   */
2820 40
  public static function is_binary($input, bool $strict = false): bool
2821
  {
2822 40
    $input = (string)$input;
2823 40
    if ('' === $input) {
2824 10
      return false;
2825
    }
2826
2827 40
    if (\preg_match('~^[01]+$~', $input)) {
2828 12
      return true;
2829
    }
2830
2831 40
    if ($strict === true) {
2832
2833 34
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2834
        self::checkForSupport();
2835
      }
2836
2837 34
      if (self::$SUPPORT['finfo'] === false) {
2838
        throw new \RuntimeException('ext-fileinfo: is not installed');
2839
      }
2840
2841
      /** @noinspection PhpComposerExtensionStubsInspection */
2842 34
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
2843 34
      $finfo_encoding = $finfo->buffer($input);
2844 34
      if ($finfo_encoding && $finfo_encoding === 'binary') {
2845 15
        return true;
2846
      }
2847
2848
    }
2849
2850 40
    $ext = self::get_file_type($input);
2851 40
    if ($ext['type'] === 'binary') {
2852 6
      return true;
2853
    }
2854
2855 38
    $testLength = \strlen($input);
2856 38
    if ($testLength) {
2857 38
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2858
        self::checkForSupport();
2859
      }
2860
2861 38
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
2862
        $testNull = \substr_count($input, "\x0"); // will use "mb_substr_count()" ...
2863
      } else {
2864 38
        $testNull = \substr_count($input, "\x0", 0, $testLength);
2865
      }
2866
2867 38
      if (($testNull / $testLength) > 0.256) {
2868 12
        return true;
2869
      }
2870
    }
2871
2872 36
    return false;
2873
  }
2874
2875
  /**
2876
   * Check if the file is binary.
2877
   *
2878
   * @param string $file
2879
   *
2880
   * @return bool
2881
   */
2882 6
  public static function is_binary_file($file): bool
2883
  {
2884
    // init
2885 6
    $block = '';
2886
2887 6
    $fp = \fopen($file, 'rb');
2888 6
    if (\is_resource($fp)) {
2889 6
      $block = \fread($fp, 512);
2890 6
      \fclose($fp);
2891
    }
2892
2893 6
    if ($block === '') {
2894 2
      return false;
2895
    }
2896
2897 6
    return self::is_binary($block, true);
2898
  }
2899
2900
  /**
2901
   * Returns true if the string contains only whitespace chars, false otherwise.
2902
   *
2903
   * @param string $str
2904
   *
2905
   * @return bool
2906
   *               Whether or not $str contains only whitespace characters.
2907
   */
2908 15
  public static function is_blank(string $str): bool
2909
  {
2910 15
    return self::str_matches_pattern($str, '^[[:space:]]*$');
2911
  }
2912
2913
  /**
2914
   * Checks if the given string is equal to any "Byte Order Mark".
2915
   *
2916
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2917
   *
2918
   * @param string $str <p>The input string.</p>
2919
   *
2920
   * @return bool
2921
   *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.
2922
   */
2923 2
  public static function is_bom($str): bool
2924
  {
2925 2
    foreach (self::$BOM as $bomString => $bomByteLength) {
2926 2
      if ($str === $bomString) {
2927 2
        return true;
2928
      }
2929
    }
2930
2931 2
    return false;
2932
  }
2933
2934
  /**
2935
   * Determine whether the string is considered to be empty.
2936
   *
2937
   * A variable is considered empty if it does not exist or if its value equals FALSE.
2938
   * empty() does not generate a warning if the variable does not exist.
2939
   *
2940
   * @param mixed $str
2941
   *
2942
   * @return bool Whether or not $str is empty().
2943
   */
2944
  public static function is_empty($str): bool
2945
  {
2946
    return empty($str);
2947
  }
2948
2949
  /**
2950
   * Returns true if the string contains only hexadecimal chars, false otherwise.
2951
   *
2952
   * @param string $str
2953
   *
2954
   * @return bool
2955
   *               Whether or not $str contains only hexadecimal chars.
2956
   */
2957 13
  public static function is_hexadecimal(string $str): bool
2958
  {
2959 13
    return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
2960
  }
2961
2962
  /**
2963
   * Check if the string contains any html-tags <lall>.
2964
   *
2965
   * @param string $str <p>The input string.</p>
2966
   *
2967
   * @return bool
2968
   */
2969 3
  public static function is_html(string $str): bool
2970
  {
2971 3
    if ('' === $str) {
2972 3
      return false;
2973
    }
2974
2975
    // init
2976 3
    $matches = [];
2977
2978 3
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2979
2980 3
    return !(\count($matches) === 0);
2981
  }
2982
2983
  /**
2984
   * Try to check if "$str" is an json-string.
2985
   *
2986
   * @param string $str <p>The input string.</p>
2987
   *
2988
   * @return bool
2989
   */
2990 22
  public static function is_json(string $str): bool
2991
  {
2992 22
    if ('' === $str) {
2993 3
      return false;
2994
    }
2995
2996 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2997
      self::checkForSupport();
2998
    }
2999
3000 21
    if (self::$SUPPORT['json'] === false) {
3001
      throw new \RuntimeException('ext-json: is not installed');
3002
    }
3003
3004 21
    $json = self::json_decode($str);
3005
3006
    /** @noinspection PhpComposerExtensionStubsInspection */
3007
    return (
3008 21
               \is_object($json) === true
3009
               ||
3010 21
               \is_array($json) === true
3011
           )
3012
           &&
3013 21
           \json_last_error() === JSON_ERROR_NONE;
3014
  }
3015
3016
  /**
3017
   * @param string $str
3018
   *
3019
   * @return bool
3020
   */
3021 8
  public static function is_lowercase(string $str): bool
3022
  {
3023 8
    if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
3024 3
      return true;
3025
    }
3026
3027 5
    return false;
3028
  }
3029
3030
  /**
3031
   * Returns true if the string is serialized, false otherwise.
3032
   *
3033
   * @param string $str
3034
   *
3035
   * @return bool Whether or not $str is serialized.
3036
   */
3037 7
  public static function is_serialized(string $str): bool
3038
  {
3039 7
    if ('' === $str) {
3040 1
      return false;
3041
    }
3042
3043
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3044
    /** @noinspection UnserializeExploitsInspection */
3045 6
    return $str === 'b:0;'
3046
           ||
3047 6
           @\unserialize($str) !== false;
3048
  }
3049
3050
  /**
3051
   * Returns true if the string contains only lower case chars, false
3052
   * otherwise.
3053
   *
3054
   * @param string $str <p>The input string.</p>
3055
   *
3056
   * @return bool
3057
   *               Whether or not $str contains only lower case characters.
3058
   */
3059 8
  public static function is_uppercase(string $str): bool
3060
  {
3061 8
    return self::str_matches_pattern($str, '^[[:upper:]]*$');
3062
  }
3063
3064
  /**
3065
   * Check if the string is UTF-16.
3066
   *
3067
   * @param mixed $str <p>The input string.</p>
3068
   *
3069
   * @return int|false
3070
   *                   <strong>false</strong> if is't not UTF-16,<br>
3071
   *                   <strong>1</strong> for UTF-16LE,<br>
3072
   *                   <strong>2</strong> for UTF-16BE.
3073
   */
3074 21
  public static function is_utf16($str)
3075
  {
3076
    // init
3077 21
    $str = (string)$str;
3078
3079 21
    if (self::is_binary($str) === false) {
3080 9
      return false;
3081
    }
3082
3083 16
    if (self::$SUPPORT['mbstring'] === false) {
3084 2
      \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', E_USER_WARNING);
3085
    }
3086
3087
    // init
3088 16
    $strChars = [];
3089
3090 16
    $str = self::remove_bom($str);
3091
3092 16
    $maybeUTF16LE = 0;
3093 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3094 16
    if ($test) {
3095 14
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3096 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3097 14
      if ($test3 === $test) {
3098 14
        if (\count($strChars) === 0) {
3099 14
          $strChars = self::count_chars($str, true);
3100
        }
3101 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3102 14
          if (\in_array($test3char, $strChars, true) === true) {
3103 14
            $maybeUTF16LE++;
3104
          }
3105
        }
3106
      }
3107
    }
3108
3109 16
    $maybeUTF16BE = 0;
3110 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3111 16
    if ($test) {
3112 14
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3113 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3114 14
      if ($test3 === $test) {
3115 14
        if (\count($strChars) === 0) {
3116 6
          $strChars = self::count_chars($str, true);
3117
        }
3118 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3119 14
          if (\in_array($test3char, $strChars, true) === true) {
3120 14
            $maybeUTF16BE++;
3121
          }
3122
        }
3123
      }
3124
    }
3125
3126 16
    if ($maybeUTF16BE !== $maybeUTF16LE) {
3127 6
      if ($maybeUTF16LE > $maybeUTF16BE) {
3128 4
        return 1;
3129
      }
3130
3131 6
      return 2;
3132
    }
3133
3134 12
    return false;
3135
  }
3136
3137
  /**
3138
   * Check if the string is UTF-32.
3139
   *
3140
   * @param mixed $str
3141
   *
3142
   * @return int|false
3143
   *                   <strong>false</strong> if is't not UTF-32,<br>
3144
   *                   <strong>1</strong> for UTF-32LE,<br>
3145
   *                   <strong>2</strong> for UTF-32BE.
3146
   */
3147 17
  public static function is_utf32($str)
3148
  {
3149
    // init
3150 17
    $str = (string)$str;
3151
3152 17
    if (self::is_binary($str) === false) {
3153 9
      return false;
3154
    }
3155
3156 12
    if (self::$SUPPORT['mbstring'] === false) {
3157 2
      \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', E_USER_WARNING);
3158
    }
3159
3160
    // init
3161 12
    $strChars = [];
3162
3163 12
    $str = self::remove_bom($str);
3164
3165 12
    $maybeUTF32LE = 0;
3166 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3167 12
    if ($test) {
3168 10
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3169 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3170 10
      if ($test3 === $test) {
3171 10
        if (\count($strChars) === 0) {
3172 10
          $strChars = self::count_chars($str, true);
3173
        }
3174 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3175 10
          if (\in_array($test3char, $strChars, true) === true) {
3176 10
            $maybeUTF32LE++;
3177
          }
3178
        }
3179
      }
3180
    }
3181
3182 12
    $maybeUTF32BE = 0;
3183 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3184 12
    if ($test) {
3185 10
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3186 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3187 10
      if ($test3 === $test) {
3188 10
        if (\count($strChars) === 0) {
3189 6
          $strChars = self::count_chars($str, true);
3190
        }
3191 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3192 10
          if (\in_array($test3char, $strChars, true) === true) {
3193 10
            $maybeUTF32BE++;
3194
          }
3195
        }
3196
      }
3197
    }
3198
3199 12
    if ($maybeUTF32BE !== $maybeUTF32LE) {
3200 2
      if ($maybeUTF32LE > $maybeUTF32BE) {
3201 2
        return 1;
3202
      }
3203
3204 2
      return 2;
3205
    }
3206
3207 12
    return false;
3208
  }
3209
3210
  /**
3211
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3212
   *
3213
   * @see    http://hsivonen.iki.fi/php-utf8/
3214
   *
3215
   * @param string|string[] $str    <p>The string to be checked.</p>
3216
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3217
   *
3218
   * @return bool
3219
   */
3220 107
  public static function is_utf8($str, bool $strict = false): bool
3221
  {
3222 107
    if (\is_array($str) === true) {
3223 2
      foreach ($str as $k => $v) {
3224 2
        if (false === self::is_utf8($v, $strict)) {
3225 2
          return false;
3226
        }
3227
      }
3228
3229
      return true;
3230
    }
3231
3232 107
    if ('' === $str) {
3233 12
      return true;
3234
    }
3235
3236 103
    if ($strict === true) {
3237 2
      if (self::is_utf16($str) !== false) {
3238 2
        return false;
3239
      }
3240
3241
      if (self::is_utf32($str) !== false) {
3242
        return false;
3243
      }
3244
    }
3245
3246 103
    if (self::pcre_utf8_support() !== true) {
3247
3248
      // If even just the first character can be matched, when the /u
3249
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3250
      // invalid, nothing at all will match, even if the string contains
3251
      // some valid sequences
3252
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
3253
    }
3254
3255 103
    $mState = 0; // cached expected number of octets after the current octet
3256
    // until the beginning of the next UTF8 character sequence
3257 103
    $mUcs4 = 0; // cached Unicode character
3258 103
    $mBytes = 1; // cached expected number of octets in the current sequence
3259
3260 103
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3261
      self::checkForSupport();
3262
    }
3263
3264 103
    if (self::$ORD === null) {
3265
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3266
    }
3267
3268 103
    $len = self::strlen_in_byte((string)$str);
3269
    /** @noinspection ForeachInvariantsInspection */
3270 103
    for ($i = 0; $i < $len; $i++) {
3271 103
      $in = self::$ORD[$str[$i]];
3272 103
      if ($mState === 0) {
3273
        // When mState is zero we expect either a US-ASCII character or a
3274
        // multi-octet sequence.
3275 103
        if (0 === (0x80 & $in)) {
3276
          // US-ASCII, pass straight through.
3277 98
          $mBytes = 1;
3278 84
        } elseif (0xC0 === (0xE0 & $in)) {
3279
          // First octet of 2 octet sequence.
3280 75
          $mUcs4 = $in;
3281 75
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3282 75
          $mState = 1;
3283 75
          $mBytes = 2;
3284 58
        } elseif (0xE0 === (0xF0 & $in)) {
3285
          // First octet of 3 octet sequence.
3286 41
          $mUcs4 = $in;
3287 41
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3288 41
          $mState = 2;
3289 41
          $mBytes = 3;
3290 30
        } elseif (0xF0 === (0xF8 & $in)) {
3291
          // First octet of 4 octet sequence.
3292 19
          $mUcs4 = $in;
3293 19
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3294 19
          $mState = 3;
3295 19
          $mBytes = 4;
3296 13
        } elseif (0xF8 === (0xFC & $in)) {
3297
          /* First octet of 5 octet sequence.
3298
          *
3299
          * This is illegal because the encoded codepoint must be either
3300
          * (a) not the shortest form or
3301
          * (b) outside the Unicode range of 0-0x10FFFF.
3302
          * Rather than trying to resynchronize, we will carry on until the end
3303
          * of the sequence and let the later error handling code catch it.
3304
          */
3305 5
          $mUcs4 = $in;
3306 5
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3307 5
          $mState = 4;
3308 5
          $mBytes = 5;
3309 10
        } elseif (0xFC === (0xFE & $in)) {
3310
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3311 5
          $mUcs4 = $in;
3312 5
          $mUcs4 = ($mUcs4 & 1) << 30;
3313 5
          $mState = 5;
3314 5
          $mBytes = 6;
3315
        } else {
3316
          // Current octet is neither in the US-ASCII range nor a legal first
3317
          // octet of a multi-octet sequence.
3318 103
          return false;
3319
        }
3320
      } else {
3321
        // When mState is non-zero, we expect a continuation of the multi-octet
3322
        // sequence
3323 84
        if (0x80 === (0xC0 & $in)) {
3324
          // Legal continuation.
3325 76
          $shift = ($mState - 1) * 6;
3326 76
          $tmp = $in;
3327 76
          $tmp = ($tmp & 0x0000003F) << $shift;
3328 76
          $mUcs4 |= $tmp;
3329
          // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3330
          // Unicode code point to be output.
3331 76
          if (0 === --$mState) {
3332
            // Check for illegal sequences and code points.
3333
            //
3334
            // From Unicode 3.1, non-shortest form is illegal
3335
            if (
3336 76
                (2 === $mBytes && $mUcs4 < 0x0080)
3337
                ||
3338 76
                (3 === $mBytes && $mUcs4 < 0x0800)
3339
                ||
3340 76
                (4 === $mBytes && $mUcs4 < 0x10000)
3341
                ||
3342 76
                (4 < $mBytes)
3343
                ||
3344
                // From Unicode 3.2, surrogate characters are illegal.
3345 76
                (($mUcs4 & 0xFFFFF800) === 0xD800)
3346
                ||
3347
                // Code points outside the Unicode range are illegal.
3348 76
                ($mUcs4 > 0x10FFFF)
3349
            ) {
3350 8
              return false;
3351
            }
3352
            // initialize UTF8 cache
3353 76
            $mState = 0;
3354 76
            $mUcs4 = 0;
3355 76
            $mBytes = 1;
3356
          }
3357
        } else {
3358
          // ((0xC0 & (*in) != 0x80) && (mState != 0))
3359
          // Incomplete multi-octet sequence.
3360 36
          return false;
3361
        }
3362
      }
3363
    }
3364
3365 67
    return true;
3366
  }
3367
3368
  /**
3369
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3370
   * Decodes a JSON string
3371
   *
3372
   * @link http://php.net/manual/en/function.json-decode.php
3373
   *
3374
   * @param string $json    <p>
3375
   *                        The <i>json</i> string being decoded.
3376
   *                        </p>
3377
   *                        <p>
3378
   *                        This function only works with UTF-8 encoded strings.
3379
   *                        </p>
3380
   *                        <p>PHP implements a superset of
3381
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3382
   *                        only supports these values when they are nested inside an array or an object.
3383
   *                        </p>
3384
   * @param bool   $assoc   [optional] <p>
3385
   *                        When <b>TRUE</b>, returned objects will be converted into
3386
   *                        associative arrays.
3387
   *                        </p>
3388
   * @param int    $depth   [optional] <p>
3389
   *                        User specified recursion depth.
3390
   *                        </p>
3391
   * @param int    $options [optional] <p>
3392
   *                        Bitmask of JSON decode options. Currently only
3393
   *                        <b>JSON_BIGINT_AS_STRING</b>
3394
   *                        is supported (default is to cast large integers as floats)
3395
   *                        </p>
3396
   *
3397
   * @return mixed
3398
   *                The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3399
   *                null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3400
   *                <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3401
   *                is deeper than the recursion limit.
3402
   */
3403 23
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3404
  {
3405 23
    $json = self::filter($json);
3406
3407 23
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3408
      self::checkForSupport();
3409
    }
3410
3411 23
    if (self::$SUPPORT['json'] === false) {
3412
      throw new \RuntimeException('ext-json: is not installed');
3413
    }
3414
3415
    /** @noinspection PhpComposerExtensionStubsInspection */
3416 23
    $json = \json_decode($json, $assoc, $depth, $options);
3417
3418 23
    return $json;
3419
  }
3420
3421
  /**
3422
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3423
   * Returns the JSON representation of a value.
3424
   *
3425
   * @link http://php.net/manual/en/function.json-encode.php
3426
   *
3427
   * @param mixed $value   <p>
3428
   *                       The <i>value</i> being encoded. Can be any type except
3429
   *                       a resource.
3430
   *                       </p>
3431
   *                       <p>
3432
   *                       All string data must be UTF-8 encoded.
3433
   *                       </p>
3434
   *                       <p>PHP implements a superset of
3435
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3436
   *                       only supports these values when they are nested inside an array or an object.
3437
   *                       </p>
3438
   * @param int   $options [optional] <p>
3439
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3440
   *                       <b>JSON_HEX_TAG</b>,
3441
   *                       <b>JSON_HEX_AMP</b>,
3442
   *                       <b>JSON_HEX_APOS</b>,
3443
   *                       <b>JSON_NUMERIC_CHECK</b>,
3444
   *                       <b>JSON_PRETTY_PRINT</b>,
3445
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3446
   *                       <b>JSON_FORCE_OBJECT</b>,
3447
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3448
   *                       constants is described on
3449
   *                       the JSON constants page.
3450
   *                       </p>
3451
   * @param int   $depth   [optional] <p>
3452
   *                       Set the maximum depth. Must be greater than zero.
3453
   *                       </p>
3454
   *
3455
   * @return string|false
3456
   *                      A JSON encoded <strong>string</strong> on success or<br>
3457
   *                      <strong>FALSE</strong> on failure.
3458
   */
3459 4
  public static function json_encode($value, int $options = 0, int $depth = 512)
3460
  {
3461 4
    $value = self::filter($value);
3462
3463 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3464
      self::checkForSupport();
3465
    }
3466
3467 4
    if (self::$SUPPORT['json'] === false) {
3468
      throw new \RuntimeException('ext-json: is not installed');
3469
    }
3470
3471
    /** @noinspection PhpComposerExtensionStubsInspection */
3472 4
    $json = \json_encode($value, $options, $depth);
3473
3474 4
    return $json;
3475
  }
3476
3477
  /**
3478
   * Checks whether JSON is available on the server.
3479
   *
3480
   * @return bool
3481
   *              <strong>true</strong> if available, <strong>false</strong> otherwise
3482
   */
3483
  public static function json_loaded(): bool
3484
  {
3485
    return \function_exists('json_decode');
3486
  }
3487
3488
  /**
3489
   * Makes string's first char lowercase.
3490
   *
3491
   * @param string $str       <p>The input string</p>
3492
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
3493
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3494
   *
3495
   * @return string The resulting string.
3496
   */
3497 46
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3498
  {
3499 46
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3500 46
    if ($strPartTwo === false) {
3501
      $strPartTwo = '';
3502
    }
3503
3504 46
    $strPartOne = self::strtolower(
3505 46
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3506 46
        $encoding,
3507 46
        $cleanUtf8
3508
    );
3509
3510 46
    return $strPartOne . $strPartTwo;
3511
  }
3512
3513
  /**
3514
   * alias for "UTF8::lcfirst()"
3515
   *
3516
   * @see UTF8::lcfirst()
3517
   *
3518
   * @param string $str
3519
   * @param string $encoding
3520
   * @param bool   $cleanUtf8
3521
   *
3522
   * @return string
3523
   */
3524 2
  public static function lcword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3525
  {
3526 2
    return self::lcfirst($str, $encoding, $cleanUtf8);
3527
  }
3528
3529
  /**
3530
   * Lowercase for all words in the string.
3531
   *
3532
   * @param string   $str        <p>The input string.</p>
3533
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3534
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3535
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3536
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3537
   *
3538
   * @return string
3539
   */
3540 2
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3541
  {
3542 2
    if (!$str) {
3543 2
      return '';
3544
    }
3545
3546 2
    $words = self::str_to_words($str, $charlist);
3547 2
    $newWords = [];
3548
3549 2
    if (\count($exceptions) > 0) {
3550 2
      $useExceptions = true;
3551
    } else {
3552 2
      $useExceptions = false;
3553
    }
3554
3555 2
    foreach ($words as $word) {
3556
3557 2
      if (!$word) {
3558 2
        continue;
3559
      }
3560
3561
      if (
3562 2
          $useExceptions === false
3563
          ||
3564
          (
3565 2
              $useExceptions === true
3566
              &&
3567 2
              !\in_array($word, $exceptions, true)
3568
          )
3569
      ) {
3570 2
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3571
      }
3572
3573 2
      $newWords[] = $word;
3574
    }
3575
3576 2
    return \implode('', $newWords);
3577
  }
3578
3579
  /**
3580
   * alias for "UTF8::lcfirst()"
3581
   *
3582
   * @see UTF8::lcfirst()
3583
   *
3584
   * @param string $str
3585
   * @param string $encoding
3586
   * @param bool   $cleanUtf8
3587
   *
3588
   * @return string
3589
   */
3590 5
  public static function lowerCaseFirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3591
  {
3592 5
    return self::lcfirst($str, $encoding, $cleanUtf8);
3593
  }
3594
3595
  /**
3596
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3597
   *
3598
   * @param string $str   <p>The string to be trimmed</p>
3599
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3600
   *
3601
   * @return string The string with unwanted characters stripped from the left.
3602
   */
3603 22
  public static function ltrim(string $str = '', $chars = INF): string
3604
  {
3605 22
    if ('' === $str) {
3606 3
      return '';
3607
    }
3608
3609
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3610 21
    if ($chars === INF || !$chars) {
3611 14
      $pattern = "^[\pZ\pC]+";
3612
    } else {
3613 10
      $chars = \preg_quote($chars, '/');
3614 10
      $pattern = "^[$chars]+";
3615
    }
3616
3617 21
    return self::regex_replace($str, $pattern, '', '', '/');
3618
  }
3619
3620
  /**
3621
   * Returns the UTF-8 character with the maximum code point in the given data.
3622
   *
3623
   * @param string|array<string> $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3624
   *
3625
   * @return string|null The character with the highest code point than others, returns null on failure or empty input.
3626
   */
3627 2
  public static function max($arg)
3628
  {
3629 2
    if (\is_array($arg) === true) {
3630 2
      $arg = \implode('', $arg);
3631
    }
3632
3633 2
    $codepoints = self::codepoints($arg, false);
3634 2
    if (\count($codepoints) === 0) {
3635 2
      return null;
3636
    }
3637
3638 2
    $codepoint_max = \max($codepoints);
3639
3640 2
    return self::chr($codepoint_max);
3641
  }
3642
3643
  /**
3644
   * Calculates and returns the maximum number of bytes taken by any
3645
   * UTF-8 encoded character in the given string.
3646
   *
3647
   * @param string $str <p>The original Unicode string.</p>
3648
   *
3649
   * @return int Max byte lengths of the given chars.
3650
   */
3651 2
  public static function max_chr_width(string $str): int
3652
  {
3653 2
    $bytes = self::chr_size_list($str);
3654 2
    if (\count($bytes) > 0) {
3655 2
      return (int)\max($bytes);
3656
    }
3657
3658 2
    return 0;
3659
  }
3660
3661
  /**
3662
   * Checks whether mbstring is available on the server.
3663
   *
3664
   * @return bool
3665
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
3666
   */
3667 27
  public static function mbstring_loaded(): bool
3668
  {
3669 27
    $return = \extension_loaded('mbstring') ? true : false;
3670
3671 27
    if ($return === true) {
3672 27
      \mb_internal_encoding('UTF-8');
3673
    }
3674
3675 27
    return $return;
3676
  }
3677
3678
  /**
3679
   * Checks whether mbstring "overloaded" is active on the server.
3680
   *
3681
   * @return bool
3682
   */
3683
  private static function mbstring_overloaded(): bool
3684
  {
3685
    /**
3686
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3687
     */
3688
3689
    /** @noinspection PhpComposerExtensionStubsInspection */
3690
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3691
    return \defined('MB_OVERLOAD_STRING')
3692
           &&
3693
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3694
  }
3695
3696
  /**
3697
   * Returns the UTF-8 character with the minimum code point in the given data.
3698
   *
3699
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3700
   *
3701
   * @return string|null The character with the lowest code point than others, returns null on failure or empty input.
3702
   */
3703 2
  public static function min($arg)
3704
  {
3705 2
    if (\is_array($arg) === true) {
3706 2
      $arg = \implode('', $arg);
3707
    }
3708
3709 2
    $codepoints = self::codepoints($arg, false);
3710 2
    if (\count($codepoints) === 0) {
3711 2
      return null;
3712
    }
3713
3714 2
    $codepoint_min = \min($codepoints);
3715
3716 2
    return self::chr($codepoint_min);
3717
  }
3718
3719
  /**
3720
   * alias for "UTF8::normalize_encoding()"
3721
   *
3722
   * @see        UTF8::normalize_encoding()
3723
   *
3724
   * @param mixed $encoding
3725
   * @param mixed $fallback
3726
   *
3727
   * @return mixed
3728
   *
3729
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3730
   */
3731 2
  public static function normalizeEncoding($encoding, $fallback = '')
3732
  {
3733 2
    return self::normalize_encoding($encoding, $fallback);
3734
  }
3735
3736
  /**
3737
   * Normalize the encoding-"name" input.
3738
   *
3739
   * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3740
   * @param mixed $fallback <p>e.g.: UTF-8</p>
3741
   *
3742
   * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3743
   */
3744 340
  public static function normalize_encoding($encoding, $fallback = '')
3745
  {
3746 340
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3747
3748
    // init
3749 340
    $encoding = (string)$encoding;
3750
3751
    if (
3752 340
        !$encoding
3753
        ||
3754 49
        $encoding === '1' // only a fallback, for non "strict_types" usage ...
3755
        ||
3756 340
        $encoding === '0' // only a fallback, for non "strict_types" usage ...
3757
    ) {
3758 296
      return $fallback;
3759
    }
3760
3761
    if (
3762 48
        'UTF-8' === $encoding
3763
        ||
3764 48
        'UTF8' === $encoding
3765
    ) {
3766 21
      return 'UTF-8';
3767
    }
3768
3769 41
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3770 39
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3771
    }
3772
3773 5
    if (self::$ENCODINGS === null) {
3774 1
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3775
    }
3776
3777 5
    if (\in_array($encoding, self::$ENCODINGS, true)) {
0 ignored issues
show
Bug introduced by
It seems like self::ENCODINGS can also be of type false; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3777
    if (\in_array($encoding, /** @scrutinizer ignore-type */ self::$ENCODINGS, true)) {
Loading history...
3778 4
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3779
3780 4
      return $encoding;
3781
    }
3782
3783 4
    $encodingOrig = $encoding;
3784 4
    $encoding = \strtoupper($encoding);
3785 4
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3786
3787
    $equivalences = [
3788 4
        'ISO8859'     => 'ISO-8859-1',
3789
        'ISO88591'    => 'ISO-8859-1',
3790
        'ISO'         => 'ISO-8859-1',
3791
        'LATIN'       => 'ISO-8859-1',
3792
        'LATIN1'      => 'ISO-8859-1', // Western European
3793
        'ISO88592'    => 'ISO-8859-2',
3794
        'LATIN2'      => 'ISO-8859-2', // Central European
3795
        'ISO88593'    => 'ISO-8859-3',
3796
        'LATIN3'      => 'ISO-8859-3', // Southern European
3797
        'ISO88594'    => 'ISO-8859-4',
3798
        'LATIN4'      => 'ISO-8859-4', // Northern European
3799
        'ISO88595'    => 'ISO-8859-5',
3800
        'ISO88596'    => 'ISO-8859-6', // Greek
3801
        'ISO88597'    => 'ISO-8859-7',
3802
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3803
        'ISO88599'    => 'ISO-8859-9',
3804
        'LATIN5'      => 'ISO-8859-9', // Turkish
3805
        'ISO885911'   => 'ISO-8859-11',
3806
        'TIS620'      => 'ISO-8859-11', // Thai
3807
        'ISO885910'   => 'ISO-8859-10',
3808
        'LATIN6'      => 'ISO-8859-10', // Nordic
3809
        'ISO885913'   => 'ISO-8859-13',
3810
        'LATIN7'      => 'ISO-8859-13', // Baltic
3811
        'ISO885914'   => 'ISO-8859-14',
3812
        'LATIN8'      => 'ISO-8859-14', // Celtic
3813
        'ISO885915'   => 'ISO-8859-15',
3814
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3815
        'ISO885916'   => 'ISO-8859-16',
3816
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3817
        'CP1250'      => 'WINDOWS-1250',
3818
        'WIN1250'     => 'WINDOWS-1250',
3819
        'WINDOWS1250' => 'WINDOWS-1250',
3820
        'CP1251'      => 'WINDOWS-1251',
3821
        'WIN1251'     => 'WINDOWS-1251',
3822
        'WINDOWS1251' => 'WINDOWS-1251',
3823
        'CP1252'      => 'WINDOWS-1252',
3824
        'WIN1252'     => 'WINDOWS-1252',
3825
        'WINDOWS1252' => 'WINDOWS-1252',
3826
        'CP1253'      => 'WINDOWS-1253',
3827
        'WIN1253'     => 'WINDOWS-1253',
3828
        'WINDOWS1253' => 'WINDOWS-1253',
3829
        'CP1254'      => 'WINDOWS-1254',
3830
        'WIN1254'     => 'WINDOWS-1254',
3831
        'WINDOWS1254' => 'WINDOWS-1254',
3832
        'CP1255'      => 'WINDOWS-1255',
3833
        'WIN1255'     => 'WINDOWS-1255',
3834
        'WINDOWS1255' => 'WINDOWS-1255',
3835
        'CP1256'      => 'WINDOWS-1256',
3836
        'WIN1256'     => 'WINDOWS-1256',
3837
        'WINDOWS1256' => 'WINDOWS-1256',
3838
        'CP1257'      => 'WINDOWS-1257',
3839
        'WIN1257'     => 'WINDOWS-1257',
3840
        'WINDOWS1257' => 'WINDOWS-1257',
3841
        'CP1258'      => 'WINDOWS-1258',
3842
        'WIN1258'     => 'WINDOWS-1258',
3843
        'WINDOWS1258' => 'WINDOWS-1258',
3844
        'UTF16'       => 'UTF-16',
3845
        'UTF32'       => 'UTF-32',
3846
        'UTF8'        => 'UTF-8',
3847
        'UTF'         => 'UTF-8',
3848
        'UTF7'        => 'UTF-7',
3849
        '8BIT'        => 'CP850',
3850
        'BINARY'      => 'CP850',
3851
    ];
3852
3853 4
    if (!empty($equivalences[$encodingUpperHelper])) {
3854 4
      $encoding = $equivalences[$encodingUpperHelper];
3855
    }
3856
3857 4
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3858
3859 4
    return $encoding;
3860
  }
3861
3862
  /**
3863
   * Standardize line ending to unix-like.
3864
   *
3865
   * @param string $str
3866
   *
3867
   * @return string
3868
   */
3869 5
  public static function normalize_line_ending(string $str): string
3870
  {
3871 5
    return (string)str_replace(["\r\n", "\r"], "\n", $str);
3872
  }
3873
3874
  /**
3875
   * Normalize some MS Word special characters.
3876
   *
3877
   * @param string $str <p>The string to be normalized.</p>
3878
   *
3879
   * @return string
3880
   */
3881 39
  public static function normalize_msword(string $str): string
3882
  {
3883 39
    if ('' === $str) {
3884 2
      return '';
3885
    }
3886
3887 39
    static $UTF8_MSWORD_KEYS_CACHE = null;
3888 39
    static $UTF8_MSWORD_VALUES_CACHE = null;
3889
3890 39
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3891
3892 1
      if (self::$UTF8_MSWORD === null) {
3893 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_msword') can also be of type false. However, the property $UTF8_MSWORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3894
      }
3895
3896 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3896
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
3897 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3897
      $UTF8_MSWORD_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
3898
    }
3899
3900 39
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3901
  }
3902
3903
  /**
3904
   * Normalize the whitespace.
3905
   *
3906
   * @param string $str                     <p>The string to be normalized.</p>
3907
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3908
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3909
   *                                        bidirectional text chars.</p>
3910
   *
3911
   * @return string
3912
   */
3913 87
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
3914
  {
3915 87
    if ('' === $str) {
3916 9
      return '';
3917
    }
3918
3919 87
    static $WHITESPACE_CACHE = [];
3920 87
    $cacheKey = (int)$keepNonBreakingSpace;
3921
3922 87
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3923
3924 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3925
3926 2
      if ($keepNonBreakingSpace === true) {
3927 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3928
      }
3929
3930 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
3931
    }
3932
3933 87
    if ($keepBidiUnicodeControls === false) {
3934 87
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3935
3936 87
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3937 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3938
      }
3939
3940 87
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3941
    }
3942
3943 87
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3944
  }
3945
3946
  /**
3947
   * Calculates Unicode code point of the given UTF-8 encoded character.
3948
   *
3949
   * INFO: opposite to UTF8::chr()
3950
   *
3951
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3952
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3953
   *
3954
   * @return int
3955
   *             Unicode code point of the given character,<br>
3956
   *             0 on invalid UTF-8 byte sequence.
3957
   */
3958 35
  public static function ord($chr, string $encoding = 'UTF-8'): int
3959
  {
3960
    // init
3961 35
    $chr = (string)$chr;
3962
3963 35
    static $CHAR_CACHE = [];
3964
3965
    // save the original string
3966 35
    $chr_orig = $chr;
3967
3968 35
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3969 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3970
3971
      // check again, if it's still not UTF-8
3972 4
      if ($encoding !== 'UTF-8') {
3973 4
        $chr = self::encode($encoding, $chr);
3974
      }
3975
    }
3976
3977 35
    $cacheKey = $chr_orig . $encoding;
3978 35
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3979 35
      return $CHAR_CACHE[$cacheKey];
3980
    }
3981
3982 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3983
      self::checkForSupport();
3984
    }
3985
3986 11
    if (self::$SUPPORT['intlChar'] === true) {
3987
      /** @noinspection PhpComposerExtensionStubsInspection */
3988 10
      $code = \IntlChar::ord($chr);
3989 10
      if ($code) {
3990 9
        return $CHAR_CACHE[$cacheKey] = $code;
3991
      }
3992
    }
3993
3994
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3995 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
0 ignored issues
show
Bug introduced by
$chr of type array is incompatible with the type string expected by parameter $str of voku\helper\UTF8::substr(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3995
    $chr = \unpack('C*', (string)self::substr(/** @scrutinizer ignore-type */ $chr, 0, 4, 'CP850'));
Loading history...
3996 6
    $code = $chr ? $chr[1] : 0;
3997
3998 6
    if (0xF0 <= $code && isset($chr[4])) {
3999
      /** @noinspection UnnecessaryCastingInspection */
4000
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4001
    }
4002
4003 6
    if (0xE0 <= $code && isset($chr[3])) {
4004
      /** @noinspection UnnecessaryCastingInspection */
4005 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4006
    }
4007
4008 6
    if (0xC0 <= $code && isset($chr[2])) {
4009
      /** @noinspection UnnecessaryCastingInspection */
4010 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xC0) << 6) + $chr[2] - 0x80);
4011
    }
4012
4013 6
    return $CHAR_CACHE[$cacheKey] = $code;
4014
  }
4015
4016
  /**
4017
   * Parses the string into an array (into the the second parameter).
4018
   *
4019
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4020
   *          if the second parameter is not set!
4021
   *
4022
   * @link http://php.net/manual/en/function.parse-str.php
4023
   *
4024
   * @param string $str       <p>The input string.</p>
4025
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4026
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4027
   *
4028
   * @return bool Will return <strong>false</strong> if php can't parse the string and we haven't any $result.
4029
   */
4030 2
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4031
  {
4032 2
    if ($cleanUtf8 === true) {
4033 2
      $str = self::clean($str);
4034
    }
4035
4036
    // always fallback via symfony polyfill
4037 2
    $return = \mb_parse_str($str, $result);
4038
4039 2
    return !($return === false || empty($result));
4040
  }
4041
4042
  /**
4043
   * Checks if \u modifier is available that enables Unicode support in PCRE.
4044
   *
4045
   * @return bool
4046
   *              <strong>true</strong> if support is available,<br>
4047
   *              <strong>false</strong> otherwise.
4048
   */
4049 103
  public static function pcre_utf8_support(): bool
4050
  {
4051
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4052 103
    return (bool)@\preg_match('//u', '');
4053
  }
4054
4055
  /**
4056
   * Create an array containing a range of UTF-8 characters.
4057
   *
4058
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4059
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4060
   *
4061
   * @return string[]
4062
   */
4063 2
  public static function range($var1, $var2): array
4064
  {
4065 2
    if (!$var1 || !$var2) {
4066 2
      return [];
4067
    }
4068
4069 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4070
      self::checkForSupport();
4071
    }
4072
4073 2
    if (self::$SUPPORT['ctype'] === false) {
4074
      throw new \RuntimeException('ext-ctype: is not installed');
4075
    }
4076
4077
    /** @noinspection PhpComposerExtensionStubsInspection */
4078 2
    if (\ctype_digit((string)$var1)) {
4079 2
      $start = (int)$var1;
4080 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4081
      $start = (int)self::hex_to_int($var1);
4082
    } else {
4083 2
      $start = self::ord($var1);
4084
    }
4085
4086 2
    if (!$start) {
4087
      return [];
4088
    }
4089
4090
    /** @noinspection PhpComposerExtensionStubsInspection */
4091 2
    if (\ctype_digit((string)$var2)) {
4092 2
      $end = (int)$var2;
4093 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4094
      $end = (int)self::hex_to_int($var2);
4095
    } else {
4096 2
      $end = self::ord($var2);
4097
    }
4098
4099 2
    if (!$end) {
4100
      return [];
4101
    }
4102
4103 2
    return \array_map(
4104
        [
4105 2
            self::class,
4106
            'chr',
4107
        ],
4108 2
        \range($start, $end)
4109
    );
4110
  }
4111
4112
  /**
4113
   * Multi decode html entity & fix urlencoded-win1252-chars.
4114
   *
4115
   * e.g:
4116
   * 'test+test'                     => 'test+test'
4117
   * 'D&#252;sseldorf'               => 'Düsseldorf'
4118
   * 'D%FCsseldorf'                  => 'Düsseldorf'
4119
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4120
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4121
   * 'Düsseldorf'                   => 'Düsseldorf'
4122
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4123
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4124
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4125
   *
4126
   * @param string $str          <p>The input string.</p>
4127
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
4128
   *
4129
   * @return string
4130
   */
4131 3
  public static function rawurldecode(string $str, bool $multi_decode = true): string
4132
  {
4133 3
    if ('' === $str) {
4134 2
      return '';
4135
    }
4136
4137 3
    $pattern = '/%u([0-9a-f]{3,4})/i';
4138 3
    if (\preg_match($pattern, $str)) {
4139 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4140
    }
4141
4142 3
    $flags = ENT_QUOTES | ENT_HTML5;
4143
4144
    do {
4145 3
      $str_compare = $str;
4146
4147 3
      $str = self::fix_simple_utf8(
4148 3
          \rawurldecode(
4149 3
              self::html_entity_decode(
4150 3
                  self::to_utf8($str),
4151 3
                  $flags
4152
              )
4153
          )
4154
      );
4155
4156 3
    } while ($multi_decode === true && $str_compare !== $str);
4157
4158 3
    return $str;
4159
  }
4160
4161
  /**
4162
   * @param array $strings
4163
   * @param bool  $removeEmptyValues
4164
   * @param int   $removeShortValues
4165
   *
4166
   * @return array
4167
   */
4168 2
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
4169
  {
4170
    // init
4171 2
    $return = [];
4172
4173 2
    foreach ($strings as $str) {
4174
      if (
4175 2
          $removeShortValues !== null
4176
          &&
4177 2
          self::strlen($str) <= $removeShortValues
4178
      ) {
4179 2
        continue;
4180
      }
4181
4182
      if (
4183 2
          $removeEmptyValues === true
4184
          &&
4185 2
          \trim($str) === ''
4186
      ) {
4187 2
        continue;
4188
      }
4189
4190 2
      $return[] = $str;
4191
    }
4192
4193 2
    return $return;
4194
  }
4195
4196
  /**
4197
   * Replaces all occurrences of $pattern in $str by $replacement.
4198
   *
4199
   * @param string $str         <p>The input string.</p>
4200
   * @param string $pattern     <p>The regular expression pattern.</p>
4201
   * @param string $replacement <p>The string to replace with.</p>
4202
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
4203
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4204
   *
4205
   * @return string
4206
   */
4207 291
  public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4208
  {
4209 291
    if ($options === 'msr') {
4210 9
      $options = 'ms';
4211
    }
4212
4213
    // fallback
4214 291
    if (!$delimiter) {
4215
      $delimiter = '/';
4216
    }
4217
4218 291
    $str = (string)\preg_replace(
4219 291
        $delimiter . $pattern . $delimiter . 'u' . $options,
4220 291
        $replacement,
4221 291
        $str
4222
    );
4223
4224 291
    return $str;
4225
  }
4226
4227
  /**
4228
   * alias for "UTF8::remove_bom()"
4229
   *
4230
   * @see        UTF8::remove_bom()
4231
   *
4232
   * @param string $str
4233
   *
4234
   * @return string
4235
   *
4236
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4237
   */
4238
  public static function removeBOM(string $str): string
4239
  {
4240
    return self::remove_bom($str);
4241
  }
4242
4243
  /**
4244
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4245
   *
4246
   * @param string $str <p>The input string.</p>
4247
   *
4248
   * @return string String without UTF-BOM.
4249
   */
4250 75
  public static function remove_bom(string $str): string
4251
  {
4252 75
    if ('' === $str) {
4253 7
      return '';
4254
    }
4255
4256 75
    $strLength = self::strlen_in_byte($str);
4257 75
    foreach (self::$BOM as $bomString => $bomByteLength) {
4258 75
      if (0 === self::strpos_in_byte($str, $bomString, 0)) {
4259 10
        $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4260 10
        if ($strTmp === false) {
4261
          return '';
4262
        }
4263
4264 10
        $strLength -= $bomByteLength;
4265
4266 75
        $str = (string)$strTmp;
4267
      }
4268
    }
4269
4270 75
    return $str;
4271
  }
4272
4273
  /**
4274
   * Removes duplicate occurrences of a string in another string.
4275
   *
4276
   * @param string          $str  <p>The base string.</p>
4277
   * @param string|string[] $what <p>String to search for in the base string.</p>
4278
   *
4279
   * @return string The result string with removed duplicates.
4280
   */
4281 2
  public static function remove_duplicates(string $str, $what = ' '): string
4282
  {
4283 2
    if (\is_string($what) === true) {
4284 2
      $what = [$what];
4285
    }
4286
4287 2
    if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4288
      /** @noinspection ForeachSourceInspection */
4289 2
      foreach ($what as $item) {
4290 2
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4291
      }
4292
    }
4293
4294 2
    return $str;
4295
  }
4296
4297
  /**
4298
   * Remove html via "strip_tags()" from the string.
4299
   *
4300
   * @param string $str
4301
   * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4302
   *                              not be stripped. Default: null
4303
   *                              </p>
4304
   *
4305
   * @return string
4306
   */
4307 6
  public static function remove_html(string $str, string $allowableTags = ''): string
4308
  {
4309 6
    return \strip_tags($str, $allowableTags);
4310
  }
4311
4312
  /**
4313
   * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4314
   *
4315
   * @param string $str
4316
   * @param string $replacement [optional] <p>Default is a empty string.</p>
4317
   *
4318
   * @return string
4319
   */
4320 6
  public static function remove_html_breaks(string $str, string $replacement = ''): string
4321
  {
4322 6
    return (string)\preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4323
  }
4324
4325
  /**
4326
   * Remove invisible characters from a string.
4327
   *
4328
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4329
   *
4330
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4331
   *
4332
   * @param string $str
4333
   * @param bool   $url_encoded
4334
   * @param string $replacement
4335
   *
4336
   * @return string
4337
   */
4338 113
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4339
  {
4340
    // init
4341 113
    $non_displayables = [];
4342
4343
    // every control character except newline (dec 10),
4344
    // carriage return (dec 13) and horizontal tab (dec 09)
4345 113
    if ($url_encoded) {
4346 113
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4347 113
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4348
    }
4349
4350 113
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4351
4352
    do {
4353 113
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4354 113
    } while ($count !== 0);
4355
4356 113
    return $str;
4357
  }
4358
4359
  /**
4360
   * Returns a new string with the prefix $substring removed, if present.
4361
   *
4362
   * @param string $str
4363
   * @param string $substring <p>The prefix to remove.</p>
4364
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4365
   *
4366
   * @return string String without the prefix $substring.
4367
   */
4368 12
  public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4369
  {
4370 12
    if (self::str_starts_with($str, $substring)) {
4371
4372 6
      return (string)self::substr(
4373 6
          $str,
4374 6
          self::strlen($substring, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::strlen($substring, $encoding) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4374
          /** @scrutinizer ignore-type */ self::strlen($substring, $encoding),
Loading history...
4375 6
          null,
4376 6
          $encoding
4377
      );
4378
    }
4379
4380 6
    return $str;
4381
  }
4382
4383
  /**
4384
   * Returns a new string with the suffix $substring removed, if present.
4385
   *
4386
   * @param string $str
4387
   * @param string $substring <p>The suffix to remove.</p>
4388
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4389
   *
4390
   * @return string String having a $str without the suffix $substring.
4391
   */
4392 12
  public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4393
  {
4394 12
    if (self::str_ends_with($str, $substring)) {
4395
4396 6
      return (string)self::substr(
4397 6
          $str,
4398 6
          0,
4399 6
          self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4400
      );
4401
    }
4402
4403 6
    return $str;
4404
  }
4405
4406
  /**
4407
   * Replaces all occurrences of $search in $str by $replacement.
4408
   *
4409
   * @param string $str           <p>The input string.</p>
4410
   * @param string $search        <p>The needle to search for.</p>
4411
   * @param string $replacement   <p>The string to replace with.</p>
4412
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4413
   *
4414
   * @return string String after the replacements.
4415
   */
4416 29
  public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4417
  {
4418 29
    if ($caseSensitive) {
4419 22
      return self::str_replace($search, $replacement, $str);
4420
    }
4421
4422 7
    return self::str_ireplace($search, $replacement, $str);
4423
  }
4424
4425
  /**
4426
   * Replaces all occurrences of $search in $str by $replacement.
4427
   *
4428
   * @param string       $str           <p>The input string.</p>
4429
   * @param array        $search        <p>The elements to search for.</p>
4430
   * @param string|array $replacement   <p>The string to replace with.</p>
4431
   * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4432
   *
4433
   * @return string String after the replacements.
4434
   */
4435 30
  public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4436
  {
4437 30
    if ($caseSensitive) {
4438 23
      return self::str_replace($search, $replacement, $str);
4439
    }
4440
4441 7
    return self::str_ireplace($search, $replacement, $str);
4442
  }
4443
4444
  /**
4445
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4446
   *
4447
   * @param string $str                <p>The input string</p>
4448
   * @param string $replacementChar    <p>The replacement character.</p>
4449
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4450
   *
4451
   * @return string
4452
   */
4453 63
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4454
  {
4455 63
    if ('' === $str) {
4456 9
      return '';
4457
    }
4458
4459 63
    if ($processInvalidUtf8 === true) {
4460 63
      $replacementCharHelper = $replacementChar;
4461 63
      if ($replacementChar === '') {
4462 63
        $replacementCharHelper = 'none';
4463
      }
4464
4465 63
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4466
        self::checkForSupport();
4467
      }
4468
4469 63
      if (self::$SUPPORT['mbstring'] === false) {
4470
        // if there is no native support for "mbstring",
4471
        // then we need to clean the string before ...
4472
        $str = self::clean($str);
4473
      }
4474
4475
      // always fallback via symfony polyfill
4476 63
      $save = \mb_substitute_character();
4477 63
      \mb_substitute_character($replacementCharHelper);
4478 63
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4479 63
      \mb_substitute_character($save);
4480
4481 63
      if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4482 63
        $str = $strTmp;
4483
      } else {
4484
        $str = '';
4485
      }
4486
    }
4487
4488 63
    return str_replace(
4489
        [
4490 63
            "\xEF\xBF\xBD",
4491
            '�',
4492
        ],
4493
        [
4494 63
            $replacementChar,
4495 63
            $replacementChar,
4496
        ],
4497 63
        $str
4498
    );
4499
  }
4500
4501
  /**
4502
   * Strip whitespace or other characters from end of a UTF-8 string.
4503
   *
4504
   * @param string $str   <p>The string to be trimmed.</p>
4505
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4506
   *
4507
   * @return string The string with unwanted characters stripped from the right.
4508
   */
4509 22
  public static function rtrim(string $str = '', $chars = INF): string
4510
  {
4511 22
    if ('' === $str) {
4512 3
      return '';
4513
    }
4514
4515
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4516 21
    if ($chars === INF || !$chars) {
4517 16
      $pattern = "[\pZ\pC]+\$";
4518
    } else {
4519 8
      $chars = \preg_quote($chars, '/');
4520 8
      $pattern = "[$chars]+\$";
4521
    }
4522
4523 21
    return self::regex_replace($str, $pattern, '', '', '/');
4524
  }
4525
4526
  /**
4527
   * rxClass
4528
   *
4529
   * @param string $s
4530
   * @param string $class
4531
   *
4532
   * @return string
4533
   */
4534 37
  private static function rxClass(string $s, string $class = ''): string
4535
  {
4536 37
    static $RX_CLASSS_CACHE = [];
4537
4538 37
    $cacheKey = $s . $class;
4539
4540 37
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4541 25
      return $RX_CLASSS_CACHE[$cacheKey];
4542
    }
4543
4544
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4545 16
    $class = [$class];
4546
4547
    /** @noinspection SuspiciousLoopInspection */
4548 16
    foreach (self::str_split($s) as $s) {
4549 15
      if ('-' === $s) {
4550
        $class[0] = '-' . $class[0];
4551 15
      } elseif (!isset($s[2])) {
4552 15
        $class[0] .= \preg_quote($s, '/');
4553 1
      } elseif (1 === self::strlen($s)) {
4554 1
        $class[0] .= $s;
4555
      } else {
4556 15
        $class[] = $s;
4557
      }
4558
    }
4559
4560 16
    if ($class[0]) {
4561 16
      $class[0] = '[' . $class[0] . ']';
4562
    }
4563
4564 16
    if (1 === \count($class)) {
4565 16
      $return = $class[0];
4566
    } else {
4567
      $return = '(?:' . \implode('|', $class) . ')';
4568
    }
4569
4570 16
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4571
4572 16
    return $return;
4573
  }
4574
4575
  /**
4576
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4577
   */
4578 2
  public static function showSupport()
4579
  {
4580 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4581
      self::checkForSupport();
4582
    }
4583
4584 2
    echo '<pre>';
4585 2
    foreach (self::$SUPPORT as $key => $value) {
4586 2
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4587
    }
4588 2
    echo '</pre>';
4589 2
  }
4590
4591
  /**
4592
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4593
   *
4594
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4595
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4596
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4597
   *
4598
   * @return string The HTML numbered entity.
4599
   */
4600 2
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4601
  {
4602 2
    if ('' === $char) {
4603 2
      return '';
4604
    }
4605
4606
    if (
4607 2
        $keepAsciiChars === true
4608
        &&
4609 2
        self::is_ascii($char) === true
4610
    ) {
4611 2
      return $char;
4612
    }
4613
4614 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4615 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4616
    }
4617
4618 2
    return '&#' . self::ord($char, $encoding) . ';';
4619
  }
4620
4621
  /**
4622
   * @param string $str
4623
   * @param int    $tabLength
4624
   *
4625
   * @return string
4626
   */
4627 5
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4628
  {
4629 5
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4630
  }
4631
4632
  /**
4633
   * Convert a string to an array of Unicode characters.
4634
   *
4635
   * @param string|int $str       <p>The string to split into array.</p>
4636
   * @param int        $length    [optional] <p>Max character length of each array element.</p>
4637
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4638
   *
4639
   * @return string[] An array containing chunks of the string.
4640
   */
4641 60
  public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4642
  {
4643
    // init
4644 60
    $str = (string)$str;
4645
4646 60
    if ('' === $str) {
4647 9
      return [];
4648
    }
4649
4650
    // init
4651 59
    $ret = [];
4652
4653 59
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4654
      self::checkForSupport();
4655
    }
4656
4657 59
    if ($cleanUtf8 === true) {
4658 18
      $str = self::clean($str);
4659
    }
4660
4661 59
    if (self::$SUPPORT['pcre_utf8'] === true) {
4662
4663 55
      \preg_match_all('/./us', $str, $retArray);
4664 55
      if (isset($retArray[0])) {
4665 55
        $ret = $retArray[0];
4666
      }
4667 55
      unset($retArray);
4668
4669
    } else {
4670
4671
      // fallback
4672
4673 8
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4674
        self::checkForSupport();
4675
      }
4676
4677 8
      $len = self::strlen_in_byte($str);
4678
4679
      /** @noinspection ForeachInvariantsInspection */
4680 8
      for ($i = 0; $i < $len; $i++) {
4681
4682 8
        if (($str[$i] & "\x80") === "\x00") {
4683
4684 8
          $ret[] = $str[$i];
4685
4686
        } elseif (
4687 8
            isset($str[$i + 1])
4688
            &&
4689 8
            ($str[$i] & "\xE0") === "\xC0"
4690
        ) {
4691
4692 4
          if (($str[$i + 1] & "\xC0") === "\x80") {
4693 4
            $ret[] = $str[$i] . $str[$i + 1];
4694
4695 4
            $i++;
4696
          }
4697
4698
        } elseif (
4699 6
            isset($str[$i + 2])
4700
            &&
4701 6
            ($str[$i] & "\xF0") === "\xE0"
4702
        ) {
4703
4704
          if (
4705 6
              ($str[$i + 1] & "\xC0") === "\x80"
4706
              &&
4707 6
              ($str[$i + 2] & "\xC0") === "\x80"
4708
          ) {
4709 6
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4710
4711 6
            $i += 2;
4712
          }
4713
4714
        } elseif (
4715
            isset($str[$i + 3])
4716
            &&
4717
            ($str[$i] & "\xF8") === "\xF0"
4718
        ) {
4719
4720
          if (
4721
              ($str[$i + 1] & "\xC0") === "\x80"
4722
              &&
4723
              ($str[$i + 2] & "\xC0") === "\x80"
4724
              &&
4725
              ($str[$i + 3] & "\xC0") === "\x80"
4726
          ) {
4727
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4728
4729
            $i += 3;
4730
          }
4731
4732
        }
4733
      }
4734
    }
4735
4736 59
    if ($length > 1) {
4737 9
      $ret = \array_chunk($ret, $length);
4738
4739 9
      return \array_map(
4740
          function ($item) {
4741 9
            return \implode('', $item);
4742 9
          }, $ret
4743
      );
4744
    }
4745
4746 52
    if (isset($ret[0]) && $ret[0] === '') {
4747
      return [];
4748
    }
4749
4750 52
    return $ret;
4751
  }
4752
4753
  /**
4754
   * Returns a camelCase version of the string. Trims surrounding spaces,
4755
   * capitalizes letters following digits, spaces, dashes and underscores,
4756
   * and removes spaces, dashes, as well as underscores.
4757
   *
4758
   * @param string $str      <p>The input string.</p>
4759
   * @param string $encoding [optional] <p>Default: UTF-8</p>
4760
   *
4761
   * @return string
4762
   */
4763 32
  public static function str_camelize(string $str, string $encoding = 'UTF-8'): string
4764
  {
4765 32
    $str = self::lcfirst(self::trim($str), $encoding);
4766 32
    $str = (string)\preg_replace('/^[-_]+/', '', $str);
4767
4768 32
    $str = (string)\preg_replace_callback(
4769 32
        '/[-_\s]+(.)?/u',
4770
        function ($match) use ($encoding) {
4771 27
          if (isset($match[1])) {
4772 27
            return UTF8::strtoupper($match[1], $encoding);
4773
          }
4774
4775 1
          return '';
4776 32
        },
4777 32
        $str
4778
    );
4779
4780 32
    $str = (string)\preg_replace_callback(
4781 32
        '/[\d]+(.)?/u',
4782
        function ($match) use ($encoding) {
4783 6
          return UTF8::strtoupper($match[0], $encoding);
4784 32
        },
4785 32
        $str
4786
    );
4787
4788 32
    return $str;
4789
  }
4790
4791
  /**
4792
   * Returns the string with the first letter of each word capitalized,
4793
   * except for when the word is a name which shouldn't be capitalized.
4794
   *
4795
   * @param string $str
4796
   *
4797
   * @return string String with $str capitalized.
4798
   */
4799 1
  public static function str_capitalize_name(string $str): string
4800
  {
4801 1
    $str = self::collapse_whitespace($str);
4802
4803 1
    $str = self::str_capitalize_name_helper($str, ' ');
4804 1
    $str = self::str_capitalize_name_helper($str, '-');
4805
4806 1
    return $str;
4807
  }
4808
4809
  /**
4810
   * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
4811
   *
4812
   * @param string $names
4813
   * @param string $delimiter
4814
   * @param string $encoding
4815
   *
4816
   * @return string
4817
   */
4818 1
  private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
4819
  {
4820
    // init
4821 1
    $namesArray = \explode($delimiter, $names);
4822
4823 1
    if ($namesArray === false) {
4824
      return '';
4825
    }
4826
4827
    $specialCases = [
4828 1
        'names'    => [
4829
            'ab',
4830
            'af',
4831
            'al',
4832
            'and',
4833
            'ap',
4834
            'bint',
4835
            'binte',
4836
            'da',
4837
            'de',
4838
            'del',
4839
            'den',
4840
            'der',
4841
            'di',
4842
            'dit',
4843
            'ibn',
4844
            'la',
4845
            'mac',
4846
            'nic',
4847
            'of',
4848
            'ter',
4849
            'the',
4850
            'und',
4851
            'van',
4852
            'von',
4853
            'y',
4854
            'zu',
4855
        ],
4856
        'prefixes' => [
4857
            'al-',
4858
            "d'",
4859
            'ff',
4860
            "l'",
4861
            'mac',
4862
            'mc',
4863
            'nic',
4864
        ],
4865
    ];
4866
4867 1
    foreach ($namesArray as &$name) {
4868 1
      if (\in_array($name, $specialCases['names'], true)) {
4869 1
        continue;
4870
      }
4871
4872 1
      $continue = false;
4873
4874 1
      if ($delimiter == '-') {
4875 1
        foreach ($specialCases['names'] as $beginning) {
4876 1
          if (self::strpos($name, $beginning, 0, $encoding) === 0) {
4877 1
            $continue = true;
4878
          }
4879
        }
4880
      }
4881
4882 1
      foreach ($specialCases['prefixes'] as $beginning) {
4883 1
        if (self::strpos($name, $beginning, 0, $encoding) === 0) {
4884 1
          $continue = true;
4885
        }
4886
      }
4887
4888 1
      if ($continue) {
4889 1
        continue;
4890
      }
4891
4892 1
      $name = self::str_upper_first($name);
4893
    }
4894
4895 1
    return \implode($delimiter, $namesArray);
4896
  }
4897
4898
  /**
4899
   * Returns true if the string contains $needle, false otherwise. By default
4900
   * the comparison is case-sensitive, but can be made insensitive by setting
4901
   * $caseSensitive to false.
4902
   *
4903
   * @param string $haystack      <p>The input string.</p>
4904
   * @param string $needle        <p>Substring to look for.</p>
4905
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4906
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4907
   *
4908
   * @return bool Whether or not $haystack contains $needle.
4909
   */
4910 106
  public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4911
  {
4912 106
    if ('' === $haystack || '' === $needle) {
4913 1
      return false;
4914
    }
4915
4916
    // only a fallback to prevent BC in the api ...
4917 105
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
4918 2
      $encoding = (string)$caseSensitive;
4919
    }
4920
4921 105
    if ($caseSensitive) {
4922 55
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
4923
    }
4924
4925 50
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
4926
  }
4927
4928
  /**
4929
   * Returns true if the string contains all $needles, false otherwise. By
4930
   * default the comparison is case-sensitive, but can be made insensitive by
4931
   * setting $caseSensitive to false.
4932
   *
4933
   * @param string $haystack      <p>The input string.</p>
4934
   * @param array  $needles       <p>SubStrings to look for.</p>
4935
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4936
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4937
   *
4938
   * @return bool Whether or not $haystack contains $needle.
4939
   */
4940 44
  public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4941
  {
4942 44
    if ('' === $haystack) {
4943
      return false;
4944
    }
4945
4946 44
    if (empty($needles)) {
4947 1
      return false;
4948
    }
4949
4950
    // only a fallback to prevent BC in the api ...
4951 43
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
4952 1
      $encoding = (string)$caseSensitive;
4953
    }
4954
4955 43
    foreach ($needles as $needle) {
4956 43
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4957 43
        return false;
4958
      }
4959
    }
4960
4961 24
    return true;
4962
  }
4963
4964
  /**
4965
   * Returns true if the string contains any $needles, false otherwise. By
4966
   * default the comparison is case-sensitive, but can be made insensitive by
4967
   * setting $caseSensitive to false.
4968
   *
4969
   * @param string $haystack      <p>The input string.</p>
4970
   * @param array  $needles       <p>SubStrings to look for.</p>
4971
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4972
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4973
   *
4974
   * @return bool
4975
   *               Whether or not $str contains $needle.
4976
   */
4977 43
  public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4978
  {
4979 43
    if (empty($needles)) {
4980 1
      return false;
4981
    }
4982
4983 42
    foreach ($needles as $needle) {
4984 42
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4985 42
        return true;
4986
      }
4987
    }
4988
4989 18
    return false;
4990
  }
4991
4992
  /**
4993
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
4994
   * inserted before uppercase characters (with the exception of the first
4995
   * character of the string), and in place of spaces as well as underscores.
4996
   *
4997
   * @param string $str      <p>The input string.</p>
4998
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4999
   *
5000
   * @return string
5001
   */
5002 19
  public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5003
  {
5004 19
    return self::str_delimit($str, '-', $encoding);
5005
  }
5006
5007
  /**
5008
   * Returns a lowercase and trimmed string separated by the given delimiter.
5009
   * Delimiters are inserted before uppercase characters (with the exception
5010
   * of the first character of the string), and in place of spaces, dashes,
5011
   * and underscores. Alpha delimiters are not converted to lowercase.
5012
   *
5013
   * @param string $str       <p>The input string.</p>
5014
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
5015
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5016
   *
5017
   * @return string
5018
   */
5019 49
  public static function str_delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
5020
  {
5021 49
    $str = self::trim($str);
5022
5023 49
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
5024
5025 49
    $str = self::strtolower($str, $encoding);
5026
5027 49
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
5028
  }
5029
5030
  /**
5031
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5032
   *
5033
   * @param string $str <p>The input string.</p>
5034
   *
5035
   * @return false|string
5036
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5037
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5038
   */
5039 32
  public static function str_detect_encoding($str)
5040
  {
5041
    // init
5042 32
    $str = (string)$str;
5043
5044
    //
5045
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5046
    //
5047
5048 32
    if (self::is_binary($str, true) === true) {
5049
5050 11
      if (self::is_utf16($str) === 1) {
5051 2
        return 'UTF-16LE';
5052
      }
5053
5054 11
      if (self::is_utf16($str) === 2) {
5055 2
        return 'UTF-16BE';
5056
      }
5057
5058 9
      if (self::is_utf32($str) === 1) {
5059
        return 'UTF-32LE';
5060
      }
5061
5062 9
      if (self::is_utf32($str) === 2) {
5063
        return 'UTF-32BE';
5064
      }
5065
5066
      // is binary but not "UTF-16" or "UTF-32"
5067 9
      return false;
5068
    }
5069
5070
    //
5071
    // 2.) simple check for ASCII chars
5072
    //
5073
5074 27
    if (self::is_ascii($str) === true) {
5075 9
      return 'ASCII';
5076
    }
5077
5078
    //
5079
    // 3.) simple check for UTF-8 chars
5080
    //
5081
5082 27
    if (self::is_utf8($str) === true) {
5083 19
      return 'UTF-8';
5084
    }
5085
5086
    //
5087
    // 4.) check via "mb_detect_encoding()"
5088
    //
5089
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5090
5091
    $detectOrder = [
5092 16
        'ISO-8859-1',
5093
        'ISO-8859-2',
5094
        'ISO-8859-3',
5095
        'ISO-8859-4',
5096
        'ISO-8859-5',
5097
        'ISO-8859-6',
5098
        'ISO-8859-7',
5099
        'ISO-8859-8',
5100
        'ISO-8859-9',
5101
        'ISO-8859-10',
5102
        'ISO-8859-13',
5103
        'ISO-8859-14',
5104
        'ISO-8859-15',
5105
        'ISO-8859-16',
5106
        'WINDOWS-1251',
5107
        'WINDOWS-1252',
5108
        'WINDOWS-1254',
5109
        'CP932',
5110
        'CP936',
5111
        'CP950',
5112
        'CP866',
5113
        'CP850',
5114
        'CP51932',
5115
        'CP50220',
5116
        'CP50221',
5117
        'CP50222',
5118
        'ISO-2022-JP',
5119
        'ISO-2022-KR',
5120
        'JIS',
5121
        'JIS-ms',
5122
        'EUC-CN',
5123
        'EUC-JP',
5124
    ];
5125
5126
    // always fallback via symfony polyfill
5127 16
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
5128 16
    if ($encoding) {
5129 16
      return $encoding;
5130
    }
5131
5132
    //
5133
    // 5.) check via "iconv()"
5134
    //
5135
5136
    if (self::$ENCODINGS === null) {
5137
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
5138
    }
5139
5140
    $md5 = \md5($str);
5141
    foreach (self::$ENCODINGS as $encodingTmp) {
5142
      # INFO: //IGNORE but still throw notice
5143
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
5144
      if (\md5((string)@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
5145
        return $encodingTmp;
5146
      }
5147
    }
5148
5149
    return false;
5150
  }
5151
5152
  /**
5153
   * Check if the string ends with the given substring.
5154
   *
5155
   * @param string $haystack <p>The string to search in.</p>
5156
   * @param string $needle   <p>The substring to search for.</p>
5157
   *
5158
   * @return bool
5159
   */
5160 40
  public static function str_ends_with(string $haystack, string $needle): bool
5161
  {
5162 40
    if ('' === $haystack || '' === $needle) {
5163 4
      return false;
5164
    }
5165
5166 38
    return \substr($haystack, -\strlen($needle)) === $needle;
5167
  }
5168
5169
  /**
5170
   * Returns true if the string ends with any of $substrings, false otherwise.
5171
   *
5172
   * - case-sensitive
5173
   *
5174
   * @param string   $str        <p>The input string.</p>
5175
   * @param string[] $substrings <p>Substrings to look for.</p>
5176
   *
5177
   * @return bool Whether or not $str ends with $substring.
5178
   */
5179 7
  public static function str_ends_with_any(string $str, array $substrings): bool
5180
  {
5181 7
    if (empty($substrings)) {
5182
      return false;
5183
    }
5184
5185 7
    foreach ($substrings as $substring) {
5186 7
      if (self::str_ends_with($str, $substring)) {
5187 7
        return true;
5188
      }
5189
    }
5190
5191 6
    return false;
5192
  }
5193
5194
  /**
5195
   * Ensures that the string begins with $substring. If it doesn't, it's
5196
   * prepended.
5197
   *
5198
   * @param string $str       <p>The input string.</p>
5199
   * @param string $substring <p>The substring to add if not present.</p>
5200
   *
5201
   * @return string
5202
   */
5203 10
  public static function str_ensure_left(string $str, string $substring): string
5204
  {
5205 10
    if (!self::str_starts_with($str, $substring)) {
5206 4
      $str = $substring . $str;
5207
    }
5208
5209 10
    return $str;
5210
  }
5211
5212
  /**
5213
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
5214
   *
5215
   * @param string $str       <p>The input string.</p>
5216
   * @param string $substring <p>The substring to add if not present.</p>
5217
   *
5218
   * @return string
5219
   */
5220 10
  public static function str_ensure_right(string $str, string $substring): string
5221
  {
5222 10
    if (!self::str_ends_with($str, $substring)) {
5223 4
      $str .= $substring;
5224
    }
5225
5226 10
    return $str;
5227
  }
5228
5229
  /**
5230
   * Capitalizes the first word of the string, replaces underscores with
5231
   * spaces, and strips '_id'.
5232
   *
5233
   * @param string $str
5234
   *
5235
   * @return string
5236
   */
5237 3
  public static function str_humanize($str): string
5238
  {
5239 3
    $str = self::str_replace(
5240
        [
5241 3
            '_id',
5242
            '_',
5243
        ],
5244
        [
5245 3
            '',
5246
            ' ',
5247
        ],
5248 3
        $str
5249
    );
5250
5251 3
    return self::ucfirst(self::trim($str));
5252
  }
5253
5254
  /**
5255
   * Check if the string ends with the given substring, case insensitive.
5256
   *
5257
   * @param string $haystack <p>The string to search in.</p>
5258
   * @param string $needle   <p>The substring to search for.</p>
5259
   *
5260
   * @return bool
5261
   */
5262 12
  public static function str_iends_with(string $haystack, string $needle): bool
5263
  {
5264 12
    if ('' === $haystack || '' === $needle) {
5265 2
      return false;
5266
    }
5267
5268 12
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
5269 12
      return true;
5270
    }
5271
5272 8
    return false;
5273
  }
5274
5275
  /**
5276
   * Returns true if the string ends with any of $substrings, false otherwise.
5277
   *
5278
   * - case-insensitive
5279
   *
5280
   * @param string   $str        <p>The input string.</p>
5281
   * @param string[] $substrings <p>Substrings to look for.</p>
5282
   *
5283
   * @return bool Whether or not $str ends with $substring.
5284
   */
5285 4
  public static function str_iends_with_any(string $str, array $substrings): bool
5286
  {
5287 4
    if (empty($substrings)) {
5288
      return false;
5289
    }
5290
5291 4
    foreach ($substrings as $substring) {
5292 4
      if (self::str_iends_with($str, $substring)) {
5293 4
        return true;
5294
      }
5295
    }
5296
5297
    return false;
5298
  }
5299
5300
  /**
5301
   * Returns the index of the first occurrence of $needle in the string,
5302
   * and false if not found. Accepts an optional offset from which to begin
5303
   * the search.
5304
   *
5305
   * @param string $str      <p>The input string.</p>
5306
   * @param string $needle   <p>Substring to look for.</p>
5307
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5308
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5309
   *
5310
   * @return int|false
5311
   *                    The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5312
   */
5313 2
  public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5314
  {
5315 2
    return self::stripos(
5316 2
        $str,
5317 2
        $needle,
5318 2
        $offset,
5319 2
        $encoding
5320
    );
5321
  }
5322
5323
  /**
5324
   * Returns the index of the last occurrence of $needle in the string,
5325
   * and false if not found. Accepts an optional offset from which to begin
5326
   * the search. Offsets may be negative to count from the last character
5327
   * in the string.
5328
   *
5329
   * @param string $str      <p>The input string.</p>
5330
   * @param string $needle   <p>Substring to look for.</p>
5331
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5332
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5333
   *
5334
   * @return int|false
5335
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5336
   */
5337 2
  public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5338
  {
5339 2
    return self::strripos(
5340 2
        $str,
5341 2
        $needle,
5342 2
        $offset,
5343 2
        $encoding
5344
    );
5345
  }
5346
5347
  /**
5348
   * Returns the index of the first occurrence of $needle in the string,
5349
   * and false if not found. Accepts an optional offset from which to begin
5350
   * the search.
5351
   *
5352
   * @param string $str      <p>The input string.</p>
5353
   * @param string $needle   <p>Substring to look for.</p>
5354
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5355
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5356
   *
5357
   * @return int|false
5358
   *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5359
   */
5360 12
  public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5361
  {
5362 12
    return self::strpos(
5363 12
        $str,
5364 12
        $needle,
5365 12
        $offset,
5366 12
        $encoding
5367
    );
5368
  }
5369
5370
  /**
5371
   * Returns the index of the last occurrence of $needle in the string,
5372
   * and false if not found. Accepts an optional offset from which to begin
5373
   * the search. Offsets may be negative to count from the last character
5374
   * in the string.
5375
   *
5376
   * @param string $str      <p>The input string.</p>
5377
   * @param string $needle   <p>Substring to look for.</p>
5378
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5379
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5380
   *
5381
   * @return int|false
5382
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5383
   */
5384 12
  public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5385
  {
5386 12
    return self::strrpos(
5387 12
        $str,
5388 12
        $needle,
5389 12
        $offset,
5390 12
        $encoding
5391
    );
5392
  }
5393
5394
  /**
5395
   * Inserts $substring into the string at the $index provided.
5396
   *
5397
   * @param string $str       <p>The input string.</p>
5398
   * @param string $substring <p>String to be inserted.</p>
5399
   * @param int    $index     <p>The index at which to insert the substring.</p>
5400
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5401
   *
5402
   * @return string
5403
   */
5404 8
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5405
  {
5406 8
    $len = self::strlen($str, $encoding);
5407
5408 8
    if ($index > $len) {
5409 1
      return $str;
5410
    }
5411
5412 7
    $start = self::substr($str, 0, $index, $encoding);
5413 7
    $end = self::substr($str, $index, $len, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $len can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5413
    $end = self::substr($str, $index, /** @scrutinizer ignore-type */ $len, $encoding);
Loading history...
5414
5415 7
    return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5415
    return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5415
    return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
5416
  }
5417
5418
  /**
5419
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5420
   *
5421
   * @link  http://php.net/manual/en/function.str-ireplace.php
5422
   *
5423
   * @param mixed $search  <p>
5424
   *                       Every replacement with search array is
5425
   *                       performed on the result of previous replacement.
5426
   *                       </p>
5427
   * @param mixed $replace <p>
5428
   *                       </p>
5429
   * @param mixed $subject <p>
5430
   *                       If subject is an array, then the search and
5431
   *                       replace is performed with every entry of
5432
   *                       subject, and the return value is an array as
5433
   *                       well.
5434
   *                       </p>
5435
   * @param int   $count   [optional] <p>
5436
   *                       The number of matched and replaced needles will
5437
   *                       be returned in count which is passed by
5438
   *                       reference.
5439
   *                       </p>
5440
   *
5441
   * @return mixed A string or an array of replacements.
5442
   */
5443 41
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5444
  {
5445 41
    $search = (array)$search;
5446
5447
    /** @noinspection AlterInForeachInspection */
5448 41
    foreach ($search as &$s) {
5449 41
      if ('' === $s .= '') {
5450 7
        $s = '/^(?<=.)$/';
5451
      } else {
5452 41
        $s = '/' . \preg_quote($s, '/') . '/ui';
5453
      }
5454
    }
5455
5456 41
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5457 41
    $count = $replace; // used as reference parameter
5458
5459 41
    return $subject;
5460
  }
5461
5462
  /**
5463
   * Check if the string starts with the given substring, case insensitive.
5464
   *
5465
   * @param string $haystack <p>The string to search in.</p>
5466
   * @param string $needle   <p>The substring to search for.</p>
5467
   *
5468
   * @return bool
5469
   */
5470 12
  public static function str_istarts_with(string $haystack, string $needle): bool
5471
  {
5472 12
    if ('' === $haystack || '' === $needle) {
5473 2
      return false;
5474
    }
5475
5476 12
    if (self::stripos($haystack, $needle) === 0) {
5477 12
      return true;
5478
    }
5479
5480 4
    return false;
5481
  }
5482
5483
  /**
5484
   * Returns true if the string begins with any of $substrings, false otherwise.
5485
   *
5486
   * - case-insensitive
5487
   *
5488
   * @param string $str        <p>The input string.</p>
5489
   * @param array  $substrings <p>Substrings to look for.</p>
5490
   *
5491
   * @return bool Whether or not $str starts with $substring.
5492
   */
5493 4
  public static function str_istarts_with_any(string $str, array $substrings): bool
5494
  {
5495 4
    if ('' === $str) {
5496
      return false;
5497
    }
5498
5499 4
    if (empty($substrings)) {
5500
      return false;
5501
    }
5502
5503 4
    foreach ($substrings as $substring) {
5504 4
      if (self::str_istarts_with($str, $substring)) {
5505 4
        return true;
5506
      }
5507
    }
5508
5509
    return false;
5510
  }
5511
5512
  /**
5513
   * Gets the substring after the first occurrence of a separator.
5514
   *
5515
   * @param string $str       <p>The input string.</p>
5516
   * @param string $separator <p>The string separator.</p>
5517
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5518
   *
5519
   * @return string
5520
   */
5521 1
  public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5522
  {
5523
    if (
5524 1
        $separator === ''
5525
        ||
5526 1
        $str === ''
5527
    ) {
5528 1
      return '';
5529
    }
5530
5531 1
    $offset = self::str_iindex_first($str, $separator);
5532 1
    if ($offset === false) {
5533 1
      return '';
5534
    }
5535
5536 1
    return (string)self::substr(
5537 1
        $str,
5538 1
        $offset + self::strlen($separator, $encoding),
5539 1
        null,
5540 1
        $encoding
5541
    );
5542
  }
5543
5544
  /**
5545
   * Gets the substring after the last occurrence of a separator.
5546
   *
5547
   * @param string $str       <p>The input string.</p>
5548
   * @param string $separator <p>The string separator.</p>
5549
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5550
   *
5551
   * @return string
5552
   */
5553 1
  public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5554
  {
5555
    if (
5556 1
        $separator === ''
5557
        ||
5558 1
        $str === ''
5559
    ) {
5560 1
      return '';
5561
    }
5562
5563 1
    $offset = self::str_iindex_last($str, $separator);
5564 1
    if ($offset === false) {
5565 1
      return '';
5566
    }
5567
5568 1
    return (string)self::substr(
5569 1
        $str,
5570 1
        $offset + self::strlen($separator, $encoding),
5571 1
        null,
5572 1
        $encoding
5573
    );
5574
  }
5575
5576
  /**
5577
   * Gets the substring before the first occurrence of a separator.
5578
   *
5579
   * @param string $str       <p>The input string.</p>
5580
   * @param string $separator <p>The string separator.</p>
5581
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5582
   *
5583
   * @return string
5584
   */
5585 1
  public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5586
  {
5587
    if (
5588 1
        $separator === ''
5589
        ||
5590 1
        $str === ''
5591
    ) {
5592 1
      return '';
5593
    }
5594
5595 1
    $offset = self::str_iindex_first($str, $separator);
5596 1
    if ($offset === false) {
5597 1
      return '';
5598
    }
5599
5600 1
    return (string)self::substr($str, 0, $offset, $encoding);
5601
  }
5602
5603
  /**
5604
   * Gets the substring before the last occurrence of a separator.
5605
   *
5606
   * @param string $str       <p>The input string.</p>
5607
   * @param string $separator <p>The string separator.</p>
5608
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5609
   *
5610
   * @return string
5611
   */
5612 1
  public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5613
  {
5614
    if (
5615 1
        $separator === ''
5616
        ||
5617 1
        $str === ''
5618
    ) {
5619 1
      return '';
5620
    }
5621
5622 1
    $offset = self::str_iindex_last($str, $separator);
5623 1
    if ($offset === false) {
5624 1
      return '';
5625
    }
5626
5627 1
    return (string)self::substr($str, 0, $offset, $encoding);
5628
  }
5629
5630
  /**
5631
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5632
   *
5633
   * @param string $str          <p>The input string.</p>
5634
   * @param string $needle       <p>The string to look for.</p>
5635
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5636
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5637
   *
5638
   * @return string
5639
   */
5640 2
  public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5641
  {
5642
    if (
5643 2
        '' === $needle
5644
        ||
5645 2
        '' === $str
5646
    ) {
5647 2
      return '';
5648
    }
5649
5650 2
    $part = self::stristr(
5651 2
        $str,
5652 2
        $needle,
5653 2
        $beforeNeedle,
5654 2
        $encoding
5655
    );
5656 2
    if (false === $part) {
5657 2
      return '';
5658
    }
5659
5660 2
    return $part;
5661
  }
5662
5663
  /**
5664
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5665
   *
5666
   * @param string $str          <p>The input string.</p>
5667
   * @param string $needle       <p>The string to look for.</p>
5668
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5669
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5670
   *
5671
   * @return string
5672
   */
5673 1
  public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5674
  {
5675
    if (
5676 1
        '' === $needle
5677
        ||
5678 1
        '' === $str
5679
    ) {
5680 1
      return '';
5681
    }
5682
5683 1
    $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5684 1
    if (false === $part) {
0 ignored issues
show
introduced by
The condition false === $part is always false.
Loading history...
5685 1
      return '';
5686
    }
5687
5688 1
    return $part;
5689
  }
5690
5691
  /**
5692
   * Returns the last $n characters of the string.
5693
   *
5694
   * @param string $str      <p>The input string.</p>
5695
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5696
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5697
   *
5698
   * @return string
5699
   */
5700 12
  public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5701
  {
5702 12
    if ($n <= 0) {
5703 4
      return '';
5704
    }
5705
5706 8
    $returnTmp = self::substr($str, -$n, null, $encoding);
5707
5708 8
    return ($returnTmp === false ? '' : $returnTmp);
5709
  }
5710
5711
  /**
5712
   * Limit the number of characters in a string.
5713
   *
5714
   * @param string $str      <p>The input string.</p>
5715
   * @param int    $length   [optional] <p>Default: 100</p>
5716
   * @param string $strAddOn [optional] <p>Default: …</p>
5717
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5718
   *
5719
   * @return string
5720
   */
5721 2
  public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5722
  {
5723 2
    if ('' === $str) {
5724 2
      return '';
5725
    }
5726
5727 2
    if ($length <= 0) {
5728 2
      return '';
5729
    }
5730
5731 2
    if (self::strlen($str, $encoding) <= $length) {
5732 2
      return $str;
5733
    }
5734
5735 2
    return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5735
    return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
5736
  }
5737
5738
  /**
5739
   * Limit the number of characters in a string, but also after the next word.
5740
   *
5741
   * @param string $str      <p>The input string.</p>
5742
   * @param int    $length   [optional] <p>Default: 100</p>
5743
   * @param string $strAddOn [optional] <p>Default: …</p>
5744
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5745
   *
5746
   * @return string
5747
   */
5748 6
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5749
  {
5750 6
    if ('' === $str) {
5751 2
      return '';
5752
    }
5753
5754 6
    if ($length <= 0) {
5755 2
      return '';
5756
    }
5757
5758 6
    if (self::strlen($str, $encoding) <= $length) {
5759 2
      return $str;
5760
    }
5761
5762 6
    if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
5763 5
      return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5763
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
5764
    }
5765
5766 3
    $str = (string)self::substr($str, 0, $length, $encoding);
5767 3
    $array = \explode(' ', $str);
5768 3
    \array_pop($array);
5769 3
    $new_str = \implode(' ', $array);
5770
5771 3
    if ($new_str === '') {
5772 2
      $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
5773
    } else {
5774 3
      $str = $new_str . $strAddOn;
5775
    }
5776
5777 3
    return $str;
5778
  }
5779
5780
  /**
5781
   * Returns the longest common prefix between the string and $otherStr.
5782
   *
5783
   * @param string $str      <p>The input sting.</p>
5784
   * @param string $otherStr <p>Second string for comparison.</p>
5785
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5786
   *
5787
   * @return string
5788
   */
5789 10
  public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5790
  {
5791 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5792
5793 10
    $longestCommonPrefix = '';
5794 10
    for ($i = 0; $i < $maxLength; $i++) {
5795 8
      $char = self::substr($str, $i, 1, $encoding);
5796
5797 8
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
5798 6
        $longestCommonPrefix .= $char;
5799
      } else {
5800 6
        break;
5801
      }
5802
    }
5803
5804 10
    return $longestCommonPrefix;
5805
  }
5806
5807
  /**
5808
   * Returns the longest common substring between the string and $otherStr.
5809
   * In the case of ties, it returns that which occurs first.
5810
   *
5811
   * @param string $str
5812
   * @param string $otherStr <p>Second string for comparison.</p>
5813
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5814
   *
5815
   * @return string String with its $str being the longest common substring.
5816
   */
5817 11
  public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5818
  {
5819
    // Uses dynamic programming to solve
5820
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
5821 11
    $strLength = self::strlen($str, $encoding);
5822 11
    $otherLength = self::strlen($otherStr, $encoding);
5823
5824
    // Return if either string is empty
5825 11
    if ($strLength == 0 || $otherLength == 0) {
5826 2
      return '';
5827
    }
5828
5829 9
    $len = 0;
5830 9
    $end = 0;
5831 9
    $table = \array_fill(
5832 9
        0,
5833 9
        $strLength + 1,
5834 9
        \array_fill(0, $otherLength + 1, 0)
5835
    );
5836
5837 9
    for ($i = 1; $i <= $strLength; $i++) {
5838 9
      for ($j = 1; $j <= $otherLength; $j++) {
5839 9
        $strChar = self::substr($str, $i - 1, 1, $encoding);
5840 9
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
5841
5842 9
        if ($strChar == $otherChar) {
5843 8
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
5844 8
          if ($table[$i][$j] > $len) {
5845 8
            $len = $table[$i][$j];
5846 8
            $end = $i;
5847
          }
5848
        } else {
5849 9
          $table[$i][$j] = 0;
5850
        }
5851
      }
5852
    }
5853
5854 9
    $returnTmp = self::substr($str, $end - $len, $len, $encoding);
5855
5856 9
    return ($returnTmp === false ? '' : $returnTmp);
5857
  }
5858
5859
  /**
5860
   * Returns the longest common suffix between the string and $otherStr.
5861
   *
5862
   * @param string $str
5863
   * @param string $otherStr <p>Second string for comparison.</p>
5864
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5865
   *
5866
   * @return string
5867
   */
5868 10
  public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5869
  {
5870 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5871
5872 10
    $longestCommonSuffix = '';
5873 10
    for ($i = 1; $i <= $maxLength; $i++) {
5874 8
      $char = self::substr($str, -$i, 1, $encoding);
5875
5876 8
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
5877 6
        $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5877
        $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
5878
      } else {
5879 6
        break;
5880
      }
5881
    }
5882
5883 10
    return $longestCommonSuffix;
5884
  }
5885
5886
  /**
5887
   * Returns true if $str matches the supplied pattern, false otherwise.
5888
   *
5889
   * @param string $str     <p>The input string.</p>
5890
   * @param string $pattern <p>Regex pattern to match against.</p>
5891
   *
5892
   * @return bool Whether or not $str matches the pattern.
5893
   */
5894 126
  public static function str_matches_pattern(string $str, string $pattern): bool
5895
  {
5896 126
    if (\preg_match('/' . $pattern . '/u', $str)) {
5897 87
      return true;
5898
    }
5899
5900 39
    return false;
5901
  }
5902
5903
  /**
5904
   * Returns whether or not a character exists at an index. Offsets may be
5905
   * negative to count from the last character in the string. Implements
5906
   * part of the ArrayAccess interface.
5907
   *
5908
   * @param string $str      <p>The input string.</p>
5909
   * @param int    $offset   <p>The index to check.</p>
5910
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5911
   *
5912
   *
5913
   * @return bool Whether or not the index exists.
5914
   */
5915 6
  public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
5916
  {
5917
    // init
5918 6
    $length = self::strlen($str, $encoding);
5919
5920 6
    if ($offset >= 0) {
5921 3
      return ($length > $offset);
5922
    }
5923
5924 3
    return ($length >= \abs($offset));
5925
  }
5926
5927
  /**
5928
   * Returns the character at the given index. Offsets may be negative to
5929
   * count from the last character in the string. Implements part of the
5930
   * ArrayAccess interface, and throws an OutOfBoundsException if the index
5931
   * does not exist.
5932
   *
5933
   * @param string $str      <p>The input string.</p>
5934
   * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
5935
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5936
   *
5937
   * @return string The character at the specified index.
5938
   *
5939
   * @throws \OutOfBoundsException If the positive or negative offset does not exist.
5940
   */
5941 2
  public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
5942
  {
5943
    // init
5944 2
    $length = self::strlen($str);
5945
5946
    if (
5947 2
        ($index >= 0 && $length <= $index)
5948
        ||
5949 2
        $length < \abs($index)
5950
    ) {
5951 1
      throw new \OutOfBoundsException('No character exists at the index');
5952
    }
5953
5954 1
    return self::char_at($str, $index, $encoding);
5955
  }
5956
5957
  /**
5958
   * Pad a UTF-8 string to given length with another string.
5959
   *
5960
   * @param string $str        <p>The input string.</p>
5961
   * @param int    $pad_length <p>The length of return string.</p>
5962
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
5963
   * @param int    $pad_type   [optional] <p>
5964
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
5965
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
5966
   *                           </p>
5967
   * @param string $encoding   [optional] <p>Default: UTF-8</p>
5968
   *
5969
   * @return string Returns the padded string.
5970
   */
5971 41
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
5972
  {
5973 41
    if ('' === $str) {
5974
      return '';
5975
    }
5976
5977 41
    if ($pad_type !== (int)$pad_type) {
5978 13
      if ($pad_type == 'left') {
5979 3
        $pad_type = STR_PAD_LEFT;
5980 10
      } elseif ($pad_type == 'right') {
5981 6
        $pad_type = STR_PAD_RIGHT;
5982 4
      } elseif ($pad_type == 'both') {
5983 3
        $pad_type = STR_PAD_BOTH;
5984
      } else {
5985 1
        throw new \InvalidArgumentException(
5986 1
            'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
5987
        );
5988
      }
5989
    }
5990
5991 40
    $str_length = self::strlen($str, $encoding);
5992
5993
    if (
5994 40
        $pad_length > 0
5995
        &&
5996 40
        $pad_length >= $str_length
5997
    ) {
5998 39
      $ps_length = self::strlen($pad_string, $encoding);
5999
6000 39
      $diff = ($pad_length - $str_length);
6001
6002
      switch ($pad_type) {
6003 39
        case STR_PAD_LEFT:
6004 13
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6005 13
          $pre = (string)self::substr($pre, 0, $diff, $encoding);
6006 13
          $post = '';
6007 13
          break;
6008
6009 29
        case STR_PAD_BOTH:
6010 14
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6011 14
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2), $encoding);
6012 14
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6013 14
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2), $encoding);
6014 14
          break;
6015
6016 18
        case STR_PAD_RIGHT:
6017
        default:
6018 18
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6019 18
          $post = (string)self::substr($post, 0, $diff, $encoding);
6020 18
          $pre = '';
6021
      }
6022
6023 39
      return $pre . $str . $post;
6024
    }
6025
6026 4
    return $str;
6027
  }
6028
6029
  /**
6030
   * Returns a new string of a given length such that both sides of the
6031
   * string are padded. Alias for pad() with a $padType of 'both'.
6032
   *
6033
   * @param string $str
6034
   * @param int    $length   <p>Desired string length after padding.</p>
6035
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6036
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6037
   *
6038
   * @return string String with padding applied.
6039
   */
6040 11
  public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6041
  {
6042 11
    $padding = $length - self::strlen($str, $encoding);
6043
6044 11
    return self::apply_padding($str, (int)\floor($padding / 2), (int)\ceil($padding / 2), $padStr, $encoding);
6045
  }
6046
6047
  /**
6048
   * Returns a new string of a given length such that the beginning of the
6049
   * string is padded. Alias for pad() with a $padType of 'left'.
6050
   *
6051
   * @param string $str
6052
   * @param int    $length   <p>Desired string length after padding.</p>
6053
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6054
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6055
   *
6056
   * @return string String with left padding.
6057
   */
6058 7
  public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6059
  {
6060 7
    return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
6061
  }
6062
6063
  /**
6064
   * Returns a new string of a given length such that the end of the string
6065
   * is padded. Alias for pad() with a $padType of 'right'.
6066
   *
6067
   * @param string $str
6068
   * @param int    $length   <p>Desired string length after padding.</p>
6069
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6070
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6071
   *
6072
   * @return string String with right padding.
6073
   */
6074 7
  public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6075
  {
6076 7
    return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
6077
  }
6078
6079
  /**
6080
   * Repeat a string.
6081
   *
6082
   * @param string $str        <p>
6083
   *                           The string to be repeated.
6084
   *                           </p>
6085
   * @param int    $multiplier <p>
6086
   *                           Number of time the input string should be
6087
   *                           repeated.
6088
   *                           </p>
6089
   *                           <p>
6090
   *                           multiplier has to be greater than or equal to 0.
6091
   *                           If the multiplier is set to 0, the function
6092
   *                           will return an empty string.
6093
   *                           </p>
6094
   *
6095
   * @return string The repeated string.
6096
   */
6097 9
  public static function str_repeat(string $str, int $multiplier): string
6098
  {
6099 9
    $str = self::filter($str);
6100
6101 9
    return \str_repeat($str, $multiplier);
6102
  }
6103
6104
  /**
6105
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6106
   *
6107
   * Replace all occurrences of the search string with the replacement string
6108
   *
6109
   * @link http://php.net/manual/en/function.str-replace.php
6110
   *
6111
   * @param mixed $search  <p>
6112
   *                       The value being searched for, otherwise known as the needle.
6113
   *                       An array may be used to designate multiple needles.
6114
   *                       </p>
6115
   * @param mixed $replace <p>
6116
   *                       The replacement value that replaces found search
6117
   *                       values. An array may be used to designate multiple replacements.
6118
   *                       </p>
6119
   * @param mixed $subject <p>
6120
   *                       The string or array being searched and replaced on,
6121
   *                       otherwise known as the haystack.
6122
   *                       </p>
6123
   *                       <p>
6124
   *                       If subject is an array, then the search and
6125
   *                       replace is performed with every entry of
6126
   *                       subject, and the return value is an array as
6127
   *                       well.
6128
   *                       </p>
6129
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
6130
   *
6131
   * @return mixed This function returns a string or an array with the replaced values.
6132
   */
6133 92
  public static function str_replace($search, $replace, $subject, int &$count = null)
6134
  {
6135 92
    return \str_replace($search, $replace, $subject, $count);
6136
  }
6137
6138
  /**
6139
   * Replaces all occurrences of $search from the beginning of string with $replacement.
6140
   *
6141
   * @param string $str         <p>The input string.</p>
6142
   * @param string $search      <p>The string to search for.</p>
6143
   * @param string $replacement <p>The replacement.</p>
6144
   *
6145
   * @return string String after the replacements.
6146
   */
6147 16
  public static function str_replace_beginning(string $str, string $search, string $replacement): string
6148
  {
6149 16
    return self::regex_replace(
6150 16
        $str,
6151 16
        '^' . \preg_quote($search, '/'),
6152 16
        self::str_replace('\\', '\\\\', $replacement)
6153
    );
6154
  }
6155
6156
  /**
6157
   * Replaces all occurrences of $search from the ending of string with $replacement.
6158
   *
6159
   * @param string $str         <p>The input string.</p>
6160
   * @param string $search      <p>The string to search for.</p>
6161
   * @param string $replacement <p>The replacement.</p>
6162
   *
6163
   * @return string String after the replacements.
6164
   */
6165 16
  public static function str_replace_ending(string $str, string $search, string $replacement): string
6166
  {
6167 16
    return self::regex_replace(
6168 16
        $str,
6169 16
        \preg_quote($search, '/') . '$',
6170 16
        self::str_replace('\\', '\\\\', $replacement)
6171
    );
6172
  }
6173
6174
  /**
6175
   * Replace the first "$search"-term with the "$replace"-term.
6176
   *
6177
   * @param string $search
6178
   * @param string $replace
6179
   * @param string $subject
6180
   *
6181
   * @return string
6182
   */
6183 2
  public static function str_replace_first(string $search, string $replace, string $subject): string
6184
  {
6185 2
    $pos = self::strpos($subject, $search);
6186
6187 2
    if ($pos !== false) {
6188 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6188
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
6189
    }
6190
6191 2
    return $subject;
6192
  }
6193
6194
  /**
6195
   * Replace the last "$search"-term with the "$replace"-term.
6196
   *
6197
   * @param string $search
6198
   * @param string $replace
6199
   * @param string $subject
6200
   *
6201
   * @return string
6202
   */
6203 2
  public static function str_replace_last(string $search, string $replace, string $subject): string
6204
  {
6205 2
    $pos = self::strrpos($subject, $search);
6206
6207 2
    if ($pos !== false) {
6208 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6208
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6209
    }
6210
6211 2
    return $subject;
6212
  }
6213
6214
  /**
6215
   * Shuffles all the characters in the string.
6216
   *
6217
   * PS: uses random algorithm which is weak for cryptography purposes
6218
   *
6219
   * @param string $str <p>The input string</p>
6220
   *
6221
   * @return string The shuffled string.
6222
   */
6223 5
  public static function str_shuffle(string $str): string
6224
  {
6225 5
    $indexes = \range(0, self::strlen($str) - 1);
6226
    /** @noinspection NonSecureShuffleUsageInspection */
6227 5
    \shuffle($indexes);
6228
6229 5
    $shuffledStr = '';
6230 5
    foreach ($indexes as $i) {
6231 5
      $shuffledStr .= self::substr($str, $i, 1);
6232
    }
6233
6234 5
    return $shuffledStr;
6235
  }
6236
6237
  /**
6238
   * Returns the substring beginning at $start, and up to, but not including
6239
   * the index specified by $end. If $end is omitted, the function extracts
6240
   * the remaining string. If $end is negative, it is computed from the end
6241
   * of the string.
6242
   *
6243
   * @param string $str
6244
   * @param int    $start    <p>Initial index from which to begin extraction.</p>
6245
   * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6246
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6247
   *
6248
   * @return string|false
6249
   *                     <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6250
   *                     characters long, <b>FALSE</b> will be returned.
6251
   */
6252 18
  public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6253
  {
6254 18
    if ($end === null) {
6255 6
      $length = self::strlen($str);
6256 12
    } elseif ($end >= 0 && $end <= $start) {
6257 4
      return '';
6258 8
    } elseif ($end < 0) {
6259 2
      $length = self::strlen($str) + $end - $start;
6260
    } else {
6261 6
      $length = $end - $start;
6262
    }
6263
6264 14
    return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6264
    return self::substr($str, $start, /** @scrutinizer ignore-type */ $length, $encoding);
Loading history...
6265
  }
6266
6267
  /**
6268
   * Convert a string to e.g.: "snake_case"
6269
   *
6270
   * @param string $str
6271
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6272
   *
6273
   * @return string String in snake_case.
6274
   */
6275 20
  public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6276
  {
6277 20
    $str = self::normalize_whitespace($str);
6278 20
    $str = \str_replace('-', '_', $str);
6279
6280 20
    $str = (string)\preg_replace_callback(
6281 20
        '/([\d|A-Z])/u',
6282
        function ($matches) use ($encoding) {
6283 8
          $match = $matches[1];
6284 8
          $matchInt = (int)$match;
6285
6286 8
          if ((string)$matchInt == $match) {
6287 4
            return '_' . $match . '_';
6288
          }
6289
6290 4
          return '_' . UTF8::strtolower($match, $encoding);
6291 20
        },
6292 20
        $str
6293
    );
6294
6295 20
    $str = (string)\preg_replace(
6296
        [
6297 20
            '/\s+/',        // convert spaces to "_"
6298
            '/^\s+|\s+$/',  // trim leading & trailing spaces
6299
            '/_+/',         // remove double "_"
6300
        ],
6301
        [
6302 20
            '_',
6303
            '',
6304
            '_',
6305
        ],
6306 20
        $str
6307
    );
6308
6309 20
    $str = self::trim($str, '_'); // trim leading & trailing "_"
6310 20
    $str = self::trim($str); // trim leading & trailing whitespace
6311
6312 20
    return $str;
6313
  }
6314
6315
  /**
6316
   * Sort all characters according to code points.
6317
   *
6318
   * @param string $str    <p>A UTF-8 string.</p>
6319
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6320
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6321
   *
6322
   * @return string String of sorted characters.
6323
   */
6324 2
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6325
  {
6326 2
    $array = self::codepoints($str);
6327
6328 2
    if ($unique) {
6329 2
      $array = \array_flip(\array_flip($array));
6330
    }
6331
6332 2
    if ($desc) {
6333 2
      \arsort($array);
6334
    } else {
6335 2
      \asort($array);
6336
    }
6337
6338 2
    return self::string($array);
6339
  }
6340
6341
  /**
6342
   * Split a string into an array.
6343
   *
6344
   * @param string|string[] $str
6345
   * @param int             $len
6346
   *
6347
   * @return string[]
6348
   */
6349 25
  public static function str_split($str, int $len = 1): array
6350
  {
6351 25
    if ($len <= 0) {
6352 2
      return [];
6353
    }
6354
6355 25
    if (\is_array($str) === true) {
6356 2
      foreach ($str as $k => $v) {
6357 2
        $str[$k] = self::str_split($v, $len);
6358
      }
6359
6360 2
      return $str;
6361
    }
6362
6363 25
    if ('' === $str) {
6364 4
      return [];
6365
    }
6366
6367
    /** @noinspection NotOptimalRegularExpressionsInspection */
6368 23
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
6369 23
    $a = $a[0];
6370
6371 23
    if ($len === 1) {
6372 23
      return $a;
6373
    }
6374
6375 2
    $arrayOutput = [];
6376 2
    $p = -1;
6377
6378
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
6379 2
    foreach ($a as $l => $a) {
6380 2
      if ($l % $len) {
6381 2
        $arrayOutput[$p] .= $a;
6382
      } else {
6383 2
        $arrayOutput[++$p] = $a;
6384
      }
6385
    }
6386
6387 2
    return $arrayOutput;
6388
  }
6389
6390
  /**
6391
   * Splits the string with the provided regular expression, returning an
6392
   * array of Stringy objects. An optional integer $limit will truncate the
6393
   * results.
6394
   *
6395
   * @param string $str
6396
   * @param string $pattern <p>The regex with which to split the string.</p>
6397
   * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6398
   *
6399
   * @return string[] An array of strings.
6400
   */
6401 16
  public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6402
  {
6403 16
    if ($limit === 0) {
6404 2
      return [];
6405
    }
6406
6407
    // this->split errors when supplied an empty pattern in < PHP 5.4.13
6408
    // and current versions of HHVM (3.8 and below)
6409 14
    if ($pattern === '') {
6410 1
      return [$str];
6411
    }
6412
6413
    // this->split returns the remaining unsplit string in the last index when
6414
    // supplying a limit
6415 13
    if ($limit > 0) {
6416 8
      ++$limit;
6417
    } else {
6418 5
      $limit = -1;
6419
    }
6420
6421 13
    $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6422
6423 13
    if ($array === false) {
6424
      return [];
6425
    }
6426
6427 13
    if ($limit > 0 && \count($array) === $limit) {
6428 4
      \array_pop($array);
6429
    }
6430
6431 13
    return $array;
6432
  }
6433
6434
  /**
6435
   * Check if the string starts with the given substring.
6436
   *
6437
   * @param string $haystack <p>The string to search in.</p>
6438
   * @param string $needle   <p>The substring to search for.</p>
6439
   *
6440
   * @return bool
6441
   */
6442 41
  public static function str_starts_with(string $haystack, string $needle): bool
6443
  {
6444 41
    if ('' === $haystack || '' === $needle) {
6445 4
      return false;
6446
    }
6447
6448 39
    if (\strpos($haystack, $needle) === 0) {
6449 19
      return true;
6450
    }
6451
6452 24
    return false;
6453
  }
6454
6455
  /**
6456
   * Returns true if the string begins with any of $substrings, false otherwise.
6457
   *
6458
   * - case-sensitive
6459
   *
6460
   * @param string $str        <p>The input string.</p>
6461
   * @param array  $substrings <p>Substrings to look for.</p>
6462
   *
6463
   * @return bool Whether or not $str starts with $substring.
6464
   */
6465 8
  public static function str_starts_with_any(string $str, array $substrings): bool
6466
  {
6467 8
    if ('' === $str) {
6468
      return false;
6469
    }
6470
6471 8
    if (empty($substrings)) {
6472
      return false;
6473
    }
6474
6475 8
    foreach ($substrings as $substring) {
6476 8
      if (self::str_starts_with($str, $substring)) {
6477 8
        return true;
6478
      }
6479
    }
6480
6481 6
    return false;
6482
  }
6483
6484
  /**
6485
   * Gets the substring after the first occurrence of a separator.
6486
   *
6487
   * @param string $str       <p>The input string.</p>
6488
   * @param string $separator <p>The string separator.</p>
6489
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6490
   *
6491
   * @return string
6492
   */
6493 1
  public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6494
  {
6495
    if (
6496 1
        $separator === ''
6497
        ||
6498 1
        $str === ''
6499
    ) {
6500 1
      return '';
6501
    }
6502
6503 1
    $offset = self::str_index_first($str, $separator);
6504 1
    if ($offset === false) {
6505 1
      return '';
6506
    }
6507
6508 1
    return (string)self::substr(
6509 1
        $str,
6510 1
        $offset + self::strlen($separator, $encoding),
6511 1
        null,
6512 1
        $encoding
6513
    );
6514
  }
6515
6516
  /**
6517
   * Gets the substring after the last occurrence of a separator.
6518
   *
6519
   * @param string $str       <p>The input string.</p>
6520
   * @param string $separator <p>The string separator.</p>
6521
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6522
   *
6523
   * @return string
6524
   */
6525 1
  public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6526
  {
6527
    if (
6528 1
        $separator === ''
6529
        ||
6530 1
        $str === ''
6531
    ) {
6532 1
      return '';
6533
    }
6534
6535 1
    $offset = self::str_index_last($str, $separator);
6536 1
    if ($offset === false) {
6537 1
      return '';
6538
    }
6539
6540 1
    return (string)self::substr(
6541 1
        $str,
6542 1
        $offset + self::strlen($separator, $encoding),
6543 1
        null,
6544 1
        $encoding
6545
    );
6546
  }
6547
6548
  /**
6549
   * Gets the substring before the first occurrence of a separator.
6550
   *
6551
   * @param string $str       <p>The input string.</p>
6552
   * @param string $separator <p>The string separator.</p>
6553
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6554
   *
6555
   * @return string
6556
   */
6557 1
  public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6558
  {
6559
    if (
6560 1
        $separator === ''
6561
        ||
6562 1
        $str === ''
6563
    ) {
6564 1
      return '';
6565
    }
6566
6567 1
    $offset = self::str_index_first($str, $separator);
6568 1
    if ($offset === false) {
6569 1
      return '';
6570
    }
6571
6572 1
    return (string)self::substr(
6573 1
        $str,
6574 1
        0,
6575 1
        $offset,
6576 1
        $encoding
6577
    );
6578
  }
6579
6580
  /**
6581
   * Gets the substring before the last occurrence of a separator.
6582
   *
6583
   * @param string $str       <p>The input string.</p>
6584
   * @param string $separator <p>The string separator.</p>
6585
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6586
   *
6587
   * @return string
6588
   */
6589 1
  public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6590
  {
6591
    if (
6592 1
        $separator === ''
6593
        ||
6594 1
        $str === ''
6595
    ) {
6596 1
      return '';
6597
    }
6598
6599 1
    $offset = self::str_index_last($str, $separator);
6600 1
    if ($offset === false) {
6601 1
      return '';
6602
    }
6603
6604 1
    return (string)self::substr(
6605 1
        $str,
6606 1
        0,
6607 1
        $offset,
6608 1
        $encoding
6609
    );
6610
  }
6611
6612
  /**
6613
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6614
   *
6615
   * @param string $str          <p>The input string.</p>
6616
   * @param string $needle       <p>The string to look for.</p>
6617
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6618
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6619
   *
6620
   * @return string
6621
   */
6622 2
  public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6623
  {
6624
    if (
6625 2
        '' === $str
6626
        ||
6627 2
        '' === $needle
6628
    ) {
6629 2
      return '';
6630
    }
6631
6632 2
    $part = self::strstr(
6633 2
        $str,
6634 2
        $needle,
6635 2
        $beforeNeedle,
6636 2
        $encoding
6637
    );
6638 2
    if (false === $part) {
6639 2
      return '';
6640
    }
6641
6642 2
    return $part;
6643
  }
6644
6645
  /**
6646
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6647
   *
6648
   * @param string $str          <p>The input string.</p>
6649
   * @param string $needle       <p>The string to look for.</p>
6650
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6651
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6652
   *
6653
   * @return string
6654
   */
6655 2
  public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6656
  {
6657
    if (
6658 2
        '' === $str
6659
        ||
6660 2
        '' === $needle
6661
    ) {
6662 2
      return '';
6663
    }
6664
6665 2
    $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6666 2
    if (false === $part) {
6667 2
      return '';
6668
    }
6669
6670 2
    return $part;
6671
  }
6672
6673
  /**
6674
   * Surrounds $str with the given substring.
6675
   *
6676
   * @param string $str
6677
   * @param string $substring <p>The substring to add to both sides.</P>
6678
   *
6679
   * @return string String with the substring both prepended and appended.
6680
   */
6681 5
  public static function str_surround(string $str, string $substring): string
6682
  {
6683 5
    return \implode('', [$substring, $str, $substring]);
6684
  }
6685
6686
  /**
6687
   * Returns a trimmed string with the first letter of each word capitalized.
6688
   * Also accepts an array, $ignore, allowing you to list words not to be
6689
   * capitalized.
6690
   *
6691
   * @param string              $str
6692
   * @param string[]|array|null $ignore   [optional] <p>An array of words not to capitalize or null. Default: null</p>
6693
   * @param string              $encoding [optional] <p>Default: UTF-8</p>
6694
   *
6695
   * @return string The titleized string.
6696
   */
6697 5
  public static function str_titleize(string $str, array $ignore = null, string $encoding = 'UTF-8'): string
6698
  {
6699 5
    $str = self::trim($str);
6700
6701 5
    $str = (string)\preg_replace_callback(
6702 5
        '/([\S]+)/u',
6703
        function ($match) use ($encoding, $ignore) {
6704 5
          if ($ignore && \in_array($match[0], $ignore, true)) {
6705 2
            return $match[0];
6706
          }
6707
6708 5
          return self::str_upper_first(self::strtolower($match[0], $encoding));
6709 5
        },
6710 5
        $str
6711
    );
6712
6713 5
    return $str;
6714
  }
6715
6716
  /**
6717
   * Returns a trimmed string in proper title case.
6718
   *
6719
   * Also accepts an array, $ignore, allowing you to list words not to be
6720
   * capitalized.
6721
   *
6722
   * Adapted from John Gruber's script.
6723
   *
6724
   * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6725
   *
6726
   * @param string $str
6727
   * @param array  $ignore   <p>An array of words not to capitalize.</p>
6728
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6729
   *
6730
   * @return string The titleized string.
6731
   */
6732 35
  public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6733
  {
6734 35
    $smallWords = \array_merge(
6735
        [
6736 35
            '(?<!q&)a',
6737
            'an',
6738
            'and',
6739
            'as',
6740
            'at(?!&t)',
6741
            'but',
6742
            'by',
6743
            'en',
6744
            'for',
6745
            'if',
6746
            'in',
6747
            'of',
6748
            'on',
6749
            'or',
6750
            'the',
6751
            'to',
6752
            'v[.]?',
6753
            'via',
6754
            'vs[.]?',
6755
        ],
6756 35
        $ignore
6757
    );
6758
6759 35
    $smallWordsRx = \implode('|', $smallWords);
6760 35
    $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6761
6762 35
    $str = self::trim($str);
6763
6764 35
    if (self::has_lowercase($str) === false) {
6765 2
      $str = self::strtolower($str);
6766
    }
6767
6768
    // The main substitutions
6769 35
    $str = (string)\preg_replace_callback(
6770
        '~\b (_*) (?:                                                              # 1. Leading underscore and
6771
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6772 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6773
                        |
6774 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6775
                        |
6776 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6777
                        |
6778 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
6779
                      ) (_*) \b                                                           # 6. With trailing underscore
6780
                    ~ux',
6781
        function ($matches) use ($encoding) {
6782
          // Preserve leading underscore
6783 35
          $str = $matches[1];
6784 35
          if ($matches[2]) {
6785
            // Preserve URLs, domains, emails and file paths
6786 5
            $str .= $matches[2];
6787 35
          } elseif ($matches[3]) {
6788
            // Lower-case small words
6789 25
            $str .= self::strtolower($matches[3], $encoding);
6790 35
          } elseif ($matches[4]) {
6791
            // Capitalize word w/o internal caps
6792 34
            $str .= static::str_upper_first($matches[4], $encoding);
6793
          } else {
6794
            // Preserve other kinds of word (iPhone)
6795 7
            $str .= $matches[5];
6796
          }
6797
          // Preserve trailing underscore
6798 35
          $str .= $matches[6];
6799
6800 35
          return $str;
6801 35
        },
6802 35
        $str
6803
    );
6804
6805
    // Exceptions for small words: capitalize at start of title...
6806 35
    $str = (string)\preg_replace_callback(
6807
        '~(  \A [[:punct:]]*                # start of title...
6808
                      |  [:.;?!][ ]+               # or of subsentence...
6809
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
6810 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
6811
                     ~uxi',
6812
        function ($matches) use ($encoding) {
6813 11
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
6814 35
        },
6815 35
        $str
6816
    );
6817
6818
    // ...and end of title
6819 35
    $str = (string)\preg_replace_callback(
6820 35
        '~\b ( ' . $smallWordsRx . ' ) # small word...
6821
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
6822
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
6823
                     ~uxi',
6824
        function ($matches) use ($encoding) {
6825 3
          return static::str_upper_first($matches[1], $encoding);
6826 35
        },
6827 35
        $str
6828
    );
6829
6830
    // Exceptions for small words in hyphenated compound words
6831
    // e.g. "in-flight" -> In-Flight
6832 35
    $str = (string)\preg_replace_callback(
6833
        '~\b
6834
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
6835 35
                        ( ' . $smallWordsRx . ' )
6836
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
6837
                       ~uxi',
6838
        function ($matches) use ($encoding) {
6839
          return static::str_upper_first($matches[1], $encoding);
6840 35
        },
6841 35
        $str
6842
    );
6843
6844
    // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
6845 35
    $str = (string)\preg_replace_callback(
6846
        '~\b
6847
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
6848
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
6849 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
6850
                      (?!	- )                   # Negative lookahead for another -
6851
                     ~uxi',
6852
        function ($matches) use ($encoding) {
6853
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
6854 35
        },
6855 35
        $str
6856
    );
6857
6858 35
    return $str;
6859
  }
6860
6861
  /**
6862
   * Get a binary representation of a specific string.
6863
   *
6864
   * @param string $str <p>The input string.</p>
6865
   *
6866
   * @return string
6867
   */
6868 2
  public static function str_to_binary(string $str): string
6869
  {
6870 2
    $value = \unpack('H*', $str);
6871
6872 2
    return \base_convert($value[1], 16, 2);
6873
  }
6874
6875
  /**
6876
   * @param string   $str
6877
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6878
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
6879
   *
6880
   * @return string[]
6881
   */
6882 17
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
6883
  {
6884 17
    if ('' === $str) {
6885 1
      return ($removeEmptyValues === true ? [] : ['']);
6886
    }
6887
6888 16
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
6889
6890 16
    if ($return === false) {
6891
      return ($removeEmptyValues === true ? [] : ['']);
6892
    }
6893
6894
    if (
6895 16
        $removeShortValues === null
6896
        &&
6897 16
        $removeEmptyValues === false
6898
    ) {
6899 16
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
6900
    }
6901
6902
    $tmpReturn = self::reduce_string_array(
6903
        $return,
6904
        $removeEmptyValues,
6905
        $removeShortValues
6906
    );
6907
6908
    return $tmpReturn;
6909
  }
6910
6911
  /**
6912
   * Convert a string into an array of words.
6913
   *
6914
   * @param string   $str
6915
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
6916
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6917
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
6918
   *
6919
   * @return string[]
6920
   */
6921 14
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
6922
  {
6923 14
    if ('' === $str) {
6924 4
      return ($removeEmptyValues === true ? [] : ['']);
6925
    }
6926
6927 14
    $charList = self::rxClass($charList, '\pL');
6928
6929 14
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
6930
6931 14
    if ($return === false) {
6932
      return ($removeEmptyValues === true ? [] : ['']);
6933
    }
6934
6935
    if (
6936 14
        $removeShortValues === null
6937
        &&
6938 14
        $removeEmptyValues === false
6939
    ) {
6940 14
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
6941
    }
6942
6943 2
    $tmpReturn = self::reduce_string_array(
6944 2
        $return,
6945 2
        $removeEmptyValues,
6946 2
        $removeShortValues
6947
    );
6948
6949 2
    foreach ($tmpReturn as &$item) {
6950 2
      $item = (string)$item;
6951
    }
6952
6953 2
    return $tmpReturn;
6954
  }
6955
6956
  /**
6957
   * alias for "UTF8::to_ascii()"
6958
   *
6959
   * @see UTF8::to_ascii()
6960
   *
6961
   * @param string $str
6962
   * @param string $unknown
6963
   * @param bool   $strict
6964
   *
6965
   * @return string
6966
   */
6967 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
6968
  {
6969 7
    return self::to_ascii($str, $unknown, $strict);
6970
  }
6971
6972
  /**
6973
   * Truncates the string to a given length. If $substring is provided, and
6974
   * truncating occurs, the string is further truncated so that the substring
6975
   * may be appended without exceeding the desired length.
6976
   *
6977
   * @param string $str
6978
   * @param int    $length    <p>Desired length of the truncated string.</p>
6979
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
6980
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6981
   *
6982
   * @return string String after truncating.
6983
   */
6984 22
  public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
6985
  {
6986
    // init
6987 22
    $str = (string)$str;
6988
6989 22
    if ('' === $str) {
6990
      return '';
6991
    }
6992
6993 22
    if ($length >= self::strlen($str, $encoding)) {
6994 4
      return $str;
6995
    }
6996
6997
    // Need to further trim the string so we can append the substring
6998 18
    $substringLength = self::strlen($substring, $encoding);
6999 18
    $length -= $substringLength;
7000
7001 18
    $truncated = self::substr($str, 0, $length, $encoding);
7002
7003 18
    return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7003
    return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7004
  }
7005
7006
  /**
7007
   * Truncates the string to a given length, while ensuring that it does not
7008
   * split words. If $substring is provided, and truncating occurs, the
7009
   * string is further truncated so that the substring may be appended without
7010
   * exceeding the desired length.
7011
   *
7012
   * @param string $str
7013
   * @param int    $length    <p>Desired length of the truncated string.</p>
7014
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7015
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7016
   *
7017
   * @return string String after truncating.
7018
   */
7019 23
  public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7020
  {
7021 23
    if ($length >= self::strlen($str, $encoding)) {
7022 4
      return $str;
7023
    }
7024
7025
    // need to further trim the string so we can append the substring
7026 19
    $substringLength = self::strlen($substring, $encoding);
7027 19
    $length -= $substringLength;
7028
7029 19
    $truncated = self::substr($str, 0, $length, $encoding);
7030 19
    if ($truncated === false) {
7031
      return '';
7032
    }
7033
7034
    // if the last word was truncated
7035 19
    $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7036 19
    if ($strPosSpace != $length) {
7037
      // find pos of the last occurrence of a space, get up to that
7038 12
      $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7039
7040 12
      if ($lastPos !== false || $strPosSpace !== false) {
7041 11
        $truncated = self::substr($truncated, 0, (int)$lastPos, $encoding);
7042
      }
7043
    }
7044
7045 19
    $str = $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7045
    $str = /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7046
7047 19
    return $str;
7048
  }
7049
7050
  /**
7051
   * Returns a lowercase and trimmed string separated by underscores.
7052
   * Underscores are inserted before uppercase characters (with the exception
7053
   * of the first character of the string), and in place of spaces as well as
7054
   * dashes.
7055
   *
7056
   * @param string $str
7057
   *
7058
   * @return string The underscored string.
7059
   */
7060 16
  public static function str_underscored(string $str): string
7061
  {
7062 16
    return self::str_delimit($str, '_');
7063
  }
7064
7065
  /**
7066
   * Returns an UpperCamelCase version of the supplied string. It trims
7067
   * surrounding spaces, capitalizes letters following digits, spaces, dashes
7068
   * and underscores, and removes spaces, dashes, underscores.
7069
   *
7070
   * @param string $str      <p>The input string.</p>
7071
   * @param string $encoding [optional] <p>Default: UTF-8</p>
7072
   *
7073
   * @return string String in UpperCamelCase.
7074
   */
7075 13
  public static function str_upper_camelize(string $str, string $encoding = 'UTF-8'): string
7076
  {
7077 13
    return self::str_upper_first(self::str_camelize($str, $encoding), $encoding);
7078
  }
7079
7080
  /**
7081
   * alias for "UTF8::ucfirst()"
7082
   *
7083
   * @see UTF8::ucfirst()
7084
   *
7085
   * @param string $str
7086
   * @param string $encoding
7087
   * @param bool   $cleanUtf8
7088
   *
7089
   * @return string
7090
   */
7091 58
  public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7092
  {
7093 58
    return self::ucfirst($str, $encoding, $cleanUtf8);
7094
  }
7095
7096
  /**
7097
   * Counts number of words in the UTF-8 string.
7098
   *
7099
   * @param string $str      <p>The input string.</p>
7100
   * @param int    $format   [optional] <p>
7101
   *                         <strong>0</strong> => return a number of words (default)<br>
7102
   *                         <strong>1</strong> => return an array of words<br>
7103
   *                         <strong>2</strong> => return an array of words with word-offset as key
7104
   *                         </p>
7105
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7106
   *
7107
   * @return string[]|int The number of words in the string
7108
   */
7109 2
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7110
  {
7111 2
    $strParts = self::str_to_words($str, $charlist);
7112
7113 2
    $len = \count($strParts);
7114
7115 2
    if ($format === 1) {
7116
7117 2
      $numberOfWords = [];
7118 2
      for ($i = 1; $i < $len; $i += 2) {
7119 2
        $numberOfWords[] = $strParts[$i];
7120
      }
7121
7122 2
    } elseif ($format === 2) {
7123
7124 2
      $numberOfWords = [];
7125 2
      $offset = self::strlen($strParts[0]);
7126 2
      for ($i = 1; $i < $len; $i += 2) {
7127 2
        $numberOfWords[$offset] = $strParts[$i];
7128 2
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
7129
      }
7130
7131
    } else {
7132
7133 2
      $numberOfWords = (int)(($len - 1) / 2);
7134
7135
    }
7136
7137 2
    return $numberOfWords;
7138
  }
7139
7140
  /**
7141
   * Case-insensitive string comparison.
7142
   *
7143
   * INFO: Case-insensitive version of UTF8::strcmp()
7144
   *
7145
   * @param string $str1
7146
   * @param string $str2
7147
   *
7148
   * @return int
7149
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7150
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7151
   *             <strong>0</strong> if they are equal.
7152
   */
7153 23
  public static function strcasecmp(string $str1, string $str2): int
7154
  {
7155 23
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
7156
  }
7157
7158
  /**
7159
   * alias for "UTF8::strstr()"
7160
   *
7161
   * @see UTF8::strstr()
7162
   *
7163
   * @param string $haystack
7164
   * @param string $needle
7165
   * @param bool   $before_needle
7166
   * @param string $encoding
7167
   * @param bool   $cleanUtf8
7168
   *
7169
   * @return string|false
7170
   */
7171 2
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7172
  {
7173 2
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7174
  }
7175
7176
  /**
7177
   * Case-sensitive string comparison.
7178
   *
7179
   * @param string $str1
7180
   * @param string $str2
7181
   *
7182
   * @return int
7183
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
7184
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
7185
   *              <strong>0</strong> if they are equal.
7186
   */
7187 29
  public static function strcmp(string $str1, string $str2): int
7188
  {
7189
    /** @noinspection PhpUndefinedClassInspection */
7190 29
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7191 24
        \Normalizer::normalize($str1, \Normalizer::NFD),
7192 29
        \Normalizer::normalize($str2, \Normalizer::NFD)
7193
    );
7194
  }
7195
7196
  /**
7197
   * Find length of initial segment not matching mask.
7198
   *
7199
   * @param string $str
7200
   * @param string $charList
7201
   * @param int    $offset
7202
   * @param int    $length
7203
   *
7204
   * @return int|null
7205
   */
7206 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
7207
  {
7208 15
    if ('' === $charList .= '') {
7209 1
      return null;
7210
    }
7211
7212 14
    if ($offset || $length !== null) {
7213 2
      $strTmp = self::substr($str, $offset, $length);
7214 2
      if ($strTmp === false) {
7215
        return null;
7216
      }
7217 2
      $str = (string)$strTmp;
7218
    }
7219
7220 14
    if ('' === $str) {
7221 1
      return null;
7222
    }
7223
7224 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept null|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7224
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7225 13
      return self::strlen($length[1]);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($length[1]) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7226
    }
7227
7228 1
    return self::strlen($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($str) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7229
  }
7230
7231
  /**
7232
   * alias for "UTF8::stristr()"
7233
   *
7234
   * @see UTF8::stristr()
7235
   *
7236
   * @param string $haystack
7237
   * @param string $needle
7238
   * @param bool   $before_needle
7239
   * @param string $encoding
7240
   * @param bool   $cleanUtf8
7241
   *
7242
   * @return string|false
7243
   */
7244 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7245
  {
7246 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7247
  }
7248
7249
  /**
7250
   * Create a UTF-8 string from code points.
7251
   *
7252
   * INFO: opposite to UTF8::codepoints()
7253
   *
7254
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7255
   *
7256
   * @return string UTF-8 encoded string.
7257
   */
7258 4
  public static function string(array $array): string
7259
  {
7260 4
    return \implode(
7261 4
        '',
7262 4
        \array_map(
7263
            [
7264 4
                self::class,
7265
                'chr',
7266
            ],
7267 4
            $array
7268
        )
7269
    );
7270
  }
7271
7272
  /**
7273
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7274
   *
7275
   * @param string $str <p>The input string.</p>
7276
   *
7277
   * @return bool
7278
   *              <strong>true</strong> if the string has BOM at the start,<br>
7279
   *              <strong>false</strong> otherwise.
7280
   */
7281 6
  public static function string_has_bom(string $str): bool
7282
  {
7283 6
    foreach (self::$BOM as $bomString => $bomByteLength) {
7284 6
      if (0 === \strpos($str, $bomString)) {
7285 6
        return true;
7286
      }
7287
    }
7288
7289 6
    return false;
7290
  }
7291
7292
  /**
7293
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7294
   *
7295
   * @link http://php.net/manual/en/function.strip-tags.php
7296
   *
7297
   * @param string $str             <p>
7298
   *                                The input string.
7299
   *                                </p>
7300
   * @param string $allowable_tags  [optional] <p>
7301
   *                                You can use the optional second parameter to specify tags which should
7302
   *                                not be stripped.
7303
   *                                </p>
7304
   *                                <p>
7305
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
7306
   *                                can not be changed with allowable_tags.
7307
   *                                </p>
7308
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
7309
   *
7310
   * @return string The stripped string.
7311
   */
7312 4
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7313
  {
7314 4
    if ('' === $str) {
7315 1
      return '';
7316
    }
7317
7318 4
    if ($cleanUtf8 === true) {
7319 2
      $str = self::clean($str);
7320
    }
7321
7322 4
    return \strip_tags($str, $allowable_tags);
7323
  }
7324
7325
  /**
7326
   * Strip all whitespace characters. This includes tabs and newline
7327
   * characters, as well as multibyte whitespace such as the thin space
7328
   * and ideographic space.
7329
   *
7330
   * @param string $str
7331
   *
7332
   * @return string
7333
   */
7334 36
  public static function strip_whitespace(string $str): string
7335
  {
7336 36
    if ('' === $str) {
7337 3
      return '';
7338
    }
7339
7340 33
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
7341
  }
7342
7343
  /**
7344
   * Finds position of first occurrence of a string within another, case insensitive.
7345
   *
7346
   * @link http://php.net/manual/en/function.mb-stripos.php
7347
   *
7348
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7349
   * @param string $needle    <p>The string to find in haystack.</p>
7350
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7351
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7352
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7353
   *
7354
   * @return int|false
7355
   *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7356
   *                   haystack string,<br> or <strong>false</strong> if needle is not found.
7357
   */
7358 75
  public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7359
  {
7360 75
    if ('' === $haystack || '' === $needle) {
7361 5
      return false;
7362
    }
7363
7364 74
    if ($cleanUtf8 === true) {
7365
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7366
      // if invalid characters are found in $haystack before $needle
7367 1
      $haystack = self::clean($haystack);
7368 1
      $needle = self::clean($needle);
7369
    }
7370
7371 74
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7372 23
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7373
    }
7374
7375 74
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7376
      self::checkForSupport();
7377
    }
7378
7379 74
    if (self::$SUPPORT['mbstring'] === true) {
7380 74
      $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7381 74
      if ($returnTmp !== false) {
7382 54
        return $returnTmp;
7383
      }
7384
    }
7385
7386
    if (
7387 31
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7388
        &&
7389 31
        $offset >= 0 // grapheme_stripos() can't handle negative offset
7390
        &&
7391 31
        self::$SUPPORT['intl'] === true
7392
    ) {
7393 31
      $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7394 31
      if ($returnTmp !== false) {
7395
        return $returnTmp;
7396
      }
7397
    }
7398
7399
    // fallback for ascii only
7400 31
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7401 15
      return \stripos($haystack, $needle, $offset);
7402
    }
7403
7404
    // fallback via symfony polyfill
7405 20
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7406
      return \mb_stripos($haystack, $needle, $offset, $encoding);
7407
    }
7408
7409
    // fallback via vanilla php
7410
7411 20
    $haystack = self::strtoupper($haystack, $encoding, false, null, true);
7412 20
    $needle = self::strtoupper($needle, $encoding, false, null, true);
7413
7414 20
    return self::strpos($haystack, $needle, $offset, $encoding);
7415
  }
7416
7417
  /**
7418
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
7419
   *
7420
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7421
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7422
   * @param bool   $before_needle  [optional] <p>
7423
   *                               If <b>TRUE</b>, it returns the part of the
7424
   *                               haystack before the first occurrence of the needle (excluding the needle).
7425
   *                               </p>
7426
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
7427
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7428
   *
7429
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
7430
   */
7431 19
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7432
  {
7433 19
    if ('' === $haystack || '' === $needle) {
7434 6
      return false;
7435
    }
7436
7437 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7438 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7439
    }
7440
7441 13
    if ($cleanUtf8 === true) {
7442
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7443
      // if invalid characters are found in $haystack before $needle
7444 1
      $needle = self::clean($needle);
7445 1
      $haystack = self::clean($haystack);
7446
    }
7447
7448 13
    if (!$needle) {
7449
      return $haystack;
7450
    }
7451
7452 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7453
      self::checkForSupport();
7454
    }
7455
7456
    if (
7457 13
        $encoding !== 'UTF-8'
7458
        &&
7459 13
        self::$SUPPORT['mbstring'] === false
7460
    ) {
7461
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7462
    }
7463
7464 13
    if (self::$SUPPORT['mbstring'] === true) {
7465 13
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7466
    }
7467
7468
    if (
7469
        $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7470
        &&
7471
        self::$SUPPORT['intl'] === true
7472
    ) {
7473
      $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7474
      if ($returnTmp !== false) {
7475
        return $returnTmp;
7476
      }
7477
    }
7478
7479
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7480
      return \stristr($haystack, $needle, $before_needle);
7481
    }
7482
7483
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7484
7485
    if (!isset($match[1])) {
7486
      return false;
7487
    }
7488
7489
    if ($before_needle) {
7490
      return $match[1];
7491
    }
7492
7493
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7493
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
7494
  }
7495
7496
  /**
7497
   * Get the string length, not the byte-length!
7498
   *
7499
   * @link     http://php.net/manual/en/function.mb-strlen.php
7500
   *
7501
   * @param string $str       <p>The string being checked for length.</p>
7502
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7503
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7504
   *
7505
   * @return int|false
7506
   *             The number <strong>(int)</strong> of characters in the string $str having character encoding $encoding.
7507
   *             (One multi-byte character counted as +1).
7508
   *             <br>
7509
   *             Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid chars.
7510
   */
7511 402
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7512
  {
7513 402
    if ('' === $str) {
7514 37
      return 0;
7515
    }
7516
7517 400
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7518 167
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7519
    }
7520
7521 400
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7522
      self::checkForSupport();
7523
    }
7524
7525 400
    if ($encoding === 'ASCII' || $encoding === 'CP850') {
7526 2
      return self::strlen_in_byte($str);
7527
    }
7528
7529 400
    if ($cleanUtf8 === true) {
7530
      // "mb_strlen" and "\iconv_strlen" returns wrong length,
7531
      // if invalid characters are found in $str
7532 4
      $str = self::clean($str);
7533
    }
7534
7535
    if (
7536 400
        $encoding !== 'UTF-8'
7537
        &&
7538 400
        self::$SUPPORT['mbstring'] === false
7539
        &&
7540 400
        self::$SUPPORT['iconv'] === false
7541
    ) {
7542 2
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7543
    }
7544
7545
    if (
7546 400
        $encoding !== 'UTF-8'
7547
        &&
7548 400
        self::$SUPPORT['iconv'] === true
7549
        &&
7550 400
        self::$SUPPORT['mbstring'] === false
7551
    ) {
7552
      $returnTmp = \iconv_strlen($str, $encoding);
7553
      if ($returnTmp !== false) {
7554
        return $returnTmp;
7555
      }
7556
    }
7557
7558 400
    if (self::$SUPPORT['mbstring'] === true) {
7559 400
      $returnTmp = \mb_strlen($str, $encoding);
7560 400
      if ($returnTmp !== false) {
7561 400
        return $returnTmp;
7562
      }
7563
    }
7564
7565 8
    if (self::$SUPPORT['iconv'] === true) {
7566
      $returnTmp = \iconv_strlen($str, $encoding);
7567
      if ($returnTmp !== false) {
7568
        return $returnTmp;
7569
      }
7570
    }
7571
7572
    if (
7573 8
        $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7574
        &&
7575 8
        self::$SUPPORT['intl'] === true
7576
    ) {
7577
      $returnTmp = \grapheme_strlen($str);
7578
      if ($returnTmp !== null) {
7579
        return $returnTmp;
7580
      }
7581
    }
7582
7583
    // fallback via symfony polyfill
7584 8
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7585
      $returnTmp = \mb_strlen($str, $encoding);
7586
      if ($returnTmp !== false) {
7587
        return $returnTmp;
7588
      }
7589
    }
7590
7591
    // fallback for ascii only
7592 8
    if (self::is_ascii($str)) {
7593 4
      return \strlen($str);
7594
    }
7595
7596
    // fallback via vanilla php
7597 8
    \preg_match_all('/./us', $str, $parts);
7598
7599 8
    $returnTmp = \count($parts[0]);
7600 8
    if ($returnTmp === 0 && isset($str[0])) {
7601
      return false;
7602
    }
7603
7604 8
    return $returnTmp;
7605
  }
7606
7607
  /**
7608
   * Get string length in byte.
7609
   *
7610
   * @param string $str
7611
   *
7612
   * @return int
7613
   */
7614 194
  public static function strlen_in_byte($str): int
7615
  {
7616
    // init
7617 194
    $str = (string)$str;
7618
7619 194
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7620
      self::checkForSupport();
7621
    }
7622
7623 194
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7624
      // "mb_" is available if overload is used, so use it ...
7625
      return \mb_strlen($str, 'CP850'); // 8-BIT
7626
    }
7627
7628 194
    return \strlen($str);
7629
  }
7630
7631
  /**
7632
   * Case insensitive string comparisons using a "natural order" algorithm.
7633
   *
7634
   * INFO: natural order version of UTF8::strcasecmp()
7635
   *
7636
   * @param string $str1 <p>The first string.</p>
7637
   * @param string $str2 <p>The second string.</p>
7638
   *
7639
   * @return int
7640
   *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7641
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7642
   *             <strong>0</strong> if they are equal
7643
   */
7644 2
  public static function strnatcasecmp(string $str1, string $str2): int
7645
  {
7646 2
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
7647
  }
7648
7649
  /**
7650
   * String comparisons using a "natural order" algorithm
7651
   *
7652
   * INFO: natural order version of UTF8::strcmp()
7653
   *
7654
   * @link  http://php.net/manual/en/function.strnatcmp.php
7655
   *
7656
   * @param string $str1 <p>The first string.</p>
7657
   * @param string $str2 <p>The second string.</p>
7658
   *
7659
   * @return int
7660
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7661
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7662
   *             <strong>0</strong> if they are equal
7663
   */
7664 4
  public static function strnatcmp(string $str1, string $str2): int
7665
  {
7666 4
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
7667
  }
7668
7669
  /**
7670
   * Case-insensitive string comparison of the first n characters.
7671
   *
7672
   * @link  http://php.net/manual/en/function.strncasecmp.php
7673
   *
7674
   * @param string $str1 <p>The first string.</p>
7675
   * @param string $str2 <p>The second string.</p>
7676
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
7677
   *
7678
   * @return int
7679
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7680
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7681
   *             <strong>0</strong> if they are equal
7682
   */
7683 2
  public static function strncasecmp(string $str1, string $str2, int $len): int
7684
  {
7685 2
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
7686
  }
7687
7688
  /**
7689
   * String comparison of the first n characters.
7690
   *
7691
   * @link  http://php.net/manual/en/function.strncmp.php
7692
   *
7693
   * @param string $str1 <p>The first string.</p>
7694
   * @param string $str2 <p>The second string.</p>
7695
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
7696
   *
7697
   * @return int
7698
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7699
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7700
   *             <strong>0</strong> if they are equal
7701
   */
7702 4
  public static function strncmp(string $str1, string $str2, int $len): int
7703
  {
7704 4
    $str1 = (string)self::substr($str1, 0, $len);
7705 4
    $str2 = (string)self::substr($str2, 0, $len);
7706
7707 4
    return self::strcmp($str1, $str2);
7708
  }
7709
7710
  /**
7711
   * Search a string for any of a set of characters.
7712
   *
7713
   * @link  http://php.net/manual/en/function.strpbrk.php
7714
   *
7715
   * @param string $haystack  <p>The string where char_list is looked for.</p>
7716
   * @param string $char_list <p>This parameter is case sensitive.</p>
7717
   *
7718
   * @return string|false String starting from the character found, or false if it is not found.
7719
   */
7720 2
  public static function strpbrk(string $haystack, string $char_list)
7721
  {
7722 2
    if ('' === $haystack || '' === $char_list) {
7723 2
      return false;
7724
    }
7725
7726 2
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7727 2
      return \substr($haystack, (int)\strpos($haystack, $m[0]));
7728
    }
7729
7730 2
    return false;
7731
  }
7732
7733
  /**
7734
   * Find position of first occurrence of string in a string.
7735
   *
7736
   * @link http://php.net/manual/en/function.mb-strpos.php
7737
   *
7738
   * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7739
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7740
   * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7741
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7742
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7743
   *
7744
   * @return int|false
7745
   *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
7746
   *                   string.<br> If needle is not found it returns false.
7747
   */
7748 142
  public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7749
  {
7750 142
    if ('' === $haystack) {
7751 4
      return false;
7752
    }
7753
7754
    // iconv and mbstring do not support integer $needle
7755 141
    if ((int)$needle === $needle && $needle >= 0) {
7756
      $needle = (string)self::chr($needle);
7757
    }
7758 141
    $needle = (string)$needle;
7759
7760 141
    if ('' === $needle) {
7761 2
      return false;
7762
    }
7763
7764 141
    if ($cleanUtf8 === true) {
7765
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7766
      // if invalid characters are found in $haystack before $needle
7767 3
      $needle = self::clean($needle);
7768 3
      $haystack = self::clean($haystack);
7769
    }
7770
7771 141
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7772 55
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7773
    }
7774
7775 141
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7776
      self::checkForSupport();
7777
    }
7778
7779
    if (
7780 141
        $encoding === 'CP850'
7781
        &&
7782 141
        self::$SUPPORT['mbstring_func_overload'] === false
7783
    ) {
7784 2
      return \strpos($haystack, $needle, $offset);
7785
    }
7786
7787
    if (
7788 141
        $encoding !== 'UTF-8'
7789
        &&
7790 141
        self::$SUPPORT['iconv'] === false
7791
        &&
7792 141
        self::$SUPPORT['mbstring'] === false
7793
    ) {
7794 2
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7795
    }
7796
7797
    if (
7798 141
        $offset >= 0 // iconv_strpos() can't handle negative offset
7799
        &&
7800 141
        $encoding !== 'UTF-8'
7801
        &&
7802 141
        self::$SUPPORT['mbstring'] === false
7803
        &&
7804 141
        self::$SUPPORT['iconv'] === true
7805
    ) {
7806
      // ignore invalid negative offset to keep compatibility
7807
      // with php < 5.5.35, < 5.6.21, < 7.0.6
7808
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
7809
      if ($returnTmp !== false) {
7810
        return $returnTmp;
7811
      }
7812
    }
7813
7814 141
    if (self::$SUPPORT['mbstring'] === true) {
7815 141
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
7816 141
      if ($returnTmp !== false) {
7817 86
        return $returnTmp;
7818
      }
7819
    }
7820
7821
    if (
7822 69
        $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
7823
        &&
7824 69
        $offset >= 0 // grapheme_strpos() can't handle negative offset
7825
        &&
7826 69
        self::$SUPPORT['intl'] === true
7827
    ) {
7828 69
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
7829 69
      if ($returnTmp !== false) {
7830
        return $returnTmp;
7831
      }
7832
    }
7833
7834
    if (
7835 69
        $offset >= 0 // iconv_strpos() can't handle negative offset
7836
        &&
7837 69
        self::$SUPPORT['iconv'] === true
7838
    ) {
7839
      // ignore invalid negative offset to keep compatibility
7840
      // with php < 5.5.35, < 5.6.21, < 7.0.6
7841 69
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
7842 69
      if ($returnTmp !== false) {
7843
        return $returnTmp;
7844
      }
7845
    }
7846
7847
    // fallback for ascii only
7848 69
    if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
7849 35
      return \strpos($haystack, $needle, $offset);
7850
    }
7851
7852
    // fallback via symfony polyfill
7853 39
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7854
      return \mb_strpos($haystack, $needle, $offset, $encoding);
7855
    }
7856
7857
    // fallback via vanilla php
7858
7859 39
    if ($haystackIsAscii) {
7860
      $haystackTmp = \substr($haystack, $offset);
7861
    } else {
7862 39
      $haystackTmp = self::substr($haystack, $offset, null, $encoding);
7863
    }
7864 39
    if ($haystackTmp === false) {
7865
      $haystackTmp = '';
7866
    }
7867 39
    $haystack = (string)$haystackTmp;
7868
7869 39
    if ($offset < 0) {
7870 2
      $offset = 0;
7871
    }
7872
7873 39
    $pos = \strpos($haystack, $needle);
7874 39
    if ($pos === false) {
7875 39
      return false;
7876
    }
7877
7878 4
    if ($pos) {
7879 4
      return ($offset + (self::strlen(substr($haystack, 0, $pos), $encoding)));
7880
    }
7881
7882 2
    return ($offset + 0);
7883
  }
7884
7885
  /**
7886
   * Finds the last occurrence of a character in a string within another.
7887
   *
7888
   * @link http://php.net/manual/en/function.mb-strrchr.php
7889
   *
7890
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
7891
   * @param string $needle        <p>The string to find in haystack</p>
7892
   * @param bool   $before_needle [optional] <p>
7893
   *                              Determines which portion of haystack
7894
   *                              this function returns.
7895
   *                              If set to true, it returns all of haystack
7896
   *                              from the beginning to the last occurrence of needle.
7897
   *                              If set to false, it returns all of haystack
7898
   *                              from the last occurrence of needle to the end,
7899
   *                              </p>
7900
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
7901
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
7902
   *
7903
   * @return string|false The portion of haystack or false if needle is not found.
7904
   */
7905 4
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7906
  {
7907 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7908 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7909
    }
7910
7911 4
    if ($cleanUtf8 === true) {
7912
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7913
      // if invalid characters are found in $haystack before $needle
7914 2
      $needle = self::clean($needle);
7915 2
      $haystack = self::clean($haystack);
7916
    }
7917
7918 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7919
      self::checkForSupport();
7920
    }
7921
7922
    if (
7923 4
        $encoding !== 'UTF-8'
7924
        &&
7925 4
        self::$SUPPORT['mbstring'] === false
7926
    ) {
7927
      \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7928
    }
7929
7930 4
    if (self::$SUPPORT['mbstring'] === true) {
7931 4
      return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
7932
    }
7933
7934
    if (
7935
        $before_needle === false
7936
        &&
7937
        ('CP850' === $encoding || 'ASCII' === $encoding)
7938
    ) {
7939
      return \strrchr($haystack, $needle);
7940
    }
7941
7942
    // fallback via symfony polyfill
7943
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7944
      return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
7945
    }
7946
7947
    // fallback via vanilla php
7948
7949
    $needleTmp = self::substr($needle, 0, 1, $encoding);
7950
    if ($needleTmp === false) {
7951
      return false;
7952
    }
7953
    $needle = (string)$needleTmp;
7954
7955
    $pos = self::strrpos($haystack, $needle, null, $encoding);
7956
    if ($pos === false) {
7957
      return false;
7958
    }
7959
7960
    if ($before_needle) {
7961
      return self::substr($haystack, 0, $pos, $encoding);
7962
    }
7963
7964
    return self::substr($haystack, $pos, null, $encoding);
7965
  }
7966
7967
  /**
7968
   * Reverses characters order in the string.
7969
   *
7970
   * @param string $str <p>The input string.</p>
7971
   *
7972
   * @return string The string with characters in the reverse sequence.
7973
   */
7974 10
  public static function strrev(string $str): string
7975
  {
7976 10
    if ('' === $str) {
7977 4
      return '';
7978
    }
7979
7980 8
    $reversed = '';
7981 8
    $i = self::strlen($str);
7982 8
    while ($i--) {
7983 8
      $reversed .= self::substr($str, $i, 1);
0 ignored issues
show
Bug introduced by
It seems like $i can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7983
      $reversed .= self::substr($str, /** @scrutinizer ignore-type */ $i, 1);
Loading history...
7984
    }
7985
7986 8
    return $reversed;
7987
  }
7988
7989
  /**
7990
   * Finds the last occurrence of a character in a string within another, case insensitive.
7991
   *
7992
   * @link http://php.net/manual/en/function.mb-strrichr.php
7993
   *
7994
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
7995
   * @param string $needle         <p>The string to find in haystack.</p>
7996
   * @param bool   $before_needle  [optional] <p>
7997
   *                               Determines which portion of haystack
7998
   *                               this function returns.
7999
   *                               If set to true, it returns all of haystack
8000
   *                               from the beginning to the last occurrence of needle.
8001
   *                               If set to false, it returns all of haystack
8002
   *                               from the last occurrence of needle to the end,
8003
   *                               </p>
8004
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8005
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8006
   *
8007
   * @return string|false The portion of haystack or<br>false if needle is not found.
8008
   */
8009 3
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8010
  {
8011 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8012 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8013
    }
8014
8015 3
    if ($cleanUtf8 === true) {
8016
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8017
      // if invalid characters are found in $haystack before $needle
8018 2
      $needle = self::clean($needle);
8019 2
      $haystack = self::clean($haystack);
8020
    }
8021
8022
    // always fallback via symfony polyfill
8023 3
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8024
  }
8025
8026
  /**
8027
   * Find position of last occurrence of a case-insensitive string.
8028
   *
8029
   * @param string     $haystack  <p>The string to look in.</p>
8030
   * @param string|int $needle    <p>The string to look for.</p>
8031
   * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8032
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8033
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8034
   *
8035
   * @return int|false
8036
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8037
   *                   string.<br>If needle is not found, it returns false.
8038
   */
8039 4
  public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8040
  {
8041 4
    if ('' === $haystack) {
8042
      return false;
8043
    }
8044
8045
    // iconv and mbstring do not support integer $needle
8046 4
    if ((int)$needle === $needle && $needle >= 0) {
8047
      $needle = (string)self::chr($needle);
8048
    }
8049 4
    $needle = (string)$needle;
8050
8051 4
    if ('' === $needle) {
8052
      return false;
8053
    }
8054
8055 4
    if ($cleanUtf8 === true) {
8056
      // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8057 2
      $needle = self::clean($needle);
8058 2
      $haystack = self::clean($haystack);
8059
    }
8060
8061 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8062 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8063
    }
8064
8065 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8066
      self::checkForSupport();
8067
    }
8068
8069
    if (
8070 4
        $encoding !== 'UTF-8'
8071
        &&
8072 4
        self::$SUPPORT['mbstring'] === false
8073
    ) {
8074
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8075
    }
8076
8077 4
    if (self::$SUPPORT['mbstring'] === true) {
8078 4
      return \mb_strripos($haystack, $needle, $offset, $encoding);
8079
    }
8080
8081
    if (
8082
        $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8083
        &&
8084
        $offset >= 0 // grapheme_strripos() can't handle negative offset
8085
        &&
8086
        self::$SUPPORT['intl'] === true
8087
    ) {
8088
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8089
      if ($returnTmp !== false) {
8090
        return $returnTmp;
8091
      }
8092
    }
8093
8094
    // fallback via symfony polyfill
8095
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
8096
      return \mb_strripos($haystack, $needle, $offset, $encoding);
8097
    }
8098
8099
    // fallback for ascii only
8100
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8101
      return \strripos($haystack, $needle, $offset);
8102
    }
8103
8104
    // fallback via vanilla php
8105
8106
    $haystack = self::strtoupper($haystack, $encoding, false, null, true);
8107
    $needle = self::strtoupper($needle, $encoding, false, null, true);
8108
8109
    return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8110
  }
8111
8112
  /**
8113
   * Find position of last occurrence of a string in a string.
8114
   *
8115
   * @link http://php.net/manual/en/function.mb-strrpos.php
8116
   *
8117
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8118
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8119
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8120
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
8121
   *                              the end of the string.
8122
   *                              </p>
8123
   * @param string     $encoding  [optional] <p>Set the charset.</p>
8124
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8125
   *
8126
   * @return int|false
8127
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8128
   *                   string.<br>If needle is not found, it returns false.
8129
   */
8130 38
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8131
  {
8132 38
    if ('' === $haystack) {
8133 3
      return false;
8134
    }
8135
8136
    // iconv and mbstring do not support integer $needle
8137 37
    if ((int)$needle === $needle && $needle >= 0) {
8138 2
      $needle = (string)self::chr($needle);
8139
    }
8140 37
    $needle = (string)$needle;
8141
8142 37
    if ('' === $needle) {
8143 2
      return false;
8144
    }
8145
8146 37
    if ($cleanUtf8 === true) {
8147
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8148 4
      $needle = self::clean($needle);
8149 4
      $haystack = self::clean($haystack);
8150
    }
8151
8152 37
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8153 14
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8154
    }
8155
8156 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8157
      self::checkForSupport();
8158
    }
8159
8160
    if (
8161 37
        $encoding !== 'UTF-8'
8162
        &&
8163 37
        self::$SUPPORT['mbstring'] === false
8164
    ) {
8165
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8166
    }
8167
8168 37
    if (self::$SUPPORT['mbstring'] === true) {
8169 37
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
8170
    }
8171
8172
    if (
8173
        $offset !== null
8174
        &&
8175
        $offset >= 0 // grapheme_strrpos() can't handle negative offset
8176
        &&
8177
        $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8178
        &&
8179
        self::$SUPPORT['intl'] === true
8180
    ) {
8181
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8182
      if ($returnTmp !== false) {
8183
        return $returnTmp;
8184
      }
8185
    }
8186
8187
    // fallback via symfony polyfill
8188
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
8189
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
8190
    }
8191
8192
    // fallback for ascii only
8193
    if (
8194
        $offset !== null
8195
        &&
8196
        self::is_ascii($haystack)
8197
        &&
8198
        self::is_ascii($needle)
8199
    ) {
8200
      return \strrpos($haystack, $needle, $offset);
8201
    }
8202
8203
    // fallback via vanilla php
8204
8205
    $haystackTmp = null;
8206
    if ($offset > 0) {
8207
      $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8207
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
8208
    } elseif ($offset < 0) {
8209
      $haystackTmp = self::substr($haystack, 0, $offset);
8210
      $offset = 0;
8211
    }
8212
8213
    if ($haystackTmp !== null) {
8214
      if ($haystackTmp === false) {
8215
        $haystackTmp = '';
8216
      }
8217
      $haystack = (string)$haystackTmp;
8218
    }
8219
8220
    $pos = \strrpos($haystack, $needle);
8221
    if ($pos === false) {
8222
      return false;
8223
    }
8224
8225
    return $offset + self::strlen(\substr($haystack, 0, $pos));
8226
  }
8227
8228
  /**
8229
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8230
   * mask.
8231
   *
8232
   * @param string $str    <p>The input string.</p>
8233
   * @param string $mask   <p>The mask of chars</p>
8234
   * @param int    $offset [optional]
8235
   * @param int    $length [optional]
8236
   *
8237
   * @return int
8238
   */
8239 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
8240
  {
8241 10
    if ($offset || $length !== null) {
8242 2
      $strTmp = self::substr($str, $offset, $length);
8243 2
      if ($strTmp === false) {
8244
        $strTmp = '';
8245
      }
8246 2
      $str = (string)$strTmp;
8247
    }
8248
8249 10
    if ('' === $str || '' === $mask) {
8250 2
      return 0;
8251
    }
8252
8253 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug introduced by
$str of type string is incompatible with the type null|array expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8253
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
Bug Best Practice introduced by
The expression return preg_match('/^' ....lf::strlen($str[0]) : 0 could return the type false which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
8254
  }
8255
8256
  /**
8257
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8258
   *
8259
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
8260
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
8261
   * @param bool   $before_needle  [optional] <p>
8262
   *                               If <b>TRUE</b>, strstr() returns the part of the
8263
   *                               haystack before the first occurrence of the needle (excluding the needle).
8264
   *                               </p>
8265
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8266
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8267
   *
8268
   * @return string|false
8269
   *                       A sub-string,<br>or <strong>false</strong> if needle is not found.
8270
   */
8271 5
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8272
  {
8273 5
    if ('' === $haystack || '' === $needle) {
8274 2
      return false;
8275
    }
8276
8277 5
    if ($cleanUtf8 === true) {
8278
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8279
      // if invalid characters are found in $haystack before $needle
8280
      $needle = self::clean($needle);
8281
      $haystack = self::clean($haystack);
8282
    }
8283
8284 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8285 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8286
    }
8287
8288 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8289
      self::checkForSupport();
8290
    }
8291
8292
    if (
8293 5
        $encoding !== 'UTF-8'
8294
        &&
8295 5
        self::$SUPPORT['mbstring'] === false
8296
    ) {
8297
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8298
    }
8299
8300 5
    if (self::$SUPPORT['mbstring'] === true) {
8301 5
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8302
    }
8303
8304
    if (
8305
        $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8306
        &&
8307
        self::$SUPPORT['intl'] === true
8308
    ) {
8309
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8310
      if ($returnTmp !== false) {
8311
        return $returnTmp;
8312
      }
8313
    }
8314
8315
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8316
8317
    if (!isset($match[1])) {
8318
      return false;
8319
    }
8320
8321
    if ($before_needle) {
8322
      return $match[1];
8323
    }
8324
8325
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8325
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
8326
  }
8327
8328
  /**
8329
   * Unicode transformation for case-less matching.
8330
   *
8331
   * @link http://unicode.org/reports/tr21/tr21-5.html
8332
   *
8333
   * @param string $str        <p>The input string.</p>
8334
   * @param bool   $full       [optional] <p>
8335
   *                           <b>true</b>, replace full case folding chars (default)<br>
8336
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
8337
   *                           </p>
8338
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
8339
   *
8340
   * @return string
8341
   */
8342 27
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
8343
  {
8344 27
    if ('' === $str) {
8345 5
      return '';
8346
    }
8347
8348 26
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
8349 26
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
8350
8351 26
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
8352 1
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
8353 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
8354
    }
8355
8356 26
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
8357
8358 26
    if ($full) {
8359
8360 26
      static $FULL_CASE_FOLD = null;
8361 26
      if ($FULL_CASE_FOLD === null) {
8362 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
8363
      }
8364
8365 26
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
8366
    }
8367
8368 26
    if ($cleanUtf8 === true) {
8369 2
      $str = self::clean($str);
8370
    }
8371
8372 26
    return self::strtolower($str);
8373
  }
8374
8375
  /**
8376
   * Make a string lowercase.
8377
   *
8378
   * @link http://php.net/manual/en/function.mb-strtolower.php
8379
   *
8380
   * @param string      $str                   <p>The string being lowercased.</p>
8381
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
8382
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8383
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8384
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8385
   *
8386
   * @return string String with all alphabetic characters converted to lowercase.
8387
   */
8388 175
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8389
  {
8390
    // init
8391 175
    $str = (string)$str;
8392
8393 175
    if ('' === $str) {
8394 4
      return '';
8395
    }
8396
8397 173
    if ($cleanUtf8 === true) {
8398
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8399
      // if invalid characters are found in $haystack before $needle
8400 2
      $str = self::clean($str);
8401
    }
8402
8403 173
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8404 94
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8405
    }
8406
8407
    // hack for old php version or for the polyfill ...
8408 173
    if ($tryToKeepStringLength === true) {
8409
      $str = self::fixStrCaseHelper($str, true);
8410
    }
8411
8412 173
    if ($lang !== null) {
8413
8414 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8415
        self::checkForSupport();
8416
      }
8417
8418 2
      if (self::$SUPPORT['intl'] === true) {
8419
8420 2
        $langCode = $lang . '-Lower';
8421 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8422
          \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, E_USER_WARNING);
8423
8424
          $langCode = 'Any-Lower';
8425
        }
8426
8427
        /** @noinspection PhpComposerExtensionStubsInspection */
8428 2
        return transliterator_transliterate($langCode, $str);
8429
      }
8430
8431
      \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, E_USER_WARNING);
8432
    }
8433
8434
    // always fallback via symfony polyfill
8435 173
    return \mb_strtolower($str, $encoding);
8436
  }
8437
8438
  /**
8439
   * Generic case sensitive transformation for collation matching.
8440
   *
8441
   * @param string $str <p>The input string</p>
8442
   *
8443
   * @return string
8444
   */
8445 6
  private static function strtonatfold(string $str): string
8446
  {
8447
    /** @noinspection PhpUndefinedClassInspection */
8448 6
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
8449
  }
8450
8451
  /**
8452
   * Make a string uppercase.
8453
   *
8454
   * @link http://php.net/manual/en/function.mb-strtoupper.php
8455
   *
8456
   * @param string      $str                   <p>The string being uppercased.</p>
8457
   * @param string      $encoding              [optional] <p>Set the charset.</p>
8458
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8459
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8460
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8461
   *
8462
   * @return string String with all alphabetic characters converted to uppercase.
8463
   */
8464 136
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8465
  {
8466
    // init
8467 136
    $str = (string)$str;
8468
8469 136
    if ('' === $str) {
8470 4
      return '';
8471
    }
8472
8473 134
    if ($cleanUtf8 === true) {
8474
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8475
      // if invalid characters are found in $haystack before $needle
8476 3
      $str = self::clean($str);
8477
    }
8478
8479 134
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8480 72
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8481
    }
8482
8483
    // hack for old php version or for the polyfill ...
8484 134
    if ($tryToKeepStringLength === true) {
8485 22
      $str = self::fixStrCaseHelper($str, false);
8486
    }
8487
8488 134
    if ($lang !== null) {
8489
8490 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8491
        self::checkForSupport();
8492
      }
8493
8494 2
      if (self::$SUPPORT['intl'] === true) {
8495
8496 2
        $langCode = $lang . '-Upper';
8497 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8498
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
8499
8500
          $langCode = 'Any-Upper';
8501
        }
8502
8503
        /** @noinspection PhpComposerExtensionStubsInspection */
8504 2
        return transliterator_transliterate($langCode, $str);
8505
      }
8506
8507
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
8508
    }
8509
8510
    // always fallback via symfony polyfill
8511 134
    return \mb_strtoupper($str, $encoding);
8512
  }
8513
8514
  /**
8515
   * Translate characters or replace sub-strings.
8516
   *
8517
   * @link  http://php.net/manual/en/function.strtr.php
8518
   *
8519
   * @param string          $str  <p>The string being translated.</p>
8520
   * @param string|string[] $from <p>The string replacing from.</p>
8521
   * @param string|string[] $to   <p>The string being translated to to.</p>
8522
   *
8523
   * @return string
8524
   *                This function returns a copy of str, translating all occurrences of each character in from to the
8525
   *                corresponding character in to.
8526
   */
8527 2
  public static function strtr(string $str, $from, $to = INF): string
8528
  {
8529 2
    if ('' === $str) {
8530
      return '';
8531
    }
8532
8533 2
    if ($from === $to) {
8534
      return $str;
8535
    }
8536
8537 2
    if (INF !== $to) {
8538 2
      $from = self::str_split($from);
8539 2
      $to = self::str_split($to);
8540 2
      $countFrom = \count($from);
8541 2
      $countTo = \count($to);
8542
8543 2
      if ($countFrom > $countTo) {
8544 2
        $from = \array_slice($from, 0, $countTo);
8545 2
      } elseif ($countFrom < $countTo) {
8546 2
        $to = \array_slice($to, 0, $countFrom);
8547
      }
8548
8549 2
      $from = \array_combine($from, $to);
8550
    }
8551
8552 2
    if (\is_string($from)) {
8553 2
      return \str_replace($from, '', $str);
8554
    }
8555
8556 2
    return \strtr($str, $from);
8557
  }
8558
8559
  /**
8560
   * Return the width of a string.
8561
   *
8562
   * @param string $str       <p>The input string.</p>
8563
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8564
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8565
   *
8566
   * @return int
8567
   */
8568 2
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
8569
  {
8570 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8571 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8572
    }
8573
8574 2
    if ($cleanUtf8 === true) {
8575
      // iconv and mbstring are not tolerant to invalid encoding
8576
      // further, their behaviour is inconsistent with that of PHP's substr
8577 2
      $str = self::clean($str);
8578
    }
8579
8580
    // always fallback via symfony polyfill
8581 2
    return \mb_strwidth($str, $encoding);
8582
  }
8583
8584
  /**
8585
   * Get part of a string.
8586
   *
8587
   * @link http://php.net/manual/en/function.mb-substr.php
8588
   *
8589
   * @param string $str       <p>The string being checked.</p>
8590
   * @param int    $offset    <p>The first position used in str.</p>
8591
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
8592
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8593
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8594
   *
8595
   * @return string|false
8596
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
8597
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
8598
   *                      characters long, <b>FALSE</b> will be returned.
8599
   */
8600 394
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8601
  {
8602 394
    if ('' === $str) {
8603 19
      return '';
8604
    }
8605
8606
    // Empty string
8607 388
    if ($length === 0) {
8608 14
      return '';
8609
    }
8610
8611 385
    if ($cleanUtf8 === true) {
8612
      // iconv and mbstring are not tolerant to invalid encoding
8613
      // further, their behaviour is inconsistent with that of PHP's substr
8614 2
      $str = self::clean($str);
8615
    }
8616
8617
    // Whole string
8618 385
    if (!$offset && $length === null) {
8619 40
      return $str;
8620
    }
8621
8622 356
    $str_length = 0;
8623 356
    if ($offset || $length === null) {
8624 233
      $str_length = self::strlen($str, $encoding);
8625
    }
8626
8627
    // e.g.: invalid chars + mbstring not installed
8628 356
    if ($str_length === false) {
8629
      return false;
8630
    }
8631
8632
    // Empty string
8633 356
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
8634 21
      return '';
8635
    }
8636
8637
    // Impossible
8638 356
    if ($offset && $offset > $str_length) {
8639 3
      return false;
8640
    }
8641
8642 353
    if ($length === null) {
8643 141
      $length = (int)$str_length;
8644
    } else {
8645 321
      $length = (int)$length;
8646
    }
8647
8648 353
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8649 156
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8650
    }
8651
8652 353
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8653
      self::checkForSupport();
8654
    }
8655
8656
    if (
8657 353
        $encoding === 'CP850'
8658
        &&
8659 353
        self::$SUPPORT['mbstring_func_overload'] === false
8660
    ) {
8661 5
      return \substr($str, $offset, $length ?? $str_length);
8662
    }
8663
8664
    if (
8665 348
        $encoding !== 'UTF-8'
8666
        &&
8667 348
        self::$SUPPORT['mbstring'] === false
8668
    ) {
8669 2
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8670
    }
8671
8672 348
    if (self::$SUPPORT['mbstring'] === true) {
8673 348
      return \mb_substr($str, $offset, $length, $encoding);
8674
    }
8675
8676
    if (
8677 4
        $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
8678
        &&
8679 4
        $offset >= 0 // grapheme_substr() can't handle negative offset
8680
        &&
8681 4
        self::$SUPPORT['intl'] === true
8682
    ) {
8683
      $returnTmp = \grapheme_substr($str, $offset, $length);
8684
      if ($returnTmp !== false) {
8685
        return $returnTmp;
8686
      }
8687
    }
8688
8689
    if (
8690 4
        $length >= 0 // "iconv_substr()" can't handle negative length
8691
        &&
8692 4
        self::$SUPPORT['iconv'] === true
8693
    ) {
8694
      $returnTmp = \iconv_substr($str, $offset, $length);
8695
      if ($returnTmp !== false) {
8696
        return $returnTmp;
8697
      }
8698
    }
8699
8700
    // fallback for ascii only
8701 4
    if (self::is_ascii($str)) {
8702
      return \substr($str, $offset, $length);
8703
    }
8704
8705
    // fallback via symfony polyfill
8706 4
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
8707
      return \mb_substr($str, $offset, $length, $encoding);
8708
    }
8709
8710
    // fallback via vanilla php
8711
8712
    // split to array, and remove invalid characters
8713 4
    $array = self::split($str);
8714
8715
    // extract relevant part, and join to make sting again
8716 4
    return \implode('', \array_slice($array, $offset, $length));
8717
  }
8718
8719
  /**
8720
   * Binary safe comparison of two strings from an offset, up to length characters.
8721
   *
8722
   * @param string   $str1               <p>The main string being compared.</p>
8723
   * @param string   $str2               <p>The secondary string being compared.</p>
8724
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
8725
   *                                     counting from the end of the string.</p>
8726
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
8727
   *                                     the length of the str compared to the length of main_str less the offset.</p>
8728
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
8729
   *                                     insensitive.</p>
8730
   *
8731
   * @return int
8732
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8733
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8734
   *             <strong>0</strong> if they are equal.
8735
   */
8736 2
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
8737
  {
8738
    if (
8739 2
        $offset !== 0
8740
        ||
8741 2
        $length !== null
8742
    ) {
8743 2
      $str1Tmp = self::substr($str1, $offset, $length);
8744 2
      if ($str1Tmp === false) {
8745
        $str1Tmp = '';
8746
      }
8747 2
      $str1 = (string)$str1Tmp;
8748
8749 2
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str1) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8749
      $str2Tmp = self::substr($str2, 0, /** @scrutinizer ignore-type */ self::strlen($str1));
Loading history...
8750 2
      if ($str2Tmp === false) {
8751
        $str2Tmp = '';
8752
      }
8753 2
      $str2 = (string)$str2Tmp;
8754
    }
8755
8756 2
    if ($case_insensitivity === true) {
8757 2
      return self::strcasecmp($str1, $str2);
8758
    }
8759
8760 2
    return self::strcmp($str1, $str2);
8761
  }
8762
8763
  /**
8764
   * Count the number of substring occurrences.
8765
   *
8766
   * @link  http://php.net/manual/en/function.substr-count.php
8767
   *
8768
   * @param string $haystack   <p>The string to search in.</p>
8769
   * @param string $needle     <p>The substring to search for.</p>
8770
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
8771
   * @param int    $length     [optional] <p>
8772
   *                           The maximum length after the specified offset to search for the
8773
   *                           substring. It outputs a warning if the offset plus the length is
8774
   *                           greater than the haystack length.
8775
   *                           </p>
8776
   * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8777
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
8778
   *
8779
   * @return int|false This functions returns an integer or false if there isn't a string.
8780
   */
8781 18
  public static function substr_count(
8782
      string $haystack,
8783
      string $needle,
8784
      int $offset = 0,
8785
      int $length = null,
8786
      string $encoding = 'UTF-8',
8787
      bool $cleanUtf8 = false
8788
  )
8789
  {
8790 18
    if ('' === $haystack || '' === $needle) {
8791 2
      return false;
8792
    }
8793
8794 18
    if ($offset || $length !== null) {
8795
8796 2
      if ($length === null) {
8797 2
        $lengthTmp = self::strlen($haystack);
8798 2
        if ($lengthTmp === false) {
8799
          return false;
8800
        }
8801 2
        $length = (int)$lengthTmp;
8802
      }
8803
8804
      if (
8805
          (
8806 2
              $length !== 0
8807
              &&
8808 2
              $offset !== 0
8809
          )
8810
          &&
8811 2
          ($length + $offset) <= 0
8812
          &&
8813 2
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
8814
      ) {
8815 2
        return false;
8816
      }
8817
8818 2
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
8819 2
      if ($haystackTmp === false) {
8820
        $haystackTmp = '';
8821
      }
8822 2
      $haystack = (string)$haystackTmp;
8823
    }
8824
8825 18
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8826 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8827
    }
8828
8829 18
    if ($cleanUtf8 === true) {
8830
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8831
      // if invalid characters are found in $haystack before $needle
8832
      $needle = self::clean($needle);
8833
      $haystack = self::clean($haystack);
8834
    }
8835
8836 18
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8837
      self::checkForSupport();
8838
    }
8839
8840
    if (
8841 18
        $encoding !== 'UTF-8'
8842
        &&
8843 18
        self::$SUPPORT['mbstring'] === false
8844
    ) {
8845
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8846
    }
8847
8848 18
    if (self::$SUPPORT['mbstring'] === true) {
8849 18
      return \mb_substr_count($haystack, $needle, $encoding);
8850
    }
8851
8852
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
8853
8854
    return \count($matches);
8855
  }
8856
8857
  /**
8858
   * Returns the number of occurrences of $substring in the given string.
8859
   * By default, the comparison is case-sensitive, but can be made insensitive
8860
   * by setting $caseSensitive to false.
8861
   *
8862
   * @param string $str           <p>The input string.</p>
8863
   * @param string $substring     <p>The substring to search for.</p>
8864
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
8865
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8866
   *
8867
   * @return int
8868
   */
8869 15
  public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
8870
  {
8871 15
    if ('' === $str || '' === $substring) {
8872 2
      return 0;
8873
    }
8874
8875
    // only a fallback to prevent BC in the api ...
8876 13
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
8877 4
      $encoding = $caseSensitive;
8878
    }
8879
8880 13
    if (!$caseSensitive) {
8881 6
      $str = self::strtoupper($str, $encoding);
8882 6
      $substring = self::strtoupper($substring, $encoding);
8883
    }
8884
8885 13
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
8886
  }
8887
8888
  /**
8889
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
8890
   *
8891
   * @param string $haystack <p>The string to search in.</p>
8892
   * @param string $needle   <p>The substring to search for.</p>
8893
   *
8894
   * @return string Return the sub-string.
8895
   */
8896 2
  public static function substr_ileft(string $haystack, string $needle): string
8897
  {
8898 2
    if ('' === $haystack) {
8899 2
      return '';
8900
    }
8901
8902 2
    if ('' === $needle) {
8903 2
      return $haystack;
8904
    }
8905
8906 2
    if (self::str_istarts_with($haystack, $needle) === true) {
8907 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8907
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
8908 2
      if ($haystackTmp === false) {
8909
        $haystackTmp = '';
8910
      }
8911 2
      $haystack = (string)$haystackTmp;
8912
    }
8913
8914 2
    return $haystack;
8915
  }
8916
8917
  /**
8918
   * Get part of a string process in bytes.
8919
   *
8920
   * @param string $str    <p>The string being checked.</p>
8921
   * @param int    $offset <p>The first position used in str.</p>
8922
   * @param int    $length [optional] <p>The maximum length of the returned string.</p>
8923
   *
8924
   * @return string|false
8925
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
8926
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
8927
   *                      characters long, <b>FALSE</b> will be returned.
8928
   */
8929 26
  public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
8930
  {
8931 26
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8932
      self::checkForSupport();
8933
    }
8934
8935 26
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8936
      // "mb_" is available if overload is used, so use it ...
8937
      return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
8938
    }
8939
8940 26
    return \substr($str, $offset, $length);
8941
  }
8942
8943
  /**
8944
   * Find position of first occurrence of string in a string
8945
   *
8946
   * @param string $haystack <p>
8947
   *                         The string being checked.
8948
   *                         </p>
8949
   * @param string $needle   <p>
8950
   *                         The position counted from the beginning of haystack.
8951
   *                         </p>
8952
   * @param int    $offset   [optional] <p>
8953
   *                         The search offset. If it is not specified, 0 is used.
8954
   *                         </p>
8955
   *
8956
   * @return int|false the numeric position of
8957
   *                   the first occurrence of needle in the
8958
   *                   haystack string. If
8959
   *                   needle is not found, it returns false.
8960
   */
8961 75
  public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8962
  {
8963 75
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8964
      self::checkForSupport();
8965
    }
8966
8967 75
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8968
      // "mb_" is available if overload is used, so use it ...
8969
      return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8970
    }
8971
8972 75
    return \strpos($haystack, $needle, $offset);
8973
  }
8974
8975
  /**
8976
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
8977
   *
8978
   * @param string $haystack <p>The string to search in.</p>
8979
   * @param string $needle   <p>The substring to search for.</p>
8980
   *
8981
   * @return string Return the sub-string.
8982
   */
8983 2
  public static function substr_iright(string $haystack, string $needle): string
8984
  {
8985 2
    if ('' === $haystack) {
8986 2
      return '';
8987
    }
8988
8989 2
    if ('' === $needle) {
8990 2
      return $haystack;
8991
    }
8992
8993 2
    if (self::str_iends_with($haystack, $needle) === true) {
8994 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
8995 2
      if ($haystackTmp === false) {
8996
        $haystackTmp = '';
8997
      }
8998 2
      $haystack = (string)$haystackTmp;
8999
    }
9000
9001 2
    return $haystack;
9002
  }
9003
9004
  /**
9005
   * Removes an prefix ($needle) from start of the string ($haystack).
9006
   *
9007
   * @param string $haystack <p>The string to search in.</p>
9008
   * @param string $needle   <p>The substring to search for.</p>
9009
   *
9010
   * @return string Return the sub-string.
9011
   */
9012 2
  public static function substr_left(string $haystack, string $needle): string
9013
  {
9014 2
    if ('' === $haystack) {
9015 2
      return '';
9016
    }
9017
9018 2
    if ('' === $needle) {
9019 2
      return $haystack;
9020
    }
9021
9022 2
    if (self::str_starts_with($haystack, $needle) === true) {
9023 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9023
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9024 2
      if ($haystackTmp === false) {
9025
        $haystackTmp = '';
9026
      }
9027 2
      $haystack = (string)$haystackTmp;
9028
    }
9029
9030 2
    return $haystack;
9031
  }
9032
9033
  /**
9034
   * Replace text within a portion of a string.
9035
   *
9036
   * source: https://gist.github.com/stemar/8287074
9037
   *
9038
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
9039
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
9040
   * @param int|int[]       $offset           <p>
9041
   *                                          If start is positive, the replacing will begin at the start'th offset
9042
   *                                          into string.
9043
   *                                          <br><br>
9044
   *                                          If start is negative, the replacing will begin at the start'th character
9045
   *                                          from the end of string.
9046
   *                                          </p>
9047
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
9048
   *                                          portion of string which is to be replaced. If it is negative, it
9049
   *                                          represents the number of characters from the end of string at which to
9050
   *                                          stop replacing. If it is not given, then it will default to strlen(
9051
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
9052
   *                                          length is zero then this function will have the effect of inserting
9053
   *                                          replacement into string at the given start offset.</p>
9054
   *
9055
   * @return string|string[] The result string is returned. If string is an array then array is returned.
9056
   */
9057 10
  public static function substr_replace($str, $replacement, $offset, $length = null)
9058
  {
9059 10
    if (\is_array($str) === true) {
9060 1
      $num = \count($str);
9061
9062
      // the replacement
9063 1
      if (\is_array($replacement) === true) {
9064 1
        $replacement = \array_slice($replacement, 0, $num);
9065
      } else {
9066 1
        $replacement = \array_pad([$replacement], $num, $replacement);
9067
      }
9068
9069
      // the offset
9070 1
      if (\is_array($offset) === true) {
9071 1
        $offset = \array_slice($offset, 0, $num);
9072 1
        foreach ($offset as &$valueTmp) {
9073 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
9074
        }
9075 1
        unset($valueTmp);
9076
      } else {
9077 1
        $offset = \array_pad([$offset], $num, $offset);
9078
      }
9079
9080
      // the length
9081 1
      if (null === $length) {
9082 1
        $length = \array_fill(0, $num, 0);
9083 1
      } elseif (\is_array($length) === true) {
9084 1
        $length = \array_slice($length, 0, $num);
9085 1
        foreach ($length as &$valueTmpV2) {
9086 1
          if (null !== $valueTmpV2) {
9087 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9088
          } else {
9089 1
            $valueTmpV2 = 0;
9090
          }
9091
        }
9092 1
        unset($valueTmpV2);
9093
      } else {
9094 1
        $length = \array_pad([$length], $num, $length);
9095
      }
9096
9097
      // recursive call
9098 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9099
    }
9100
9101 10
    if (\is_array($replacement) === true) {
9102 1
      if (\count($replacement) > 0) {
9103 1
        $replacement = $replacement[0];
9104
      } else {
9105 1
        $replacement = '';
9106
      }
9107
    }
9108
9109
    // init
9110 10
    $str = (string)$str;
9111 10
    $replacement = (string)$replacement;
9112
9113 10
    if ('' === $str) {
9114 1
      return $replacement;
9115
    }
9116
9117 9
    if (self::is_ascii($str)) {
9118 6
      return ($length === null) ?
9119
          \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9119
          \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
9120 6
          \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9120
          \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
9121
    }
9122
9123 8
    \preg_match_all('/./us', $str, $smatches);
9124 8
    \preg_match_all('/./us', $replacement, $rmatches);
9125
9126 8
    if ($length === null) {
9127 3
      $lengthTmp = self::strlen($str);
9128 3
      if ($lengthTmp === false) {
9129
        // e.g.: non mbstring support + invalid chars
9130
        return '';
9131
      }
9132 3
      $length = (int)$lengthTmp;
9133
    }
9134
9135 8
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9135
    \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9135
    \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
9136
9137 8
    return \implode('', $smatches[0]);
9138
  }
9139
9140
  /**
9141
   * Removes an suffix ($needle) from end of the string ($haystack).
9142
   *
9143
   * @param string $haystack <p>The string to search in.</p>
9144
   * @param string $needle   <p>The substring to search for.</p>
9145
   *
9146
   * @return string Return the sub-string.
9147
   */
9148 2
  public static function substr_right(string $haystack, string $needle): string
9149
  {
9150 2
    if ('' === $haystack) {
9151 2
      return '';
9152
    }
9153
9154 2
    if ('' === $needle) {
9155 2
      return $haystack;
9156
    }
9157
9158 2
    if (self::str_ends_with($haystack, $needle) === true) {
9159 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9160 2
      if ($haystackTmp === false) {
9161
        $haystackTmp = '';
9162
      }
9163 2
      $haystack = (string)$haystackTmp;
9164
    }
9165
9166 2
    return $haystack;
9167
  }
9168
9169
  /**
9170
   * Returns a case swapped version of the string.
9171
   *
9172
   * @param string $str       <p>The input string.</p>
9173
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9174
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9175
   *
9176
   * @return string Each character's case swapped.
9177
   */
9178 6
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9179
  {
9180 6
    if ('' === $str) {
9181 1
      return '';
9182
    }
9183
9184 6
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9185 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9186
    }
9187
9188 6
    if ($cleanUtf8 === true) {
9189
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9190
      // if invalid characters are found in $haystack before $needle
9191 2
      $str = self::clean($str);
9192
    }
9193
9194 6
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9195
  }
9196
9197
  /**
9198
   * Checks whether mbstring is available on the server.
9199
   *
9200
   * @return bool
9201
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
9202
   */
9203
  public static function symfony_polyfill_used(): bool
9204
  {
9205
    // init
9206
    $return = false;
9207
9208
    $returnTmp = \extension_loaded('mbstring') ? true : false;
9209
    if ($returnTmp === false && \function_exists('mb_strlen')) {
9210
      $return = true;
9211
    }
9212
9213
    $returnTmp = \extension_loaded('iconv') ? true : false;
9214
    if ($returnTmp === false && \function_exists('iconv')) {
9215
      $return = true;
9216
    }
9217
9218
    return $return;
9219
  }
9220
9221
  /**
9222
   * @param string $str
9223
   * @param int    $tabLength
9224
   *
9225
   * @return string
9226
   */
9227 6
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9228
  {
9229 6
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9230
  }
9231
9232
  /**
9233
   * Converts the first character of each word in the string to uppercase
9234
   * and all other chars to lowercase.
9235
   *
9236
   * @param string $str      <p>The input string.</p>
9237
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9238
   *
9239
   * @return string String with all characters of $str being title-cased.
9240
   */
9241 5
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
9242
  {
9243 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9244 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9245
    }
9246
9247
    // always fallback via symfony polyfill
9248 5
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
9249
  }
9250
9251
  /**
9252
   * alias for "UTF8::to_ascii()"
9253
   *
9254
   * @see        UTF8::to_ascii()
9255
   *
9256
   * @param string $str
9257
   * @param string $subst_chr
9258
   * @param bool   $strict
9259
   *
9260
   * @return string
9261
   *
9262
   * @deprecated <p>use "UTF8::to_ascii()"</p>
9263
   */
9264 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
9265
  {
9266 7
    return self::to_ascii($str, $subst_chr, $strict);
9267
  }
9268
9269
  /**
9270
   * alias for "UTF8::to_iso8859()"
9271
   *
9272
   * @see        UTF8::to_iso8859()
9273
   *
9274
   * @param string|string[] $str
9275
   *
9276
   * @return string|string[]
9277
   *
9278
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
9279
   */
9280 2
  public static function toIso8859($str)
9281
  {
9282 2
    return self::to_iso8859($str);
9283
  }
9284
9285
  /**
9286
   * alias for "UTF8::to_latin1()"
9287
   *
9288
   * @see        UTF8::to_latin1()
9289
   *
9290
   * @param string|string[] $str
9291
   *
9292
   * @return string|string[]
9293
   *
9294
   * @deprecated <p>use "UTF8::to_latin1()"</p>
9295
   */
9296 2
  public static function toLatin1($str)
9297
  {
9298 2
    return self::to_latin1($str);
9299
  }
9300
9301
  /**
9302
   * alias for "UTF8::to_utf8()"
9303
   *
9304
   * @see        UTF8::to_utf8()
9305
   *
9306
   * @param string|string[] $str
9307
   *
9308
   * @return string|string[]
9309
   *
9310
   * @deprecated <p>use "UTF8::to_utf8()"</p>
9311
   */
9312 2
  public static function toUTF8($str)
9313
  {
9314 2
    return self::to_utf8($str);
9315
  }
9316
9317
  /**
9318
   * Convert a string into ASCII.
9319
   *
9320
   * @param string $str     <p>The input string.</p>
9321
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
9322
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
9323
   *                        performance</p>
9324
   *
9325
   * @return string
9326
   */
9327 37
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
9328
  {
9329 37
    static $UTF8_TO_ASCII;
9330
9331 37
    if ('' === $str) {
9332 3
      return '';
9333
    }
9334
9335
    // check if we only have ASCII, first (better performance)
9336 34
    if (self::is_ascii($str) === true) {
9337 6
      return $str;
9338
    }
9339
9340 29
    $str = self::clean(
9341 29
        $str,
9342 29
        true,
9343 29
        true,
9344 29
        true,
9345 29
        false,
9346 29
        true,
9347 29
        true
9348
    );
9349
9350
    // check again, if we only have ASCII, now ...
9351 29
    if (self::is_ascii($str) === true) {
9352 12
      return $str;
9353
    }
9354
9355 18
    if ($strict === true) {
9356
9357 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9358
        self::checkForSupport();
9359
      }
9360
9361 1
      if (self::$SUPPORT['intl'] === true) {
9362
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
9363
        /** @noinspection PhpComposerExtensionStubsInspection */
9364 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
9365
9366
        // check again, if we only have ASCII, now ...
9367 1
        if (self::is_ascii($str) === true) {
9368 1
          return $str;
9369
        }
9370
9371
      }
9372
    }
9373
9374 18
    if (self::$ORD === null) {
9375
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9376
    }
9377
9378 18
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
9379 18
    $chars = $ar[0];
9380 18
    $ord = null;
9381 18
    foreach ($chars as &$c) {
9382
9383 18
      $ordC0 = self::$ORD[$c[0]];
9384
9385 18
      if ($ordC0 >= 0 && $ordC0 <= 127) {
9386 14
        continue;
9387
      }
9388
9389 18
      $ordC1 = self::$ORD[$c[1]];
9390
9391
      // ASCII - next please
9392 18
      if ($ordC0 >= 192 && $ordC0 <= 223) {
9393 16
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
9394
      }
9395
9396 18
      if ($ordC0 >= 224) {
9397 7
        $ordC2 = self::$ORD[$c[2]];
9398
9399 7
        if ($ordC0 <= 239) {
9400 6
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
9401
        }
9402
9403 7
        if ($ordC0 >= 240) {
9404 2
          $ordC3 = self::$ORD[$c[3]];
9405
9406 2
          if ($ordC0 <= 247) {
9407 2
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
9408
          }
9409
9410 2
          if ($ordC0 >= 248) {
9411
            $ordC4 = self::$ORD[$c[4]];
9412
9413
            if ($ordC0 <= 251) {
9414
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
9415
            }
9416
9417
            if ($ordC0 >= 252) {
9418
              $ordC5 = self::$ORD[$c[5]];
9419
9420
              if ($ordC0 <= 253) {
9421
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
9422
              }
9423
            }
9424
          }
9425
        }
9426
      }
9427
9428 18
      if ($ordC0 === 254 || $ordC0 === 255) {
9429
        $c = $unknown;
9430
        continue;
9431
      }
9432
9433 18
      if ($ord === null) {
9434
        $c = $unknown;
9435
        continue;
9436
      }
9437
9438 18
      $bank = $ord >> 8;
9439 18
      if (!isset($UTF8_TO_ASCII[$bank])) {
9440 9
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
9441 9
        if ($UTF8_TO_ASCII[$bank] === false) {
9442 2
          $UTF8_TO_ASCII[$bank] = [];
9443
        }
9444
      }
9445
9446 18
      $newchar = $ord & 255;
9447
9448 18
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
9449
9450
        // keep for debugging
9451
        /*
9452
        echo "file: " . sprintf('x%02x', $bank) . "\n";
9453
        echo "char: " . $c . "\n";
9454
        echo "ord: " . $ord . "\n";
9455
        echo "newchar: " . $newchar . "\n";
9456
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
9457
        echo "bank:" . $bank . "\n\n";
9458
        */
9459
9460 17
        $c = $UTF8_TO_ASCII[$bank][$newchar];
9461
      } else {
9462
9463
        // keep for debugging missing chars
9464
        /*
9465
        echo "file: " . sprintf('x%02x', $bank) . "\n";
9466
        echo "char: " . $c . "\n";
9467
        echo "ord: " . $ord . "\n";
9468
        echo "newchar: " . $newchar . "\n";
9469
        echo "bank:" . $bank . "\n\n";
9470
        */
9471
9472 18
        $c = $unknown;
9473
      }
9474
    }
9475
9476 18
    return \implode('', $chars);
9477
  }
9478
9479
  /**
9480
   * @param mixed $str
9481
   *
9482
   * @return bool
9483
   */
9484 19
  public static function to_boolean($str): bool
9485
  {
9486
    // init
9487 19
    $str = (string)$str;
9488
9489 19
    if ('' === $str) {
9490 2
      return false;
9491
    }
9492
9493 17
    $key = \strtolower($str);
9494
9495
    // Info: http://php.net/manual/en/filter.filters.validate.php
9496
    $map = [
9497 17
        'true'  => true,
9498
        '1'     => true,
9499
        'on'    => true,
9500
        'yes'   => true,
9501
        'false' => false,
9502
        '0'     => false,
9503
        'off'   => false,
9504
        'no'    => false,
9505
    ];
9506
9507 17
    if (isset($map[$key])) {
9508 13
      return $map[$key];
9509
    }
9510
9511
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
9512 4
    if (\is_numeric($str)) {
9513 2
      return (((float)$str + 0) > 0);
9514
    }
9515
9516 2
    return (bool)self::trim($str);
9517
  }
9518
9519
  /**
9520
   * Convert a string into "ISO-8859"-encoding (Latin-1).
9521
   *
9522
   * @param string|string[] $str
9523
   *
9524
   * @return string|string[]
9525
   */
9526 10
  public static function to_iso8859($str)
9527
  {
9528 10
    if (\is_array($str) === true) {
9529 2
      foreach ($str as $k => $v) {
9530 2
        $str[$k] = self::to_iso8859($v);
9531
      }
9532
9533 2
      return $str;
9534
    }
9535
9536 10
    $str = (string)$str;
9537 10
    if ('' === $str) {
9538 2
      return '';
9539
    }
9540
9541 10
    return self::utf8_decode($str);
9542
  }
9543
9544
  /**
9545
   * alias for "UTF8::to_iso8859()"
9546
   *
9547
   * @see UTF8::to_iso8859()
9548
   *
9549
   * @param string|string[] $str
9550
   *
9551
   * @return string|string[]
9552
   */
9553 2
  public static function to_latin1($str)
9554
  {
9555 2
    return self::to_iso8859($str);
9556
  }
9557
9558
  /**
9559
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
9560
   *
9561
   * <ul>
9562
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
9563
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
9564
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
9565
   * case.</li>
9566
   * </ul>
9567
   *
9568
   * @param string|string[] $str                    <p>Any string or array.</p>
9569
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
9570
   *
9571
   * @return string|string[] The UTF-8 encoded string.
9572
   */
9573 42
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
9574
  {
9575 42
    if (\is_array($str) === true) {
9576 4
      foreach ($str as $k => $v) {
9577 4
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
9578
      }
9579
9580 4
      return $str;
9581
    }
9582
9583 42
    $str = (string)$str;
9584 42
    if ('' === $str) {
9585 6
      return $str;
9586
    }
9587
9588 42
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9589
      self::checkForSupport();
9590
    }
9591
9592 42
    $max = self::strlen_in_byte($str);
9593 42
    $buf = '';
9594
9595
    /** @noinspection ForeachInvariantsInspection */
9596 42
    for ($i = 0; $i < $max; $i++) {
9597 42
      $c1 = $str[$i];
9598
9599 42
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
9600
9601 42
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
9602
9603 39
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9604
9605 39
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
9606 29
            $buf .= $c1 . $c2;
9607 29
            $i++;
9608
          } else { // not valid UTF8 - convert it
9609 39
            $buf .= self::to_utf8_convert_helper($c1);
9610
          }
9611
9612 40
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
9613
9614 38
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9615 38
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
9616
9617 38
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
9618 24
            $buf .= $c1 . $c2 . $c3;
9619 24
            $i += 2;
9620
          } else { // not valid UTF8 - convert it
9621 38
            $buf .= self::to_utf8_convert_helper($c1);
9622
          }
9623
9624 28
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
9625
9626 28
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9627 28
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
9628 28
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
9629
9630 28
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
9631 10
            $buf .= $c1 . $c2 . $c3 . $c4;
9632 10
            $i += 3;
9633
          } else { // not valid UTF8 - convert it
9634 28
            $buf .= self::to_utf8_convert_helper($c1);
9635
          }
9636
9637
        } else { // doesn't look like UTF8, but should be converted
9638 42
          $buf .= self::to_utf8_convert_helper($c1);
9639
        }
9640
9641 39
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
9642
9643 4
        $buf .= self::to_utf8_convert_helper($c1);
9644
9645
      } else { // it doesn't need conversion
9646 39
        $buf .= $c1;
9647
      }
9648
    }
9649
9650
    // decode unicode escape sequences
9651 42
    $buf = \preg_replace_callback(
9652 42
        '/\\\\u([0-9a-f]{4})/i',
9653 42
        function ($match) {
9654
          // always fallback via symfony polyfill
9655 8
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
9656 42
        },
9657 42
        $buf
9658
    );
9659
9660
    // decode UTF-8 codepoints
9661 42
    if ($decodeHtmlEntityToUtf8 === true) {
9662 2
      $buf = self::html_entity_decode($buf);
9663
    }
9664
9665 42
    return $buf;
9666
  }
9667
9668
  /**
9669
   * @param int|string $input
9670
   *
9671
   * @return string
9672
   */
9673 30
  private static function to_utf8_convert_helper($input): string
9674
  {
9675
    // init
9676 30
    $buf = '';
9677
9678 30
    if (self::$ORD === null) {
9679 1
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9680
    }
9681
9682 30
    if (self::$CHR === null) {
9683 1
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9684
    }
9685
9686 30
    if (self::$WIN1252_TO_UTF8 === null) {
9687 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9688
    }
9689
9690 30
    $ordC1 = self::$ORD[$input];
9691 30
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
9692 30
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
9693
    } else {
9694 2
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
9695 2
      $cc2 = ((string)$input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
9696 2
      $buf .= $cc1 . $cc2;
9697
    }
9698
9699 30
    return $buf;
9700
  }
9701
9702
  /**
9703
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
9704
   *
9705
   * INFO: This is slower then "trim()"
9706
   *
9707
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
9708
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
9709
   *
9710
   * @param string $str   <p>The string to be trimmed</p>
9711
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
9712
   *
9713
   * @return string The trimmed string.
9714
   */
9715 214
  public static function trim(string $str = '', $chars = INF): string
9716
  {
9717 214
    if ('' === $str) {
9718 11
      return '';
9719
    }
9720
9721
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
9722 206
    if ($chars === INF || !$chars) {
9723 179
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
9724
    } else {
9725 47
      $chars = \preg_quote($chars, '/');
9726 47
      $pattern = "^[$chars]+|[$chars]+\$";
9727
    }
9728
9729 206
    return self::regex_replace($str, $pattern, '', '', '/');
9730
  }
9731
9732
  /**
9733
   * Makes string's first char uppercase.
9734
   *
9735
   * @param string $str       <p>The input string.</p>
9736
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9737
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9738
   *
9739
   * @return string The resulting string.
9740
   */
9741 76
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9742
  {
9743 76
    if ($cleanUtf8 === true) {
9744
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9745
      // if invalid characters are found in $haystack before $needle
9746 1
      $str = self::clean($str);
9747
    }
9748
9749 76
    $strPartTwo = self::substr($str, 1, null, $encoding);
9750 76
    if ($strPartTwo === false) {
9751
      $strPartTwo = '';
9752
    }
9753
9754 76
    $strPartOne = self::strtoupper(
9755 76
        (string)self::substr($str, 0, 1, $encoding),
9756 76
        $encoding,
9757 76
        $cleanUtf8
9758
    );
9759
9760 76
    return $strPartOne . $strPartTwo;
9761
  }
9762
9763
  /**
9764
   * alias for "UTF8::ucfirst()"
9765
   *
9766
   * @see UTF8::ucfirst()
9767
   *
9768
   * @param string $str
9769
   * @param string $encoding
9770
   * @param bool   $cleanUtf8
9771
   *
9772
   * @return string
9773
   */
9774 1
  public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9775
  {
9776 1
    return self::ucfirst($str, $encoding, $cleanUtf8);
9777
  }
9778
9779
  /**
9780
   * Uppercase for all words in the string.
9781
   *
9782
   * @param string   $str        <p>The input string.</p>
9783
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
9784
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9785
   * @param string   $encoding   [optional] <p>Set the charset.</p>
9786
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9787
   *
9788
   * @return string
9789
   */
9790 9
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9791
  {
9792 9
    if (!$str) {
9793 2
      return '';
9794
    }
9795
9796
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
9797
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
9798
9799 8
    if ($cleanUtf8 === true) {
9800
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9801
      // if invalid characters are found in $haystack before $needle
9802 1
      $str = self::clean($str);
9803
    }
9804
9805 8
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
9806
9807
    if (
9808 8
        $usePhpDefaultFunctions === true
9809
        &&
9810 8
        self::is_ascii($str) === true
9811
    ) {
9812
      return \ucwords($str);
9813
    }
9814
9815 8
    $words = self::str_to_words($str, $charlist);
9816 8
    $newWords = [];
9817
9818 8
    if (\count($exceptions) > 0) {
9819 1
      $useExceptions = true;
9820
    } else {
9821 8
      $useExceptions = false;
9822
    }
9823
9824 8
    foreach ($words as $word) {
9825
9826 8
      if (!$word) {
9827 8
        continue;
9828
      }
9829
9830
      if (
9831 8
          $useExceptions === false
9832
          ||
9833
          (
9834 1
              $useExceptions === true
9835
              &&
9836 8
              !\in_array($word, $exceptions, true)
9837
          )
9838
      ) {
9839 8
        $word = self::ucfirst($word, $encoding);
9840
      }
9841
9842 8
      $newWords[] = $word;
9843
    }
9844
9845 8
    return \implode('', $newWords);
9846
  }
9847
9848
  /**
9849
   * Multi decode html entity & fix urlencoded-win1252-chars.
9850
   *
9851
   * e.g:
9852
   * 'test+test'                     => 'test test'
9853
   * 'D&#252;sseldorf'               => 'Düsseldorf'
9854
   * 'D%FCsseldorf'                  => 'Düsseldorf'
9855
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
9856
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
9857
   * 'Düsseldorf'                   => 'Düsseldorf'
9858
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
9859
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
9860
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
9861
   *
9862
   * @param string $str          <p>The input string.</p>
9863
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
9864
   *
9865
   * @return string
9866
   */
9867 2
  public static function urldecode(string $str, bool $multi_decode = true): string
9868
  {
9869 2
    if ('' === $str) {
9870 2
      return '';
9871
    }
9872
9873 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
9874 2
    if (\preg_match($pattern, $str)) {
9875 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
9876
    }
9877
9878 2
    $flags = ENT_QUOTES | ENT_HTML5;
9879
9880
    do {
9881 2
      $str_compare = $str;
9882
9883 2
      $str = self::fix_simple_utf8(
9884 2
          \urldecode(
9885 2
              self::html_entity_decode(
9886 2
                  self::to_utf8($str),
9887 2
                  $flags
9888
              )
9889
          )
9890
      );
9891
9892 2
    } while ($multi_decode === true && $str_compare !== $str);
9893
9894 2
    return $str;
9895
  }
9896
9897
  /**
9898
   * Return a array with "urlencoded"-win1252 -> UTF-8
9899
   *
9900
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
9901
   *
9902
   * @return string[]
9903
   */
9904 2
  public static function urldecode_fix_win1252_chars(): array
9905
  {
9906
    return [
9907 2
        '%20' => ' ',
9908
        '%21' => '!',
9909
        '%22' => '"',
9910
        '%23' => '#',
9911
        '%24' => '$',
9912
        '%25' => '%',
9913
        '%26' => '&',
9914
        '%27' => "'",
9915
        '%28' => '(',
9916
        '%29' => ')',
9917
        '%2A' => '*',
9918
        '%2B' => '+',
9919
        '%2C' => ',',
9920
        '%2D' => '-',
9921
        '%2E' => '.',
9922
        '%2F' => '/',
9923
        '%30' => '0',
9924
        '%31' => '1',
9925
        '%32' => '2',
9926
        '%33' => '3',
9927
        '%34' => '4',
9928
        '%35' => '5',
9929
        '%36' => '6',
9930
        '%37' => '7',
9931
        '%38' => '8',
9932
        '%39' => '9',
9933
        '%3A' => ':',
9934
        '%3B' => ';',
9935
        '%3C' => '<',
9936
        '%3D' => '=',
9937
        '%3E' => '>',
9938
        '%3F' => '?',
9939
        '%40' => '@',
9940
        '%41' => 'A',
9941
        '%42' => 'B',
9942
        '%43' => 'C',
9943
        '%44' => 'D',
9944
        '%45' => 'E',
9945
        '%46' => 'F',
9946
        '%47' => 'G',
9947
        '%48' => 'H',
9948
        '%49' => 'I',
9949
        '%4A' => 'J',
9950
        '%4B' => 'K',
9951
        '%4C' => 'L',
9952
        '%4D' => 'M',
9953
        '%4E' => 'N',
9954
        '%4F' => 'O',
9955
        '%50' => 'P',
9956
        '%51' => 'Q',
9957
        '%52' => 'R',
9958
        '%53' => 'S',
9959
        '%54' => 'T',
9960
        '%55' => 'U',
9961
        '%56' => 'V',
9962
        '%57' => 'W',
9963
        '%58' => 'X',
9964
        '%59' => 'Y',
9965
        '%5A' => 'Z',
9966
        '%5B' => '[',
9967
        '%5C' => '\\',
9968
        '%5D' => ']',
9969
        '%5E' => '^',
9970
        '%5F' => '_',
9971
        '%60' => '`',
9972
        '%61' => 'a',
9973
        '%62' => 'b',
9974
        '%63' => 'c',
9975
        '%64' => 'd',
9976
        '%65' => 'e',
9977
        '%66' => 'f',
9978
        '%67' => 'g',
9979
        '%68' => 'h',
9980
        '%69' => 'i',
9981
        '%6A' => 'j',
9982
        '%6B' => 'k',
9983
        '%6C' => 'l',
9984
        '%6D' => 'm',
9985
        '%6E' => 'n',
9986
        '%6F' => 'o',
9987
        '%70' => 'p',
9988
        '%71' => 'q',
9989
        '%72' => 'r',
9990
        '%73' => 's',
9991
        '%74' => 't',
9992
        '%75' => 'u',
9993
        '%76' => 'v',
9994
        '%77' => 'w',
9995
        '%78' => 'x',
9996
        '%79' => 'y',
9997
        '%7A' => 'z',
9998
        '%7B' => '{',
9999
        '%7C' => '|',
10000
        '%7D' => '}',
10001
        '%7E' => '~',
10002
        '%7F' => '',
10003
        '%80' => '`',
10004
        '%81' => '',
10005
        '%82' => '‚',
10006
        '%83' => 'ƒ',
10007
        '%84' => '„',
10008
        '%85' => '…',
10009
        '%86' => '†',
10010
        '%87' => '‡',
10011
        '%88' => 'ˆ',
10012
        '%89' => '‰',
10013
        '%8A' => 'Š',
10014
        '%8B' => '‹',
10015
        '%8C' => 'Œ',
10016
        '%8D' => '',
10017
        '%8E' => 'Ž',
10018
        '%8F' => '',
10019
        '%90' => '',
10020
        '%91' => '‘',
10021
        '%92' => '’',
10022
        '%93' => '“',
10023
        '%94' => '”',
10024
        '%95' => '•',
10025
        '%96' => '–',
10026
        '%97' => '—',
10027
        '%98' => '˜',
10028
        '%99' => '™',
10029
        '%9A' => 'š',
10030
        '%9B' => '›',
10031
        '%9C' => 'œ',
10032
        '%9D' => '',
10033
        '%9E' => 'ž',
10034
        '%9F' => 'Ÿ',
10035
        '%A0' => '',
10036
        '%A1' => '¡',
10037
        '%A2' => '¢',
10038
        '%A3' => '£',
10039
        '%A4' => '¤',
10040
        '%A5' => '¥',
10041
        '%A6' => '¦',
10042
        '%A7' => '§',
10043
        '%A8' => '¨',
10044
        '%A9' => '©',
10045
        '%AA' => 'ª',
10046
        '%AB' => '«',
10047
        '%AC' => '¬',
10048
        '%AD' => '',
10049
        '%AE' => '®',
10050
        '%AF' => '¯',
10051
        '%B0' => '°',
10052
        '%B1' => '±',
10053
        '%B2' => '²',
10054
        '%B3' => '³',
10055
        '%B4' => '´',
10056
        '%B5' => 'µ',
10057
        '%B6' => '¶',
10058
        '%B7' => '·',
10059
        '%B8' => '¸',
10060
        '%B9' => '¹',
10061
        '%BA' => 'º',
10062
        '%BB' => '»',
10063
        '%BC' => '¼',
10064
        '%BD' => '½',
10065
        '%BE' => '¾',
10066
        '%BF' => '¿',
10067
        '%C0' => 'À',
10068
        '%C1' => 'Á',
10069
        '%C2' => 'Â',
10070
        '%C3' => 'Ã',
10071
        '%C4' => 'Ä',
10072
        '%C5' => 'Å',
10073
        '%C6' => 'Æ',
10074
        '%C7' => 'Ç',
10075
        '%C8' => 'È',
10076
        '%C9' => 'É',
10077
        '%CA' => 'Ê',
10078
        '%CB' => 'Ë',
10079
        '%CC' => 'Ì',
10080
        '%CD' => 'Í',
10081
        '%CE' => 'Î',
10082
        '%CF' => 'Ï',
10083
        '%D0' => 'Ð',
10084
        '%D1' => 'Ñ',
10085
        '%D2' => 'Ò',
10086
        '%D3' => 'Ó',
10087
        '%D4' => 'Ô',
10088
        '%D5' => 'Õ',
10089
        '%D6' => 'Ö',
10090
        '%D7' => '×',
10091
        '%D8' => 'Ø',
10092
        '%D9' => 'Ù',
10093
        '%DA' => 'Ú',
10094
        '%DB' => 'Û',
10095
        '%DC' => 'Ü',
10096
        '%DD' => 'Ý',
10097
        '%DE' => 'Þ',
10098
        '%DF' => 'ß',
10099
        '%E0' => 'à',
10100
        '%E1' => 'á',
10101
        '%E2' => 'â',
10102
        '%E3' => 'ã',
10103
        '%E4' => 'ä',
10104
        '%E5' => 'å',
10105
        '%E6' => 'æ',
10106
        '%E7' => 'ç',
10107
        '%E8' => 'è',
10108
        '%E9' => 'é',
10109
        '%EA' => 'ê',
10110
        '%EB' => 'ë',
10111
        '%EC' => 'ì',
10112
        '%ED' => 'í',
10113
        '%EE' => 'î',
10114
        '%EF' => 'ï',
10115
        '%F0' => 'ð',
10116
        '%F1' => 'ñ',
10117
        '%F2' => 'ò',
10118
        '%F3' => 'ó',
10119
        '%F4' => 'ô',
10120
        '%F5' => 'õ',
10121
        '%F6' => 'ö',
10122
        '%F7' => '÷',
10123
        '%F8' => 'ø',
10124
        '%F9' => 'ù',
10125
        '%FA' => 'ú',
10126
        '%FB' => 'û',
10127
        '%FC' => 'ü',
10128
        '%FD' => 'ý',
10129
        '%FE' => 'þ',
10130
        '%FF' => 'ÿ',
10131
    ];
10132
  }
10133
10134
  /**
10135
   * Decodes an UTF-8 string to ISO-8859-1.
10136
   *
10137
   * @param string $str <p>The input string.</p>
10138
   * @param bool   $keepUtf8Chars
10139
   *
10140
   * @return string
10141
   */
10142 16
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10143
  {
10144 16
    if ('' === $str) {
10145 5
      return '';
10146
    }
10147
10148 16
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10149 16
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10150
10151 16
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10152
10153 1
      if (self::$WIN1252_TO_UTF8 === null) {
10154
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10155
      }
10156
10157 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10157
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10158 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10158
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10159
    }
10160
10161
    /** @noinspection PhpInternalEntityUsedInspection */
10162 16
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10163
10164 16
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10165
      self::checkForSupport();
10166
    }
10167
10168
    // save for later comparision
10169 16
    $str_backup = $str;
10170 16
    $len = self::strlen_in_byte($str);
10171
10172 16
    if (self::$ORD === null) {
10173
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10174
    }
10175
10176 16
    if (self::$CHR === null) {
10177
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10178
    }
10179
10180 16
    $noCharFound = '?';
10181
    /** @noinspection ForeachInvariantsInspection */
10182 16
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10183 16
      switch ($str[$i] & "\xF0") {
10184 16
        case "\xC0":
10185 14
        case "\xD0":
10186 14
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10187 14
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10188 14
          break;
10189
10190
        /** @noinspection PhpMissingBreakStatementInspection */
10191 14
        case "\xF0":
10192
          ++$i;
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment if this fall-through is intended.
Loading history...
10193 14
        case "\xE0":
10194 10
          $str[$j] = $noCharFound;
10195 10
          $i += 2;
10196 10
          break;
10197
10198
        default:
10199 14
          $str[$j] = $str[$i];
10200
      }
10201
    }
10202
10203 16
    $return = self::substr_in_byte($str, 0, $j);
10204 16
    if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
10205
      $return = '';
10206
    }
10207
10208
    if (
10209 16
        $keepUtf8Chars === true
10210
        &&
10211 16
        self::strlen($return) >= self::strlen($str_backup)
10212
    ) {
10213 2
      return $str_backup;
10214
    }
10215
10216 16
    return $return;
10217
  }
10218
10219
  /**
10220
   * Encodes an ISO-8859-1 string to UTF-8.
10221
   *
10222
   * @param string $str <p>The input string.</p>
10223
   *
10224
   * @return string
10225
   */
10226 14
  public static function utf8_encode(string $str): string
10227
  {
10228 14
    if ('' === $str) {
10229 13
      return '';
10230
    }
10231
10232 14
    $str = \utf8_encode($str);
10233
10234
    // the polyfill maybe return false
10235
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10236 14
    if ($str === false) {
10237
      return '';
10238
    }
10239
10240 14
    if (false === \strpos($str, "\xC2")) {
10241 6
      return $str;
10242
    }
10243
10244 12
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
10245 12
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
10246
10247 12
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
10248
10249 1
      if (self::$WIN1252_TO_UTF8 === null) {
10250
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10251
      }
10252
10253 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10253
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10254 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10254
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10255
    }
10256
10257 12
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
10258
  }
10259
10260
  /**
10261
   * fix -> utf8-win1252 chars
10262
   *
10263
   * @param string $str <p>The input string.</p>
10264
   *
10265
   * @return string
10266
   *
10267
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
10268
   */
10269 2
  public static function utf8_fix_win1252_chars(string $str): string
10270
  {
10271 2
    return self::fix_simple_utf8($str);
10272
  }
10273
10274
  /**
10275
   * Returns an array with all utf8 whitespace characters.
10276
   *
10277
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
10278
   *
10279
   * @author: Derek E. [email protected]
10280
   *
10281
   * @return string[]
10282
   *                 An array with all known whitespace characters as values and the type of whitespace as keys
10283
   *                 as defined in above URL.
10284
   */
10285 2
  public static function whitespace_table(): array
10286
  {
10287 2
    return self::$WHITESPACE_TABLE;
10288
  }
10289
10290
  /**
10291
   * Limit the number of words in a string.
10292
   *
10293
   * @param string $str      <p>The input string.</p>
10294
   * @param int    $limit    <p>The limit of words as integer.</p>
10295
   * @param string $strAddOn <p>Replacement for the striped string.</p>
10296
   *
10297
   * @return string
10298
   */
10299 2
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
10300
  {
10301 2
    if ('' === $str) {
10302 2
      return '';
10303
    }
10304
10305 2
    if ($limit < 1) {
10306 2
      return '';
10307
    }
10308
10309 2
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
10310
10311
    if (
10312 2
        !isset($matches[0])
10313
        ||
10314 2
        self::strlen($str) === self::strlen($matches[0])
10315
    ) {
10316 2
      return $str;
10317
    }
10318
10319 2
    return self::rtrim($matches[0]) . $strAddOn;
10320
  }
10321
10322
  /**
10323
   * Wraps a string to a given number of characters
10324
   *
10325
   * @link  http://php.net/manual/en/function.wordwrap.php
10326
   *
10327
   * @param string $str   <p>The input string.</p>
10328
   * @param int    $width [optional] <p>The column width.</p>
10329
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
10330
   * @param bool   $cut   [optional] <p>
10331
   *                      If the cut is set to true, the string is
10332
   *                      always wrapped at or before the specified width. So if you have
10333
   *                      a word that is larger than the given width, it is broken apart.
10334
   *                      </p>
10335
   *
10336
   * @return string The given string wrapped at the specified column.
10337
   */
10338 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
10339
  {
10340 10
    if ('' === $str || '' === $break) {
10341 3
      return '';
10342
    }
10343
10344 8
    $w = '';
10345 8
    $strSplit = \explode($break, $str);
10346 8
    if ($strSplit === false) {
10347
      $count = 0;
10348
    } else {
10349 8
      $count = \count($strSplit);
10350
    }
10351
10352 8
    $chars = [];
10353
    /** @noinspection ForeachInvariantsInspection */
10354 8
    for ($i = 0; $i < $count; ++$i) {
10355
10356 8
      if ($i) {
10357 1
        $chars[] = $break;
10358 1
        $w .= '#';
10359
      }
10360
10361 8
      $c = $strSplit[$i];
10362 8
      unset($strSplit[$i]);
10363
10364 8
      if ($c !== null) {
10365 8
        foreach (self::split($c) as $c) {
10366 8
          $chars[] = $c;
10367 8
          $w .= ' ' === $c ? ' ' : '?';
10368
        }
10369
      }
10370
    }
10371
10372 8
    $strReturn = '';
10373 8
    $j = 0;
10374 8
    $b = $i = -1;
10375 8
    $w = \wordwrap($w, $width, '#', $cut);
10376
10377 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
10378 6
      for (++$i; $i < $b; ++$i) {
10379 6
        $strReturn .= $chars[$j];
10380 6
        unset($chars[$j++]);
10381
      }
10382
10383 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
10384 3
        unset($chars[$j++]);
10385
      }
10386
10387 6
      $strReturn .= $break;
10388
    }
10389
10390 8
    return $strReturn . \implode('', $chars);
10391
  }
10392
10393
  /**
10394
   * Line-Wrap the string after $limit, but also after the next word.
10395
   *
10396
   * @param string $str
10397
   * @param int    $limit
10398
   *
10399
   * @return string
10400
   */
10401 1
  public static function wordwrap_per_line(string $str, int $limit): string
10402
  {
10403 1
    $strings = (array)\preg_split('/\\r\\n|\\r|\\n/', $str);
10404
10405 1
    $string = '';
10406 1
    foreach ($strings as $value) {
10407 1
      if ($value === false) {
10408
        continue;
10409
      }
10410
10411 1
      $string .= wordwrap($value, $limit);
10412 1
      $string .= "\n";
10413
    }
10414
10415 1
    return $string;
10416
  }
10417
10418
  /**
10419
   * Returns an array of Unicode White Space characters.
10420
   *
10421
   * @return string[] An array with numeric code point as key and White Space Character as value.
10422
   */
10423 2
  public static function ws(): array
10424
  {
10425 2
    return self::$WHITESPACE;
10426
  }
10427
10428
}
10429