Passed
Push — master ( 267d99...91fcec )
by Lars
04:31
created

UTF8::str_titleize_for_humans()   B

Complexity

Conditions 5
Paths 2

Size

Total Lines 127
Code Lines 75

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 50
CRAP Score 5.0014

Importance

Changes 0
Metric Value
cc 5
eloc 75
nc 2
nop 3
dl 0
loc 127
ccs 50
cts 52
cp 0.9615
crap 5.0014
rs 8.2343
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'upper' => [
157
          'µ',
158
          'ſ',
159
          "\xCD\x85",
160
          'ς',
161
          'ẞ',
162
          "\xCF\x90",
163
          "\xCF\x91",
164
          "\xCF\x95",
165
          "\xCF\x96",
166
          "\xCF\xB0",
167
          "\xCF\xB1",
168
          "\xCF\xB5",
169
          "\xE1\xBA\x9B",
170
          "\xE1\xBE\xBE",
171
      ],
172
      'lower' => [
173
          'μ',
174
          's',
175
          'ι',
176
          'σ',
177
          'ß',
178
          'β',
179
          'θ',
180
          'φ',
181
          'π',
182
          'κ',
183
          'ρ',
184
          'ε',
185
          "\xE1\xB9\xA1",
186
          'ι',
187
      ],
188
  ];
189
190
191
  /**
192
   * @var array
193
   */
194
  private static $SUPPORT = [];
195
196
  /**
197
   * @var null|array
198
   */
199
  private static $UTF8_MSWORD;
200
201
  /**
202
   * @var null|array
203
   */
204
  private static $BROKEN_UTF8_FIX;
205
206
  /**
207
   * @var null|array
208
   */
209
  private static $WIN1252_TO_UTF8;
210
211
  /**
212
   * @var null|array
213
   */
214
  private static $ENCODINGS;
215
216
  /**
217
   * @var null|array
218
   */
219
  private static $ORD;
220
221
  /**
222
   * @var null|array
223
   */
224
  private static $CHR;
225
226
  /**
227
   * __construct()
228
   */
229 32
  public function __construct()
230
  {
231 32
    self::checkForSupport();
232 32
  }
233
234
  /**
235
   * Return the character at the specified position: $str[1] like functionality.
236
   *
237
   * @param string $str <p>A UTF-8 string.</p>
238
   * @param int    $pos <p>The position of character to return.</p>
239
   *
240
   * @return string Single Multi-Byte character.
241
   */
242 3
  public static function access(string $str, int $pos): string
243
  {
244 3
    if ('' === $str) {
245 1
      return '';
246
    }
247
248 3
    if ($pos < 0) {
249 2
      return '';
250
    }
251
252 3
    return (string)self::substr($str, $pos, 1);
253
  }
254
255
  /**
256
   * Prepends UTF-8 BOM character to the string and returns the whole string.
257
   *
258
   * INFO: If BOM already existed there, the Input string is returned.
259
   *
260
   * @param string $str <p>The input string.</p>
261
   *
262
   * @return string The output string that contains BOM.
263
   */
264 2
  public static function add_bom_to_string(string $str): string
265
  {
266 2
    if (self::string_has_bom($str) === false) {
267 2
      $str = self::bom() . $str;
268
    }
269
270 2
    return $str;
271
  }
272
273
  /**
274
   * Adds the specified amount of left and right padding to the given string.
275
   * The default character used is a space.
276
   *
277
   * @param string $str
278
   * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
279
   * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
280
   * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
281
   * @param string $encoding [optional] <p>Default: UTF-8</p>
282
   *
283
   * @return string String with padding applied.
284
   */
285 25
  private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
286
  {
287 25
    $strlen = self::strlen($str, $encoding);
288
289 25
    if ($left && $right) {
290 8
      $length = ($left + $right) + $strlen;
291 8
      $type = STR_PAD_BOTH;
292 17
    } elseif ($left) {
293 7
      $length = $left + $strlen;
294 7
      $type = STR_PAD_LEFT;
295 10
    } elseif ($right) {
296 10
      $length = $right + $strlen;
297 10
      $type = STR_PAD_RIGHT;
298
    } else {
299
      $length = ($left + $right) + $strlen;
300
      $type = STR_PAD_BOTH;
301
    }
302
303 25
    return self::str_pad($str, $length, $padStr, $type, $encoding);
304
  }
305
306
  /**
307
   * Changes all keys in an array.
308
   *
309
   * @param array $array <p>The array to work on</p>
310
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
311
   *                     or <strong>CASE_LOWER</strong> (default)</p>
312
   *
313
   * @return string[] An array with its keys lower or uppercased.
314
   */
315 2
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
316
  {
317
    if (
318 2
        $case !== CASE_LOWER
319
        &&
320 2
        $case !== CASE_UPPER
321
    ) {
322
      $case = CASE_LOWER;
323
    }
324
325 2
    $return = [];
326 2
    foreach ($array as $key => $value) {
327 2
      if ($case === CASE_LOWER) {
328 2
        $key = self::strtolower($key);
329
      } else {
330 2
        $key = self::strtoupper($key);
331
      }
332
333 2
      $return[$key] = $value;
334
    }
335
336 2
    return $return;
337
  }
338
339
  /**
340
   * Returns the substring between $start and $end, if found, or an empty
341
   * string. An optional offset may be supplied from which to begin the
342
   * search for the start string.
343
   *
344
   * @param string $str
345
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
346
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
347
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
348
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
349
   *
350
   * @return string
351
   */
352 16
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
353
  {
354 16
    $posStart = self::strpos($str, $start, $offset, $encoding);
355 16
    if ($posStart === false) {
356 2
      return '';
357
    }
358
359 14
    $substrIndex = $posStart + self::strlen($start, $encoding);
360 14
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
361
    if (
362 14
        $posEnd === false
363
        ||
364 14
        $posEnd === $substrIndex
365
    ) {
366 4
      return '';
367
    }
368
369 10
    $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
370
371 10
    if ($return === false) {
372
      return '';
373
    }
374
375 10
    return $return;
376
  }
377
378
  /**
379
   * Convert binary into an string.
380
   *
381
   * @param mixed $bin 1|0
382
   *
383
   * @return string
384
   */
385 2
  public static function binary_to_str($bin): string
386
  {
387 2
    if (!isset($bin[0])) {
388
      return '';
389
    }
390
391 2
    $convert = \base_convert($bin, 2, 16);
392 2
    if ($convert === '0') {
393 1
      return '';
394
    }
395
396 2
    return \pack('H*', $convert);
397
  }
398
399
  /**
400
   * Returns the UTF-8 Byte Order Mark Character.
401
   *
402
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
403
   *
404
   * @return string UTF-8 Byte Order Mark
405
   */
406 4
  public static function bom(): string
407
  {
408 4
    return "\xef\xbb\xbf";
409
  }
410
411
  /**
412
   * @alias of UTF8::chr_map()
413
   *
414
   * @see   UTF8::chr_map()
415
   *
416
   * @param string|array $callback
417
   * @param string       $str
418
   *
419
   * @return string[]
420
   */
421 2
  public static function callback($callback, string $str): array
422
  {
423 2
    return self::chr_map($callback, $str);
424
  }
425
426
  /**
427
   * Returns the character at $index, with indexes starting at 0.
428
   *
429
   * @param string $str
430
   * @param int    $index    <p>Position of the character.</p>
431
   * @param string $encoding [optional] <p>Default is UTF-8</p>
432
   *
433
   * @return string The character at $index.
434
   */
435 9
  public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
436
  {
437 9
    return (string)self::substr($str, $index, 1, $encoding);
438
  }
439
440
  /**
441
   * Returns an array consisting of the characters in the string.
442
   *
443
   * @param string $str <p>The input string.</p>
444
   *
445
   * @return string[] An array of chars.
446
   */
447 3
  public static function chars(string $str): array
448
  {
449 3
    return self::str_split($str, 1);
450
  }
451
452
  /**
453
   * This method will auto-detect your server environment for UTF-8 support.
454
   *
455
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
456
   */
457 37
  public static function checkForSupport()
458
  {
459 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
460
461
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
462
463
      // http://php.net/manual/en/book.mbstring.php
464
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
465
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
466
467
      // http://php.net/manual/en/book.iconv.php
468
      self::$SUPPORT['iconv'] = self::iconv_loaded();
469
470
      // http://php.net/manual/en/book.intl.php
471
      self::$SUPPORT['intl'] = self::intl_loaded();
472
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
473
474
      self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
475
476
      if (
477
          self::$SUPPORT['intl'] === true
478
          &&
479
          \function_exists('transliterator_list_ids') === true
480
      ) {
481
        /** @noinspection PhpComposerExtensionStubsInspection */
482
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
483
      }
484
485
      // http://php.net/manual/en/class.intlchar.php
486
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
487
488
      // http://php.net/manual/en/book.ctype.php
489
      self::$SUPPORT['ctype'] = self::ctype_loaded();
490
491
      // http://php.net/manual/en/class.finfo.php
492
      self::$SUPPORT['finfo'] = self::finfo_loaded();
493
494
      // http://php.net/manual/en/book.json.php
495
      self::$SUPPORT['json'] = self::json_loaded();
496
497
      // http://php.net/manual/en/book.pcre.php
498
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
499
    }
500 37
  }
501
502
  /**
503
   * Generates a UTF-8 encoded character from the given code point.
504
   *
505
   * INFO: opposite to UTF8::ord()
506
   *
507
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
508
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
509
   *
510
   * @return string|null Multi-Byte character, returns null on failure or empty input.
511
   */
512 17
  public static function chr($code_point, string $encoding = 'UTF-8')
513
  {
514
    // init
515 17
    static $CHAR_CACHE = [];
516
517 17
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
518
      self::checkForSupport();
519
    }
520
521 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
522 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
523
    }
524
525
    if (
526 17
        $encoding !== 'UTF-8'
527
        &&
528 17
        $encoding !== 'ISO-8859-1'
529
        &&
530 17
        $encoding !== 'WINDOWS-1252'
531
        &&
532 17
        self::$SUPPORT['mbstring'] === false
533
    ) {
534
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
535
    }
536
537 17
    $cacheKey = $code_point . $encoding;
538 17
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
539 16
      return $CHAR_CACHE[$cacheKey];
540
    }
541
542 11
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
543
544 10
      if (self::$CHR === null) {
545
        $chrTmp = self::getData('chr');
546
        if ($chrTmp) {
547
          self::$CHR = (array)$chrTmp;
548
        }
549
      }
550
551 10
      $chr = self::$CHR[$code_point];
552
553 10
      if ($encoding !== 'UTF-8') {
554 1
        $chr = self::encode($encoding, $chr);
555
      }
556
557 10
      return $CHAR_CACHE[$cacheKey] = $chr;
558
    }
559
560 7
    if (self::$SUPPORT['intlChar'] === true) {
561
      /** @noinspection PhpComposerExtensionStubsInspection */
562 7
      $chr = \IntlChar::chr($code_point);
563
564 7
      if ($encoding !== 'UTF-8') {
565
        $chr = self::encode($encoding, $chr);
566
      }
567
568 7
      return $CHAR_CACHE[$cacheKey] = $chr;
569
    }
570
571
    if (self::$CHR === null) {
572
      $chrTmp = self::getData('chr');
573
      if ($chrTmp) {
574
        self::$CHR = (array)$chrTmp;
575
      }
576
    }
577
578
    $code_point = (int)$code_point;
579
    if ($code_point <= 0x7F) {
580
      $chr = self::$CHR[$code_point];
581
    } elseif ($code_point <= 0x7FF) {
582
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
583
             self::$CHR[($code_point & 0x3F) + 0x80];
584
    } elseif ($code_point <= 0xFFFF) {
585
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
586
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
587
             self::$CHR[($code_point & 0x3F) + 0x80];
588
    } else {
589
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
590
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
591
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
592
             self::$CHR[($code_point & 0x3F) + 0x80];
593
    }
594
595
    if ($encoding !== 'UTF-8') {
596
      $chr = self::encode($encoding, $chr);
597
    }
598
599
    return $CHAR_CACHE[$cacheKey] = $chr;
600
  }
601
602
  /**
603
   * Applies callback to all characters of a string.
604
   *
605
   * @param string|array $callback <p>The callback function.</p>
606
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
607
   *
608
   * @return string[] The outcome of callback.
609
   */
610 2
  public static function chr_map($callback, string $str): array
611
  {
612 2
    $chars = self::split($str);
613
614 2
    return \array_map($callback, $chars);
615
  }
616
617
  /**
618
   * Generates an array of byte length of each character of a Unicode string.
619
   *
620
   * 1 byte => U+0000  - U+007F
621
   * 2 byte => U+0080  - U+07FF
622
   * 3 byte => U+0800  - U+FFFF
623
   * 4 byte => U+10000 - U+10FFFF
624
   *
625
   * @param string $str <p>The original unicode string.</p>
626
   *
627
   * @return int[] An array of byte lengths of each character.
628
   */
629 4
  public static function chr_size_list(string $str): array
630
  {
631 4
    if ('' === $str) {
632 4
      return [];
633
    }
634
635 4
    $strSplit = self::split($str);
636
637 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
638
      self::checkForSupport();
639
    }
640
641 4
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
642
      return \array_map(
643
          function ($data) {
644
            return UTF8::strlen_in_byte($data);
645
          },
646
          $strSplit
647
      );
648
    }
649
650 4
    return \array_map('\strlen', $strSplit);
651
  }
652
653
  /**
654
   * Get a decimal code representation of a specific character.
655
   *
656
   * @param string $char <p>The input character.</p>
657
   *
658
   * @return int
659
   */
660 4
  public static function chr_to_decimal(string $char): int
661
  {
662 4
    $code = self::ord($char[0]);
663 4
    $bytes = 1;
664
665 4
    if (!($code & 0x80)) {
666
      // 0xxxxxxx
667 4
      return $code;
668
    }
669
670 4
    if (($code & 0xe0) === 0xc0) {
671
      // 110xxxxx
672 4
      $bytes = 2;
673 4
      $code &= ~0xc0;
674 4
    } elseif (($code & 0xf0) === 0xe0) {
675
      // 1110xxxx
676 4
      $bytes = 3;
677 4
      $code &= ~0xe0;
678 2
    } elseif (($code & 0xf8) === 0xf0) {
679
      // 11110xxx
680 2
      $bytes = 4;
681 2
      $code &= ~0xf0;
682
    }
683
684 4
    for ($i = 2; $i <= $bytes; $i++) {
685
      // 10xxxxxx
686 4
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
687
    }
688
689 4
    return $code;
690
  }
691
692
  /**
693
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
694
   *
695
   * @param string|int $char <p>The input character</p>
696
   * @param string     $pfix [optional]
697
   *
698
   * @return string The code point encoded as U+xxxx
699
   */
700 2
  public static function chr_to_hex($char, string $pfix = 'U+'): string
701
  {
702 2
    if ('' === $char) {
703 2
      return '';
704
    }
705
706 2
    if ($char === '&#0;') {
707 2
      $char = '';
708
    }
709
710 2
    return self::int_to_hex(self::ord($char), $pfix);
711
  }
712
713
  /**
714
   * alias for "UTF8::chr_to_decimal()"
715
   *
716
   * @see UTF8::chr_to_decimal()
717
   *
718
   * @param string $chr
719
   *
720
   * @return int
721
   */
722 2
  public static function chr_to_int(string $chr): int
723
  {
724 2
    return self::chr_to_decimal($chr);
725
  }
726
727
  /**
728
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
729
   *
730
   * @param string $body     <p>The original string to be split.</p>
731
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
732
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
733
   *
734
   * @return string The chunked string.
735
   */
736 4
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
737
  {
738 4
    return \implode($end, self::split($body, $chunklen));
739
  }
740
741
  /**
742
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
743
   *
744
   * @param string $str                           <p>The string to be sanitized.</p>
745
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
746
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
747
   *                                              whitespace.</p>
748
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
749
   *                                              e.g.: "…"
750
   *                                              => "..."</p>
751
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
752
   *                                              combination with
753
   *                                              $normalize_whitespace</p>
754
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
755
   *                                              mark e.g.: "�"</p>
756
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
757
   *                                              characters e.g.: "\0"</p>
758
   *
759
   * @return string Clean UTF-8 encoded string.
760
   */
761 112
  public static function clean(
762
      string $str,
763
      bool $remove_bom = false,
764
      bool $normalize_whitespace = false,
765
      bool $normalize_msword = false,
766
      bool $keep_non_breaking_space = false,
767
      bool $replace_diamond_question_mark = false,
768
      bool $remove_invisible_characters = true
769
  ): string
770
  {
771
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
772
    // caused connection reset problem on larger strings
773
774 112
    $regx = '/
775
      (
776
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
777
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
778
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
779
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
780
        ){1,100}                      # ...one or more times
781
      )
782
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
783
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
784
    /x';
785 112
    $str = (string)\preg_replace($regx, '$1', $str);
786
787 112
    if ($replace_diamond_question_mark === true) {
788 62
      $str = self::replace_diamond_question_mark($str, '');
789
    }
790
791 112
    if ($remove_invisible_characters === true) {
792 112
      $str = self::remove_invisible_characters($str);
793
    }
794
795 112
    if ($normalize_whitespace === true) {
796 66
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
797
    }
798
799 112
    if ($normalize_msword === true) {
800 34
      $str = self::normalize_msword($str);
801
    }
802
803 112
    if ($remove_bom === true) {
804 64
      $str = self::remove_bom($str);
805
    }
806
807 112
    return $str;
808
  }
809
810
  /**
811
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
812
   *
813
   * @param string $str <p>The input string.</p>
814
   *
815
   * @return string
816
   */
817 33
  public static function cleanup($str): string
818
  {
819
    // init
820 33
    $str = (string)$str;
821
822 33
    if ('' === $str) {
823 5
      return '';
824
    }
825
826
    // fixed ISO <-> UTF-8 Errors
827 33
    $str = self::fix_simple_utf8($str);
828
829
    // remove all none UTF-8 symbols
830
    // && remove diamond question mark (�)
831
    // && remove remove invisible characters (e.g. "\0")
832
    // && remove BOM
833
    // && normalize whitespace chars (but keep non-breaking-spaces)
834 33
    $str = self::clean(
835 33
        $str,
836 33
        true,
837 33
        true,
838 33
        false,
839 33
        true,
840 33
        true,
841 33
        true
842
    );
843
844 33
    return $str;
845
  }
846
847
  /**
848
   * Accepts a string or a array of strings and returns an array of Unicode code points.
849
   *
850
   * INFO: opposite to UTF8::string()
851
   *
852
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
853
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
854
   *                                    default, code points will be returned as integers.</p>
855
   *
856
   * @return array<int|string>
857
   *                           The array of code points:<br>
858
   *                           array<int> for $u_style === false<br>
859
   *                           array<string> for $u_style === true<br>
860
   */
861 12
  public static function codepoints($arg, bool $u_style = false): array
862
  {
863 12
    if (\is_string($arg) === true) {
864 12
      $arg = self::split($arg);
865
    }
866
867 12
    $arg = \array_map(
868
        [
869 12
            self::class,
870
            'ord',
871
        ],
872 12
        $arg
873
    );
874
875 12
    if (\count($arg) === 0) {
876 7
      return [];
877
    }
878
879 11
    if ($u_style) {
880 2
      $arg = \array_map(
881
          [
882 2
              self::class,
883
              'int_to_hex',
884
          ],
885 2
          $arg
886
      );
887
    }
888
889 11
    return $arg;
890
  }
891
892
  /**
893
   * Trims the string and replaces consecutive whitespace characters with a
894
   * single space. This includes tabs and newline characters, as well as
895
   * multibyte whitespace such as the thin space and ideographic space.
896
   *
897
   * @param string $str <p>The input string.</p>
898
   *
899
   * @return string String with a trimmed $str and condensed whitespace.
900
   */
901 13
  public static function collapse_whitespace(string $str): string
902
  {
903 13
    return self::trim(
904 13
        self::regex_replace($str, '[[:space:]]+', ' ')
905
    );
906
  }
907
908
  /**
909
   * Returns count of characters used in a string.
910
   *
911
   * @param string $str       <p>The input string.</p>
912
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
913
   *
914
   * @return int[] An associative array of Character as keys and
915
   *               their count as values.
916
   */
917 18
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
918
  {
919 18
    return \array_count_values(self::split($str, 1, $cleanUtf8));
920
  }
921
922
  /**
923
   * Remove css media-queries.
924
   *
925
   * @param string $str
926
   *
927
   * @return string
928
   */
929 1
  public static function css_stripe_media_queries(string $str): string
930
  {
931 1
    return (string)\preg_replace(
932 1
        '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
933 1
        '',
934 1
        $str
935
    );
936
  }
937
938
  /**
939
   * Checks whether ctype is available on the server.
940
   *
941
   * @return bool
942
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
943
   */
944
  public static function ctype_loaded(): bool
945
  {
946
    return \extension_loaded('ctype');
947
  }
948
949
  /**
950
   * Converts a int-value into an UTF-8 character.
951
   *
952
   * @param mixed $int
953
   *
954
   * @return string
955
   */
956 10
  public static function decimal_to_chr($int): string
957
  {
958 10
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
959
  }
960
961
  /**
962
   * Decodes a MIME header field
963
   *
964
   * @param string $str
965
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
966
   *
967
   * @return string|false
968
   *                      A decoded MIME field on success,
969
   *                      or false if an error occurs during the decoding.
970
   */
971
  public static function decode_mimeheader($str, $encoding = 'UTF-8')
972
  {
973
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
974
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
975
    }
976
977
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
978
      self::checkForSupport();
979
    }
980
981
    if (self::$SUPPORT['iconv'] === true) {
982
      return \iconv_mime_decode($str, ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
983
    }
984
985
    if ($encoding != 'UTF-8') {
986
      $str = self::encode($encoding, $str);
987
    }
988
989
    return \mb_decode_mimeheader($str);
990
  }
991
992
  /**
993
   * Encode a string with a new charset-encoding.
994
   *
995
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
996
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
997
   *
998
   * @param string $toEncoding                  <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
999
   * @param string $str                         <p>The input string</p>
1000
   * @param bool   $autodetectFromEncoding      [optional] <p>Force the new encoding (we try to fix broken / double
1001
   *                                            encoding for UTF-8)<br> otherwise we auto-detect the current
1002
   *                                            string-encoding</p>
1003
   * @param string $fromEncoding                [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1004
   *                                            A empty string will trigger the autodetect anyway.</p>
1005
   *
1006
   * @return string
1007
   */
1008 30
  public static function encode(string $toEncoding, string $str, bool $autodetectFromEncoding = true, string $fromEncoding = ''): string
1009
  {
1010 30
    if ('' === $str || '' === $toEncoding) {
1011 12
      return $str;
1012
    }
1013
1014 30
    if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1015 8
      $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1016
    }
1017
1018 30
    if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1019 2
      $fromEncoding = self::normalize_encoding($fromEncoding, null);
1020
    }
1021
1022 30
    if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) {
1023
      return $str;
1024
    }
1025
1026 30
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1027
      self::checkForSupport();
1028
    }
1029
1030 30
    if ($toEncoding === 'JSON') {
1031 1
      return self::json_encode($str);
1032
    }
1033 30
    if ($fromEncoding === 'JSON') {
1034 1
      $str = self::json_decode($str);
1035 1
      $fromEncoding = '';
1036
    }
1037
1038 30
    if ($toEncoding === 'BASE64') {
1039 2
      return base64_encode($str);
1040
    }
1041 30
    if ($fromEncoding === 'BASE64') {
1042 2
      $str = base64_decode($str);
1043 2
      $fromEncoding = '';
1044
    }
1045
1046 30
    if ($toEncoding === 'HTML-ENTITIES') {
1047 2
      return self::html_encode($str, true, 'UTF-8');
1048
    }
1049 30
    if ($fromEncoding === 'HTML-ENTITIES') {
1050 2
      $str = self::html_decode($str, ENT_COMPAT, 'UTF-8');
1051 2
      $fromEncoding = '';
1052
    }
1053
1054 30
    $fromEncodingDetected = false;
1055
    if (
1056 30
        $autodetectFromEncoding === true
1057
        ||
1058 30
        !$fromEncoding
1059
    ) {
1060 30
      $fromEncodingDetected = self::str_detect_encoding($str);
1061
    }
1062
1063
    // DEBUG
1064
    //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1065
1066 30
    if ($fromEncodingDetected !== false) {
1067 25
      $fromEncoding = $fromEncodingDetected;
1068 7
    } elseif ($fromEncodingDetected === false && $autodetectFromEncoding === true) {
1069
      // fallback for the "autodetect"-mode
1070 7
      return self::to_utf8($str);
1071
    }
1072
1073
    if (
1074 25
        !$fromEncoding
1075
        ||
1076 25
        $fromEncoding === $toEncoding
1077
    ) {
1078 15
      return $str;
1079
    }
1080
1081
    if (
1082 19
        $toEncoding === 'UTF-8'
1083
        &&
1084
        (
1085 17
            $fromEncoding === 'WINDOWS-1252'
1086
            ||
1087 19
            $fromEncoding === 'ISO-8859-1'
1088
        )
1089
    ) {
1090 14
      return self::to_utf8($str);
1091
    }
1092
1093
    if (
1094 11
        $toEncoding === 'ISO-8859-1'
1095
        &&
1096
        (
1097 6
            $fromEncoding === 'WINDOWS-1252'
1098
            ||
1099 11
            $fromEncoding === 'UTF-8'
1100
        )
1101
    ) {
1102 6
      return self::to_iso8859($str);
1103
    }
1104
1105
    if (
1106 9
        $toEncoding !== 'UTF-8'
1107
        &&
1108 9
        $toEncoding !== 'ISO-8859-1'
1109
        &&
1110 9
        $toEncoding !== 'WINDOWS-1252'
1111
        &&
1112 9
        self::$SUPPORT['mbstring'] === false
1113
    ) {
1114
      \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', E_USER_WARNING);
1115
    }
1116
1117 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1118
      self::checkForSupport();
1119
    }
1120
1121 9
    if (self::$SUPPORT['mbstring'] === true) {
1122
      // info: do not use the symfony polyfill here
1123 9
      $strEncoded = \mb_convert_encoding(
1124 9
          $str,
1125 9
          $toEncoding,
1126 9
          $fromEncoding
1127
      );
1128
1129 9
      if ($strEncoded) {
1130 9
        return $strEncoded;
1131
      }
1132
    }
1133
1134
    $return = \iconv($fromEncoding, $toEncoding, $str);
1135
    if ($return !== false) {
1136
      return $return;
1137
    }
1138
1139
    return $str;
1140
  }
1141
1142
  /**
1143
   * @param string $str
1144
   * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1145
   * @param string $toCharset        [optional] <p>Set the output charset.</p>
1146
   * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1147
   * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1148
   * @param int    $indent           [optional] <p>Set the max length indent.</p>
1149
   *
1150
   * @return string|false
1151
   *                      An encoded MIME field on success,
1152
   *                      or false if an error occurs during the encoding.
1153
   */
1154
  public static function encode_mimeheader(
1155
      $str,
1156
      $fromCharset = 'UTF-8',
1157
      $toCharset = 'UTF-8',
1158
      $transferEncoding = 'Q',
1159
      $linefeed = "\r\n",
1160
      $indent = 76
1161
  )
1162
  {
1163
    if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1164
      $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1165
    }
1166
1167
    if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1168
      $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1169
    }
1170
1171
    $output = \iconv_mime_encode(
1172
        '',
1173
        $str,
1174
        [
1175
            'scheme'           => $transferEncoding,
1176
            'line-length'      => $indent,
1177
            'input-charset'    => $fromCharset,
1178
            'output-charset'   => $toCharset,
1179
            'line-break-chars' => $linefeed,
1180
        ]
1181
    );
1182
1183
    return $output;
1184
  }
1185
1186
  /**
1187
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1188
   *
1189
   * @param string   $str                    <p>The input string.</p>
1190
   * @param string   $search                 <p>The searched string.</p>
1191
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1192
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1193
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1194
   *
1195
   * @return string
1196
   */
1197 1
  public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1198
  {
1199 1
    if ('' === $str) {
1200 1
      return '';
1201
    }
1202
1203 1
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1204
1205 1
    if ($length === null) {
1206 1
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
1207
    }
1208
1209 1
    if (empty($search)) {
1210
1211 1
      $stringLength = self::strlen($str, $encoding);
1212
1213 1
      if ($length > 0) {
1214 1
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1215
      } else {
1216 1
        $end = 0;
1217
      }
1218
1219 1
      $pos = (int)\min(
1220 1
          self::strpos($str, ' ', $end, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $end can also be of type false; however, parameter $offset of voku\helper\UTF8::strpos() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1220
          self::strpos($str, ' ', /** @scrutinizer ignore-type */ $end, $encoding),
Loading history...
1221 1
          self::strpos($str, '.', $end, $encoding)
1222
      );
1223
1224 1
      if ($pos) {
1225 1
        $strSub = self::substr($str, 0, $pos, $encoding);
1226 1
        if ($strSub === false) {
1227
          return '';
1228
        }
1229
1230 1
        return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1231
      }
1232
1233
      return $str;
1234
    }
1235
1236 1
    $wordPos = self::stripos($str, $search, 0, $encoding);
1237 1
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1238
1239 1
    $pos_start = 0;
1240 1
    if ($halfSide > 0) {
1241 1
      $halfText = self::substr($str, 0, $halfSide, $encoding);
1242 1
      if ($halfText !== false) {
1243 1
        $pos_start = (int)\max(
1244 1
            self::strrpos($halfText, ' ', 0, $encoding),
1245 1
            self::strrpos($halfText, '.', 0, $encoding)
1246
        );
1247
      }
1248
    }
1249
1250 1
    if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type integer|false is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1251 1
      $l = $pos_start + $length - 1;
1252 1
      $realLength = self::strlen($str, $encoding);
1253
1254 1
      if ($l > $realLength) {
1255
        $l = $realLength;
1256
      }
1257
1258 1
      $pos_end = (int)\min(
1259 1
              self::strpos($str, ' ', $l, $encoding),
1260 1
              self::strpos($str, '.', $l, $encoding)
1261 1
          ) - $pos_start;
1262
1263 1
      if (!$pos_end || $pos_end <= 0) {
1264 1
        $strSub = self::substr($str, $pos_start, self::strlen($str), $encoding);
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1264
        $strSub = self::substr($str, $pos_start, /** @scrutinizer ignore-type */ self::strlen($str), $encoding);
Loading history...
1265 1
        if ($strSub !== false) {
1266 1
          $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1267
        } else {
1268 1
          $extract = '';
1269
        }
1270
      } else {
1271 1
        $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1272 1
        if ($strSub !== false) {
1273 1
          $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1274
        } else {
1275 1
          $extract = '';
1276
        }
1277
      }
1278
1279
    } else {
1280
1281 1
      $l = $length - 1;
1282 1
      $trueLength = self::strlen($str, $encoding);
1283
1284 1
      if ($l > $trueLength) {
1285
        $l = $trueLength;
1286
      }
1287
1288 1
      $pos_end = \min(
1289 1
          self::strpos($str, ' ', $l, $encoding),
1290 1
          self::strpos($str, '.', $l, $encoding)
1291
      );
1292
1293 1
      if ($pos_end) {
1294 1
        $strSub = self::substr($str, 0, $pos_end, $encoding);
1295 1
        if ($strSub !== false) {
1296 1
          $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1297
        } else {
1298 1
          $extract = '';
1299
        }
1300
      } else {
1301 1
        $extract = $str;
1302
      }
1303
    }
1304
1305 1
    return $extract;
1306
  }
1307
1308
  /**
1309
   * Reads entire file into a string.
1310
   *
1311
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1312
   *
1313
   * @link http://php.net/manual/en/function.file-get-contents.php
1314
   *
1315
   * @param string        $filename             <p>
1316
   *                                            Name of the file to read.
1317
   *                                            </p>
1318
   * @param bool          $use_include_path     [optional] <p>
1319
   *                                            Prior to PHP 5, this parameter is called
1320
   *                                            use_include_path and is a bool.
1321
   *                                            As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1322
   *                                            to trigger include path
1323
   *                                            search.
1324
   *                                            </p>
1325
   * @param resource|null $context              [optional] <p>
1326
   *                                            A valid context resource created with
1327
   *                                            stream_context_create. If you don't need to use a
1328
   *                                            custom context, you can skip this parameter by &null;.
1329
   *                                            </p>
1330
   * @param int|null      $offset               [optional] <p>
1331
   *                                            The offset where the reading starts.
1332
   *                                            </p>
1333
   * @param int|null      $maxLength            [optional] <p>
1334
   *                                            Maximum length of data read. The default is to read until end
1335
   *                                            of file is reached.
1336
   *                                            </p>
1337
   * @param int           $timeout              <p>The time in seconds for the timeout.</p>
1338
   *
1339
   * @param bool          $convertToUtf8        <strong>WARNING!!!</strong> <p>Maybe you can't use this option for some
1340
   *                                            files, because they used non default utf-8 chars. Binary files like
1341
   *                                            images or pdf will not be converted.</p>
1342
   * @param string        $fromEncoding         [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1343
   *                                            A empty string will trigger the autodetect anyway.</p>
1344
   *
1345
   * @return string|false The function returns the read data or false on failure.
1346
   */
1347 11
  public static function file_get_contents(
1348
      string $filename,
1349
      bool $use_include_path = false,
1350
      $context = null,
1351
      int $offset = null,
1352
      int $maxLength = null,
1353
      int $timeout = 10,
1354
      bool $convertToUtf8 = true,
1355
      string $fromEncoding = ''
1356
  )
1357
  {
1358
    // init
1359 11
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1360
1361 11
    if ($timeout && $context === null) {
1362 9
      $context = \stream_context_create(
1363
          [
1364
              'http' =>
1365
                  [
1366 9
                      'timeout' => $timeout,
1367
                  ],
1368
          ]
1369
      );
1370
    }
1371
1372 11
    if ($offset === null) {
1373 11
      $offset = 0;
1374
    }
1375
1376 11
    if (\is_int($maxLength) === true) {
1377 2
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1378
    } else {
1379 11
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1380
    }
1381
1382
    // return false on error
1383 11
    if ($data === false) {
1384
      return false;
1385
    }
1386
1387 11
    if ($convertToUtf8 === true) {
1388
      if (
1389 11
          self::is_binary($data, true) === true
1390
          &&
1391 11
          self::is_utf16($data, false) === false
1392
          &&
1393 11
          self::is_utf32($data, false) === false
1394 6
      ) {
1395
        // do nothing, it's binary and not UTF16 or UTF32
1396
      } else {
1397
1398 9
        $data = self::encode('UTF-8', $data, false, $fromEncoding);
1399 9
        $data = self::cleanup($data);
1400
1401
      }
1402
    }
1403
1404 11
    return $data;
1405
  }
1406
1407
  /**
1408
   * Checks if a file starts with BOM (Byte Order Mark) character.
1409
   *
1410
   * @param string $file_path <p>Path to a valid file.</p>
1411
   *
1412
   * @throws \RuntimeException if file_get_contents() returned false
1413
   *
1414
   * @return bool
1415
   *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.
1416
   */
1417 2
  public static function file_has_bom(string $file_path): bool
1418
  {
1419 2
    $file_content = \file_get_contents($file_path);
1420 2
    if ($file_content === false) {
1421
      throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1422
    }
1423
1424 2
    return self::string_has_bom($file_content);
1425
  }
1426
1427
  /**
1428
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1429
   *
1430
   * @param mixed  $var
1431
   * @param int    $normalization_form
1432
   * @param string $leading_combining
1433
   *
1434
   * @return mixed
1435
   */
1436 43
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1437
  {
1438 43
    switch (\gettype($var)) {
1439 43
      case 'array':
1440 6
        foreach ($var as $k => $v) {
1441
          /** @noinspection AlterInForeachInspection */
1442 6
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1443
        }
1444 6
        break;
1445 43
      case 'object':
1446 4
        foreach ($var as $k => $v) {
1447 4
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1448
        }
1449 4
        break;
1450 43
      case 'string':
1451
1452 43
        if (false !== \strpos($var, "\r")) {
1453
          // Workaround https://bugs.php.net/65732
1454 3
          $var = self::normalize_line_ending($var);
1455
        }
1456
1457 43
        if (self::is_ascii($var) === false) {
1458
          /** @noinspection PhpUndefinedClassInspection */
1459 26
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1460 21
            $n = '-';
1461
          } else {
1462
            /** @noinspection PhpUndefinedClassInspection */
1463 13
            $n = \Normalizer::normalize($var, $normalization_form);
1464
1465 13
            if (isset($n[0])) {
1466 7
              $var = $n;
1467
            } else {
1468 9
              $var = self::encode('UTF-8', $var, true);
1469
            }
1470
          }
1471
1472
          if (
1473 26
              $var[0] >= "\x80"
1474
              &&
1475 26
              isset($n[0], $leading_combining[0])
1476
              &&
1477 26
              \preg_match('/^\p{Mn}/u', $var)
1478
          ) {
1479
            // Prevent leading combining chars
1480
            // for NFC-safe concatenations.
1481 3
            $var = $leading_combining . $var;
1482
          }
1483
        }
1484
1485 43
        break;
1486
    }
1487
1488 43
    return $var;
1489
  }
1490
1491
  /**
1492
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1493
   *
1494
   * Gets a specific external variable by name and optionally filters it
1495
   *
1496
   * @link  http://php.net/manual/en/function.filter-input.php
1497
   *
1498
   * @param int    $type          <p>
1499
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1500
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1501
   *                              <b>INPUT_ENV</b>.
1502
   *                              </p>
1503
   * @param string $variable_name <p>
1504
   *                              Name of a variable to get.
1505
   *                              </p>
1506
   * @param int    $filter        [optional] <p>
1507
   *                              The ID of the filter to apply. The
1508
   *                              manual page lists the available filters.
1509
   *                              </p>
1510
   * @param mixed  $options       [optional] <p>
1511
   *                              Associative array of options or bitwise disjunction of flags. If filter
1512
   *                              accepts options, flags can be provided in "flags" field of array.
1513
   *                              </p>
1514
   *
1515
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1516
   *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1517
   *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1518
   */
1519
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
1520
  {
1521
    if (4 > \func_num_args()) {
1522
      $var = \filter_input($type, $variable_name, $filter);
1523
    } else {
1524
      $var = \filter_input($type, $variable_name, $filter, $options);
1525
    }
1526
1527
    return self::filter($var);
1528
  }
1529
1530
  /**
1531
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1532
   *
1533
   * Gets external variables and optionally filters them
1534
   *
1535
   * @link  http://php.net/manual/en/function.filter-input-array.php
1536
   *
1537
   * @param int   $type       <p>
1538
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1539
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1540
   *                          <b>INPUT_ENV</b>.
1541
   *                          </p>
1542
   * @param mixed $definition [optional] <p>
1543
   *                          An array defining the arguments. A valid key is a string
1544
   *                          containing a variable name and a valid value is either a filter type, or an array
1545
   *                          optionally specifying the filter, flags and options. If the value is an
1546
   *                          array, valid keys are filter which specifies the
1547
   *                          filter type,
1548
   *                          flags which specifies any flags that apply to the
1549
   *                          filter, and options which specifies any options that
1550
   *                          apply to the filter. See the example below for a better understanding.
1551
   *                          </p>
1552
   *                          <p>
1553
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1554
   *                          input array are filtered by this filter.
1555
   *                          </p>
1556
   * @param bool  $add_empty  [optional] <p>
1557
   *                          Add missing keys as <b>NULL</b> to the return value.
1558
   *                          </p>
1559
   *
1560
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1561
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set. Or
1562
   *               if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable is not
1563
   *               set and <b>NULL</b> if the filter fails.
1564
   */
1565
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1566
  {
1567
    if (2 > \func_num_args()) {
1568
      $a = \filter_input_array($type);
1569
    } else {
1570
      $a = \filter_input_array($type, $definition, $add_empty);
1571
    }
1572
1573
    return self::filter($a);
1574
  }
1575
1576
  /**
1577
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1578
   *
1579
   * Filters a variable with a specified filter
1580
   *
1581
   * @link  http://php.net/manual/en/function.filter-var.php
1582
   *
1583
   * @param mixed $variable <p>
1584
   *                        Value to filter.
1585
   *                        </p>
1586
   * @param int   $filter   [optional] <p>
1587
   *                        The ID of the filter to apply. The
1588
   *                        manual page lists the available filters.
1589
   *                        </p>
1590
   * @param mixed $options  [optional] <p>
1591
   *                        Associative array of options or bitwise disjunction of flags. If filter
1592
   *                        accepts options, flags can be provided in "flags" field of array. For
1593
   *                        the "callback" filter, callable type should be passed. The
1594
   *                        callback must accept one argument, the value to be filtered, and return
1595
   *                        the value after filtering/sanitizing it.
1596
   *                        </p>
1597
   *                        <p>
1598
   *                        <code>
1599
   *                        // for filters that accept options, use this format
1600
   *                        $options = array(
1601
   *                        'options' => array(
1602
   *                        'default' => 3, // value to return if the filter fails
1603
   *                        // other options here
1604
   *                        'min_range' => 0
1605
   *                        ),
1606
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1607
   *                        );
1608
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1609
   *                        // for filter that only accept flags, you can pass them directly
1610
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1611
   *                        // for filter that only accept flags, you can also pass as an array
1612
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1613
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1614
   *                        // callback validate filter
1615
   *                        function foo($value)
1616
   *                        {
1617
   *                        // Expected format: Surname, GivenNames
1618
   *                        if (strpos($value, ", ") === false) return false;
1619
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1620
   *                        $empty = (empty($surname) || empty($givennames));
1621
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1622
   *                        if ($empty || $notstrings) {
1623
   *                        return false;
1624
   *                        } else {
1625
   *                        return $value;
1626
   *                        }
1627
   *                        }
1628
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1629
   *                        </code>
1630
   *                        </p>
1631
   *
1632
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1633
   */
1634 2
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
1635
  {
1636 2
    if (3 > \func_num_args()) {
1637 2
      $variable = \filter_var($variable, $filter);
1638
    } else {
1639 2
      $variable = \filter_var($variable, $filter, $options);
1640
    }
1641
1642 2
    return self::filter($variable);
1643
  }
1644
1645
  /**
1646
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1647
   *
1648
   * Gets multiple variables and optionally filters them
1649
   *
1650
   * @link  http://php.net/manual/en/function.filter-var-array.php
1651
   *
1652
   * @param array $data       <p>
1653
   *                          An array with string keys containing the data to filter.
1654
   *                          </p>
1655
   * @param mixed $definition [optional] <p>
1656
   *                          An array defining the arguments. A valid key is a string
1657
   *                          containing a variable name and a valid value is either a
1658
   *                          filter type, or an
1659
   *                          array optionally specifying the filter, flags and options.
1660
   *                          If the value is an array, valid keys are filter
1661
   *                          which specifies the filter type,
1662
   *                          flags which specifies any flags that apply to the
1663
   *                          filter, and options which specifies any options that
1664
   *                          apply to the filter. See the example below for a better understanding.
1665
   *                          </p>
1666
   *                          <p>
1667
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1668
   *                          input array are filtered by this filter.
1669
   *                          </p>
1670
   * @param bool  $add_empty  [optional] <p>
1671
   *                          Add missing keys as <b>NULL</b> to the return value.
1672
   *                          </p>
1673
   *
1674
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1675
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set.
1676
   */
1677 2
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1678
  {
1679 2
    if (2 > \func_num_args()) {
1680 2
      $a = \filter_var_array($data);
1681
    } else {
1682 2
      $a = \filter_var_array($data, $definition, $add_empty);
1683
    }
1684
1685 2
    return self::filter($a);
1686
  }
1687
1688
  /**
1689
   * Checks whether finfo is available on the server.
1690
   *
1691
   * @return bool
1692
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
1693
   */
1694
  public static function finfo_loaded(): bool
1695
  {
1696
    return \class_exists('finfo');
1697
  }
1698
1699
  /**
1700
   * Returns the first $n characters of the string.
1701
   *
1702
   * @param string $str      <p>The input string.</p>
1703
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1704
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1705
   *
1706
   * @return string
1707
   */
1708 13
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1709
  {
1710 13
    if ($n <= 0) {
1711 4
      return '';
1712
    }
1713
1714 9
    $strSub = self::substr($str, 0, $n, $encoding);
1715 9
    if ($strSub === false) {
1716
      return '';
1717
    }
1718
1719 9
    return $strSub;
1720
  }
1721
1722
  /**
1723
   * Check if the number of unicode characters are not more than the specified integer.
1724
   *
1725
   * @param string $str      The original string to be checked.
1726
   * @param int    $box_size The size in number of chars to be checked against string.
1727
   *
1728
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1729
   */
1730 2
  public static function fits_inside(string $str, int $box_size): bool
1731
  {
1732 2
    return (self::strlen($str) <= $box_size);
1733
  }
1734
1735
  /**
1736
   * @param string $str
1737
   * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
1738
   * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
1739
   *
1740
   * @return string
1741
   */
1742 54
  private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
1743
  {
1744 54
    $upper = self::$COMMON_CASE_FOLD['upper'];
1745 54
    $lower = self::$COMMON_CASE_FOLD['lower'];
1746
1747 54
    if ($useLower === true) {
1748 2
      $str = (string)\str_replace(
1749 2
          $upper,
1750 2
          $lower,
1751 2
          $str
1752
      );
1753
    } else {
1754 52
      $str = (string)\str_replace(
1755 52
          $lower,
1756 52
          $upper,
1757 52
          $str
1758
      );
1759
    }
1760
1761 54
    if ($fullCaseFold) {
1762
1763 52
      static $FULL_CASE_FOLD = null;
1764 52
      if ($FULL_CASE_FOLD === null) {
1765 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
1766
      }
1767
1768 52
      if ($useLower === true) {
1769 2
        $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
1770
      } else {
1771 50
        $str = (string)\str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
1772
      }
1773
    }
1774
1775 54
    return $str;
1776
  }
1777
1778
  /**
1779
   * Try to fix simple broken UTF-8 strings.
1780
   *
1781
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1782
   *
1783
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1784
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1785
   * See: http://en.wikipedia.org/wiki/Windows-1252
1786
   *
1787
   * @param string $str <p>The input string</p>
1788
   *
1789
   * @return string
1790
   */
1791 42
  public static function fix_simple_utf8(string $str): string
1792
  {
1793 42
    if ('' === $str) {
1794 4
      return '';
1795
    }
1796
1797 42
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1798 42
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1799
1800 42
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1801
1802 1
      if (self::$BROKEN_UTF8_FIX === null) {
1803 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_fix') can also be of type false. However, the property $BROKEN_UTF8_FIX is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1804
      }
1805
1806 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1806
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1807 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1807
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1808
    }
1809
1810 42
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1811
  }
1812
1813
  /**
1814
   * Fix a double (or multiple) encoded UTF8 string.
1815
   *
1816
   * @param string[]|string $str You can use a string or an array of strings.
1817
   *
1818
   * @return string[]|string
1819
   *                          Will return the fixed input-"array" or
1820
   *                          the fixed input-"string".
1821
   */
1822 2
  public static function fix_utf8($str)
1823
  {
1824 2
    if (\is_array($str) === true) {
1825 2
      foreach ($str as $k => $v) {
1826 2
        $str[$k] = self::fix_utf8($v);
1827
      }
1828
1829 2
      return $str;
1830
    }
1831
1832 2
    $str = (string)$str;
1833 2
    $last = '';
1834 2
    while ($last !== $str) {
1835 2
      $last = $str;
1836 2
      $str = self::to_utf8(
1837 2
          self::utf8_decode($str, true)
1838
      );
1839
    }
1840
1841 2
    return $str;
1842
  }
1843
1844
  /**
1845
   * Get character of a specific character.
1846
   *
1847
   * @param string $char
1848
   *
1849
   * @return string 'RTL' or 'LTR'
1850
   */
1851 2
  public static function getCharDirection(string $char): string
1852
  {
1853 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1854
      self::checkForSupport();
1855
    }
1856
1857 2
    if (self::$SUPPORT['intlChar'] === true) {
1858
      /** @noinspection PhpComposerExtensionStubsInspection */
1859 2
      $tmpReturn = \IntlChar::charDirection($char);
1860
1861
      // from "IntlChar"-Class
1862
      $charDirection = [
1863 2
          'RTL' => [1, 13, 14, 15, 21],
1864
          'LTR' => [0, 11, 12, 20],
1865
      ];
1866
1867 2
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1868
        return 'LTR';
1869
      }
1870
1871 2
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1872 2
        return 'RTL';
1873
      }
1874
    }
1875
1876 2
    $c = static::chr_to_decimal($char);
1877
1878 2
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1879 2
      return 'LTR';
1880
    }
1881
1882 2
    if (0x85e >= $c) {
1883
1884 2
      if (0x5be === $c ||
1885 2
          0x5c0 === $c ||
1886 2
          0x5c3 === $c ||
1887 2
          0x5c6 === $c ||
1888 2
          (0x5d0 <= $c && 0x5ea >= $c) ||
1889 2
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1890 2
          0x608 === $c ||
1891 2
          0x60b === $c ||
1892 2
          0x60d === $c ||
1893 2
          0x61b === $c ||
1894 2
          (0x61e <= $c && 0x64a >= $c) ||
1895
          (0x66d <= $c && 0x66f >= $c) ||
1896
          (0x671 <= $c && 0x6d5 >= $c) ||
1897
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1898
          (0x6ee <= $c && 0x6ef >= $c) ||
1899
          (0x6fa <= $c && 0x70d >= $c) ||
1900
          0x710 === $c ||
1901
          (0x712 <= $c && 0x72f >= $c) ||
1902
          (0x74d <= $c && 0x7a5 >= $c) ||
1903
          0x7b1 === $c ||
1904
          (0x7c0 <= $c && 0x7ea >= $c) ||
1905
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1906
          0x7fa === $c ||
1907
          (0x800 <= $c && 0x815 >= $c) ||
1908
          0x81a === $c ||
1909
          0x824 === $c ||
1910
          0x828 === $c ||
1911
          (0x830 <= $c && 0x83e >= $c) ||
1912
          (0x840 <= $c && 0x858 >= $c) ||
1913 2
          0x85e === $c
1914
      ) {
1915 2
        return 'RTL';
1916
      }
1917
1918 2
    } elseif (0x200f === $c) {
1919
1920
      return 'RTL';
1921
1922 2
    } elseif (0xfb1d <= $c) {
1923
1924 2
      if (0xfb1d === $c ||
1925 2
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1926 2
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1927 2
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1928 2
          0xfb3e === $c ||
1929 2
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1930 2
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1931 2
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1932 2
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1933 2
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1934 2
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1935 2
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1936 2
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1937 2
          (0xfe76 <= $c && 0xfefc >= $c) ||
1938 2
          (0x10800 <= $c && 0x10805 >= $c) ||
1939 2
          0x10808 === $c ||
1940 2
          (0x1080a <= $c && 0x10835 >= $c) ||
1941 2
          (0x10837 <= $c && 0x10838 >= $c) ||
1942 2
          0x1083c === $c ||
1943 2
          (0x1083f <= $c && 0x10855 >= $c) ||
1944 2
          (0x10857 <= $c && 0x1085f >= $c) ||
1945 2
          (0x10900 <= $c && 0x1091b >= $c) ||
1946 2
          (0x10920 <= $c && 0x10939 >= $c) ||
1947 2
          0x1093f === $c ||
1948 2
          0x10a00 === $c ||
1949 2
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1950 2
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1951 2
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1952 2
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1953 2
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1954 2
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1955 2
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1956 2
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1957 2
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1958 2
          (0x10b78 <= $c && 0x10b7f >= $c)
1959
      ) {
1960 2
        return 'RTL';
1961
      }
1962
    }
1963
1964 2
    return 'LTR';
1965
  }
1966
1967
  /**
1968
   * get data from "/data/*.ser"
1969
   *
1970
   * @param string $file
1971
   *
1972
   * @return mixed|false Will return false on error.
1973
   */
1974 13
  private static function getData(string $file)
1975
  {
1976 13
    $file = __DIR__ . '/data/' . $file . '.php';
1977 13
    if (\file_exists($file)) {
1978
      /** @noinspection PhpIncludeInspection */
1979 12
      return require $file;
1980
    }
1981
1982 2
    return false;
1983
  }
1984
1985
  /**
1986
   * Check for php-support.
1987
   *
1988
   * @param string|null $key
1989
   *
1990
   * @return mixed
1991
   *               Return the full support-"array", if $key === null<br>
1992
   *               return bool-value, if $key is used and available<br>
1993
   *               otherwise return <strong>null</strong>.
1994
   */
1995 26
  public static function getSupportInfo(string $key = null)
1996
  {
1997 26
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1998
      self::checkForSupport();
1999
    }
2000
2001 26
    if ($key === null) {
2002 4
      return self::$SUPPORT;
2003
    }
2004
2005 24
    if (!isset(self::$SUPPORT[$key])) {
2006 2
      return null;
2007
    }
2008
2009 22
    return self::$SUPPORT[$key];
2010
  }
2011
2012
  /**
2013
   * @param string $str
2014
   * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2015
   *
2016
   * @return array
2017
   *               with this keys: 'ext', 'mime', 'type'
2018
   */
2019 40
  private static function get_file_type(
2020
      string $str,
2021
      array $fallback = [
2022
          'ext'  => null,
2023
          'mime' => 'application/octet-stream',
2024
          'type' => null,
2025
      ]
2026
  ): array
2027
  {
2028 40
    if ($str === '') {
2029
      return $fallback;
2030
    }
2031
2032 40
    $str_info = self::substr_in_byte($str, 0, 2);
2033 40
    if (self::strlen_in_byte($str_info) !== 2) {
2034 11
      return $fallback;
2035
    }
2036
2037 35
    $str_info = \unpack('C2chars', $str_info);
2038 35
    $type_code = (int)($str_info['chars1'] . $str_info['chars2']);
2039
2040
    // DEBUG
2041
    //var_dump($type_code);
2042
2043
    switch ($type_code) {
2044 35
      case 3780:
2045 4
        $ext = 'pdf';
2046 4
        $mime = 'application/pdf';
2047 4
        $type = 'binary';
2048
2049 4
        break;
2050 35
      case 7790:
2051
        $ext = 'exe';
2052
        $mime = 'application/octet-stream';
2053
        $type = 'binary';
2054
2055
        break;
2056 35
      case 7784:
2057
        $ext = 'midi';
2058
        $mime = 'audio/x-midi';
2059
        $type = 'binary';
2060
2061
        break;
2062 35
      case 8075:
2063 6
        $ext = 'zip';
2064 6
        $mime = 'application/zip';
2065 6
        $type = 'binary';
2066
2067 6
        break;
2068 35
      case 8297:
2069
        $ext = 'rar';
2070
        $mime = 'application/rar';
2071
        $type = 'binary';
2072
2073
        break;
2074 35
      case 255216:
2075
        $ext = 'jpg';
2076
        $mime = 'image/jpeg';
2077
        $type = 'binary';
2078
2079
        break;
2080 35
      case 7173:
2081
        $ext = 'gif';
2082
        $mime = 'image/gif';
2083
        $type = 'binary';
2084
2085
        break;
2086 35
      case 6677:
2087
        $ext = 'bmp';
2088
        $mime = 'image/bmp';
2089
        $type = 'binary';
2090
2091
        break;
2092 35
      case 13780:
2093 6
        $ext = 'png';
2094 6
        $mime = 'image/png';
2095 6
        $type = 'binary';
2096
2097 6
        break;
2098
      default:
2099 33
        return $fallback;
2100
    }
2101
2102
    return [
2103 6
        'ext'  => $ext,
2104 6
        'mime' => $mime,
2105 6
        'type' => $type,
2106
    ];
2107
  }
2108
2109
  /**
2110
   * @param int    $length        <p>Length of the random string.</p>
2111
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2112
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2113
   *
2114
   * @return string
2115
   */
2116 1
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2117
  {
2118
    // init
2119 1
    $i = 0;
2120 1
    $str = '';
2121 1
    $maxlength = self::strlen($possibleChars, $encoding);
2122
2123 1
    if ($maxlength === 0) {
2124 1
      return '';
2125
    }
2126
2127
    // add random chars
2128 1
    while ($i < $length) {
2129
      try {
2130 1
        $randInt = \random_int(0, $maxlength - 1);
2131
      } catch (\Exception $e) {
2132
        /** @noinspection RandomApiMigrationInspection */
2133
        $randInt = \mt_rand(0, $maxlength - 1);
2134
      }
2135 1
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
2136 1
      $str .= $char;
2137 1
      $i++;
2138
    }
2139
2140 1
    return $str;
2141
  }
2142
2143
  /**
2144
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2145
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2146
   *
2147
   * @return string
2148
   */
2149 1
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2150
  {
2151 1
    $uniqueHelper = \mt_rand() .
2152 1
                    \session_id() .
2153 1
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
2154 1
                    ($_SERVER['SERVER_ADDR'] ?? '') .
2155 1
                    $entropyExtra;
2156
2157 1
    $uniqueString = \uniqid($uniqueHelper, true);
2158
2159 1
    if ($md5) {
2160 1
      $uniqueString = \md5($uniqueString . $uniqueHelper);
2161
    }
2162
2163 1
    return $uniqueString;
2164
  }
2165
2166
  /**
2167
   * alias for "UTF8::string_has_bom()"
2168
   *
2169
   * @see        UTF8::string_has_bom()
2170
   *
2171
   * @param string $str
2172
   *
2173
   * @return bool
2174
   *
2175
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2176
   */
2177 2
  public static function hasBom(string $str): bool
2178
  {
2179 2
    return self::string_has_bom($str);
2180
  }
2181
2182
  /**
2183
   * Returns true if the string contains a lower case char, false otherwise.
2184
   *
2185
   * @param string $str <p>The input string.</p>
2186
   *
2187
   * @return bool Whether or not the string contains a lower case character.
2188
   */
2189 47
  public static function has_lowercase(string $str): bool
2190
  {
2191 47
    return self::str_matches_pattern($str, '.*[[:lower:]]');
2192
  }
2193
2194
  /**
2195
   * Returns true if the string contains an upper case char, false otherwise.
2196
   *
2197
   * @param string $str <p>The input string.</p>
2198
   *
2199
   * @return bool Whether or not the string contains an upper case character.
2200
   */
2201 12
  public static function has_uppercase(string $str): bool
2202
  {
2203 12
    return self::str_matches_pattern($str, '.*[[:upper:]]');
2204
  }
2205
2206
  /**
2207
   * Converts a hexadecimal-value into an UTF-8 character.
2208
   *
2209
   * @param string $hexdec <p>The hexadecimal value.</p>
2210
   *
2211
   * @return string|false One single UTF-8 character.
2212
   */
2213 4
  public static function hex_to_chr(string $hexdec)
2214
  {
2215 4
    return self::decimal_to_chr(\hexdec($hexdec));
2216
  }
2217
2218
  /**
2219
   * Converts hexadecimal U+xxxx code point representation to integer.
2220
   *
2221
   * INFO: opposite to UTF8::int_to_hex()
2222
   *
2223
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2224
   *
2225
   * @return int|false The code point, or false on failure.
2226
   */
2227 2
  public static function hex_to_int($hexDec)
2228
  {
2229
    // init
2230 2
    $hexDec = (string)$hexDec;
2231
2232 2
    if ('' === $hexDec) {
2233 2
      return false;
2234
    }
2235
2236 2
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2237 2
      return \intval($match[1], 16);
2238
    }
2239
2240 2
    return false;
2241
  }
2242
2243
  /**
2244
   * alias for "UTF8::html_entity_decode()"
2245
   *
2246
   * @see UTF8::html_entity_decode()
2247
   *
2248
   * @param string $str
2249
   * @param int    $flags
2250
   * @param string $encoding
2251
   *
2252
   * @return string
2253
   */
2254 4
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2255
  {
2256 4
    return self::html_entity_decode($str, $flags, $encoding);
2257
  }
2258
2259
  /**
2260
   * Converts a UTF-8 string to a series of HTML numbered entities.
2261
   *
2262
   * INFO: opposite to UTF8::html_decode()
2263
   *
2264
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2265
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2266
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2267
   *
2268
   * @return string HTML numbered entities.
2269
   */
2270 13
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2271
  {
2272 13
    if ('' === $str) {
2273 4
      return '';
2274
    }
2275
2276 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2277 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2278
    }
2279
2280 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2281
      self::checkForSupport();
2282
    }
2283
2284
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2285 13
    if (self::$SUPPORT['mbstring'] === true) {
2286 13
      $startCode = 0x00;
2287 13
      if ($keepAsciiChars === true) {
2288 13
        $startCode = 0x80;
2289
      }
2290
2291 13
      return \mb_encode_numericentity(
2292 13
          $str,
2293 13
          [$startCode, 0xfffff, 0, 0xfffff, 0],
2294 13
          $encoding
2295
      );
2296
    }
2297
2298
    //
2299
    // fallback via vanilla php
2300
    //
2301
2302
    return \implode(
2303
        '',
2304
        \array_map(
2305
            function ($chr) use ($keepAsciiChars, $encoding) {
2306
              return UTF8::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2307
            },
2308
            self::split($str)
2309
        )
2310
    );
2311
  }
2312
2313
  /**
2314
   * UTF-8 version of html_entity_decode()
2315
   *
2316
   * The reason we are not using html_entity_decode() by itself is because
2317
   * while it is not technically correct to leave out the semicolon
2318
   * at the end of an entity most browsers will still interpret the entity
2319
   * correctly. html_entity_decode() does not convert entities without
2320
   * semicolons, so we are left with our own little solution here. Bummer.
2321
   *
2322
   * Convert all HTML entities to their applicable characters
2323
   *
2324
   * INFO: opposite to UTF8::html_encode()
2325
   *
2326
   * @link http://php.net/manual/en/function.html-entity-decode.php
2327
   *
2328
   * @param string $str      <p>
2329
   *                         The input string.
2330
   *                         </p>
2331
   * @param int    $flags    [optional] <p>
2332
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2333
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2334
   *                         <table>
2335
   *                         Available <i>flags</i> constants
2336
   *                         <tr valign="top">
2337
   *                         <td>Constant Name</td>
2338
   *                         <td>Description</td>
2339
   *                         </tr>
2340
   *                         <tr valign="top">
2341
   *                         <td><b>ENT_COMPAT</b></td>
2342
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2343
   *                         </tr>
2344
   *                         <tr valign="top">
2345
   *                         <td><b>ENT_QUOTES</b></td>
2346
   *                         <td>Will convert both double and single quotes.</td>
2347
   *                         </tr>
2348
   *                         <tr valign="top">
2349
   *                         <td><b>ENT_NOQUOTES</b></td>
2350
   *                         <td>Will leave both double and single quotes unconverted.</td>
2351
   *                         </tr>
2352
   *                         <tr valign="top">
2353
   *                         <td><b>ENT_HTML401</b></td>
2354
   *                         <td>
2355
   *                         Handle code as HTML 4.01.
2356
   *                         </td>
2357
   *                         </tr>
2358
   *                         <tr valign="top">
2359
   *                         <td><b>ENT_XML1</b></td>
2360
   *                         <td>
2361
   *                         Handle code as XML 1.
2362
   *                         </td>
2363
   *                         </tr>
2364
   *                         <tr valign="top">
2365
   *                         <td><b>ENT_XHTML</b></td>
2366
   *                         <td>
2367
   *                         Handle code as XHTML.
2368
   *                         </td>
2369
   *                         </tr>
2370
   *                         <tr valign="top">
2371
   *                         <td><b>ENT_HTML5</b></td>
2372
   *                         <td>
2373
   *                         Handle code as HTML 5.
2374
   *                         </td>
2375
   *                         </tr>
2376
   *                         </table>
2377
   *                         </p>
2378
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2379
   *
2380
   * @return string The decoded string.
2381
   */
2382 40
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2383
  {
2384 40
    if ('' === $str) {
2385 12
      return '';
2386
    }
2387
2388 40
    if (!isset($str[3])) { // examples: &; || &x;
2389 19
      return $str;
2390
    }
2391
2392
    if (
2393 39
        \strpos($str, '&') === false
2394
        ||
2395
        (
2396 39
            \strpos($str, '&#') === false
2397
            &&
2398 39
            \strpos($str, ';') === false
2399
        )
2400
    ) {
2401 18
      return $str;
2402
    }
2403
2404 39
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2405 9
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2406
    }
2407
2408 39
    if ($flags === null) {
2409 10
      $flags = ENT_QUOTES | ENT_HTML5;
2410
    }
2411
2412
    if (
2413 39
        $encoding !== 'UTF-8'
2414
        &&
2415 39
        $encoding !== 'ISO-8859-1'
2416
        &&
2417 39
        $encoding !== 'WINDOWS-1252'
2418
        &&
2419 39
        self::$SUPPORT['mbstring'] === false
2420
    ) {
2421
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2422
    }
2423
2424 39
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2425
      self::checkForSupport();
2426
    }
2427
2428
    do {
2429 39
      $str_compare = $str;
2430
2431
      # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2432 39
      if (self::$SUPPORT['mbstring'] === true) {
2433
2434 39
        $str = \mb_decode_numericentity(
2435 39
            $str,
2436 39
            [0x80, 0xfffff, 0, 0xfffff, 0],
2437 39
            $encoding
2438
        );
2439
2440
      } else {
2441
2442
        $str = (string)\preg_replace_callback(
2443
            "/&#\d{2,6};/",
2444
            function ($matches) use ($encoding) {
2445
              // always fallback via symfony polyfill
2446
              $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2447
2448
              if ($returnTmp !== '"' && $returnTmp !== "'") {
2449
                return $returnTmp;
2450
              }
2451
2452
              return $matches[0];
2453
            },
2454
            $str
2455
        );
2456
2457
      }
2458
2459
      // decode numeric & UTF16 two byte entities
2460 39
      $str = \html_entity_decode(
2461 39
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2462 39
          $flags,
2463 39
          $encoding
2464
      );
2465
2466 39
    } while ($str_compare !== $str);
2467
2468 39
    return $str;
2469
  }
2470
2471
  /**
2472
   * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2473
   *
2474
   * @param string $str
2475
   * @param string $encoding [optional] <p>Default: UTF-8</p>
2476
   *
2477
   * @return string
2478
   */
2479 6
  public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2480
  {
2481 6
    return self::htmlspecialchars(
2482 6
        $str,
2483 6
        ENT_QUOTES | ENT_SUBSTITUTE,
2484 6
        $encoding
2485
    );
2486
  }
2487
2488
  /**
2489
   * Remove empty html-tag.
2490
   *
2491
   * e.g.: <tag></tag>
2492
   *
2493
   * @param string $str
2494
   *
2495
   * @return string
2496
   */
2497 1
  public static function html_stripe_empty_tags(string $str): string
2498
  {
2499 1
    return (string)\preg_replace(
2500 1
        "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2501 1
        '',
2502 1
        $str
2503
    );
2504
  }
2505
2506
  /**
2507
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2508
   *
2509
   * @link http://php.net/manual/en/function.htmlentities.php
2510
   *
2511
   * @param string $str           <p>
2512
   *                              The input string.
2513
   *                              </p>
2514
   * @param int    $flags         [optional] <p>
2515
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2516
   *                              invalid code unit sequences and the used document type. The default is
2517
   *                              ENT_COMPAT | ENT_HTML401.
2518
   *                              <table>
2519
   *                              Available <i>flags</i> constants
2520
   *                              <tr valign="top">
2521
   *                              <td>Constant Name</td>
2522
   *                              <td>Description</td>
2523
   *                              </tr>
2524
   *                              <tr valign="top">
2525
   *                              <td><b>ENT_COMPAT</b></td>
2526
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2527
   *                              </tr>
2528
   *                              <tr valign="top">
2529
   *                              <td><b>ENT_QUOTES</b></td>
2530
   *                              <td>Will convert both double and single quotes.</td>
2531
   *                              </tr>
2532
   *                              <tr valign="top">
2533
   *                              <td><b>ENT_NOQUOTES</b></td>
2534
   *                              <td>Will leave both double and single quotes unconverted.</td>
2535
   *                              </tr>
2536
   *                              <tr valign="top">
2537
   *                              <td><b>ENT_IGNORE</b></td>
2538
   *                              <td>
2539
   *                              Silently discard invalid code unit sequences instead of returning
2540
   *                              an empty string. Using this flag is discouraged as it
2541
   *                              may have security implications.
2542
   *                              </td>
2543
   *                              </tr>
2544
   *                              <tr valign="top">
2545
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2546
   *                              <td>
2547
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2548
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2549
   *                              </td>
2550
   *                              </tr>
2551
   *                              <tr valign="top">
2552
   *                              <td><b>ENT_DISALLOWED</b></td>
2553
   *                              <td>
2554
   *                              Replace invalid code points for the given document type with a
2555
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2556
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2557
   *                              instance, to ensure the well-formedness of XML documents with
2558
   *                              embedded external content.
2559
   *                              </td>
2560
   *                              </tr>
2561
   *                              <tr valign="top">
2562
   *                              <td><b>ENT_HTML401</b></td>
2563
   *                              <td>
2564
   *                              Handle code as HTML 4.01.
2565
   *                              </td>
2566
   *                              </tr>
2567
   *                              <tr valign="top">
2568
   *                              <td><b>ENT_XML1</b></td>
2569
   *                              <td>
2570
   *                              Handle code as XML 1.
2571
   *                              </td>
2572
   *                              </tr>
2573
   *                              <tr valign="top">
2574
   *                              <td><b>ENT_XHTML</b></td>
2575
   *                              <td>
2576
   *                              Handle code as XHTML.
2577
   *                              </td>
2578
   *                              </tr>
2579
   *                              <tr valign="top">
2580
   *                              <td><b>ENT_HTML5</b></td>
2581
   *                              <td>
2582
   *                              Handle code as HTML 5.
2583
   *                              </td>
2584
   *                              </tr>
2585
   *                              </table>
2586
   *                              </p>
2587
   * @param string $encoding      [optional] <p>
2588
   *                              Like <b>htmlspecialchars</b>,
2589
   *                              <b>htmlentities</b> takes an optional third argument
2590
   *                              <i>encoding</i> which defines encoding used in
2591
   *                              conversion.
2592
   *                              Although this argument is technically optional, you are highly
2593
   *                              encouraged to specify the correct value for your code.
2594
   *                              </p>
2595
   * @param bool   $double_encode [optional] <p>
2596
   *                              When <i>double_encode</i> is turned off PHP will not
2597
   *                              encode existing html entities. The default is to convert everything.
2598
   *                              </p>
2599
   *
2600
   *
2601
   * @return string The encoded string.
2602
   * </p>
2603
   * <p>
2604
   * If the input <i>string</i> contains an invalid code unit
2605
   * sequence within the given <i>encoding</i> an empty string
2606
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2607
   * <b>ENT_SUBSTITUTE</b> flags are set.
2608
   */
2609 9
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2610
  {
2611 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2612 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2613
    }
2614
2615 9
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2616
2617
    /**
2618
     * PHP doesn't replace a backslash to its html entity since this is something
2619
     * that's mostly used to escape characters when inserting in a database. Since
2620
     * we're using a decent database layer, we don't need this shit and we're replacing
2621
     * the double backslashes by its' html entity equivalent.
2622
     *
2623
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2624
     */
2625 9
    $str = \str_replace('\\', '&#92;', $str);
2626
2627 9
    return self::html_encode($str, true, $encoding);
2628
  }
2629
2630
  /**
2631
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2632
   *
2633
   * INFO: Take a look at "UTF8::htmlentities()"
2634
   *
2635
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2636
   *
2637
   * @param string $str           <p>
2638
   *                              The string being converted.
2639
   *                              </p>
2640
   * @param int    $flags         [optional] <p>
2641
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2642
   *                              invalid code unit sequences and the used document type. The default is
2643
   *                              ENT_COMPAT | ENT_HTML401.
2644
   *                              <table>
2645
   *                              Available <i>flags</i> constants
2646
   *                              <tr valign="top">
2647
   *                              <td>Constant Name</td>
2648
   *                              <td>Description</td>
2649
   *                              </tr>
2650
   *                              <tr valign="top">
2651
   *                              <td><b>ENT_COMPAT</b></td>
2652
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2653
   *                              </tr>
2654
   *                              <tr valign="top">
2655
   *                              <td><b>ENT_QUOTES</b></td>
2656
   *                              <td>Will convert both double and single quotes.</td>
2657
   *                              </tr>
2658
   *                              <tr valign="top">
2659
   *                              <td><b>ENT_NOQUOTES</b></td>
2660
   *                              <td>Will leave both double and single quotes unconverted.</td>
2661
   *                              </tr>
2662
   *                              <tr valign="top">
2663
   *                              <td><b>ENT_IGNORE</b></td>
2664
   *                              <td>
2665
   *                              Silently discard invalid code unit sequences instead of returning
2666
   *                              an empty string. Using this flag is discouraged as it
2667
   *                              may have security implications.
2668
   *                              </td>
2669
   *                              </tr>
2670
   *                              <tr valign="top">
2671
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2672
   *                              <td>
2673
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2674
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2675
   *                              </td>
2676
   *                              </tr>
2677
   *                              <tr valign="top">
2678
   *                              <td><b>ENT_DISALLOWED</b></td>
2679
   *                              <td>
2680
   *                              Replace invalid code points for the given document type with a
2681
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2682
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2683
   *                              instance, to ensure the well-formedness of XML documents with
2684
   *                              embedded external content.
2685
   *                              </td>
2686
   *                              </tr>
2687
   *                              <tr valign="top">
2688
   *                              <td><b>ENT_HTML401</b></td>
2689
   *                              <td>
2690
   *                              Handle code as HTML 4.01.
2691
   *                              </td>
2692
   *                              </tr>
2693
   *                              <tr valign="top">
2694
   *                              <td><b>ENT_XML1</b></td>
2695
   *                              <td>
2696
   *                              Handle code as XML 1.
2697
   *                              </td>
2698
   *                              </tr>
2699
   *                              <tr valign="top">
2700
   *                              <td><b>ENT_XHTML</b></td>
2701
   *                              <td>
2702
   *                              Handle code as XHTML.
2703
   *                              </td>
2704
   *                              </tr>
2705
   *                              <tr valign="top">
2706
   *                              <td><b>ENT_HTML5</b></td>
2707
   *                              <td>
2708
   *                              Handle code as HTML 5.
2709
   *                              </td>
2710
   *                              </tr>
2711
   *                              </table>
2712
   *                              </p>
2713
   * @param string $encoding      [optional] <p>
2714
   *                              Defines encoding used in conversion.
2715
   *                              </p>
2716
   *                              <p>
2717
   *                              For the purposes of this function, the encodings
2718
   *                              ISO-8859-1, ISO-8859-15,
2719
   *                              UTF-8, cp866,
2720
   *                              cp1251, cp1252, and
2721
   *                              KOI8-R are effectively equivalent, provided the
2722
   *                              <i>string</i> itself is valid for the encoding, as
2723
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2724
   *                              the same positions in all of these encodings.
2725
   *                              </p>
2726
   * @param bool   $double_encode [optional] <p>
2727
   *                              When <i>double_encode</i> is turned off PHP will not
2728
   *                              encode existing html entities, the default is to convert everything.
2729
   *                              </p>
2730
   *
2731
   * @return string The converted string.
2732
   * </p>
2733
   * <p>
2734
   * If the input <i>string</i> contains an invalid code unit
2735
   * sequence within the given <i>encoding</i> an empty string
2736
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2737
   * <b>ENT_SUBSTITUTE</b> flags are set.
2738
   */
2739 8
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2740
  {
2741 8
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2742 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2743
    }
2744
2745 8
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2746
  }
2747
2748
  /**
2749
   * Checks whether iconv is available on the server.
2750
   *
2751
   * @return bool
2752
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2753
   */
2754
  public static function iconv_loaded(): bool
2755
  {
2756
    return \extension_loaded('iconv') ? true : false;
2757
  }
2758
2759
  /**
2760
   * alias for "UTF8::decimal_to_chr()"
2761
   *
2762
   * @see UTF8::decimal_to_chr()
2763
   *
2764
   * @param mixed $int
2765
   *
2766
   * @return string
2767
   */
2768 4
  public static function int_to_chr($int): string
2769
  {
2770 4
    return self::decimal_to_chr($int);
2771
  }
2772
2773
  /**
2774
   * Converts Integer to hexadecimal U+xxxx code point representation.
2775
   *
2776
   * INFO: opposite to UTF8::hex_to_int()
2777
   *
2778
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2779
   * @param string $pfix [optional]
2780
   *
2781
   * @return string The code point, or empty string on failure.
2782
   */
2783 6
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2784
  {
2785 6
    $hex = \dechex($int);
2786
2787 6
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2788
2789 6
    return $pfix . $hex . '';
2790
  }
2791
2792
  /**
2793
   * Checks whether intl-char is available on the server.
2794
   *
2795
   * @return bool
2796
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2797
   */
2798
  public static function intlChar_loaded(): bool
2799
  {
2800
    return \class_exists('IntlChar');
2801
  }
2802
2803
  /**
2804
   * Checks whether intl is available on the server.
2805
   *
2806
   * @return bool
2807
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2808
   */
2809 5
  public static function intl_loaded(): bool
2810
  {
2811 5
    return \extension_loaded('intl');
2812
  }
2813
2814
  /**
2815
   * alias for "UTF8::is_ascii()"
2816
   *
2817
   * @see        UTF8::is_ascii()
2818
   *
2819
   * @param string $str
2820
   *
2821
   * @return bool
2822
   *
2823
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2824
   */
2825 2
  public static function isAscii(string $str): bool
2826
  {
2827 2
    return self::is_ascii($str);
2828
  }
2829
2830
  /**
2831
   * alias for "UTF8::is_base64()"
2832
   *
2833
   * @see        UTF8::is_base64()
2834
   *
2835
   * @param string $str
2836
   *
2837
   * @return bool
2838
   *
2839
   * @deprecated <p>use "UTF8::is_base64()"</p>
2840
   */
2841 2
  public static function isBase64($str): bool
2842
  {
2843 2
    return self::is_base64($str);
2844
  }
2845
2846
  /**
2847
   * alias for "UTF8::is_binary()"
2848
   *
2849
   * @see        UTF8::is_binary()
2850
   *
2851
   * @param mixed $str
2852
   * @param bool  $strict
2853
   *
2854
   * @return bool
2855
   *
2856
   * @deprecated <p>use "UTF8::is_binary()"</p>
2857
   */
2858 4
  public static function isBinary($str, $strict = false): bool
2859
  {
2860 4
    return self::is_binary($str, $strict);
2861
  }
2862
2863
  /**
2864
   * alias for "UTF8::is_bom()"
2865
   *
2866
   * @see        UTF8::is_bom()
2867
   *
2868
   * @param string $utf8_chr
2869
   *
2870
   * @return bool
2871
   *
2872
   * @deprecated <p>use "UTF8::is_bom()"</p>
2873
   */
2874 2
  public static function isBom(string $utf8_chr): bool
2875
  {
2876 2
    return self::is_bom($utf8_chr);
2877
  }
2878
2879
  /**
2880
   * alias for "UTF8::is_html()"
2881
   *
2882
   * @see        UTF8::is_html()
2883
   *
2884
   * @param string $str
2885
   *
2886
   * @return bool
2887
   *
2888
   * @deprecated <p>use "UTF8::is_html()"</p>
2889
   */
2890 2
  public static function isHtml(string $str): bool
2891
  {
2892 2
    return self::is_html($str);
2893
  }
2894
2895
  /**
2896
   * alias for "UTF8::is_json()"
2897
   *
2898
   * @see        UTF8::is_json()
2899
   *
2900
   * @param string $str
2901
   *
2902
   * @return bool
2903
   *
2904
   * @deprecated <p>use "UTF8::is_json()"</p>
2905
   */
2906
  public static function isJson(string $str): bool
2907
  {
2908
    return self::is_json($str);
2909
  }
2910
2911
  /**
2912
   * alias for "UTF8::is_utf16()"
2913
   *
2914
   * @see        UTF8::is_utf16()
2915
   *
2916
   * @param mixed $str
2917
   *
2918
   * @return int|false
2919
   *                    <strong>false</strong> if is't not UTF16,<br>
2920
   *                    <strong>1</strong> for UTF-16LE,<br>
2921
   *                    <strong>2</strong> for UTF-16BE.
2922
   *
2923
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2924
   */
2925 2
  public static function isUtf16($str)
2926
  {
2927 2
    return self::is_utf16($str);
2928
  }
2929
2930
  /**
2931
   * alias for "UTF8::is_utf32()"
2932
   *
2933
   * @see        UTF8::is_utf32()
2934
   *
2935
   * @param mixed $str
2936
   *
2937
   * @return int|false
2938
   *                   <strong>false</strong> if is't not UTF16,
2939
   *                   <strong>1</strong> for UTF-32LE,
2940
   *                   <strong>2</strong> for UTF-32BE.
2941
   *
2942
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2943
   */
2944 2
  public static function isUtf32($str)
2945
  {
2946 2
    return self::is_utf32($str);
2947
  }
2948
2949
  /**
2950
   * alias for "UTF8::is_utf8()"
2951
   *
2952
   * @see        UTF8::is_utf8()
2953
   *
2954
   * @param string $str
2955
   * @param bool   $strict
2956
   *
2957
   * @return bool
2958
   *
2959
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2960
   */
2961 17
  public static function isUtf8($str, $strict = false): bool
2962
  {
2963 17
    return self::is_utf8($str, $strict);
2964
  }
2965
2966
  /**
2967
   * Returns true if the string contains only alphabetic chars, false otherwise.
2968
   *
2969
   * @param string $str
2970
   *
2971
   * @return bool
2972
   *               Whether or not $str contains only alphabetic chars.
2973
   */
2974 10
  public static function is_alpha(string $str): bool
2975
  {
2976 10
    return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2977
  }
2978
2979
  /**
2980
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2981
   *
2982
   * @param string $str
2983
   *
2984
   * @return bool
2985
   *               Whether or not $str contains only alphanumeric chars.
2986
   */
2987 13
  public static function is_alphanumeric(string $str): bool
2988
  {
2989 13
    return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2990
  }
2991
2992
  /**
2993
   * Checks if a string is 7 bit ASCII.
2994
   *
2995
   * @param string $str <p>The string to check.</p>
2996
   *
2997
   * @return bool
2998
   *              <strong>true</strong> if it is ASCII<br>
2999
   *              <strong>false</strong> otherwise
3000
   *
3001
   */
3002 202
  public static function is_ascii(string $str): bool
3003
  {
3004 202
    if ('' === $str) {
3005 10
      return true;
3006
    }
3007
3008 201
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3009
  }
3010
3011
  /**
3012
   * Returns true if the string is base64 encoded, false otherwise.
3013
   *
3014
   * @param string $str <p>The input string.</p>
3015
   *
3016
   * @return bool Whether or not $str is base64 encoded.
3017
   */
3018 9
  public static function is_base64($str): bool
3019
  {
3020 9
    if ('' === $str) {
3021 3
      return false;
3022
    }
3023
3024 8
    if (\is_string($str) === false) {
0 ignored issues
show
introduced by
The condition is_string($str) === false is always false.
Loading history...
3025 2
      return false;
3026
    }
3027
3028 8
    $base64String = (string)\base64_decode($str, true);
3029
3030 8
    return $base64String && \base64_encode($base64String) === $str;
3031
  }
3032
3033
  /**
3034
   * Check if the input is binary... (is look like a hack).
3035
   *
3036
   * @param mixed $input
3037
   * @param bool  $strict
3038
   *
3039
   * @return bool
3040
   */
3041 40
  public static function is_binary($input, bool $strict = false): bool
3042
  {
3043 40
    $input = (string)$input;
3044 40
    if ('' === $input) {
3045 10
      return false;
3046
    }
3047
3048 40
    if (\preg_match('~^[01]+$~', $input)) {
3049 12
      return true;
3050
    }
3051
3052 40
    $ext = self::get_file_type($input);
3053 40
    if ($ext['type'] === 'binary') {
3054 6
      return true;
3055
    }
3056
3057 38
    $testLength = self::strlen_in_byte($input);
3058 38
    if ($testLength) {
3059 38
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3060
        self::checkForSupport();
3061
      }
3062
3063 38
      $testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength);
3064 38
      if (($testNull / $testLength) > 0.256) {
3065 12
        return true;
3066
      }
3067
    }
3068
3069 36
    if ($strict === true) {
3070
3071 36
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3072
        self::checkForSupport();
3073
      }
3074
3075 36
      if (self::$SUPPORT['finfo'] === false) {
3076
        throw new \RuntimeException('ext-fileinfo: is not installed');
3077
      }
3078
3079
      /** @noinspection PhpComposerExtensionStubsInspection */
3080 36
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
3081 36
      $finfo_encoding = $finfo->buffer($input);
3082 36
      if ($finfo_encoding && $finfo_encoding === 'binary') {
3083 15
        return true;
3084
      }
3085
3086
    }
3087
3088 31
    return false;
3089
  }
3090
3091
  /**
3092
   * Check if the file is binary.
3093
   *
3094
   * @param string $file
3095
   *
3096
   * @return bool
3097
   */
3098 6
  public static function is_binary_file($file): bool
3099
  {
3100
    // init
3101 6
    $block = '';
3102
3103 6
    $fp = \fopen($file, 'rb');
3104 6
    if (\is_resource($fp)) {
3105 6
      $block = \fread($fp, 512);
3106 6
      \fclose($fp);
3107
    }
3108
3109 6
    if ($block === '') {
3110 2
      return false;
3111
    }
3112
3113 6
    return self::is_binary($block, true);
3114
  }
3115
3116
  /**
3117
   * Returns true if the string contains only whitespace chars, false otherwise.
3118
   *
3119
   * @param string $str
3120
   *
3121
   * @return bool
3122
   *               Whether or not $str contains only whitespace characters.
3123
   */
3124 15
  public static function is_blank(string $str): bool
3125
  {
3126 15
    return self::str_matches_pattern($str, '^[[:space:]]*$');
3127
  }
3128
3129
  /**
3130
   * Checks if the given string is equal to any "Byte Order Mark".
3131
   *
3132
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3133
   *
3134
   * @param string $str <p>The input string.</p>
3135
   *
3136
   * @return bool
3137
   *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.
3138
   */
3139 2
  public static function is_bom($str): bool
3140
  {
3141 2
    foreach (self::$BOM as $bomString => $bomByteLength) {
3142 2
      if ($str === $bomString) {
3143 2
        return true;
3144
      }
3145
    }
3146
3147 2
    return false;
3148
  }
3149
3150
  /**
3151
   * Determine whether the string is considered to be empty.
3152
   *
3153
   * A variable is considered empty if it does not exist or if its value equals FALSE.
3154
   * empty() does not generate a warning if the variable does not exist.
3155
   *
3156
   * @param mixed $str
3157
   *
3158
   * @return bool Whether or not $str is empty().
3159
   */
3160
  public static function is_empty($str): bool
3161
  {
3162
    return empty($str);
3163
  }
3164
3165
  /**
3166
   * Returns true if the string contains only hexadecimal chars, false otherwise.
3167
   *
3168
   * @param string $str
3169
   *
3170
   * @return bool
3171
   *               Whether or not $str contains only hexadecimal chars.
3172
   */
3173 13
  public static function is_hexadecimal(string $str): bool
3174
  {
3175 13
    return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3176
  }
3177
3178
  /**
3179
   * Check if the string contains any html-tags <lall>.
3180
   *
3181
   * @param string $str <p>The input string.</p>
3182
   *
3183
   * @return bool
3184
   */
3185 3
  public static function is_html(string $str): bool
3186
  {
3187 3
    if ('' === $str) {
3188 3
      return false;
3189
    }
3190
3191
    // init
3192 3
    $matches = [];
3193
3194 3
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3195
3196 3
    return !(\count($matches) === 0);
3197
  }
3198
3199
  /**
3200
   * Try to check if "$str" is an json-string.
3201
   *
3202
   * @param string $str <p>The input string.</p>
3203
   *
3204
   * @return bool
3205
   */
3206 22
  public static function is_json(string $str): bool
3207
  {
3208 22
    if ('' === $str) {
3209 3
      return false;
3210
    }
3211
3212 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3213
      self::checkForSupport();
3214
    }
3215
3216 21
    if (self::$SUPPORT['json'] === false) {
3217
      throw new \RuntimeException('ext-json: is not installed');
3218
    }
3219
3220 21
    $json = self::json_decode($str);
3221
3222
    /** @noinspection PhpComposerExtensionStubsInspection */
3223
    return (
3224 21
               \is_object($json) === true
3225
               ||
3226 21
               \is_array($json) === true
3227
           )
3228
           &&
3229 21
           \json_last_error() === JSON_ERROR_NONE;
3230
  }
3231
3232
  /**
3233
   * @param string $str
3234
   *
3235
   * @return bool
3236
   */
3237 8
  public static function is_lowercase(string $str): bool
3238
  {
3239 8
    if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
3240 3
      return true;
3241
    }
3242
3243 5
    return false;
3244
  }
3245
3246
  /**
3247
   * Returns true if the string is serialized, false otherwise.
3248
   *
3249
   * @param string $str
3250
   *
3251
   * @return bool Whether or not $str is serialized.
3252
   */
3253 7
  public static function is_serialized(string $str): bool
3254
  {
3255 7
    if ('' === $str) {
3256 1
      return false;
3257
    }
3258
3259
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3260
    /** @noinspection UnserializeExploitsInspection */
3261 6
    return $str === 'b:0;'
3262
           ||
3263 6
           @\unserialize($str) !== false;
3264
  }
3265
3266
  /**
3267
   * Returns true if the string contains only lower case chars, false
3268
   * otherwise.
3269
   *
3270
   * @param string $str <p>The input string.</p>
3271
   *
3272
   * @return bool
3273
   *               Whether or not $str contains only lower case characters.
3274
   */
3275 8
  public static function is_uppercase(string $str): bool
3276
  {
3277 8
    return self::str_matches_pattern($str, '^[[:upper:]]*$');
3278
  }
3279
3280
  /**
3281
   * Check if the string is UTF-16.
3282
   *
3283
   * @param mixed $str <p>The input string.</p>
3284
   * @param bool  $checkIfStringIsBinary
3285
   *
3286
   * @return int|false
3287
   *                   <strong>false</strong> if is't not UTF-16,<br>
3288
   *                   <strong>1</strong> for UTF-16LE,<br>
3289
   *                   <strong>2</strong> for UTF-16BE.
3290
   */
3291 21
  public static function is_utf16($str, $checkIfStringIsBinary = true)
3292
  {
3293
    // init
3294 21
    $str = (string)$str;
3295 21
    $strChars = [];
3296
3297
    if (
3298 21
        $checkIfStringIsBinary === true
3299
        &&
3300 21
        self::is_binary($str, true) === false
3301
    ) {
3302 2
      return false;
3303
    }
3304
3305 21
    if (self::$SUPPORT['mbstring'] === false) {
3306 2
      \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', E_USER_WARNING);
3307
    }
3308
3309 21
    $str = self::remove_bom($str);
3310
3311 21
    $maybeUTF16LE = 0;
3312 21
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3313 21
    if ($test) {
3314 14
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3315 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3316 14
      if ($test3 === $test) {
3317 14
        if (\count($strChars) === 0) {
3318 14
          $strChars = self::count_chars($str, true);
3319
        }
3320 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3321 14
          if (\in_array($test3char, $strChars, true) === true) {
3322 14
            $maybeUTF16LE++;
3323
          }
3324
        }
3325
      }
3326
    }
3327
3328 21
    $maybeUTF16BE = 0;
3329 21
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3330 21
    if ($test) {
3331 14
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3332 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3333 14
      if ($test3 === $test) {
3334 14
        if (\count($strChars) === 0) {
3335 6
          $strChars = self::count_chars($str, true);
3336
        }
3337 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3338 14
          if (\in_array($test3char, $strChars, true) === true) {
3339 14
            $maybeUTF16BE++;
3340
          }
3341
        }
3342
      }
3343
    }
3344
3345 21
    if ($maybeUTF16BE !== $maybeUTF16LE) {
3346 6
      if ($maybeUTF16LE > $maybeUTF16BE) {
3347 4
        return 1;
3348
      }
3349
3350 6
      return 2;
3351
    }
3352
3353 17
    return false;
3354
  }
3355
3356
  /**
3357
   * Check if the string is UTF-32.
3358
   *
3359
   * @param mixed $str <p>The input string.</p>
3360
   * @param bool  $checkIfStringIsBinary
3361
   *
3362
   * @return int|false
3363
   *                   <strong>false</strong> if is't not UTF-32,<br>
3364
   *                   <strong>1</strong> for UTF-32LE,<br>
3365
   *                   <strong>2</strong> for UTF-32BE.
3366
   */
3367 17
  public static function is_utf32($str, $checkIfStringIsBinary = true)
3368
  {
3369
    // init
3370 17
    $str = (string)$str;
3371 17
    $strChars = [];
3372
3373
    if (
3374 17
        $checkIfStringIsBinary === true
3375
        &&
3376 17
        self::is_binary($str, true) === false
3377
    ) {
3378 2
      return false;
3379
    }
3380
3381 17
    if (self::$SUPPORT['mbstring'] === false) {
3382 2
      \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', E_USER_WARNING);
3383
    }
3384
3385 17
    $str = self::remove_bom($str);
3386
3387 17
    $maybeUTF32LE = 0;
3388 17
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3389 17
    if ($test) {
3390 10
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3391 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3392 10
      if ($test3 === $test) {
3393 10
        if (\count($strChars) === 0) {
3394 10
          $strChars = self::count_chars($str, true);
3395
        }
3396 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3397 10
          if (\in_array($test3char, $strChars, true) === true) {
3398 10
            $maybeUTF32LE++;
3399
          }
3400
        }
3401
      }
3402
    }
3403
3404 17
    $maybeUTF32BE = 0;
3405 17
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3406 17
    if ($test) {
3407 10
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3408 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3409 10
      if ($test3 === $test) {
3410 10
        if (\count($strChars) === 0) {
3411 6
          $strChars = self::count_chars($str, true);
3412
        }
3413 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3414 10
          if (\in_array($test3char, $strChars, true) === true) {
3415 10
            $maybeUTF32BE++;
3416
          }
3417
        }
3418
      }
3419
    }
3420
3421 17
    if ($maybeUTF32BE !== $maybeUTF32LE) {
3422 2
      if ($maybeUTF32LE > $maybeUTF32BE) {
3423 2
        return 1;
3424
      }
3425
3426 2
      return 2;
3427
    }
3428
3429 17
    return false;
3430
  }
3431
3432
  /**
3433
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3434
   *
3435
   * @see    http://hsivonen.iki.fi/php-utf8/
3436
   *
3437
   * @param string|string[] $str    <p>The string to be checked.</p>
3438
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3439
   *
3440
   * @return bool
3441
   */
3442 107
  public static function is_utf8($str, bool $strict = false): bool
3443
  {
3444 107
    if (\is_array($str) === true) {
3445 2
      foreach ($str as $k => $v) {
3446 2
        if (false === self::is_utf8($v, $strict)) {
3447 2
          return false;
3448
        }
3449
      }
3450
3451
      return true;
3452
    }
3453
3454 107
    if ('' === $str) {
3455 12
      return true;
3456
    }
3457
3458 103
    if ($strict === true) {
3459 2
      $isBinary = self::is_binary($str, true);
3460
3461 2
      if ($isBinary && self::is_utf16($str, false) !== false) {
3462 2
        return false;
3463
      }
3464
3465
      if ($isBinary && self::is_utf32($str, false) !== false) {
3466
        return false;
3467
      }
3468
    }
3469
3470 103
    if (self::pcre_utf8_support() !== true) {
3471
3472
      // If even just the first character can be matched, when the /u
3473
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3474
      // invalid, nothing at all will match, even if the string contains
3475
      // some valid sequences
3476
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
3477
    }
3478
3479 103
    $mState = 0; // cached expected number of octets after the current octet
3480
    // until the beginning of the next UTF8 character sequence
3481 103
    $mUcs4 = 0; // cached Unicode character
3482 103
    $mBytes = 1; // cached expected number of octets in the current sequence
3483
3484 103
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3485
      self::checkForSupport();
3486
    }
3487
3488 103
    if (self::$ORD === null) {
3489
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3490
    }
3491
3492 103
    $len = self::strlen_in_byte((string)$str);
3493
    /** @noinspection ForeachInvariantsInspection */
3494 103
    for ($i = 0; $i < $len; $i++) {
3495 103
      $in = self::$ORD[$str[$i]];
3496 103
      if ($mState === 0) {
3497
        // When mState is zero we expect either a US-ASCII character or a
3498
        // multi-octet sequence.
3499 103
        if (0 === (0x80 & $in)) {
3500
          // US-ASCII, pass straight through.
3501 98
          $mBytes = 1;
3502 84
        } elseif (0xC0 === (0xE0 & $in)) {
3503
          // First octet of 2 octet sequence.
3504 75
          $mUcs4 = $in;
3505 75
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3506 75
          $mState = 1;
3507 75
          $mBytes = 2;
3508 58
        } elseif (0xE0 === (0xF0 & $in)) {
3509
          // First octet of 3 octet sequence.
3510 41
          $mUcs4 = $in;
3511 41
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3512 41
          $mState = 2;
3513 41
          $mBytes = 3;
3514 30
        } elseif (0xF0 === (0xF8 & $in)) {
3515
          // First octet of 4 octet sequence.
3516 19
          $mUcs4 = $in;
3517 19
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3518 19
          $mState = 3;
3519 19
          $mBytes = 4;
3520 13
        } elseif (0xF8 === (0xFC & $in)) {
3521
          /* First octet of 5 octet sequence.
3522
          *
3523
          * This is illegal because the encoded codepoint must be either
3524
          * (a) not the shortest form or
3525
          * (b) outside the Unicode range of 0-0x10FFFF.
3526
          * Rather than trying to resynchronize, we will carry on until the end
3527
          * of the sequence and let the later error handling code catch it.
3528
          */
3529 5
          $mUcs4 = $in;
3530 5
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3531 5
          $mState = 4;
3532 5
          $mBytes = 5;
3533 10
        } elseif (0xFC === (0xFE & $in)) {
3534
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3535 5
          $mUcs4 = $in;
3536 5
          $mUcs4 = ($mUcs4 & 1) << 30;
3537 5
          $mState = 5;
3538 5
          $mBytes = 6;
3539
        } else {
3540
          // Current octet is neither in the US-ASCII range nor a legal first
3541
          // octet of a multi-octet sequence.
3542 103
          return false;
3543
        }
3544
      } else {
3545
        // When mState is non-zero, we expect a continuation of the multi-octet
3546
        // sequence
3547 84
        if (0x80 === (0xC0 & $in)) {
3548
          // Legal continuation.
3549 76
          $shift = ($mState - 1) * 6;
3550 76
          $tmp = $in;
3551 76
          $tmp = ($tmp & 0x0000003F) << $shift;
3552 76
          $mUcs4 |= $tmp;
3553
          // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3554
          // Unicode code point to be output.
3555 76
          if (0 === --$mState) {
3556
            // Check for illegal sequences and code points.
3557
            //
3558
            // From Unicode 3.1, non-shortest form is illegal
3559
            if (
3560 76
                (2 === $mBytes && $mUcs4 < 0x0080)
3561
                ||
3562 76
                (3 === $mBytes && $mUcs4 < 0x0800)
3563
                ||
3564 76
                (4 === $mBytes && $mUcs4 < 0x10000)
3565
                ||
3566 76
                (4 < $mBytes)
3567
                ||
3568
                // From Unicode 3.2, surrogate characters are illegal.
3569 76
                (($mUcs4 & 0xFFFFF800) === 0xD800)
3570
                ||
3571
                // Code points outside the Unicode range are illegal.
3572 76
                ($mUcs4 > 0x10FFFF)
3573
            ) {
3574 8
              return false;
3575
            }
3576
            // initialize UTF8 cache
3577 76
            $mState = 0;
3578 76
            $mUcs4 = 0;
3579 76
            $mBytes = 1;
3580
          }
3581
        } else {
3582
          // ((0xC0 & (*in) != 0x80) && (mState != 0))
3583
          // Incomplete multi-octet sequence.
3584 36
          return false;
3585
        }
3586
      }
3587
    }
3588
3589 67
    return true;
3590
  }
3591
3592
  /**
3593
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3594
   * Decodes a JSON string
3595
   *
3596
   * @link http://php.net/manual/en/function.json-decode.php
3597
   *
3598
   * @param string $json    <p>
3599
   *                        The <i>json</i> string being decoded.
3600
   *                        </p>
3601
   *                        <p>
3602
   *                        This function only works with UTF-8 encoded strings.
3603
   *                        </p>
3604
   *                        <p>PHP implements a superset of
3605
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3606
   *                        only supports these values when they are nested inside an array or an object.
3607
   *                        </p>
3608
   * @param bool   $assoc   [optional] <p>
3609
   *                        When <b>TRUE</b>, returned objects will be converted into
3610
   *                        associative arrays.
3611
   *                        </p>
3612
   * @param int    $depth   [optional] <p>
3613
   *                        User specified recursion depth.
3614
   *                        </p>
3615
   * @param int    $options [optional] <p>
3616
   *                        Bitmask of JSON decode options. Currently only
3617
   *                        <b>JSON_BIGINT_AS_STRING</b>
3618
   *                        is supported (default is to cast large integers as floats)
3619
   *                        </p>
3620
   *
3621
   * @return mixed
3622
   *                The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3623
   *                null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3624
   *                <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3625
   *                is deeper than the recursion limit.
3626
   */
3627 24
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3628
  {
3629 24
    $json = self::filter($json);
3630
3631 24
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3632
      self::checkForSupport();
3633
    }
3634
3635 24
    if (self::$SUPPORT['json'] === false) {
3636
      throw new \RuntimeException('ext-json: is not installed');
3637
    }
3638
3639
    /** @noinspection PhpComposerExtensionStubsInspection */
3640 24
    $json = \json_decode($json, $assoc, $depth, $options);
3641
3642 24
    return $json;
3643
  }
3644
3645
  /**
3646
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3647
   * Returns the JSON representation of a value.
3648
   *
3649
   * @link http://php.net/manual/en/function.json-encode.php
3650
   *
3651
   * @param mixed $value   <p>
3652
   *                       The <i>value</i> being encoded. Can be any type except
3653
   *                       a resource.
3654
   *                       </p>
3655
   *                       <p>
3656
   *                       All string data must be UTF-8 encoded.
3657
   *                       </p>
3658
   *                       <p>PHP implements a superset of
3659
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3660
   *                       only supports these values when they are nested inside an array or an object.
3661
   *                       </p>
3662
   * @param int   $options [optional] <p>
3663
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3664
   *                       <b>JSON_HEX_TAG</b>,
3665
   *                       <b>JSON_HEX_AMP</b>,
3666
   *                       <b>JSON_HEX_APOS</b>,
3667
   *                       <b>JSON_NUMERIC_CHECK</b>,
3668
   *                       <b>JSON_PRETTY_PRINT</b>,
3669
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3670
   *                       <b>JSON_FORCE_OBJECT</b>,
3671
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3672
   *                       constants is described on
3673
   *                       the JSON constants page.
3674
   *                       </p>
3675
   * @param int   $depth   [optional] <p>
3676
   *                       Set the maximum depth. Must be greater than zero.
3677
   *                       </p>
3678
   *
3679
   * @return string|false
3680
   *                      A JSON encoded <strong>string</strong> on success or<br>
3681
   *                      <strong>FALSE</strong> on failure.
3682
   */
3683 5
  public static function json_encode($value, int $options = 0, int $depth = 512)
3684
  {
3685 5
    $value = self::filter($value);
3686
3687 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3688
      self::checkForSupport();
3689
    }
3690
3691 5
    if (self::$SUPPORT['json'] === false) {
3692
      throw new \RuntimeException('ext-json: is not installed');
3693
    }
3694
3695
    /** @noinspection PhpComposerExtensionStubsInspection */
3696 5
    $json = \json_encode($value, $options, $depth);
3697
3698 5
    return $json;
3699
  }
3700
3701
  /**
3702
   * Checks whether JSON is available on the server.
3703
   *
3704
   * @return bool
3705
   *              <strong>true</strong> if available, <strong>false</strong> otherwise
3706
   */
3707
  public static function json_loaded(): bool
3708
  {
3709
    return \function_exists('json_decode');
3710
  }
3711
3712
  /**
3713
   * Makes string's first char lowercase.
3714
   *
3715
   * @param string $str       <p>The input string</p>
3716
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
3717
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3718
   *
3719
   * @return string The resulting string.
3720
   */
3721 46
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3722
  {
3723 46
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3724 46
    if ($strPartTwo === false) {
3725
      $strPartTwo = '';
3726
    }
3727
3728 46
    $strPartOne = self::strtolower(
3729 46
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3730 46
        $encoding,
3731 46
        $cleanUtf8
3732
    );
3733
3734 46
    return $strPartOne . $strPartTwo;
3735
  }
3736
3737
  /**
3738
   * alias for "UTF8::lcfirst()"
3739
   *
3740
   * @see UTF8::lcfirst()
3741
   *
3742
   * @param string $str
3743
   * @param string $encoding
3744
   * @param bool   $cleanUtf8
3745
   *
3746
   * @return string
3747
   */
3748 2
  public static function lcword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3749
  {
3750 2
    return self::lcfirst($str, $encoding, $cleanUtf8);
3751
  }
3752
3753
  /**
3754
   * Lowercase for all words in the string.
3755
   *
3756
   * @param string   $str        <p>The input string.</p>
3757
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3758
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3759
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3760
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3761
   *
3762
   * @return string
3763
   */
3764 2
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3765
  {
3766 2
    if (!$str) {
3767 2
      return '';
3768
    }
3769
3770 2
    $words = self::str_to_words($str, $charlist);
3771 2
    $newWords = [];
3772
3773 2
    if (\count($exceptions) > 0) {
3774 2
      $useExceptions = true;
3775
    } else {
3776 2
      $useExceptions = false;
3777
    }
3778
3779 2
    foreach ($words as $word) {
3780
3781 2
      if (!$word) {
3782 2
        continue;
3783
      }
3784
3785
      if (
3786 2
          $useExceptions === false
3787
          ||
3788
          (
3789 2
              $useExceptions === true
3790
              &&
3791 2
              !\in_array($word, $exceptions, true)
3792
          )
3793
      ) {
3794 2
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3795
      }
3796
3797 2
      $newWords[] = $word;
3798
    }
3799
3800 2
    return \implode('', $newWords);
3801
  }
3802
3803
  /**
3804
   * alias for "UTF8::lcfirst()"
3805
   *
3806
   * @see UTF8::lcfirst()
3807
   *
3808
   * @param string $str
3809
   * @param string $encoding
3810
   * @param bool   $cleanUtf8
3811
   *
3812
   * @return string
3813
   */
3814 5
  public static function lowerCaseFirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3815
  {
3816 5
    return self::lcfirst($str, $encoding, $cleanUtf8);
3817
  }
3818
3819
  /**
3820
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3821
   *
3822
   * @param string $str   <p>The string to be trimmed</p>
3823
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3824
   *
3825
   * @return string The string with unwanted characters stripped from the left.
3826
   */
3827 22
  public static function ltrim(string $str = '', $chars = INF): string
3828
  {
3829 22
    if ('' === $str) {
3830 3
      return '';
3831
    }
3832
3833
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3834 21
    if ($chars === INF || !$chars) {
3835 14
      $pattern = "^[\pZ\pC]+";
3836
    } else {
3837 10
      $chars = \preg_quote($chars, '/');
3838 10
      $pattern = "^[$chars]+";
3839
    }
3840
3841 21
    return self::regex_replace($str, $pattern, '', '', '/');
3842
  }
3843
3844
  /**
3845
   * Returns the UTF-8 character with the maximum code point in the given data.
3846
   *
3847
   * @param string|array<string> $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3848
   *
3849
   * @return string|null The character with the highest code point than others, returns null on failure or empty input.
3850
   */
3851 2
  public static function max($arg)
3852
  {
3853 2
    if (\is_array($arg) === true) {
3854 2
      $arg = \implode('', $arg);
3855
    }
3856
3857 2
    $codepoints = self::codepoints($arg, false);
3858 2
    if (\count($codepoints) === 0) {
3859 2
      return null;
3860
    }
3861
3862 2
    $codepoint_max = \max($codepoints);
3863
3864 2
    return self::chr($codepoint_max);
3865
  }
3866
3867
  /**
3868
   * Calculates and returns the maximum number of bytes taken by any
3869
   * UTF-8 encoded character in the given string.
3870
   *
3871
   * @param string $str <p>The original Unicode string.</p>
3872
   *
3873
   * @return int Max byte lengths of the given chars.
3874
   */
3875 2
  public static function max_chr_width(string $str): int
3876
  {
3877 2
    $bytes = self::chr_size_list($str);
3878 2
    if (\count($bytes) > 0) {
3879 2
      return (int)\max($bytes);
3880
    }
3881
3882 2
    return 0;
3883
  }
3884
3885
  /**
3886
   * Checks whether mbstring is available on the server.
3887
   *
3888
   * @return bool
3889
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
3890
   */
3891 27
  public static function mbstring_loaded(): bool
3892
  {
3893 27
    $return = \extension_loaded('mbstring') ? true : false;
3894
3895 27
    if ($return === true) {
3896 27
      \mb_internal_encoding('UTF-8');
3897
    }
3898
3899 27
    return $return;
3900
  }
3901
3902
  /**
3903
   * Checks whether mbstring "overloaded" is active on the server.
3904
   *
3905
   * @return bool
3906
   */
3907
  private static function mbstring_overloaded(): bool
3908
  {
3909
    /**
3910
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3911
     */
3912
3913
    /** @noinspection PhpComposerExtensionStubsInspection */
3914
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3915
    return \defined('MB_OVERLOAD_STRING')
3916
           &&
3917
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3918
  }
3919
3920
  /**
3921
   * Returns the UTF-8 character with the minimum code point in the given data.
3922
   *
3923
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3924
   *
3925
   * @return string|null The character with the lowest code point than others, returns null on failure or empty input.
3926
   */
3927 2
  public static function min($arg)
3928
  {
3929 2
    if (\is_array($arg) === true) {
3930 2
      $arg = \implode('', $arg);
3931
    }
3932
3933 2
    $codepoints = self::codepoints($arg, false);
3934 2
    if (\count($codepoints) === 0) {
3935 2
      return null;
3936
    }
3937
3938 2
    $codepoint_min = \min($codepoints);
3939
3940 2
    return self::chr($codepoint_min);
3941
  }
3942
3943
  /**
3944
   * alias for "UTF8::normalize_encoding()"
3945
   *
3946
   * @see        UTF8::normalize_encoding()
3947
   *
3948
   * @param mixed $encoding
3949
   * @param mixed $fallback
3950
   *
3951
   * @return mixed
3952
   *
3953
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3954
   */
3955 2
  public static function normalizeEncoding($encoding, $fallback = '')
3956
  {
3957 2
    return self::normalize_encoding($encoding, $fallback);
3958
  }
3959
3960
  /**
3961
   * Normalize the encoding-"name" input.
3962
   *
3963
   * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3964
   * @param mixed $fallback <p>e.g.: UTF-8</p>
3965
   *
3966
   * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3967
   */
3968 340
  public static function normalize_encoding($encoding, $fallback = '')
3969
  {
3970 340
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3971
3972
    // init
3973 340
    $encoding = (string)$encoding;
3974
3975
    if (
3976 340
        !$encoding
3977
        ||
3978 49
        $encoding === '1' // only a fallback, for non "strict_types" usage ...
3979
        ||
3980 340
        $encoding === '0' // only a fallback, for non "strict_types" usage ...
3981
    ) {
3982 296
      return $fallback;
3983
    }
3984
3985
    if (
3986 48
        'UTF-8' === $encoding
3987
        ||
3988 48
        'UTF8' === $encoding
3989
    ) {
3990 21
      return 'UTF-8';
3991
    }
3992
3993
    if (
3994 41
        '8BIT' === $encoding
3995
        ||
3996 41
        'BINARY' === $encoding
3997
    ) {
3998
      return 'CP850';
3999
    }
4000
4001
    if (
4002 41
        'HTML' === $encoding
4003
        ||
4004 41
        'HTML-ENTITIES' === $encoding
4005
    ) {
4006 2
      return 'HTML-ENTITIES';
4007
    }
4008
4009 41
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4010 39
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4011
    }
4012
4013 6
    if (self::$ENCODINGS === null) {
4014 1
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4015
    }
4016
4017 6
    if (\in_array($encoding, self::$ENCODINGS, true)) {
0 ignored issues
show
Bug introduced by
It seems like self::ENCODINGS can also be of type false; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4017
    if (\in_array($encoding, /** @scrutinizer ignore-type */ self::$ENCODINGS, true)) {
Loading history...
4018 4
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4019
4020 4
      return $encoding;
4021
    }
4022
4023 5
    $encodingOrig = $encoding;
4024 5
    $encoding = \strtoupper($encoding);
4025 5
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
4026
4027
    $equivalences = [
4028 5
        'ISO8859'     => 'ISO-8859-1',
4029
        'ISO88591'    => 'ISO-8859-1',
4030
        'ISO'         => 'ISO-8859-1',
4031
        'LATIN'       => 'ISO-8859-1',
4032
        'LATIN1'      => 'ISO-8859-1', // Western European
4033
        'ISO88592'    => 'ISO-8859-2',
4034
        'LATIN2'      => 'ISO-8859-2', // Central European
4035
        'ISO88593'    => 'ISO-8859-3',
4036
        'LATIN3'      => 'ISO-8859-3', // Southern European
4037
        'ISO88594'    => 'ISO-8859-4',
4038
        'LATIN4'      => 'ISO-8859-4', // Northern European
4039
        'ISO88595'    => 'ISO-8859-5',
4040
        'ISO88596'    => 'ISO-8859-6', // Greek
4041
        'ISO88597'    => 'ISO-8859-7',
4042
        'ISO88598'    => 'ISO-8859-8', // Hebrew
4043
        'ISO88599'    => 'ISO-8859-9',
4044
        'LATIN5'      => 'ISO-8859-9', // Turkish
4045
        'ISO885911'   => 'ISO-8859-11',
4046
        'TIS620'      => 'ISO-8859-11', // Thai
4047
        'ISO885910'   => 'ISO-8859-10',
4048
        'LATIN6'      => 'ISO-8859-10', // Nordic
4049
        'ISO885913'   => 'ISO-8859-13',
4050
        'LATIN7'      => 'ISO-8859-13', // Baltic
4051
        'ISO885914'   => 'ISO-8859-14',
4052
        'LATIN8'      => 'ISO-8859-14', // Celtic
4053
        'ISO885915'   => 'ISO-8859-15',
4054
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4055
        'ISO885916'   => 'ISO-8859-16',
4056
        'LATIN10'     => 'ISO-8859-16', // Southeast European
4057
        'CP1250'      => 'WINDOWS-1250',
4058
        'WIN1250'     => 'WINDOWS-1250',
4059
        'WINDOWS1250' => 'WINDOWS-1250',
4060
        'CP1251'      => 'WINDOWS-1251',
4061
        'WIN1251'     => 'WINDOWS-1251',
4062
        'WINDOWS1251' => 'WINDOWS-1251',
4063
        'CP1252'      => 'WINDOWS-1252',
4064
        'WIN1252'     => 'WINDOWS-1252',
4065
        'WINDOWS1252' => 'WINDOWS-1252',
4066
        'CP1253'      => 'WINDOWS-1253',
4067
        'WIN1253'     => 'WINDOWS-1253',
4068
        'WINDOWS1253' => 'WINDOWS-1253',
4069
        'CP1254'      => 'WINDOWS-1254',
4070
        'WIN1254'     => 'WINDOWS-1254',
4071
        'WINDOWS1254' => 'WINDOWS-1254',
4072
        'CP1255'      => 'WINDOWS-1255',
4073
        'WIN1255'     => 'WINDOWS-1255',
4074
        'WINDOWS1255' => 'WINDOWS-1255',
4075
        'CP1256'      => 'WINDOWS-1256',
4076
        'WIN1256'     => 'WINDOWS-1256',
4077
        'WINDOWS1256' => 'WINDOWS-1256',
4078
        'CP1257'      => 'WINDOWS-1257',
4079
        'WIN1257'     => 'WINDOWS-1257',
4080
        'WINDOWS1257' => 'WINDOWS-1257',
4081
        'CP1258'      => 'WINDOWS-1258',
4082
        'WIN1258'     => 'WINDOWS-1258',
4083
        'WINDOWS1258' => 'WINDOWS-1258',
4084
        'UTF16'       => 'UTF-16',
4085
        'UTF32'       => 'UTF-32',
4086
        'UTF8'        => 'UTF-8',
4087
        'UTF'         => 'UTF-8',
4088
        'UTF7'        => 'UTF-7',
4089
        '8BIT'        => 'CP850',
4090
        'BINARY'      => 'CP850',
4091
    ];
4092
4093 5
    if (!empty($equivalences[$encodingUpperHelper])) {
4094 4
      $encoding = $equivalences[$encodingUpperHelper];
4095
    }
4096
4097 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4098
4099 5
    return $encoding;
4100
  }
4101
4102
  /**
4103
   * Standardize line ending to unix-like.
4104
   *
4105
   * @param string $str
4106
   *
4107
   * @return string
4108
   */
4109 5
  public static function normalize_line_ending(string $str): string
4110
  {
4111 5
    return (string)str_replace(["\r\n", "\r"], "\n", $str);
4112
  }
4113
4114
  /**
4115
   * Normalize some MS Word special characters.
4116
   *
4117
   * @param string $str <p>The string to be normalized.</p>
4118
   *
4119
   * @return string
4120
   */
4121 40
  public static function normalize_msword(string $str): string
4122
  {
4123 40
    if ('' === $str) {
4124 2
      return '';
4125
    }
4126
4127 40
    static $UTF8_MSWORD_KEYS_CACHE = null;
4128 40
    static $UTF8_MSWORD_VALUES_CACHE = null;
4129
4130 40
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
4131
4132 1
      if (self::$UTF8_MSWORD === null) {
4133 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_msword') can also be of type false. However, the property $UTF8_MSWORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4134
      }
4135
4136 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4136
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4137 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4137
      $UTF8_MSWORD_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4138
    }
4139
4140 40
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
4141
  }
4142
4143
  /**
4144
   * Normalize the whitespace.
4145
   *
4146
   * @param string $str                     <p>The string to be normalized.</p>
4147
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4148
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4149
   *                                        bidirectional text chars.</p>
4150
   *
4151
   * @return string
4152
   */
4153 88
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4154
  {
4155 88
    if ('' === $str) {
4156 9
      return '';
4157
    }
4158
4159 88
    static $WHITESPACE_CACHE = [];
4160 88
    $cacheKey = (int)$keepNonBreakingSpace;
4161
4162 88
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4163
4164 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4165
4166 2
      if ($keepNonBreakingSpace === true) {
4167 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4168
      }
4169
4170 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4171
    }
4172
4173 88
    if ($keepBidiUnicodeControls === false) {
4174 88
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
4175
4176 88
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4177 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4178
      }
4179
4180 88
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4181
    }
4182
4183 88
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4184
  }
4185
4186
  /**
4187
   * Calculates Unicode code point of the given UTF-8 encoded character.
4188
   *
4189
   * INFO: opposite to UTF8::chr()
4190
   *
4191
   * @param string $chr      <p>The character of which to calculate code point.<p/>
4192
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4193
   *
4194
   * @return int
4195
   *             Unicode code point of the given character,<br>
4196
   *             0 on invalid UTF-8 byte sequence.
4197
   */
4198 35
  public static function ord($chr, string $encoding = 'UTF-8'): int
4199
  {
4200
    // init
4201 35
    $chr = (string)$chr;
4202
4203 35
    static $CHAR_CACHE = [];
4204
4205
    // save the original string
4206 35
    $chr_orig = $chr;
4207
4208 35
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4209 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4210
4211
      // check again, if it's still not UTF-8
4212 4
      if ($encoding !== 'UTF-8') {
4213 4
        $chr = self::encode($encoding, $chr);
4214
      }
4215
    }
4216
4217 35
    $cacheKey = $chr_orig . $encoding;
4218 35
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
4219 35
      return $CHAR_CACHE[$cacheKey];
4220
    }
4221
4222 12
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4223
      self::checkForSupport();
4224
    }
4225
4226 12
    if (self::$SUPPORT['intlChar'] === true) {
4227
      /** @noinspection PhpComposerExtensionStubsInspection */
4228 11
      $code = \IntlChar::ord($chr);
4229 11
      if ($code) {
4230 10
        return $CHAR_CACHE[$cacheKey] = $code;
4231
      }
4232
    }
4233
4234
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4235 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
0 ignored issues
show
Bug introduced by
$chr of type array is incompatible with the type string expected by parameter $str of voku\helper\UTF8::substr(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4235
    $chr = \unpack('C*', (string)self::substr(/** @scrutinizer ignore-type */ $chr, 0, 4, 'CP850'));
Loading history...
4236 6
    $code = $chr ? $chr[1] : 0;
4237
4238 6
    if (0xF0 <= $code && isset($chr[4])) {
4239
      /** @noinspection UnnecessaryCastingInspection */
4240
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4241
    }
4242
4243 6
    if (0xE0 <= $code && isset($chr[3])) {
4244
      /** @noinspection UnnecessaryCastingInspection */
4245 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4246
    }
4247
4248 6
    if (0xC0 <= $code && isset($chr[2])) {
4249
      /** @noinspection UnnecessaryCastingInspection */
4250 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xC0) << 6) + $chr[2] - 0x80);
4251
    }
4252
4253 6
    return $CHAR_CACHE[$cacheKey] = $code;
4254
  }
4255
4256
  /**
4257
   * Parses the string into an array (into the the second parameter).
4258
   *
4259
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4260
   *          if the second parameter is not set!
4261
   *
4262
   * @link http://php.net/manual/en/function.parse-str.php
4263
   *
4264
   * @param string $str       <p>The input string.</p>
4265
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4266
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4267
   *
4268
   * @return bool
4269
   *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result.
4270
   */
4271 2
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4272
  {
4273 2
    if ($cleanUtf8 === true) {
4274 2
      $str = self::clean($str);
4275
    }
4276
4277 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4278
      self::checkForSupport();
4279
    }
4280
4281 2
    if (self::$SUPPORT['mbstring'] === true) {
4282 2
      $return = \mb_parse_str($str, $result);
4283
4284 2
      return !($return === false || empty($result));
4285
    }
4286
4287
    /** @noinspection PhpVoidFunctionResultUsedInspection */
4288
    \parse_str($str, $result);
4289
4290
    return !empty($result);
4291
  }
4292
4293
  /**
4294
   * Checks if \u modifier is available that enables Unicode support in PCRE.
4295
   *
4296
   * @return bool
4297
   *              <strong>true</strong> if support is available,<br>
4298
   *              <strong>false</strong> otherwise.
4299
   */
4300 103
  public static function pcre_utf8_support(): bool
4301
  {
4302
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4303 103
    return (bool)@\preg_match('//u', '');
4304
  }
4305
4306
  /**
4307
   * Create an array containing a range of UTF-8 characters.
4308
   *
4309
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4310
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4311
   *
4312
   * @return string[]
4313
   */
4314 2
  public static function range($var1, $var2): array
4315
  {
4316 2
    if (!$var1 || !$var2) {
4317 2
      return [];
4318
    }
4319
4320 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4321
      self::checkForSupport();
4322
    }
4323
4324 2
    if (self::$SUPPORT['ctype'] === false) {
4325
      throw new \RuntimeException('ext-ctype: is not installed');
4326
    }
4327
4328
    /** @noinspection PhpComposerExtensionStubsInspection */
4329 2
    if (\ctype_digit((string)$var1)) {
4330 2
      $start = (int)$var1;
4331 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4332
      $start = (int)self::hex_to_int($var1);
4333
    } else {
4334 2
      $start = self::ord($var1);
4335
    }
4336
4337 2
    if (!$start) {
4338
      return [];
4339
    }
4340
4341
    /** @noinspection PhpComposerExtensionStubsInspection */
4342 2
    if (\ctype_digit((string)$var2)) {
4343 2
      $end = (int)$var2;
4344 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4345
      $end = (int)self::hex_to_int($var2);
4346
    } else {
4347 2
      $end = self::ord($var2);
4348
    }
4349
4350 2
    if (!$end) {
4351
      return [];
4352
    }
4353
4354 2
    return \array_map(
4355
        [
4356 2
            self::class,
4357
            'chr',
4358
        ],
4359 2
        \range($start, $end)
4360
    );
4361
  }
4362
4363
  /**
4364
   * Multi decode html entity & fix urlencoded-win1252-chars.
4365
   *
4366
   * e.g:
4367
   * 'test+test'                     => 'test+test'
4368
   * 'D&#252;sseldorf'               => 'Düsseldorf'
4369
   * 'D%FCsseldorf'                  => 'Düsseldorf'
4370
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4371
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4372
   * 'Düsseldorf'                   => 'Düsseldorf'
4373
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4374
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4375
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4376
   *
4377
   * @param string $str          <p>The input string.</p>
4378
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
4379
   *
4380
   * @return string
4381
   */
4382 3
  public static function rawurldecode(string $str, bool $multi_decode = true): string
4383
  {
4384 3
    if ('' === $str) {
4385 2
      return '';
4386
    }
4387
4388 3
    $pattern = '/%u([0-9a-f]{3,4})/i';
4389 3
    if (\preg_match($pattern, $str)) {
4390 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4391
    }
4392
4393 3
    $flags = ENT_QUOTES | ENT_HTML5;
4394
4395
    do {
4396 3
      $str_compare = $str;
4397
4398 3
      $str = self::fix_simple_utf8(
4399 3
          \rawurldecode(
4400 3
              self::html_entity_decode(
4401 3
                  self::to_utf8($str),
4402 3
                  $flags
4403
              )
4404
          )
4405
      );
4406
4407 3
    } while ($multi_decode === true && $str_compare !== $str);
4408
4409 3
    return $str;
4410
  }
4411
4412
  /**
4413
   * @param array $strings
4414
   * @param bool  $removeEmptyValues
4415
   * @param int   $removeShortValues
4416
   *
4417
   * @return array
4418
   */
4419 2
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
4420
  {
4421
    // init
4422 2
    $return = [];
4423
4424 2
    foreach ($strings as $str) {
4425
      if (
4426 2
          $removeShortValues !== null
4427
          &&
4428 2
          self::strlen($str) <= $removeShortValues
4429
      ) {
4430 2
        continue;
4431
      }
4432
4433
      if (
4434 2
          $removeEmptyValues === true
4435
          &&
4436 2
          \trim($str) === ''
4437
      ) {
4438 2
        continue;
4439
      }
4440
4441 2
      $return[] = $str;
4442
    }
4443
4444 2
    return $return;
4445
  }
4446
4447
  /**
4448
   * Replaces all occurrences of $pattern in $str by $replacement.
4449
   *
4450
   * @param string $str         <p>The input string.</p>
4451
   * @param string $pattern     <p>The regular expression pattern.</p>
4452
   * @param string $replacement <p>The string to replace with.</p>
4453
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
4454
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4455
   *
4456
   * @return string
4457
   */
4458 291
  public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4459
  {
4460 291
    if ($options === 'msr') {
4461 9
      $options = 'ms';
4462
    }
4463
4464
    // fallback
4465 291
    if (!$delimiter) {
4466
      $delimiter = '/';
4467
    }
4468
4469 291
    $str = (string)\preg_replace(
4470 291
        $delimiter . $pattern . $delimiter . 'u' . $options,
4471 291
        $replacement,
4472 291
        $str
4473
    );
4474
4475 291
    return $str;
4476
  }
4477
4478
  /**
4479
   * alias for "UTF8::remove_bom()"
4480
   *
4481
   * @see        UTF8::remove_bom()
4482
   *
4483
   * @param string $str
4484
   *
4485
   * @return string
4486
   *
4487
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4488
   */
4489
  public static function removeBOM(string $str): string
4490
  {
4491
    return self::remove_bom($str);
4492
  }
4493
4494
  /**
4495
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4496
   *
4497
   * @param string $str <p>The input string.</p>
4498
   *
4499
   * @return string String without UTF-BOM.
4500
   */
4501 81
  public static function remove_bom(string $str): string
4502
  {
4503 81
    if ('' === $str) {
4504 7
      return '';
4505
    }
4506
4507 81
    $strLength = self::strlen_in_byte($str);
4508 81
    foreach (self::$BOM as $bomString => $bomByteLength) {
4509 81
      if (0 === self::strpos_in_byte($str, $bomString, 0)) {
4510 10
        $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4511 10
        if ($strTmp === false) {
4512
          return '';
4513
        }
4514
4515 10
        $strLength -= $bomByteLength;
4516
4517 81
        $str = (string)$strTmp;
4518
      }
4519
    }
4520
4521 81
    return $str;
4522
  }
4523
4524
  /**
4525
   * Removes duplicate occurrences of a string in another string.
4526
   *
4527
   * @param string          $str  <p>The base string.</p>
4528
   * @param string|string[] $what <p>String to search for in the base string.</p>
4529
   *
4530
   * @return string The result string with removed duplicates.
4531
   */
4532 2
  public static function remove_duplicates(string $str, $what = ' '): string
4533
  {
4534 2
    if (\is_string($what) === true) {
4535 2
      $what = [$what];
4536
    }
4537
4538 2
    if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4539
      /** @noinspection ForeachSourceInspection */
4540 2
      foreach ($what as $item) {
4541 2
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4542
      }
4543
    }
4544
4545 2
    return $str;
4546
  }
4547
4548
  /**
4549
   * Remove html via "strip_tags()" from the string.
4550
   *
4551
   * @param string $str
4552
   * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4553
   *                              not be stripped. Default: null
4554
   *                              </p>
4555
   *
4556
   * @return string
4557
   */
4558 6
  public static function remove_html(string $str, string $allowableTags = ''): string
4559
  {
4560 6
    return \strip_tags($str, $allowableTags);
4561
  }
4562
4563
  /**
4564
   * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4565
   *
4566
   * @param string $str
4567
   * @param string $replacement [optional] <p>Default is a empty string.</p>
4568
   *
4569
   * @return string
4570
   */
4571 6
  public static function remove_html_breaks(string $str, string $replacement = ''): string
4572
  {
4573 6
    return (string)\preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4574
  }
4575
4576
  /**
4577
   * Remove invisible characters from a string.
4578
   *
4579
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4580
   *
4581
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4582
   *
4583
   * @param string $str
4584
   * @param bool   $url_encoded
4585
   * @param string $replacement
4586
   *
4587
   * @return string
4588
   */
4589 114
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4590
  {
4591
    // init
4592 114
    $non_displayables = [];
4593
4594
    // every control character except newline (dec 10),
4595
    // carriage return (dec 13) and horizontal tab (dec 09)
4596 114
    if ($url_encoded) {
4597 114
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4598 114
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4599
    }
4600
4601 114
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4602
4603
    do {
4604 114
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4605 114
    } while ($count !== 0);
4606
4607 114
    return $str;
4608
  }
4609
4610
  /**
4611
   * Returns a new string with the prefix $substring removed, if present.
4612
   *
4613
   * @param string $str
4614
   * @param string $substring <p>The prefix to remove.</p>
4615
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4616
   *
4617
   * @return string String without the prefix $substring.
4618
   */
4619 12
  public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4620
  {
4621 12
    if (self::str_starts_with($str, $substring)) {
4622
4623 6
      return (string)self::substr(
4624 6
          $str,
4625 6
          self::strlen($substring, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::strlen($substring, $encoding) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4625
          /** @scrutinizer ignore-type */ self::strlen($substring, $encoding),
Loading history...
4626 6
          null,
4627 6
          $encoding
4628
      );
4629
    }
4630
4631 6
    return $str;
4632
  }
4633
4634
  /**
4635
   * Returns a new string with the suffix $substring removed, if present.
4636
   *
4637
   * @param string $str
4638
   * @param string $substring <p>The suffix to remove.</p>
4639
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4640
   *
4641
   * @return string String having a $str without the suffix $substring.
4642
   */
4643 12
  public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4644
  {
4645 12
    if (self::str_ends_with($str, $substring)) {
4646
4647 6
      return (string)self::substr(
4648 6
          $str,
4649 6
          0,
4650 6
          self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4651
      );
4652
    }
4653
4654 6
    return $str;
4655
  }
4656
4657
  /**
4658
   * Replaces all occurrences of $search in $str by $replacement.
4659
   *
4660
   * @param string $str           <p>The input string.</p>
4661
   * @param string $search        <p>The needle to search for.</p>
4662
   * @param string $replacement   <p>The string to replace with.</p>
4663
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4664
   *
4665
   * @return string String after the replacements.
4666
   */
4667 29
  public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4668
  {
4669 29
    if ($caseSensitive) {
4670 22
      return self::str_replace($search, $replacement, $str);
4671
    }
4672
4673 7
    return self::str_ireplace($search, $replacement, $str);
4674
  }
4675
4676
  /**
4677
   * Replaces all occurrences of $search in $str by $replacement.
4678
   *
4679
   * @param string       $str           <p>The input string.</p>
4680
   * @param array        $search        <p>The elements to search for.</p>
4681
   * @param string|array $replacement   <p>The string to replace with.</p>
4682
   * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4683
   *
4684
   * @return string String after the replacements.
4685
   */
4686 30
  public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4687
  {
4688 30
    if ($caseSensitive) {
4689 23
      return self::str_replace($search, $replacement, $str);
4690
    }
4691
4692 7
    return self::str_ireplace($search, $replacement, $str);
4693
  }
4694
4695
  /**
4696
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4697
   *
4698
   * @param string $str                <p>The input string</p>
4699
   * @param string $replacementChar    <p>The replacement character.</p>
4700
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4701
   *
4702
   * @return string
4703
   */
4704 64
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4705
  {
4706 64
    if ('' === $str) {
4707 9
      return '';
4708
    }
4709
4710 64
    if ($processInvalidUtf8 === true) {
4711 64
      $replacementCharHelper = $replacementChar;
4712 64
      if ($replacementChar === '') {
4713 64
        $replacementCharHelper = 'none';
4714
      }
4715
4716 64
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4717
        self::checkForSupport();
4718
      }
4719
4720 64
      if (self::$SUPPORT['mbstring'] === false) {
4721
        // if there is no native support for "mbstring",
4722
        // then we need to clean the string before ...
4723
        $str = self::clean($str);
4724
      }
4725
4726
      // always fallback via symfony polyfill
4727 64
      $save = \mb_substitute_character();
4728 64
      \mb_substitute_character($replacementCharHelper);
4729 64
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4730 64
      \mb_substitute_character($save);
4731
4732 64
      if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4733 64
        $str = $strTmp;
4734
      } else {
4735
        $str = '';
4736
      }
4737
    }
4738
4739 64
    return str_replace(
4740
        [
4741 64
            "\xEF\xBF\xBD",
4742
            '�',
4743
        ],
4744
        [
4745 64
            $replacementChar,
4746 64
            $replacementChar,
4747
        ],
4748 64
        $str
4749
    );
4750
  }
4751
4752
  /**
4753
   * Strip whitespace or other characters from end of a UTF-8 string.
4754
   *
4755
   * @param string $str   <p>The string to be trimmed.</p>
4756
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4757
   *
4758
   * @return string The string with unwanted characters stripped from the right.
4759
   */
4760 22
  public static function rtrim(string $str = '', $chars = INF): string
4761
  {
4762 22
    if ('' === $str) {
4763 3
      return '';
4764
    }
4765
4766
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4767 21
    if ($chars === INF || !$chars) {
4768 16
      $pattern = "[\pZ\pC]+\$";
4769
    } else {
4770 8
      $chars = \preg_quote($chars, '/');
4771 8
      $pattern = "[$chars]+\$";
4772
    }
4773
4774 21
    return self::regex_replace($str, $pattern, '', '', '/');
4775
  }
4776
4777
  /**
4778
   * rxClass
4779
   *
4780
   * @param string $s
4781
   * @param string $class
4782
   *
4783
   * @return string
4784
   */
4785 37
  private static function rxClass(string $s, string $class = ''): string
4786
  {
4787 37
    static $RX_CLASSS_CACHE = [];
4788
4789 37
    $cacheKey = $s . $class;
4790
4791 37
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4792 25
      return $RX_CLASSS_CACHE[$cacheKey];
4793
    }
4794
4795
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4796 16
    $class = [$class];
4797
4798
    /** @noinspection SuspiciousLoopInspection */
4799 16
    foreach (self::str_split($s) as $s) {
4800 15
      if ('-' === $s) {
4801
        $class[0] = '-' . $class[0];
4802 15
      } elseif (!isset($s[2])) {
4803 15
        $class[0] .= \preg_quote($s, '/');
4804 1
      } elseif (1 === self::strlen($s)) {
4805 1
        $class[0] .= $s;
4806
      } else {
4807 15
        $class[] = $s;
4808
      }
4809
    }
4810
4811 16
    if ($class[0]) {
4812 16
      $class[0] = '[' . $class[0] . ']';
4813
    }
4814
4815 16
    if (1 === \count($class)) {
4816 16
      $return = $class[0];
4817
    } else {
4818
      $return = '(?:' . \implode('|', $class) . ')';
4819
    }
4820
4821 16
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4822
4823 16
    return $return;
4824
  }
4825
4826
  /**
4827
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4828
   */
4829 2
  public static function showSupport()
4830
  {
4831 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4832
      self::checkForSupport();
4833
    }
4834
4835 2
    echo '<pre>';
4836 2
    foreach (self::$SUPPORT as $key => $value) {
4837 2
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4838
    }
4839 2
    echo '</pre>';
4840 2
  }
4841
4842
  /**
4843
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4844
   *
4845
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4846
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4847
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4848
   *
4849
   * @return string The HTML numbered entity.
4850
   */
4851 2
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4852
  {
4853 2
    if ('' === $char) {
4854 2
      return '';
4855
    }
4856
4857
    if (
4858 2
        $keepAsciiChars === true
4859
        &&
4860 2
        self::is_ascii($char) === true
4861
    ) {
4862 2
      return $char;
4863
    }
4864
4865 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4866 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4867
    }
4868
4869 2
    return '&#' . self::ord($char, $encoding) . ';';
4870
  }
4871
4872
  /**
4873
   * @param string $str
4874
   * @param int    $tabLength
4875
   *
4876
   * @return string
4877
   */
4878 5
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4879
  {
4880 5
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4881
  }
4882
4883
  /**
4884
   * Convert a string to an array of Unicode characters.
4885
   *
4886
   * @param string|int|string[]|int[] $str       <p>The string to split into array.</p>
4887
   * @param int                       $length    [optional] <p>Max character length of each array element.</p>
4888
   * @param bool                      $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4889
   *
4890
   * @return string[] An array containing chunks of the string.
4891
   */
4892 86
  public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4893
  {
4894 86
    if ($length <= 0) {
4895 3
      return [];
4896
    }
4897
4898 85
    if (\is_array($str) === true) {
4899 2
      foreach ($str as $k => $v) {
4900 2
        $str[$k] = self::split($v, $length);
4901
      }
4902
4903 2
      return $str;
4904
    }
4905
4906
    // init
4907 85
    $str = (string)$str;
4908
4909 85
    if ('' === $str) {
4910 13
      return [];
4911
    }
4912
4913
    // init
4914 82
    $ret = [];
4915
4916 82
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4917
      self::checkForSupport();
4918
    }
4919
4920 82
    if ($cleanUtf8 === true) {
4921 18
      $str = self::clean($str);
4922
    }
4923
4924 82
    if (self::$SUPPORT['pcre_utf8'] === true) {
4925
4926 78
      \preg_match_all('/./us', $str, $retArray);
4927 78
      if (isset($retArray[0])) {
4928 78
        $ret = $retArray[0];
4929
      }
4930 78
      unset($retArray);
4931
4932
    } else {
4933
4934
      // fallback
4935
4936 8
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4937
        self::checkForSupport();
4938
      }
4939
4940 8
      $len = self::strlen_in_byte($str);
4941
4942
      /** @noinspection ForeachInvariantsInspection */
4943 8
      for ($i = 0; $i < $len; $i++) {
4944
4945 8
        if (($str[$i] & "\x80") === "\x00") {
4946
4947 8
          $ret[] = $str[$i];
4948
4949
        } elseif (
4950 8
            isset($str[$i + 1])
4951
            &&
4952 8
            ($str[$i] & "\xE0") === "\xC0"
4953
        ) {
4954
4955 4
          if (($str[$i + 1] & "\xC0") === "\x80") {
4956 4
            $ret[] = $str[$i] . $str[$i + 1];
4957
4958 4
            $i++;
4959
          }
4960
4961
        } elseif (
4962 6
            isset($str[$i + 2])
4963
            &&
4964 6
            ($str[$i] & "\xF0") === "\xE0"
4965
        ) {
4966
4967
          if (
4968 6
              ($str[$i + 1] & "\xC0") === "\x80"
4969
              &&
4970 6
              ($str[$i + 2] & "\xC0") === "\x80"
4971
          ) {
4972 6
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4973
4974 6
            $i += 2;
4975
          }
4976
4977
        } elseif (
4978
            isset($str[$i + 3])
4979
            &&
4980
            ($str[$i] & "\xF8") === "\xF0"
4981
        ) {
4982
4983
          if (
4984
              ($str[$i + 1] & "\xC0") === "\x80"
4985
              &&
4986
              ($str[$i + 2] & "\xC0") === "\x80"
4987
              &&
4988
              ($str[$i + 3] & "\xC0") === "\x80"
4989
          ) {
4990
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4991
4992
            $i += 3;
4993
          }
4994
4995
        }
4996
      }
4997
    }
4998
4999 82
    if ($length > 1) {
5000 11
      $ret = \array_chunk($ret, $length);
5001
5002 11
      return \array_map(
5003 11
          function ($item) {
5004 11
            return \implode('', $item);
5005 11
          }, $ret
5006
      );
5007
    }
5008
5009 75
    if (isset($ret[0]) && $ret[0] === '') {
5010
      return [];
5011
    }
5012
5013 75
    return $ret;
5014
  }
5015
5016
  /**
5017
   * Returns a camelCase version of the string. Trims surrounding spaces,
5018
   * capitalizes letters following digits, spaces, dashes and underscores,
5019
   * and removes spaces, dashes, as well as underscores.
5020
   *
5021
   * @param string $str      <p>The input string.</p>
5022
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5023
   *
5024
   * @return string
5025
   */
5026 32
  public static function str_camelize(string $str, string $encoding = 'UTF-8'): string
5027
  {
5028 32
    $str = self::lcfirst(self::trim($str), $encoding);
5029 32
    $str = (string)\preg_replace('/^[-_]+/', '', $str);
5030
5031 32
    $str = (string)\preg_replace_callback(
5032 32
        '/[-_\s]+(.)?/u',
5033 32
        function ($match) use ($encoding) {
5034 27
          if (isset($match[1])) {
5035 27
            return UTF8::strtoupper($match[1], $encoding);
5036
          }
5037
5038 1
          return '';
5039 32
        },
5040 32
        $str
5041
    );
5042
5043 32
    $str = (string)\preg_replace_callback(
5044 32
        '/[\d]+(.)?/u',
5045 32
        function ($match) use ($encoding) {
5046 6
          return UTF8::strtoupper($match[0], $encoding);
5047 32
        },
5048 32
        $str
5049
    );
5050
5051 32
    return $str;
5052
  }
5053
5054
  /**
5055
   * Returns the string with the first letter of each word capitalized,
5056
   * except for when the word is a name which shouldn't be capitalized.
5057
   *
5058
   * @param string $str
5059
   *
5060
   * @return string String with $str capitalized.
5061
   */
5062 1
  public static function str_capitalize_name(string $str): string
5063
  {
5064 1
    $str = self::collapse_whitespace($str);
5065
5066 1
    $str = self::str_capitalize_name_helper($str, ' ');
5067 1
    $str = self::str_capitalize_name_helper($str, '-');
5068
5069 1
    return $str;
5070
  }
5071
5072
  /**
5073
   * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
5074
   *
5075
   * @param string $names
5076
   * @param string $delimiter
5077
   * @param string $encoding
5078
   *
5079
   * @return string
5080
   */
5081 1
  private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
5082
  {
5083
    // init
5084 1
    $namesArray = \explode($delimiter, $names);
5085
5086 1
    if ($namesArray === false) {
5087
      return '';
5088
    }
5089
5090
    $specialCases = [
5091 1
        'names'    => [
5092
            'ab',
5093
            'af',
5094
            'al',
5095
            'and',
5096
            'ap',
5097
            'bint',
5098
            'binte',
5099
            'da',
5100
            'de',
5101
            'del',
5102
            'den',
5103
            'der',
5104
            'di',
5105
            'dit',
5106
            'ibn',
5107
            'la',
5108
            'mac',
5109
            'nic',
5110
            'of',
5111
            'ter',
5112
            'the',
5113
            'und',
5114
            'van',
5115
            'von',
5116
            'y',
5117
            'zu',
5118
        ],
5119
        'prefixes' => [
5120
            'al-',
5121
            "d'",
5122
            'ff',
5123
            "l'",
5124
            'mac',
5125
            'mc',
5126
            'nic',
5127
        ],
5128
    ];
5129
5130 1
    foreach ($namesArray as &$name) {
5131 1
      if (\in_array($name, $specialCases['names'], true)) {
5132 1
        continue;
5133
      }
5134
5135 1
      $continue = false;
5136
5137 1
      if ($delimiter == '-') {
5138 1
        foreach ($specialCases['names'] as $beginning) {
5139 1
          if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5140 1
            $continue = true;
5141
          }
5142
        }
5143
      }
5144
5145 1
      foreach ($specialCases['prefixes'] as $beginning) {
5146 1
        if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5147 1
          $continue = true;
5148
        }
5149
      }
5150
5151 1
      if ($continue) {
5152 1
        continue;
5153
      }
5154
5155 1
      $name = self::str_upper_first($name);
5156
    }
5157
5158 1
    return \implode($delimiter, $namesArray);
5159
  }
5160
5161
  /**
5162
   * Returns true if the string contains $needle, false otherwise. By default
5163
   * the comparison is case-sensitive, but can be made insensitive by setting
5164
   * $caseSensitive to false.
5165
   *
5166
   * @param string $haystack      <p>The input string.</p>
5167
   * @param string $needle        <p>Substring to look for.</p>
5168
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5169
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5170
   *
5171
   * @return bool Whether or not $haystack contains $needle.
5172
   */
5173 106
  public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5174
  {
5175 106
    if ('' === $haystack || '' === $needle) {
5176 1
      return false;
5177
    }
5178
5179
    // only a fallback to prevent BC in the api ...
5180 105
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5181 2
      $encoding = (string)$caseSensitive;
5182
    }
5183
5184 105
    if ($caseSensitive) {
5185 55
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
5186
    }
5187
5188 50
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
5189
  }
5190
5191
  /**
5192
   * Returns true if the string contains all $needles, false otherwise. By
5193
   * default the comparison is case-sensitive, but can be made insensitive by
5194
   * setting $caseSensitive to false.
5195
   *
5196
   * @param string $haystack      <p>The input string.</p>
5197
   * @param array  $needles       <p>SubStrings to look for.</p>
5198
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5199
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5200
   *
5201
   * @return bool Whether or not $haystack contains $needle.
5202
   */
5203 44
  public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5204
  {
5205 44
    if ('' === $haystack) {
5206
      return false;
5207
    }
5208
5209 44
    if (empty($needles)) {
5210 1
      return false;
5211
    }
5212
5213
    // only a fallback to prevent BC in the api ...
5214 43
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5215 1
      $encoding = (string)$caseSensitive;
5216
    }
5217
5218 43
    foreach ($needles as $needle) {
5219 43
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5220 43
        return false;
5221
      }
5222
    }
5223
5224 24
    return true;
5225
  }
5226
5227
  /**
5228
   * Returns true if the string contains any $needles, false otherwise. By
5229
   * default the comparison is case-sensitive, but can be made insensitive by
5230
   * setting $caseSensitive to false.
5231
   *
5232
   * @param string $haystack      <p>The input string.</p>
5233
   * @param array  $needles       <p>SubStrings to look for.</p>
5234
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5235
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5236
   *
5237
   * @return bool
5238
   *               Whether or not $str contains $needle.
5239
   */
5240 43
  public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5241
  {
5242 43
    if (empty($needles)) {
5243 1
      return false;
5244
    }
5245
5246 42
    foreach ($needles as $needle) {
5247 42
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5248 42
        return true;
5249
      }
5250
    }
5251
5252 18
    return false;
5253
  }
5254
5255
  /**
5256
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
5257
   * inserted before uppercase characters (with the exception of the first
5258
   * character of the string), and in place of spaces as well as underscores.
5259
   *
5260
   * @param string $str      <p>The input string.</p>
5261
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5262
   *
5263
   * @return string
5264
   */
5265 19
  public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5266
  {
5267 19
    return self::str_delimit($str, '-', $encoding);
5268
  }
5269
5270
  /**
5271
   * Returns a lowercase and trimmed string separated by the given delimiter.
5272
   * Delimiters are inserted before uppercase characters (with the exception
5273
   * of the first character of the string), and in place of spaces, dashes,
5274
   * and underscores. Alpha delimiters are not converted to lowercase.
5275
   *
5276
   * @param string $str       <p>The input string.</p>
5277
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
5278
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5279
   *
5280
   * @return string
5281
   */
5282 49
  public static function str_delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
5283
  {
5284 49
    $str = self::trim($str);
5285
5286 49
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
5287
5288 49
    $str = self::strtolower($str, $encoding);
5289
5290 49
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
5291
  }
5292
5293
  /**
5294
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5295
   *
5296
   * @param string $str <p>The input string.</p>
5297
   *
5298
   * @return false|string
5299
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5300
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5301
   */
5302 32
  public static function str_detect_encoding($str)
5303
  {
5304
    // init
5305 32
    $str = (string)$str;
5306
5307
    //
5308
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5309
    //
5310
5311 32
    if (self::is_binary($str, true) === true) {
5312
5313 11
      $isUtf16 = self::is_utf16($str, false);
5314 11
      if ($isUtf16 === 1) {
5315 2
        return 'UTF-16LE';
5316
      }
5317 11
      if ($isUtf16 === 2) {
5318 2
        return 'UTF-16BE';
5319
      }
5320
5321 9
      $isUtf32 = self::is_utf32($str, false);
5322 9
      if ($isUtf32 === 1) {
5323
        return 'UTF-32LE';
5324
      }
5325 9
      if ($isUtf32 === 2) {
5326
        return 'UTF-32BE';
5327
      }
5328
5329
      // is binary but not "UTF-16" or "UTF-32"
5330 9
      return false;
5331
    }
5332
5333
    //
5334
    // 2.) simple check for ASCII chars
5335
    //
5336
5337 27
    if (self::is_ascii($str) === true) {
5338 9
      return 'ASCII';
5339
    }
5340
5341
    //
5342
    // 3.) simple check for UTF-8 chars
5343
    //
5344
5345 27
    if (self::is_utf8($str) === true) {
5346 19
      return 'UTF-8';
5347
    }
5348
5349
    //
5350
    // 4.) check via "mb_detect_encoding()"
5351
    //
5352
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5353
5354
    $detectOrder = [
5355 16
        'ISO-8859-1',
5356
        'ISO-8859-2',
5357
        'ISO-8859-3',
5358
        'ISO-8859-4',
5359
        'ISO-8859-5',
5360
        'ISO-8859-6',
5361
        'ISO-8859-7',
5362
        'ISO-8859-8',
5363
        'ISO-8859-9',
5364
        'ISO-8859-10',
5365
        'ISO-8859-13',
5366
        'ISO-8859-14',
5367
        'ISO-8859-15',
5368
        'ISO-8859-16',
5369
        'WINDOWS-1251',
5370
        'WINDOWS-1252',
5371
        'WINDOWS-1254',
5372
        'CP932',
5373
        'CP936',
5374
        'CP950',
5375
        'CP866',
5376
        'CP850',
5377
        'CP51932',
5378
        'CP50220',
5379
        'CP50221',
5380
        'CP50222',
5381
        'ISO-2022-JP',
5382
        'ISO-2022-KR',
5383
        'JIS',
5384
        'JIS-ms',
5385
        'EUC-CN',
5386
        'EUC-JP',
5387
    ];
5388
5389 16
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5390
      self::checkForSupport();
5391
    }
5392
5393 16
    if (self::$SUPPORT['mbstring'] === true) {
5394
      // info: do not use the symfony polyfill here
5395 16
      $encoding = \mb_detect_encoding($str, $detectOrder, true);
5396 16
      if ($encoding) {
5397 16
        return $encoding;
5398
      }
5399
    }
5400
5401
    //
5402
    // 5.) check via "iconv()"
5403
    //
5404
5405
    if (self::$ENCODINGS === null) {
5406
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
5407
    }
5408
5409
    foreach (self::$ENCODINGS as $encodingTmp) {
5410
      # INFO: //IGNORE but still throw notice
5411
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
5412
      if ((string)@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5413
        return $encodingTmp;
5414
      }
5415
    }
5416
5417
    return false;
5418
  }
5419
5420
  /**
5421
   * Check if the string ends with the given substring.
5422
   *
5423
   * @param string $haystack <p>The string to search in.</p>
5424
   * @param string $needle   <p>The substring to search for.</p>
5425
   *
5426
   * @return bool
5427
   */
5428 40
  public static function str_ends_with(string $haystack, string $needle): bool
5429
  {
5430 40
    if ('' === $haystack || '' === $needle) {
5431 4
      return false;
5432
    }
5433
5434 38
    return \substr($haystack, -\strlen($needle)) === $needle;
5435
  }
5436
5437
  /**
5438
   * Returns true if the string ends with any of $substrings, false otherwise.
5439
   *
5440
   * - case-sensitive
5441
   *
5442
   * @param string   $str        <p>The input string.</p>
5443
   * @param string[] $substrings <p>Substrings to look for.</p>
5444
   *
5445
   * @return bool Whether or not $str ends with $substring.
5446
   */
5447 7
  public static function str_ends_with_any(string $str, array $substrings): bool
5448
  {
5449 7
    if (empty($substrings)) {
5450
      return false;
5451
    }
5452
5453 7
    foreach ($substrings as $substring) {
5454 7
      if (self::str_ends_with($str, $substring)) {
5455 7
        return true;
5456
      }
5457
    }
5458
5459 6
    return false;
5460
  }
5461
5462
  /**
5463
   * Ensures that the string begins with $substring. If it doesn't, it's
5464
   * prepended.
5465
   *
5466
   * @param string $str       <p>The input string.</p>
5467
   * @param string $substring <p>The substring to add if not present.</p>
5468
   *
5469
   * @return string
5470
   */
5471 10
  public static function str_ensure_left(string $str, string $substring): string
5472
  {
5473 10
    if (!self::str_starts_with($str, $substring)) {
5474 4
      $str = $substring . $str;
5475
    }
5476
5477 10
    return $str;
5478
  }
5479
5480
  /**
5481
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
5482
   *
5483
   * @param string $str       <p>The input string.</p>
5484
   * @param string $substring <p>The substring to add if not present.</p>
5485
   *
5486
   * @return string
5487
   */
5488 10
  public static function str_ensure_right(string $str, string $substring): string
5489
  {
5490 10
    if (!self::str_ends_with($str, $substring)) {
5491 4
      $str .= $substring;
5492
    }
5493
5494 10
    return $str;
5495
  }
5496
5497
  /**
5498
   * Capitalizes the first word of the string, replaces underscores with
5499
   * spaces, and strips '_id'.
5500
   *
5501
   * @param string $str
5502
   *
5503
   * @return string
5504
   */
5505 3
  public static function str_humanize($str): string
5506
  {
5507 3
    $str = self::str_replace(
5508
        [
5509 3
            '_id',
5510
            '_',
5511
        ],
5512
        [
5513 3
            '',
5514
            ' ',
5515
        ],
5516 3
        $str
5517
    );
5518
5519 3
    return self::ucfirst(self::trim($str));
5520
  }
5521
5522
  /**
5523
   * Check if the string ends with the given substring, case insensitive.
5524
   *
5525
   * @param string $haystack <p>The string to search in.</p>
5526
   * @param string $needle   <p>The substring to search for.</p>
5527
   *
5528
   * @return bool
5529
   */
5530 12
  public static function str_iends_with(string $haystack, string $needle): bool
5531
  {
5532 12
    if ('' === $haystack || '' === $needle) {
5533 2
      return false;
5534
    }
5535
5536 12
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
5537 12
      return true;
5538
    }
5539
5540 8
    return false;
5541
  }
5542
5543
  /**
5544
   * Returns true if the string ends with any of $substrings, false otherwise.
5545
   *
5546
   * - case-insensitive
5547
   *
5548
   * @param string   $str        <p>The input string.</p>
5549
   * @param string[] $substrings <p>Substrings to look for.</p>
5550
   *
5551
   * @return bool Whether or not $str ends with $substring.
5552
   */
5553 4
  public static function str_iends_with_any(string $str, array $substrings): bool
5554
  {
5555 4
    if (empty($substrings)) {
5556
      return false;
5557
    }
5558
5559 4
    foreach ($substrings as $substring) {
5560 4
      if (self::str_iends_with($str, $substring)) {
5561 4
        return true;
5562
      }
5563
    }
5564
5565
    return false;
5566
  }
5567
5568
  /**
5569
   * Returns the index of the first occurrence of $needle in the string,
5570
   * and false if not found. Accepts an optional offset from which to begin
5571
   * the search.
5572
   *
5573
   * @param string $str      <p>The input string.</p>
5574
   * @param string $needle   <p>Substring to look for.</p>
5575
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5576
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5577
   *
5578
   * @return int|false
5579
   *                    The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5580
   */
5581 2
  public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5582
  {
5583 2
    return self::stripos(
5584 2
        $str,
5585 2
        $needle,
5586 2
        $offset,
5587 2
        $encoding
5588
    );
5589
  }
5590
5591
  /**
5592
   * Returns the index of the last occurrence of $needle in the string,
5593
   * and false if not found. Accepts an optional offset from which to begin
5594
   * the search. Offsets may be negative to count from the last character
5595
   * in the string.
5596
   *
5597
   * @param string $str      <p>The input string.</p>
5598
   * @param string $needle   <p>Substring to look for.</p>
5599
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5600
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5601
   *
5602
   * @return int|false
5603
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5604
   */
5605 2
  public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5606
  {
5607 2
    return self::strripos(
5608 2
        $str,
5609 2
        $needle,
5610 2
        $offset,
5611 2
        $encoding
5612
    );
5613
  }
5614
5615
  /**
5616
   * Returns the index of the first occurrence of $needle in the string,
5617
   * and false if not found. Accepts an optional offset from which to begin
5618
   * the search.
5619
   *
5620
   * @param string $str      <p>The input string.</p>
5621
   * @param string $needle   <p>Substring to look for.</p>
5622
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5623
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5624
   *
5625
   * @return int|false
5626
   *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5627
   */
5628 12
  public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5629
  {
5630 12
    return self::strpos(
5631 12
        $str,
5632 12
        $needle,
5633 12
        $offset,
5634 12
        $encoding
5635
    );
5636
  }
5637
5638
  /**
5639
   * Returns the index of the last occurrence of $needle in the string,
5640
   * and false if not found. Accepts an optional offset from which to begin
5641
   * the search. Offsets may be negative to count from the last character
5642
   * in the string.
5643
   *
5644
   * @param string $str      <p>The input string.</p>
5645
   * @param string $needle   <p>Substring to look for.</p>
5646
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5647
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5648
   *
5649
   * @return int|false
5650
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5651
   */
5652 12
  public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5653
  {
5654 12
    return self::strrpos(
5655 12
        $str,
5656 12
        $needle,
5657 12
        $offset,
5658 12
        $encoding
5659
    );
5660
  }
5661
5662
  /**
5663
   * Inserts $substring into the string at the $index provided.
5664
   *
5665
   * @param string $str       <p>The input string.</p>
5666
   * @param string $substring <p>String to be inserted.</p>
5667
   * @param int    $index     <p>The index at which to insert the substring.</p>
5668
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5669
   *
5670
   * @return string
5671
   */
5672 8
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5673
  {
5674 8
    $len = self::strlen($str, $encoding);
5675
5676 8
    if ($index > $len) {
5677 1
      return $str;
5678
    }
5679
5680 7
    $start = self::substr($str, 0, $index, $encoding);
5681 7
    $end = self::substr($str, $index, $len, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $len can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5681
    $end = self::substr($str, $index, /** @scrutinizer ignore-type */ $len, $encoding);
Loading history...
5682
5683 7
    return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5683
    return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5683
    return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
5684
  }
5685
5686
  /**
5687
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5688
   *
5689
   * @link  http://php.net/manual/en/function.str-ireplace.php
5690
   *
5691
   * @param mixed $search  <p>
5692
   *                       Every replacement with search array is
5693
   *                       performed on the result of previous replacement.
5694
   *                       </p>
5695
   * @param mixed $replace <p>
5696
   *                       </p>
5697
   * @param mixed $subject <p>
5698
   *                       If subject is an array, then the search and
5699
   *                       replace is performed with every entry of
5700
   *                       subject, and the return value is an array as
5701
   *                       well.
5702
   *                       </p>
5703
   * @param int   $count   [optional] <p>
5704
   *                       The number of matched and replaced needles will
5705
   *                       be returned in count which is passed by
5706
   *                       reference.
5707
   *                       </p>
5708
   *
5709
   * @return mixed A string or an array of replacements.
5710
   */
5711 41
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5712
  {
5713 41
    $search = (array)$search;
5714
5715
    /** @noinspection AlterInForeachInspection */
5716 41
    foreach ($search as &$s) {
5717 41
      if ('' === $s .= '') {
5718 7
        $s = '/^(?<=.)$/';
5719
      } else {
5720 41
        $s = '/' . \preg_quote($s, '/') . '/ui';
5721
      }
5722
    }
5723
5724 41
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5725 41
    $count = $replace; // used as reference parameter
5726
5727 41
    return $subject;
5728
  }
5729
5730
  /**
5731
   * Check if the string starts with the given substring, case insensitive.
5732
   *
5733
   * @param string $haystack <p>The string to search in.</p>
5734
   * @param string $needle   <p>The substring to search for.</p>
5735
   *
5736
   * @return bool
5737
   */
5738 12
  public static function str_istarts_with(string $haystack, string $needle): bool
5739
  {
5740 12
    if ('' === $haystack || '' === $needle) {
5741 2
      return false;
5742
    }
5743
5744 12
    if (self::stripos($haystack, $needle) === 0) {
5745 12
      return true;
5746
    }
5747
5748 4
    return false;
5749
  }
5750
5751
  /**
5752
   * Returns true if the string begins with any of $substrings, false otherwise.
5753
   *
5754
   * - case-insensitive
5755
   *
5756
   * @param string $str        <p>The input string.</p>
5757
   * @param array  $substrings <p>Substrings to look for.</p>
5758
   *
5759
   * @return bool Whether or not $str starts with $substring.
5760
   */
5761 4
  public static function str_istarts_with_any(string $str, array $substrings): bool
5762
  {
5763 4
    if ('' === $str) {
5764
      return false;
5765
    }
5766
5767 4
    if (empty($substrings)) {
5768
      return false;
5769
    }
5770
5771 4
    foreach ($substrings as $substring) {
5772 4
      if (self::str_istarts_with($str, $substring)) {
5773 4
        return true;
5774
      }
5775
    }
5776
5777
    return false;
5778
  }
5779
5780
  /**
5781
   * Gets the substring after the first occurrence of a separator.
5782
   *
5783
   * @param string $str       <p>The input string.</p>
5784
   * @param string $separator <p>The string separator.</p>
5785
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5786
   *
5787
   * @return string
5788
   */
5789 1
  public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5790
  {
5791
    if (
5792 1
        $separator === ''
5793
        ||
5794 1
        $str === ''
5795
    ) {
5796 1
      return '';
5797
    }
5798
5799 1
    $offset = self::str_iindex_first($str, $separator);
5800 1
    if ($offset === false) {
5801 1
      return '';
5802
    }
5803
5804 1
    return (string)self::substr(
5805 1
        $str,
5806 1
        $offset + self::strlen($separator, $encoding),
5807 1
        null,
5808 1
        $encoding
5809
    );
5810
  }
5811
5812
  /**
5813
   * Gets the substring after the last occurrence of a separator.
5814
   *
5815
   * @param string $str       <p>The input string.</p>
5816
   * @param string $separator <p>The string separator.</p>
5817
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5818
   *
5819
   * @return string
5820
   */
5821 1
  public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5822
  {
5823
    if (
5824 1
        $separator === ''
5825
        ||
5826 1
        $str === ''
5827
    ) {
5828 1
      return '';
5829
    }
5830
5831 1
    $offset = self::str_iindex_last($str, $separator);
5832 1
    if ($offset === false) {
5833 1
      return '';
5834
    }
5835
5836 1
    return (string)self::substr(
5837 1
        $str,
5838 1
        $offset + self::strlen($separator, $encoding),
5839 1
        null,
5840 1
        $encoding
5841
    );
5842
  }
5843
5844
  /**
5845
   * Gets the substring before the first occurrence of a separator.
5846
   *
5847
   * @param string $str       <p>The input string.</p>
5848
   * @param string $separator <p>The string separator.</p>
5849
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5850
   *
5851
   * @return string
5852
   */
5853 1
  public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5854
  {
5855
    if (
5856 1
        $separator === ''
5857
        ||
5858 1
        $str === ''
5859
    ) {
5860 1
      return '';
5861
    }
5862
5863 1
    $offset = self::str_iindex_first($str, $separator);
5864 1
    if ($offset === false) {
5865 1
      return '';
5866
    }
5867
5868 1
    return (string)self::substr($str, 0, $offset, $encoding);
5869
  }
5870
5871
  /**
5872
   * Gets the substring before the last occurrence of a separator.
5873
   *
5874
   * @param string $str       <p>The input string.</p>
5875
   * @param string $separator <p>The string separator.</p>
5876
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5877
   *
5878
   * @return string
5879
   */
5880 1
  public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5881
  {
5882
    if (
5883 1
        $separator === ''
5884
        ||
5885 1
        $str === ''
5886
    ) {
5887 1
      return '';
5888
    }
5889
5890 1
    $offset = self::str_iindex_last($str, $separator);
5891 1
    if ($offset === false) {
5892 1
      return '';
5893
    }
5894
5895 1
    return (string)self::substr($str, 0, $offset, $encoding);
5896
  }
5897
5898
  /**
5899
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5900
   *
5901
   * @param string $str          <p>The input string.</p>
5902
   * @param string $needle       <p>The string to look for.</p>
5903
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5904
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5905
   *
5906
   * @return string
5907
   */
5908 2
  public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5909
  {
5910
    if (
5911 2
        '' === $needle
5912
        ||
5913 2
        '' === $str
5914
    ) {
5915 2
      return '';
5916
    }
5917
5918 2
    $part = self::stristr(
5919 2
        $str,
5920 2
        $needle,
5921 2
        $beforeNeedle,
5922 2
        $encoding
5923
    );
5924 2
    if (false === $part) {
5925 2
      return '';
5926
    }
5927
5928 2
    return $part;
5929
  }
5930
5931
  /**
5932
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5933
   *
5934
   * @param string $str          <p>The input string.</p>
5935
   * @param string $needle       <p>The string to look for.</p>
5936
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5937
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5938
   *
5939
   * @return string
5940
   */
5941 1
  public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5942
  {
5943
    if (
5944 1
        '' === $needle
5945
        ||
5946 1
        '' === $str
5947
    ) {
5948 1
      return '';
5949
    }
5950
5951 1
    $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5952 1
    if (false === $part) {
5953 1
      return '';
5954
    }
5955
5956 1
    return $part;
5957
  }
5958
5959
  /**
5960
   * Returns the last $n characters of the string.
5961
   *
5962
   * @param string $str      <p>The input string.</p>
5963
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5964
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5965
   *
5966
   * @return string
5967
   */
5968 12
  public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5969
  {
5970 12
    if ($n <= 0) {
5971 4
      return '';
5972
    }
5973
5974 8
    $returnTmp = self::substr($str, -$n, null, $encoding);
5975
5976 8
    return ($returnTmp === false ? '' : $returnTmp);
5977
  }
5978
5979
  /**
5980
   * Limit the number of characters in a string.
5981
   *
5982
   * @param string $str      <p>The input string.</p>
5983
   * @param int    $length   [optional] <p>Default: 100</p>
5984
   * @param string $strAddOn [optional] <p>Default: …</p>
5985
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5986
   *
5987
   * @return string
5988
   */
5989 2
  public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5990
  {
5991 2
    if ('' === $str) {
5992 2
      return '';
5993
    }
5994
5995 2
    if ($length <= 0) {
5996 2
      return '';
5997
    }
5998
5999 2
    if (self::strlen($str, $encoding) <= $length) {
6000 2
      return $str;
6001
    }
6002
6003 2
    return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6003
    return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
6004
  }
6005
6006
  /**
6007
   * Limit the number of characters in a string, but also after the next word.
6008
   *
6009
   * @param string $str      <p>The input string.</p>
6010
   * @param int    $length   [optional] <p>Default: 100</p>
6011
   * @param string $strAddOn [optional] <p>Default: …</p>
6012
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6013
   *
6014
   * @return string
6015
   */
6016 6
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
6017
  {
6018 6
    if ('' === $str) {
6019 2
      return '';
6020
    }
6021
6022 6
    if ($length <= 0) {
6023 2
      return '';
6024
    }
6025
6026 6
    if (self::strlen($str, $encoding) <= $length) {
6027 2
      return $str;
6028
    }
6029
6030 6
    if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6031 5
      return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6031
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
6032
    }
6033
6034 3
    $str = (string)self::substr($str, 0, $length, $encoding);
6035 3
    $array = \explode(' ', $str);
6036 3
    \array_pop($array);
6037 3
    $new_str = \implode(' ', $array);
6038
6039 3
    if ($new_str === '') {
6040 2
      $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
6041
    } else {
6042 3
      $str = $new_str . $strAddOn;
6043
    }
6044
6045 3
    return $str;
6046
  }
6047
6048
  /**
6049
   * Returns the longest common prefix between the string and $otherStr.
6050
   *
6051
   * @param string $str      <p>The input sting.</p>
6052
   * @param string $otherStr <p>Second string for comparison.</p>
6053
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6054
   *
6055
   * @return string
6056
   */
6057 10
  public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6058
  {
6059 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6060
6061 10
    $longestCommonPrefix = '';
6062 10
    for ($i = 0; $i < $maxLength; $i++) {
6063 8
      $char = self::substr($str, $i, 1, $encoding);
6064
6065 8
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
6066 6
        $longestCommonPrefix .= $char;
6067
      } else {
6068 6
        break;
6069
      }
6070
    }
6071
6072 10
    return $longestCommonPrefix;
6073
  }
6074
6075
  /**
6076
   * Returns the longest common substring between the string and $otherStr.
6077
   * In the case of ties, it returns that which occurs first.
6078
   *
6079
   * @param string $str
6080
   * @param string $otherStr <p>Second string for comparison.</p>
6081
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6082
   *
6083
   * @return string String with its $str being the longest common substring.
6084
   */
6085 11
  public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6086
  {
6087
    // Uses dynamic programming to solve
6088
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6089 11
    $strLength = self::strlen($str, $encoding);
6090 11
    $otherLength = self::strlen($otherStr, $encoding);
6091
6092
    // Return if either string is empty
6093 11
    if ($strLength == 0 || $otherLength == 0) {
6094 2
      return '';
6095
    }
6096
6097 9
    $len = 0;
6098 9
    $end = 0;
6099 9
    $table = \array_fill(
6100 9
        0,
6101 9
        $strLength + 1,
6102 9
        \array_fill(0, $otherLength + 1, 0)
6103
    );
6104
6105 9
    for ($i = 1; $i <= $strLength; $i++) {
6106 9
      for ($j = 1; $j <= $otherLength; $j++) {
6107 9
        $strChar = self::substr($str, $i - 1, 1, $encoding);
6108 9
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6109
6110 9
        if ($strChar == $otherChar) {
6111 8
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6112 8
          if ($table[$i][$j] > $len) {
6113 8
            $len = $table[$i][$j];
6114 8
            $end = $i;
6115
          }
6116
        } else {
6117 9
          $table[$i][$j] = 0;
6118
        }
6119
      }
6120
    }
6121
6122 9
    $returnTmp = self::substr($str, $end - $len, $len, $encoding);
6123
6124 9
    return ($returnTmp === false ? '' : $returnTmp);
6125
  }
6126
6127
  /**
6128
   * Returns the longest common suffix between the string and $otherStr.
6129
   *
6130
   * @param string $str
6131
   * @param string $otherStr <p>Second string for comparison.</p>
6132
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6133
   *
6134
   * @return string
6135
   */
6136 10
  public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6137
  {
6138 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6139
6140 10
    $longestCommonSuffix = '';
6141 10
    for ($i = 1; $i <= $maxLength; $i++) {
6142 8
      $char = self::substr($str, -$i, 1, $encoding);
6143
6144 8
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
6145 6
        $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6145
        $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
6146
      } else {
6147 6
        break;
6148
      }
6149
    }
6150
6151 10
    return $longestCommonSuffix;
6152
  }
6153
6154
  /**
6155
   * Returns true if $str matches the supplied pattern, false otherwise.
6156
   *
6157
   * @param string $str     <p>The input string.</p>
6158
   * @param string $pattern <p>Regex pattern to match against.</p>
6159
   *
6160
   * @return bool Whether or not $str matches the pattern.
6161
   */
6162 126
  public static function str_matches_pattern(string $str, string $pattern): bool
6163
  {
6164 126
    if (\preg_match('/' . $pattern . '/u', $str)) {
6165 87
      return true;
6166
    }
6167
6168 39
    return false;
6169
  }
6170
6171
  /**
6172
   * Returns whether or not a character exists at an index. Offsets may be
6173
   * negative to count from the last character in the string. Implements
6174
   * part of the ArrayAccess interface.
6175
   *
6176
   * @param string $str      <p>The input string.</p>
6177
   * @param int    $offset   <p>The index to check.</p>
6178
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6179
   *
6180
   *
6181
   * @return bool Whether or not the index exists.
6182
   */
6183 6
  public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6184
  {
6185
    // init
6186 6
    $length = self::strlen($str, $encoding);
6187
6188 6
    if ($offset >= 0) {
6189 3
      return ($length > $offset);
6190
    }
6191
6192 3
    return ($length >= \abs($offset));
6193
  }
6194
6195
  /**
6196
   * Returns the character at the given index. Offsets may be negative to
6197
   * count from the last character in the string. Implements part of the
6198
   * ArrayAccess interface, and throws an OutOfBoundsException if the index
6199
   * does not exist.
6200
   *
6201
   * @param string $str      <p>The input string.</p>
6202
   * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6203
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6204
   *
6205
   * @return string The character at the specified index.
6206
   *
6207
   * @throws \OutOfBoundsException If the positive or negative offset does not exist.
6208
   */
6209 2
  public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6210
  {
6211
    // init
6212 2
    $length = self::strlen($str);
6213
6214
    if (
6215 2
        ($index >= 0 && $length <= $index)
6216
        ||
6217 2
        $length < \abs($index)
6218
    ) {
6219 1
      throw new \OutOfBoundsException('No character exists at the index');
6220
    }
6221
6222 1
    return self::char_at($str, $index, $encoding);
6223
  }
6224
6225
  /**
6226
   * Pad a UTF-8 string to given length with another string.
6227
   *
6228
   * @param string $str        <p>The input string.</p>
6229
   * @param int    $pad_length <p>The length of return string.</p>
6230
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
6231
   * @param int    $pad_type   [optional] <p>
6232
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
6233
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
6234
   *                           </p>
6235
   * @param string $encoding   [optional] <p>Default: UTF-8</p>
6236
   *
6237
   * @return string Returns the padded string.
6238
   */
6239 41
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
6240
  {
6241 41
    if ('' === $str) {
6242
      return '';
6243
    }
6244
6245 41
    if ($pad_type !== (int)$pad_type) {
6246 13
      if ($pad_type == 'left') {
6247 3
        $pad_type = STR_PAD_LEFT;
6248 10
      } elseif ($pad_type == 'right') {
6249 6
        $pad_type = STR_PAD_RIGHT;
6250 4
      } elseif ($pad_type == 'both') {
6251 3
        $pad_type = STR_PAD_BOTH;
6252
      } else {
6253 1
        throw new \InvalidArgumentException(
6254 1
            'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6255
        );
6256
      }
6257
    }
6258
6259 40
    $str_length = self::strlen($str, $encoding);
6260
6261
    if (
6262 40
        $pad_length > 0
6263
        &&
6264 40
        $pad_length >= $str_length
6265
    ) {
6266 39
      $ps_length = self::strlen($pad_string, $encoding);
6267
6268 39
      $diff = ($pad_length - $str_length);
6269
6270
      switch ($pad_type) {
6271 39
        case STR_PAD_LEFT:
6272 13
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6273 13
          $pre = (string)self::substr($pre, 0, $diff, $encoding);
6274 13
          $post = '';
6275 13
          break;
6276
6277 29
        case STR_PAD_BOTH:
6278 14
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6279 14
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2), $encoding);
6280 14
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6281 14
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2), $encoding);
6282 14
          break;
6283
6284 18
        case STR_PAD_RIGHT:
6285
        default:
6286 18
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6287 18
          $post = (string)self::substr($post, 0, $diff, $encoding);
6288 18
          $pre = '';
6289
      }
6290
6291 39
      return $pre . $str . $post;
6292
    }
6293
6294 4
    return $str;
6295
  }
6296
6297
  /**
6298
   * Returns a new string of a given length such that both sides of the
6299
   * string are padded. Alias for pad() with a $padType of 'both'.
6300
   *
6301
   * @param string $str
6302
   * @param int    $length   <p>Desired string length after padding.</p>
6303
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6304
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6305
   *
6306
   * @return string String with padding applied.
6307
   */
6308 11
  public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6309
  {
6310 11
    $padding = $length - self::strlen($str, $encoding);
6311
6312 11
    return self::apply_padding($str, (int)\floor($padding / 2), (int)\ceil($padding / 2), $padStr, $encoding);
6313
  }
6314
6315
  /**
6316
   * Returns a new string of a given length such that the beginning of the
6317
   * string is padded. Alias for pad() with a $padType of 'left'.
6318
   *
6319
   * @param string $str
6320
   * @param int    $length   <p>Desired string length after padding.</p>
6321
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6322
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6323
   *
6324
   * @return string String with left padding.
6325
   */
6326 7
  public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6327
  {
6328 7
    return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
6329
  }
6330
6331
  /**
6332
   * Returns a new string of a given length such that the end of the string
6333
   * is padded. Alias for pad() with a $padType of 'right'.
6334
   *
6335
   * @param string $str
6336
   * @param int    $length   <p>Desired string length after padding.</p>
6337
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6338
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6339
   *
6340
   * @return string String with right padding.
6341
   */
6342 7
  public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6343
  {
6344 7
    return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
6345
  }
6346
6347
  /**
6348
   * Repeat a string.
6349
   *
6350
   * @param string $str        <p>
6351
   *                           The string to be repeated.
6352
   *                           </p>
6353
   * @param int    $multiplier <p>
6354
   *                           Number of time the input string should be
6355
   *                           repeated.
6356
   *                           </p>
6357
   *                           <p>
6358
   *                           multiplier has to be greater than or equal to 0.
6359
   *                           If the multiplier is set to 0, the function
6360
   *                           will return an empty string.
6361
   *                           </p>
6362
   *
6363
   * @return string The repeated string.
6364
   */
6365 9
  public static function str_repeat(string $str, int $multiplier): string
6366
  {
6367 9
    $str = self::filter($str);
6368
6369 9
    return \str_repeat($str, $multiplier);
6370
  }
6371
6372
  /**
6373
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6374
   *
6375
   * Replace all occurrences of the search string with the replacement string
6376
   *
6377
   * @link http://php.net/manual/en/function.str-replace.php
6378
   *
6379
   * @param mixed $search  <p>
6380
   *                       The value being searched for, otherwise known as the needle.
6381
   *                       An array may be used to designate multiple needles.
6382
   *                       </p>
6383
   * @param mixed $replace <p>
6384
   *                       The replacement value that replaces found search
6385
   *                       values. An array may be used to designate multiple replacements.
6386
   *                       </p>
6387
   * @param mixed $subject <p>
6388
   *                       The string or array being searched and replaced on,
6389
   *                       otherwise known as the haystack.
6390
   *                       </p>
6391
   *                       <p>
6392
   *                       If subject is an array, then the search and
6393
   *                       replace is performed with every entry of
6394
   *                       subject, and the return value is an array as
6395
   *                       well.
6396
   *                       </p>
6397
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
6398
   *
6399
   * @return mixed This function returns a string or an array with the replaced values.
6400
   */
6401 92
  public static function str_replace($search, $replace, $subject, int &$count = null)
6402
  {
6403 92
    return \str_replace($search, $replace, $subject, $count);
6404
  }
6405
6406
  /**
6407
   * Replaces all occurrences of $search from the beginning of string with $replacement.
6408
   *
6409
   * @param string $str         <p>The input string.</p>
6410
   * @param string $search      <p>The string to search for.</p>
6411
   * @param string $replacement <p>The replacement.</p>
6412
   *
6413
   * @return string String after the replacements.
6414
   */
6415 16
  public static function str_replace_beginning(string $str, string $search, string $replacement): string
6416
  {
6417 16
    return self::regex_replace(
6418 16
        $str,
6419 16
        '^' . \preg_quote($search, '/'),
6420 16
        self::str_replace('\\', '\\\\', $replacement)
6421
    );
6422
  }
6423
6424
  /**
6425
   * Replaces all occurrences of $search from the ending of string with $replacement.
6426
   *
6427
   * @param string $str         <p>The input string.</p>
6428
   * @param string $search      <p>The string to search for.</p>
6429
   * @param string $replacement <p>The replacement.</p>
6430
   *
6431
   * @return string String after the replacements.
6432
   */
6433 16
  public static function str_replace_ending(string $str, string $search, string $replacement): string
6434
  {
6435 16
    return self::regex_replace(
6436 16
        $str,
6437 16
        \preg_quote($search, '/') . '$',
6438 16
        self::str_replace('\\', '\\\\', $replacement)
6439
    );
6440
  }
6441
6442
  /**
6443
   * Replace the first "$search"-term with the "$replace"-term.
6444
   *
6445
   * @param string $search
6446
   * @param string $replace
6447
   * @param string $subject
6448
   *
6449
   * @return string
6450
   */
6451 2
  public static function str_replace_first(string $search, string $replace, string $subject): string
6452
  {
6453 2
    $pos = self::strpos($subject, $search);
6454 2
    if ($pos !== false) {
6455 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6455
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
6456
    }
6457
6458 2
    return $subject;
6459
  }
6460
6461
  /**
6462
   * Replace the last "$search"-term with the "$replace"-term.
6463
   *
6464
   * @param string $search
6465
   * @param string $replace
6466
   * @param string $subject
6467
   *
6468
   * @return string
6469
   */
6470 2
  public static function str_replace_last(string $search, string $replace, string $subject): string
6471
  {
6472 2
    $pos = self::strrpos($subject, $search);
6473 2
    if ($pos !== false) {
6474 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6474
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
6475
    }
6476
6477 2
    return $subject;
6478
  }
6479
6480
  /**
6481
   * Shuffles all the characters in the string.
6482
   *
6483
   * PS: uses random algorithm which is weak for cryptography purposes
6484
   *
6485
   * @param string $str <p>The input string</p>
6486
   *
6487
   * @return string The shuffled string.
6488
   */
6489 5
  public static function str_shuffle(string $str): string
6490
  {
6491 5
    $indexes = \range(0, self::strlen($str) - 1);
6492
    /** @noinspection NonSecureShuffleUsageInspection */
6493 5
    \shuffle($indexes);
6494
6495 5
    $shuffledStr = '';
6496 5
    foreach ($indexes as $i) {
6497 5
      $shuffledStr .= self::substr($str, $i, 1);
6498
    }
6499
6500 5
    return $shuffledStr;
6501
  }
6502
6503
  /**
6504
   * Returns the substring beginning at $start, and up to, but not including
6505
   * the index specified by $end. If $end is omitted, the function extracts
6506
   * the remaining string. If $end is negative, it is computed from the end
6507
   * of the string.
6508
   *
6509
   * @param string $str
6510
   * @param int    $start    <p>Initial index from which to begin extraction.</p>
6511
   * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6512
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6513
   *
6514
   * @return string|false
6515
   *                     <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6516
   *                     characters long, <b>FALSE</b> will be returned.
6517
   */
6518 18
  public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6519
  {
6520 18
    if ($end === null) {
6521 6
      $length = self::strlen($str);
6522 12
    } elseif ($end >= 0 && $end <= $start) {
6523 4
      return '';
6524 8
    } elseif ($end < 0) {
6525 2
      $length = self::strlen($str) + $end - $start;
6526
    } else {
6527 6
      $length = $end - $start;
6528
    }
6529
6530 14
    return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6530
    return self::substr($str, $start, /** @scrutinizer ignore-type */ $length, $encoding);
Loading history...
6531
  }
6532
6533
  /**
6534
   * Convert a string to e.g.: "snake_case"
6535
   *
6536
   * @param string $str
6537
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6538
   *
6539
   * @return string String in snake_case.
6540
   */
6541 20
  public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6542
  {
6543 20
    $str = self::normalize_whitespace($str);
6544 20
    $str = \str_replace('-', '_', $str);
6545
6546 20
    $str = (string)\preg_replace_callback(
6547 20
        '/([\d|A-Z])/u',
6548 20
        function ($matches) use ($encoding) {
6549 8
          $match = $matches[1];
6550 8
          $matchInt = (int)$match;
6551
6552 8
          if ((string)$matchInt == $match) {
6553 4
            return '_' . $match . '_';
6554
          }
6555
6556 4
          return '_' . UTF8::strtolower($match, $encoding);
6557 20
        },
6558 20
        $str
6559
    );
6560
6561 20
    $str = (string)\preg_replace(
6562
        [
6563 20
            '/\s+/',        // convert spaces to "_"
6564
            '/^\s+|\s+$/',  // trim leading & trailing spaces
6565
            '/_+/',         // remove double "_"
6566
        ],
6567
        [
6568 20
            '_',
6569
            '',
6570
            '_',
6571
        ],
6572 20
        $str
6573
    );
6574
6575 20
    $str = self::trim($str, '_'); // trim leading & trailing "_"
6576 20
    $str = self::trim($str); // trim leading & trailing whitespace
6577
6578 20
    return $str;
6579
  }
6580
6581
  /**
6582
   * Sort all characters according to code points.
6583
   *
6584
   * @param string $str    <p>A UTF-8 string.</p>
6585
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6586
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6587
   *
6588
   * @return string String of sorted characters.
6589
   */
6590 2
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6591
  {
6592 2
    $array = self::codepoints($str);
6593
6594 2
    if ($unique) {
6595 2
      $array = \array_flip(\array_flip($array));
6596
    }
6597
6598 2
    if ($desc) {
6599 2
      \arsort($array);
6600
    } else {
6601 2
      \asort($array);
6602
    }
6603
6604 2
    return self::string($array);
6605
  }
6606
6607
  /**
6608
   * alias for "UTF8::split()"
6609
   *
6610
   * @see UTF8::split()
6611
   *
6612
   * @param string|string[] $str
6613
   * @param int             $len
6614
   *
6615
   * @return string[]
6616
   */
6617 25
  public static function str_split($str, int $len = 1): array
6618
  {
6619 25
    return self::split($str, $len);
6620
  }
6621
6622
  /**
6623
   * Splits the string with the provided regular expression, returning an
6624
   * array of Stringy objects. An optional integer $limit will truncate the
6625
   * results.
6626
   *
6627
   * @param string $str
6628
   * @param string $pattern <p>The regex with which to split the string.</p>
6629
   * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6630
   *
6631
   * @return string[] An array of strings.
6632
   */
6633 16
  public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6634
  {
6635 16
    if ($limit === 0) {
6636 2
      return [];
6637
    }
6638
6639
    // this->split errors when supplied an empty pattern in < PHP 5.4.13
6640
    // and current versions of HHVM (3.8 and below)
6641 14
    if ($pattern === '') {
6642 1
      return [$str];
6643
    }
6644
6645
    // this->split returns the remaining unsplit string in the last index when
6646
    // supplying a limit
6647 13
    if ($limit > 0) {
6648 8
      ++$limit;
6649
    } else {
6650 5
      $limit = -1;
6651
    }
6652
6653 13
    $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6654
6655 13
    if ($array === false) {
6656
      return [];
6657
    }
6658
6659 13
    if ($limit > 0 && \count($array) === $limit) {
6660 4
      \array_pop($array);
6661
    }
6662
6663 13
    return $array;
6664
  }
6665
6666
  /**
6667
   * Check if the string starts with the given substring.
6668
   *
6669
   * @param string $haystack <p>The string to search in.</p>
6670
   * @param string $needle   <p>The substring to search for.</p>
6671
   *
6672
   * @return bool
6673
   */
6674 41
  public static function str_starts_with(string $haystack, string $needle): bool
6675
  {
6676 41
    if ('' === $haystack || '' === $needle) {
6677 4
      return false;
6678
    }
6679
6680 39
    if (\strpos($haystack, $needle) === 0) {
6681 19
      return true;
6682
    }
6683
6684 24
    return false;
6685
  }
6686
6687
  /**
6688
   * Returns true if the string begins with any of $substrings, false otherwise.
6689
   *
6690
   * - case-sensitive
6691
   *
6692
   * @param string $str        <p>The input string.</p>
6693
   * @param array  $substrings <p>Substrings to look for.</p>
6694
   *
6695
   * @return bool Whether or not $str starts with $substring.
6696
   */
6697 8
  public static function str_starts_with_any(string $str, array $substrings): bool
6698
  {
6699 8
    if ('' === $str) {
6700
      return false;
6701
    }
6702
6703 8
    if (empty($substrings)) {
6704
      return false;
6705
    }
6706
6707 8
    foreach ($substrings as $substring) {
6708 8
      if (self::str_starts_with($str, $substring)) {
6709 8
        return true;
6710
      }
6711
    }
6712
6713 6
    return false;
6714
  }
6715
6716
  /**
6717
   * Gets the substring after the first occurrence of a separator.
6718
   *
6719
   * @param string $str       <p>The input string.</p>
6720
   * @param string $separator <p>The string separator.</p>
6721
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6722
   *
6723
   * @return string
6724
   */
6725 1
  public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6726
  {
6727
    if (
6728 1
        $separator === ''
6729
        ||
6730 1
        $str === ''
6731
    ) {
6732 1
      return '';
6733
    }
6734
6735 1
    $offset = self::str_index_first($str, $separator);
6736 1
    if ($offset === false) {
6737 1
      return '';
6738
    }
6739
6740 1
    return (string)self::substr(
6741 1
        $str,
6742 1
        $offset + self::strlen($separator, $encoding),
6743 1
        null,
6744 1
        $encoding
6745
    );
6746
  }
6747
6748
  /**
6749
   * Gets the substring after the last occurrence of a separator.
6750
   *
6751
   * @param string $str       <p>The input string.</p>
6752
   * @param string $separator <p>The string separator.</p>
6753
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6754
   *
6755
   * @return string
6756
   */
6757 1
  public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6758
  {
6759
    if (
6760 1
        $separator === ''
6761
        ||
6762 1
        $str === ''
6763
    ) {
6764 1
      return '';
6765
    }
6766
6767 1
    $offset = self::str_index_last($str, $separator);
6768 1
    if ($offset === false) {
6769 1
      return '';
6770
    }
6771
6772 1
    return (string)self::substr(
6773 1
        $str,
6774 1
        $offset + self::strlen($separator, $encoding),
6775 1
        null,
6776 1
        $encoding
6777
    );
6778
  }
6779
6780
  /**
6781
   * Gets the substring before the first occurrence of a separator.
6782
   *
6783
   * @param string $str       <p>The input string.</p>
6784
   * @param string $separator <p>The string separator.</p>
6785
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6786
   *
6787
   * @return string
6788
   */
6789 1
  public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6790
  {
6791
    if (
6792 1
        $separator === ''
6793
        ||
6794 1
        $str === ''
6795
    ) {
6796 1
      return '';
6797
    }
6798
6799 1
    $offset = self::str_index_first($str, $separator);
6800 1
    if ($offset === false) {
6801 1
      return '';
6802
    }
6803
6804 1
    return (string)self::substr(
6805 1
        $str,
6806 1
        0,
6807 1
        $offset,
6808 1
        $encoding
6809
    );
6810
  }
6811
6812
  /**
6813
   * Gets the substring before the last occurrence of a separator.
6814
   *
6815
   * @param string $str       <p>The input string.</p>
6816
   * @param string $separator <p>The string separator.</p>
6817
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6818
   *
6819
   * @return string
6820
   */
6821 1
  public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6822
  {
6823
    if (
6824 1
        $separator === ''
6825
        ||
6826 1
        $str === ''
6827
    ) {
6828 1
      return '';
6829
    }
6830
6831 1
    $offset = self::str_index_last($str, $separator);
6832 1
    if ($offset === false) {
6833 1
      return '';
6834
    }
6835
6836 1
    return (string)self::substr(
6837 1
        $str,
6838 1
        0,
6839 1
        $offset,
6840 1
        $encoding
6841
    );
6842
  }
6843
6844
  /**
6845
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6846
   *
6847
   * @param string $str          <p>The input string.</p>
6848
   * @param string $needle       <p>The string to look for.</p>
6849
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6850
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6851
   *
6852
   * @return string
6853
   */
6854 2
  public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6855
  {
6856
    if (
6857 2
        '' === $str
6858
        ||
6859 2
        '' === $needle
6860
    ) {
6861 2
      return '';
6862
    }
6863
6864 2
    $part = self::strstr(
6865 2
        $str,
6866 2
        $needle,
6867 2
        $beforeNeedle,
6868 2
        $encoding
6869
    );
6870 2
    if (false === $part) {
6871 2
      return '';
6872
    }
6873
6874 2
    return $part;
6875
  }
6876
6877
  /**
6878
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6879
   *
6880
   * @param string $str          <p>The input string.</p>
6881
   * @param string $needle       <p>The string to look for.</p>
6882
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6883
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6884
   *
6885
   * @return string
6886
   */
6887 2
  public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6888
  {
6889
    if (
6890 2
        '' === $str
6891
        ||
6892 2
        '' === $needle
6893
    ) {
6894 2
      return '';
6895
    }
6896
6897 2
    $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6898 2
    if (false === $part) {
6899 2
      return '';
6900
    }
6901
6902 2
    return $part;
6903
  }
6904
6905
  /**
6906
   * Surrounds $str with the given substring.
6907
   *
6908
   * @param string $str
6909
   * @param string $substring <p>The substring to add to both sides.</P>
6910
   *
6911
   * @return string String with the substring both prepended and appended.
6912
   */
6913 5
  public static function str_surround(string $str, string $substring): string
6914
  {
6915 5
    return \implode('', [$substring, $str, $substring]);
6916
  }
6917
6918
  /**
6919
   * Returns a trimmed string with the first letter of each word capitalized.
6920
   * Also accepts an array, $ignore, allowing you to list words not to be
6921
   * capitalized.
6922
   *
6923
   * @param string              $str
6924
   * @param string[]|array|null $ignore   [optional] <p>An array of words not to capitalize or null. Default: null</p>
6925
   * @param string              $encoding [optional] <p>Default: UTF-8</p>
6926
   *
6927
   * @return string The titleized string.
6928
   */
6929 5
  public static function str_titleize(string $str, array $ignore = null, string $encoding = 'UTF-8'): string
6930
  {
6931 5
    $str = self::trim($str);
6932
6933 5
    $str = (string)\preg_replace_callback(
6934 5
        '/([\S]+)/u',
6935 5
        function ($match) use ($encoding, $ignore) {
6936 5
          if ($ignore && \in_array($match[0], $ignore, true)) {
6937 2
            return $match[0];
6938
          }
6939
6940 5
          return self::str_upper_first(self::strtolower($match[0], $encoding));
6941 5
        },
6942 5
        $str
6943
    );
6944
6945 5
    return $str;
6946
  }
6947
6948
  /**
6949
   * Returns a trimmed string in proper title case.
6950
   *
6951
   * Also accepts an array, $ignore, allowing you to list words not to be
6952
   * capitalized.
6953
   *
6954
   * Adapted from John Gruber's script.
6955
   *
6956
   * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6957
   *
6958
   * @param string $str
6959
   * @param array  $ignore   <p>An array of words not to capitalize.</p>
6960
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6961
   *
6962
   * @return string The titleized string.
6963
   */
6964 35
  public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6965
  {
6966 35
    $smallWords = \array_merge(
6967
        [
6968 35
            '(?<!q&)a',
6969
            'an',
6970
            'and',
6971
            'as',
6972
            'at(?!&t)',
6973
            'but',
6974
            'by',
6975
            'en',
6976
            'for',
6977
            'if',
6978
            'in',
6979
            'of',
6980
            'on',
6981
            'or',
6982
            'the',
6983
            'to',
6984
            'v[.]?',
6985
            'via',
6986
            'vs[.]?',
6987
        ],
6988 35
        $ignore
6989
    );
6990
6991 35
    $smallWordsRx = \implode('|', $smallWords);
6992 35
    $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6993
6994 35
    $str = self::trim($str);
6995
6996 35
    if (self::has_lowercase($str) === false) {
6997 2
      $str = self::strtolower($str);
6998
    }
6999
7000
    // The main substitutions
7001 35
    $str = (string)\preg_replace_callback(
7002
        '~\b (_*) (?:                                                              # 1. Leading underscore and
7003
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7004 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7005
                        |
7006 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7007
                        |
7008 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7009
                        |
7010 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7011
                      ) (_*) \b                                                           # 6. With trailing underscore
7012
                    ~ux',
7013 35
        function ($matches) use ($encoding) {
7014
          // Preserve leading underscore
7015 35
          $str = $matches[1];
7016 35
          if ($matches[2]) {
7017
            // Preserve URLs, domains, emails and file paths
7018 5
            $str .= $matches[2];
7019 35
          } elseif ($matches[3]) {
7020
            // Lower-case small words
7021 25
            $str .= self::strtolower($matches[3], $encoding);
7022 35
          } elseif ($matches[4]) {
7023
            // Capitalize word w/o internal caps
7024 34
            $str .= static::str_upper_first($matches[4], $encoding);
7025
          } else {
7026
            // Preserve other kinds of word (iPhone)
7027 7
            $str .= $matches[5];
7028
          }
7029
          // Preserve trailing underscore
7030 35
          $str .= $matches[6];
7031
7032 35
          return $str;
7033 35
        },
7034 35
        $str
7035
    );
7036
7037
    // Exceptions for small words: capitalize at start of title...
7038 35
    $str = (string)\preg_replace_callback(
7039
        '~(  \A [[:punct:]]*                # start of title...
7040
                      |  [:.;?!][ ]+               # or of subsentence...
7041
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7042 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7043
                     ~uxi',
7044 35
        function ($matches) use ($encoding) {
7045 11
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
7046 35
        },
7047 35
        $str
7048
    );
7049
7050
    // ...and end of title
7051 35
    $str = (string)\preg_replace_callback(
7052 35
        '~\b ( ' . $smallWordsRx . ' ) # small word...
7053
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7054
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7055
                     ~uxi',
7056 35
        function ($matches) use ($encoding) {
7057 3
          return static::str_upper_first($matches[1], $encoding);
7058 35
        },
7059 35
        $str
7060
    );
7061
7062
    // Exceptions for small words in hyphenated compound words
7063
    // e.g. "in-flight" -> In-Flight
7064 35
    $str = (string)\preg_replace_callback(
7065
        '~\b
7066
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7067 35
                        ( ' . $smallWordsRx . ' )
7068
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7069
                       ~uxi',
7070 35
        function ($matches) use ($encoding) {
7071
          return static::str_upper_first($matches[1], $encoding);
7072 35
        },
7073 35
        $str
7074
    );
7075
7076
    // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7077 35
    $str = (string)\preg_replace_callback(
7078
        '~\b
7079
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7080
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7081 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7082
                      (?!	- )                   # Negative lookahead for another -
7083
                     ~uxi',
7084 35
        function ($matches) use ($encoding) {
7085
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
7086 35
        },
7087 35
        $str
7088
    );
7089
7090 35
    return $str;
7091
  }
7092
7093
  /**
7094
   * Get a binary representation of a specific string.
7095
   *
7096
   * @param string $str <p>The input string.</p>
7097
   *
7098
   * @return string
7099
   */
7100 2
  public static function str_to_binary(string $str): string
7101
  {
7102 2
    $value = \unpack('H*', $str);
7103
7104 2
    return \base_convert($value[1], 16, 2);
7105
  }
7106
7107
  /**
7108
   * @param string   $str
7109
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7110
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7111
   *
7112
   * @return string[]
7113
   */
7114 17
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7115
  {
7116 17
    if ('' === $str) {
7117 1
      return ($removeEmptyValues === true ? [] : ['']);
7118
    }
7119
7120 16
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
7121
7122 16
    if ($return === false) {
7123
      return ($removeEmptyValues === true ? [] : ['']);
7124
    }
7125
7126
    if (
7127 16
        $removeShortValues === null
7128
        &&
7129 16
        $removeEmptyValues === false
7130
    ) {
7131 16
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7132
    }
7133
7134
    $tmpReturn = self::reduce_string_array(
7135
        $return,
7136
        $removeEmptyValues,
7137
        $removeShortValues
7138
    );
7139
7140
    return $tmpReturn;
7141
  }
7142
7143
  /**
7144
   * Convert a string into an array of words.
7145
   *
7146
   * @param string   $str
7147
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7148
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7149
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7150
   *
7151
   * @return string[]
7152
   */
7153 14
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
7154
  {
7155 14
    if ('' === $str) {
7156 4
      return ($removeEmptyValues === true ? [] : ['']);
7157
    }
7158
7159 14
    $charList = self::rxClass($charList, '\pL');
7160
7161 14
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
7162
7163 14
    if ($return === false) {
7164
      return ($removeEmptyValues === true ? [] : ['']);
7165
    }
7166
7167
    if (
7168 14
        $removeShortValues === null
7169
        &&
7170 14
        $removeEmptyValues === false
7171
    ) {
7172 14
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7173
    }
7174
7175 2
    $tmpReturn = self::reduce_string_array(
7176 2
        $return,
7177 2
        $removeEmptyValues,
7178 2
        $removeShortValues
7179
    );
7180
7181 2
    foreach ($tmpReturn as &$item) {
7182 2
      $item = (string)$item;
7183
    }
7184
7185 2
    return $tmpReturn;
7186
  }
7187
7188
  /**
7189
   * alias for "UTF8::to_ascii()"
7190
   *
7191
   * @see UTF8::to_ascii()
7192
   *
7193
   * @param string $str
7194
   * @param string $unknown
7195
   * @param bool   $strict
7196
   *
7197
   * @return string
7198
   */
7199 8
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7200
  {
7201 8
    return self::to_ascii($str, $unknown, $strict);
7202
  }
7203
7204
  /**
7205
   * Truncates the string to a given length. If $substring is provided, and
7206
   * truncating occurs, the string is further truncated so that the substring
7207
   * may be appended without exceeding the desired length.
7208
   *
7209
   * @param string $str
7210
   * @param int    $length    <p>Desired length of the truncated string.</p>
7211
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7212
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7213
   *
7214
   * @return string String after truncating.
7215
   */
7216 22
  public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7217
  {
7218
    // init
7219 22
    $str = (string)$str;
7220
7221 22
    if ('' === $str) {
7222
      return '';
7223
    }
7224
7225 22
    if ($length >= self::strlen($str, $encoding)) {
7226 4
      return $str;
7227
    }
7228
7229
    // Need to further trim the string so we can append the substring
7230 18
    $substringLength = self::strlen($substring, $encoding);
7231 18
    $length -= $substringLength;
7232
7233 18
    $truncated = self::substr($str, 0, $length, $encoding);
7234
7235 18
    return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7235
    return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7236
  }
7237
7238
  /**
7239
   * Truncates the string to a given length, while ensuring that it does not
7240
   * split words. If $substring is provided, and truncating occurs, the
7241
   * string is further truncated so that the substring may be appended without
7242
   * exceeding the desired length.
7243
   *
7244
   * @param string $str
7245
   * @param int    $length    <p>Desired length of the truncated string.</p>
7246
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7247
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7248
   *
7249
   * @return string String after truncating.
7250
   */
7251 23
  public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7252
  {
7253 23
    if ($length >= self::strlen($str, $encoding)) {
7254 4
      return $str;
7255
    }
7256
7257
    // need to further trim the string so we can append the substring
7258 19
    $substringLength = self::strlen($substring, $encoding);
7259 19
    $length -= $substringLength;
7260
7261 19
    $truncated = self::substr($str, 0, $length, $encoding);
7262 19
    if ($truncated === false) {
7263
      return '';
7264
    }
7265
7266
    // if the last word was truncated
7267 19
    $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7268 19
    if ($strPosSpace != $length) {
7269
      // find pos of the last occurrence of a space, get up to that
7270 12
      $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7271
7272 12
      if ($lastPos !== false || $strPosSpace !== false) {
7273 11
        $truncated = self::substr($truncated, 0, (int)$lastPos, $encoding);
7274
      }
7275
    }
7276
7277 19
    $str = $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7277
    $str = /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7278
7279 19
    return $str;
7280
  }
7281
7282
  /**
7283
   * Returns a lowercase and trimmed string separated by underscores.
7284
   * Underscores are inserted before uppercase characters (with the exception
7285
   * of the first character of the string), and in place of spaces as well as
7286
   * dashes.
7287
   *
7288
   * @param string $str
7289
   *
7290
   * @return string The underscored string.
7291
   */
7292 16
  public static function str_underscored(string $str): string
7293
  {
7294 16
    return self::str_delimit($str, '_');
7295
  }
7296
7297
  /**
7298
   * Returns an UpperCamelCase version of the supplied string. It trims
7299
   * surrounding spaces, capitalizes letters following digits, spaces, dashes
7300
   * and underscores, and removes spaces, dashes, underscores.
7301
   *
7302
   * @param string $str      <p>The input string.</p>
7303
   * @param string $encoding [optional] <p>Default: UTF-8</p>
7304
   *
7305
   * @return string String in UpperCamelCase.
7306
   */
7307 13
  public static function str_upper_camelize(string $str, string $encoding = 'UTF-8'): string
7308
  {
7309 13
    return self::str_upper_first(self::str_camelize($str, $encoding), $encoding);
7310
  }
7311
7312
  /**
7313
   * alias for "UTF8::ucfirst()"
7314
   *
7315
   * @see UTF8::ucfirst()
7316
   *
7317
   * @param string $str
7318
   * @param string $encoding
7319
   * @param bool   $cleanUtf8
7320
   *
7321
   * @return string
7322
   */
7323 58
  public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7324
  {
7325 58
    return self::ucfirst($str, $encoding, $cleanUtf8);
7326
  }
7327
7328
  /**
7329
   * Counts number of words in the UTF-8 string.
7330
   *
7331
   * @param string $str      <p>The input string.</p>
7332
   * @param int    $format   [optional] <p>
7333
   *                         <strong>0</strong> => return a number of words (default)<br>
7334
   *                         <strong>1</strong> => return an array of words<br>
7335
   *                         <strong>2</strong> => return an array of words with word-offset as key
7336
   *                         </p>
7337
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7338
   *
7339
   * @return string[]|int The number of words in the string
7340
   */
7341 2
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7342
  {
7343 2
    $strParts = self::str_to_words($str, $charlist);
7344
7345 2
    $len = \count($strParts);
7346
7347 2
    if ($format === 1) {
7348
7349 2
      $numberOfWords = [];
7350 2
      for ($i = 1; $i < $len; $i += 2) {
7351 2
        $numberOfWords[] = $strParts[$i];
7352
      }
7353
7354 2
    } elseif ($format === 2) {
7355
7356 2
      $numberOfWords = [];
7357 2
      $offset = self::strlen($strParts[0]);
7358 2
      for ($i = 1; $i < $len; $i += 2) {
7359 2
        $numberOfWords[$offset] = $strParts[$i];
7360 2
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
7361
      }
7362
7363
    } else {
7364
7365 2
      $numberOfWords = (int)(($len - 1) / 2);
7366
7367
    }
7368
7369 2
    return $numberOfWords;
7370
  }
7371
7372
  /**
7373
   * Case-insensitive string comparison.
7374
   *
7375
   * INFO: Case-insensitive version of UTF8::strcmp()
7376
   *
7377
   * @param string $str1     <p>The first string.</p>
7378
   * @param string $str2     <p>The second string.</p>
7379
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7380
   *
7381
   * @return int
7382
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7383
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7384
   *             <strong>0</strong> if they are equal.
7385
   */
7386 23
  public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7387
  {
7388 23
    return self::strcmp(
7389 23
        self::strtocasefold($str1, true, false, $encoding, null, false),
7390 23
        self::strtocasefold($str2, true, false, $encoding, null, false)
7391
    );
7392
  }
7393
7394
  /**
7395
   * alias for "UTF8::strstr()"
7396
   *
7397
   * @see UTF8::strstr()
7398
   *
7399
   * @param string $haystack
7400
   * @param string $needle
7401
   * @param bool   $before_needle
7402
   * @param string $encoding
7403
   * @param bool   $cleanUtf8
7404
   *
7405
   * @return string|false
7406
   */
7407 2
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7408
  {
7409 2
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7410
  }
7411
7412
  /**
7413
   * Case-sensitive string comparison.
7414
   *
7415
   * @param string $str1 <p>The first string.</p>
7416
   * @param string $str2 <p>The second string.</p>
7417
   *
7418
   * @return int
7419
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
7420
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
7421
   *              <strong>0</strong> if they are equal.
7422
   */
7423 29
  public static function strcmp(string $str1, string $str2): int
7424
  {
7425
    /** @noinspection PhpUndefinedClassInspection */
7426 29
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7427 24
        \Normalizer::normalize($str1, \Normalizer::NFD),
7428 29
        \Normalizer::normalize($str2, \Normalizer::NFD)
7429
    );
7430
  }
7431
7432
  /**
7433
   * Find length of initial segment not matching mask.
7434
   *
7435
   * @param string $str
7436
   * @param string $charList
7437
   * @param int    $offset
7438
   * @param int    $length
7439
   *
7440
   * @return int|null
7441
   */
7442 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
7443
  {
7444 15
    if ('' === $charList .= '') {
7445 1
      return null;
7446
    }
7447
7448 14
    if ($offset || $length !== null) {
7449 2
      $strTmp = self::substr($str, $offset, $length);
7450 2
      if ($strTmp === false) {
7451
        return null;
7452
      }
7453 2
      $str = (string)$strTmp;
7454
    }
7455
7456 14
    if ('' === $str) {
7457 1
      return null;
7458
    }
7459
7460 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept null|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7460
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7461 13
      return self::strlen($length[1]);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($length[1]) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7462
    }
7463
7464 1
    return self::strlen($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($str) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7465
  }
7466
7467
  /**
7468
   * alias for "UTF8::stristr()"
7469
   *
7470
   * @see UTF8::stristr()
7471
   *
7472
   * @param string $haystack
7473
   * @param string $needle
7474
   * @param bool   $before_needle
7475
   * @param string $encoding
7476
   * @param bool   $cleanUtf8
7477
   *
7478
   * @return string|false
7479
   */
7480 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7481
  {
7482 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7483
  }
7484
7485
  /**
7486
   * Create a UTF-8 string from code points.
7487
   *
7488
   * INFO: opposite to UTF8::codepoints()
7489
   *
7490
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7491
   *
7492
   * @return string UTF-8 encoded string.
7493
   */
7494 4
  public static function string(array $array): string
7495
  {
7496 4
    return \implode(
7497 4
        '',
7498 4
        \array_map(
7499
            [
7500 4
                self::class,
7501
                'chr',
7502
            ],
7503 4
            $array
7504
        )
7505
    );
7506
  }
7507
7508
  /**
7509
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7510
   *
7511
   * @param string $str <p>The input string.</p>
7512
   *
7513
   * @return bool
7514
   *              <strong>true</strong> if the string has BOM at the start,<br>
7515
   *              <strong>false</strong> otherwise.
7516
   */
7517 6
  public static function string_has_bom(string $str): bool
7518
  {
7519 6
    foreach (self::$BOM as $bomString => $bomByteLength) {
7520 6
      if (0 === \strpos($str, $bomString)) {
7521 6
        return true;
7522
      }
7523
    }
7524
7525 6
    return false;
7526
  }
7527
7528
  /**
7529
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7530
   *
7531
   * @link http://php.net/manual/en/function.strip-tags.php
7532
   *
7533
   * @param string $str             <p>
7534
   *                                The input string.
7535
   *                                </p>
7536
   * @param string $allowable_tags  [optional] <p>
7537
   *                                You can use the optional second parameter to specify tags which should
7538
   *                                not be stripped.
7539
   *                                </p>
7540
   *                                <p>
7541
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
7542
   *                                can not be changed with allowable_tags.
7543
   *                                </p>
7544
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
7545
   *
7546
   * @return string The stripped string.
7547
   */
7548 4
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7549
  {
7550 4
    if ('' === $str) {
7551 1
      return '';
7552
    }
7553
7554 4
    if ($cleanUtf8 === true) {
7555 2
      $str = self::clean($str);
7556
    }
7557
7558 4
    return \strip_tags($str, $allowable_tags);
7559
  }
7560
7561
  /**
7562
   * Strip all whitespace characters. This includes tabs and newline
7563
   * characters, as well as multibyte whitespace such as the thin space
7564
   * and ideographic space.
7565
   *
7566
   * @param string $str
7567
   *
7568
   * @return string
7569
   */
7570 36
  public static function strip_whitespace(string $str): string
7571
  {
7572 36
    if ('' === $str) {
7573 3
      return '';
7574
    }
7575
7576 33
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
7577
  }
7578
7579
  /**
7580
   * Finds position of first occurrence of a string within another, case insensitive.
7581
   *
7582
   * @link http://php.net/manual/en/function.mb-stripos.php
7583
   *
7584
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7585
   * @param string $needle    <p>The string to find in haystack.</p>
7586
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7587
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7588
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7589
   *
7590
   * @return int|false
7591
   *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7592
   *                   haystack string,<br> or <strong>false</strong> if needle is not found.
7593
   */
7594 75
  public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7595
  {
7596 75
    if ('' === $haystack || '' === $needle) {
7597 5
      return false;
7598
    }
7599
7600 74
    if ($cleanUtf8 === true) {
7601
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7602
      // if invalid characters are found in $haystack before $needle
7603 1
      $haystack = self::clean($haystack);
7604 1
      $needle = self::clean($needle);
7605
    }
7606
7607 74
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7608 23
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7609
    }
7610
7611 74
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7612
      self::checkForSupport();
7613
    }
7614
7615 74
    if (self::$SUPPORT['mbstring'] === true) {
7616 74
      $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7617 74
      if ($returnTmp !== false) {
7618 54
        return $returnTmp;
7619
      }
7620
    }
7621
7622
    if (
7623 31
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7624
        &&
7625 31
        $offset >= 0 // grapheme_stripos() can't handle negative offset
7626
        &&
7627 31
        self::$SUPPORT['intl'] === true
7628
    ) {
7629 31
      $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7630 31
      if ($returnTmp !== false) {
7631
        return $returnTmp;
7632
      }
7633
    }
7634
7635
    //
7636
    // fallback for ascii only
7637
    //
7638
7639 31
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7640 15
      return \stripos($haystack, $needle, $offset);
7641
    }
7642
7643
    //
7644
    // fallback via vanilla php
7645
    //
7646
7647 20
    $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
7648 20
    $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
7649
7650 20
    return self::strpos($haystack, $needle, $offset, $encoding);
7651
  }
7652
7653
  /**
7654
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
7655
   *
7656
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7657
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7658
   * @param bool   $before_needle  [optional] <p>
7659
   *                               If <b>TRUE</b>, it returns the part of the
7660
   *                               haystack before the first occurrence of the needle (excluding the needle).
7661
   *                               </p>
7662
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
7663
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7664
   *
7665
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
7666
   */
7667 19
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7668
  {
7669 19
    if ('' === $haystack || '' === $needle) {
7670 6
      return false;
7671
    }
7672
7673 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7674 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7675
    }
7676
7677 13
    if ($cleanUtf8 === true) {
7678
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7679
      // if invalid characters are found in $haystack before $needle
7680 1
      $needle = self::clean($needle);
7681 1
      $haystack = self::clean($haystack);
7682
    }
7683
7684 13
    if (!$needle) {
7685
      return $haystack;
7686
    }
7687
7688 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7689
      self::checkForSupport();
7690
    }
7691
7692
    if (
7693 13
        $encoding !== 'UTF-8'
7694
        &&
7695 13
        self::$SUPPORT['mbstring'] === false
7696
    ) {
7697
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7698
    }
7699
7700 13
    if (self::$SUPPORT['mbstring'] === true) {
7701 13
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7702
    }
7703
7704
    if (
7705
        $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7706
        &&
7707
        self::$SUPPORT['intl'] === true
7708
    ) {
7709
      $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7710
      if ($returnTmp !== false) {
7711
        return $returnTmp;
7712
      }
7713
    }
7714
7715
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7716
      return \stristr($haystack, $needle, $before_needle);
7717
    }
7718
7719
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7720
7721
    if (!isset($match[1])) {
7722
      return false;
7723
    }
7724
7725
    if ($before_needle) {
7726
      return $match[1];
7727
    }
7728
7729
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7729
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
7730
  }
7731
7732
  /**
7733
   * Get the string length, not the byte-length!
7734
   *
7735
   * @link     http://php.net/manual/en/function.mb-strlen.php
7736
   *
7737
   * @param string $str       <p>The string being checked for length.</p>
7738
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7739
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7740
   *
7741
   * @return int|false
7742
   *             The number <strong>(int)</strong> of characters in the string $str having character encoding $encoding.
7743
   *             (One multi-byte character counted as +1).
7744
   *             <br>
7745
   *             Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid chars.
7746
   */
7747 263
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7748
  {
7749 263
    if ('' === $str) {
7750 37
      return 0;
7751
    }
7752
7753 261
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7754 83
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7755
    }
7756
7757
    //
7758
    // fallback for binary || ascii only
7759
    //
7760
7761
    if (
7762 261
        $encoding === 'CP850'
7763
        ||
7764 261
        $encoding === 'ASCII'
7765
    ) {
7766 2
      return self::strlen_in_byte($str);
7767
    }
7768
7769 261
    if ($cleanUtf8 === true) {
7770
      // "mb_strlen" and "\iconv_strlen" returns wrong length,
7771
      // if invalid characters are found in $str
7772 4
      $str = self::clean($str);
7773
    }
7774
7775 261
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7776
      self::checkForSupport();
7777
    }
7778
7779
    if (
7780 261
        $encoding !== 'UTF-8'
7781
        &&
7782 261
        self::$SUPPORT['mbstring'] === false
7783
        &&
7784 261
        self::$SUPPORT['iconv'] === false
7785
    ) {
7786 2
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7787
    }
7788
7789
    //
7790
    // fallback via mbstring
7791
    //
7792
7793 261
    if (self::$SUPPORT['mbstring'] === true) {
7794 257
      $returnTmp = \mb_strlen($str, $encoding);
7795 257
      if ($returnTmp !== false) {
7796 257
        return $returnTmp;
7797
      }
7798
    }
7799
7800
    //
7801
    // fallback via iconv
7802
    //
7803
7804 8
    if (self::$SUPPORT['iconv'] === true) {
7805
      $returnTmp = \iconv_strlen($str, $encoding);
7806
      if ($returnTmp !== false) {
7807
        return $returnTmp;
7808
      }
7809
    }
7810
7811
    //
7812
    // fallback via intl
7813
    //
7814
7815
    if (
7816 8
        $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7817
        &&
7818 8
        self::$SUPPORT['intl'] === true
7819
    ) {
7820
      $returnTmp = \grapheme_strlen($str);
7821
      if ($returnTmp !== null) {
7822
        return $returnTmp;
7823
      }
7824
    }
7825
7826
    //
7827
    // fallback for ascii only
7828
    //
7829
7830 8
    if (self::is_ascii($str)) {
7831 4
      return \strlen($str);
7832
    }
7833
7834
    //
7835
    // fallback via vanilla php
7836
    //
7837
7838 8
    \preg_match_all('/./us', $str, $parts);
7839
7840 8
    $returnTmp = \count($parts[0]);
7841 8
    if ($returnTmp === 0 && isset($str[0])) {
7842
      return false;
7843
    }
7844
7845 8
    return $returnTmp;
7846
  }
7847
7848
  /**
7849
   * Get string length in byte.
7850
   *
7851
   * @param string $str
7852
   *
7853
   * @return int
7854
   */
7855 195
  public static function strlen_in_byte(string $str): int
7856
  {
7857 195
    if ($str === '') {
7858
      return 0;
7859
    }
7860
7861 195
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7862
      self::checkForSupport();
7863
    }
7864
7865 195
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7866
      // "mb_" is available if overload is used, so use it ...
7867
      return \mb_strlen($str, 'CP850'); // 8-BIT
7868
    }
7869
7870 195
    return \strlen($str);
7871
  }
7872
7873
  /**
7874
   * Case insensitive string comparisons using a "natural order" algorithm.
7875
   *
7876
   * INFO: natural order version of UTF8::strcasecmp()
7877
   *
7878
   * @param string $str1     <p>The first string.</p>
7879
   * @param string $str2     <p>The second string.</p>
7880
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7881
   *
7882
   * @return int
7883
   *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7884
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7885
   *             <strong>0</strong> if they are equal
7886
   */
7887 2
  public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7888
  {
7889 2
    return self::strnatcmp(
7890 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
7891 2
        self::strtocasefold($str2, true, false, $encoding, null, false)
7892
    );
7893
  }
7894
7895
  /**
7896
   * String comparisons using a "natural order" algorithm
7897
   *
7898
   * INFO: natural order version of UTF8::strcmp()
7899
   *
7900
   * @link  http://php.net/manual/en/function.strnatcmp.php
7901
   *
7902
   * @param string $str1 <p>The first string.</p>
7903
   * @param string $str2 <p>The second string.</p>
7904
   *
7905
   * @return int
7906
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7907
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7908
   *             <strong>0</strong> if they are equal
7909
   */
7910 4
  public static function strnatcmp(string $str1, string $str2): int
7911
  {
7912 4
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
7913
  }
7914
7915
  /**
7916
   * Case-insensitive string comparison of the first n characters.
7917
   *
7918
   * @link  http://php.net/manual/en/function.strncasecmp.php
7919
   *
7920
   * @param string $str1     <p>The first string.</p>
7921
   * @param string $str2     <p>The second string.</p>
7922
   * @param int    $len      <p>The length of strings to be used in the comparison.</p>
7923
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7924
   *
7925
   * @return int
7926
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7927
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7928
   *             <strong>0</strong> if they are equal
7929
   */
7930 2
  public static function strncasecmp(string $str1, string $str2, int $len, string $encoding = 'UTF-8'): int
7931
  {
7932 2
    return self::strncmp(
7933 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
7934 2
        self::strtocasefold($str2, true, false, $encoding, null, false),
7935 2
        $len
7936
    );
7937
  }
7938
7939
  /**
7940
   * String comparison of the first n characters.
7941
   *
7942
   * @link  http://php.net/manual/en/function.strncmp.php
7943
   *
7944
   * @param string $str1 <p>The first string.</p>
7945
   * @param string $str2 <p>The second string.</p>
7946
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
7947
   *
7948
   * @return int
7949
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7950
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7951
   *             <strong>0</strong> if they are equal
7952
   */
7953 4
  public static function strncmp(string $str1, string $str2, int $len): int
7954
  {
7955 4
    $str1 = (string)self::substr($str1, 0, $len);
7956 4
    $str2 = (string)self::substr($str2, 0, $len);
7957
7958 4
    return self::strcmp($str1, $str2);
7959
  }
7960
7961
  /**
7962
   * Search a string for any of a set of characters.
7963
   *
7964
   * @link  http://php.net/manual/en/function.strpbrk.php
7965
   *
7966
   * @param string $haystack  <p>The string where char_list is looked for.</p>
7967
   * @param string $char_list <p>This parameter is case sensitive.</p>
7968
   *
7969
   * @return string|false String starting from the character found, or false if it is not found.
7970
   */
7971 2
  public static function strpbrk(string $haystack, string $char_list)
7972
  {
7973 2
    if ('' === $haystack || '' === $char_list) {
7974 2
      return false;
7975
    }
7976
7977 2
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7978 2
      return \substr($haystack, (int)\strpos($haystack, $m[0]));
7979
    }
7980
7981 2
    return false;
7982
  }
7983
7984
  /**
7985
   * Find position of first occurrence of string in a string.
7986
   *
7987
   * @link http://php.net/manual/en/function.mb-strpos.php
7988
   *
7989
   * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7990
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7991
   * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7992
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7993
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7994
   *
7995
   * @return int|false
7996
   *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
7997
   *                   string.<br> If needle is not found it returns false.
7998
   */
7999 142
  public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
8000
  {
8001 142
    if ('' === $haystack) {
8002 4
      return false;
8003
    }
8004
8005
    // iconv and mbstring do not support integer $needle
8006 141
    if ((int)$needle === $needle && $needle >= 0) {
8007
      $needle = (string)self::chr($needle);
8008
    }
8009 141
    $needle = (string)$needle;
8010
8011 141
    if ('' === $needle) {
8012 2
      return false;
8013
    }
8014
8015 141
    if ($cleanUtf8 === true) {
8016
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8017
      // if invalid characters are found in $haystack before $needle
8018 3
      $needle = self::clean($needle);
8019 3
      $haystack = self::clean($haystack);
8020
    }
8021
8022 141
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8023 55
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8024
    }
8025
8026 141
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8027
      self::checkForSupport();
8028
    }
8029
8030
    //
8031
    // fallback for binary || ascii only
8032
    //
8033
8034
    if (
8035 141
        $encoding === 'CP850'
8036
        ||
8037 141
        $encoding === 'ASCII'
8038
    ) {
8039 2
      return self::strpos_in_byte($haystack, $needle, $offset);
8040
    }
8041
8042
    if (
8043 141
        $encoding !== 'UTF-8'
8044
        &&
8045 141
        self::$SUPPORT['iconv'] === false
8046
        &&
8047 141
        self::$SUPPORT['mbstring'] === false
8048
    ) {
8049 2
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8050
    }
8051
8052
    //
8053
    // fallback via mbstring
8054
    //
8055
8056 141
    if (self::$SUPPORT['mbstring'] === true) {
8057 141
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
8058 141
      if ($returnTmp !== false) {
8059 86
        return $returnTmp;
8060
      }
8061
    }
8062
8063
    //
8064
    // fallback via intl
8065
    //
8066
8067
    if (
8068 69
        $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8069
        &&
8070 69
        $offset >= 0 // grapheme_strpos() can't handle negative offset
8071
        &&
8072 69
        self::$SUPPORT['intl'] === true
8073
    ) {
8074 69
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
8075 69
      if ($returnTmp !== false) {
8076
        return $returnTmp;
8077
      }
8078
    }
8079
8080
    //
8081
    // fallback via iconv
8082
    //
8083
8084
    if (
8085 69
        $offset >= 0 // iconv_strpos() can't handle negative offset
8086
        &&
8087 69
        self::$SUPPORT['iconv'] === true
8088
    ) {
8089
      // ignore invalid negative offset to keep compatibility
8090
      // with php < 5.5.35, < 5.6.21, < 7.0.6
8091 69
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
8092 69
      if ($returnTmp !== false) {
8093
        return $returnTmp;
8094
      }
8095
    }
8096
8097
    //
8098
    // fallback for ascii only
8099
    //
8100
8101 69
    if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
8102 35
      return \strpos($haystack, $needle, $offset);
8103
    }
8104
8105
    //
8106
    // fallback via vanilla php
8107
    //
8108
8109 39
    if ($haystackIsAscii) {
8110
      $haystackTmp = \substr($haystack, $offset);
8111
    } else {
8112 39
      $haystackTmp = self::substr($haystack, $offset, null, $encoding);
8113
    }
8114 39
    if ($haystackTmp === false) {
8115
      $haystackTmp = '';
8116
    }
8117 39
    $haystack = (string)$haystackTmp;
8118
8119 39
    if ($offset < 0) {
8120 2
      $offset = 0;
8121
    }
8122
8123 39
    $pos = \strpos($haystack, $needle);
8124 39
    if ($pos === false) {
8125 39
      return false;
8126
    }
8127
8128 4
    if ($pos) {
8129 4
      return ($offset + (self::strlen(substr($haystack, 0, $pos), $encoding)));
8130
    }
8131
8132 2
    return ($offset + 0);
8133
  }
8134
8135
  /**
8136
   * Find position of first occurrence of string in a string.
8137
   *
8138
   * @param string $haystack <p>
8139
   *                         The string being checked.
8140
   *                         </p>
8141
   * @param string $needle   <p>
8142
   *                         The position counted from the beginning of haystack.
8143
   *                         </p>
8144
   * @param int    $offset   [optional] <p>
8145
   *                         The search offset. If it is not specified, 0 is used.
8146
   *                         </p>
8147
   *
8148
   * @return int|false The numeric position of the first occurrence of needle in the
8149
   *                   haystack string. If needle is not found, it returns false.
8150
   */
8151 83
  public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8152
  {
8153 83
    if ($haystack === '' || $needle === '') {
8154
      return false;
8155
    }
8156
8157 83
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8158
      self::checkForSupport();
8159
    }
8160
8161 83
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8162
      // "mb_" is available if overload is used, so use it ...
8163
      return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8164
    }
8165
8166 83
    return \strpos($haystack, $needle, $offset);
8167
  }
8168
8169
  /**
8170
   * Finds the last occurrence of a character in a string within another.
8171
   *
8172
   * @link http://php.net/manual/en/function.mb-strrchr.php
8173
   *
8174
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8175
   * @param string $needle        <p>The string to find in haystack</p>
8176
   * @param bool   $before_needle [optional] <p>
8177
   *                              Determines which portion of haystack
8178
   *                              this function returns.
8179
   *                              If set to true, it returns all of haystack
8180
   *                              from the beginning to the last occurrence of needle.
8181
   *                              If set to false, it returns all of haystack
8182
   *                              from the last occurrence of needle to the end,
8183
   *                              </p>
8184
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8185
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8186
   *
8187
   * @return string|false The portion of haystack or false if needle is not found.
8188
   */
8189 4
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8190
  {
8191 4
    if ('' === $haystack || '' === $needle) {
8192 2
      return false;
8193
    }
8194
8195 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8196 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8197
    }
8198
8199 4
    if ($cleanUtf8 === true) {
8200
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8201
      // if invalid characters are found in $haystack before $needle
8202 2
      $needle = self::clean($needle);
8203 2
      $haystack = self::clean($haystack);
8204
    }
8205
8206 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8207
      self::checkForSupport();
8208
    }
8209
8210
    if (
8211 4
        $encoding !== 'UTF-8'
8212
        &&
8213 4
        self::$SUPPORT['mbstring'] === false
8214
    ) {
8215
      \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8216
    }
8217
8218 4
    if (self::$SUPPORT['mbstring'] === true) {
8219 4
      return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8220
    }
8221
8222
    //
8223
    // fallback for binary || ascii only
8224
    //
8225
8226
    if (
8227
        $before_needle === false
8228
        &&
8229
        (
8230
            $encoding === 'CP850'
8231
            ||
8232
            $encoding === 'ASCII'
8233
        )
8234
    ) {
8235
      return \strrchr($haystack, $needle);
8236
    }
8237
8238
    //
8239
    // fallback via iconv
8240
    //
8241
8242
    if (self::$SUPPORT['iconv'] === true) {
8243
      $needleTmp = self::substr($needle, 0, 1, $encoding);
8244
      if ($needleTmp === false) {
8245
        return false;
8246
      }
8247
      $needle = (string)$needleTmp;
8248
8249
      $pos = \iconv_strrpos($haystack, $needle, $encoding);
8250
      if (false === $pos) {
8251
        return false;
8252
      }
8253
8254
      if ($before_needle) {
8255
        return self::substr($haystack, 0, $pos, $encoding);
8256
      }
8257
8258
      return self::substr($haystack, $pos, null, $encoding);
8259
    }
8260
8261
    //
8262
    // fallback via vanilla php
8263
    //
8264
8265
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8266
    if ($needleTmp === false) {
8267
      return false;
8268
    }
8269
    $needle = (string)$needleTmp;
8270
8271
    $pos = self::strrpos($haystack, $needle, null, $encoding);
8272
    if ($pos === false) {
8273
      return false;
8274
    }
8275
8276
    if ($before_needle) {
8277
      return self::substr($haystack, 0, $pos, $encoding);
8278
    }
8279
8280
    return self::substr($haystack, $pos, null, $encoding);
8281
  }
8282
8283
  /**
8284
   * Reverses characters order in the string.
8285
   *
8286
   * @param string $str <p>The input string.</p>
8287
   *
8288
   * @return string The string with characters in the reverse sequence.
8289
   */
8290 10
  public static function strrev(string $str): string
8291
  {
8292 10
    if ('' === $str) {
8293 4
      return '';
8294
    }
8295
8296 8
    $reversed = '';
8297 8
    $i = self::strlen($str);
8298 8
    while ($i--) {
8299 8
      $reversed .= self::substr($str, $i, 1);
0 ignored issues
show
Bug introduced by
It seems like $i can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8299
      $reversed .= self::substr($str, /** @scrutinizer ignore-type */ $i, 1);
Loading history...
8300
    }
8301
8302 8
    return $reversed;
8303
  }
8304
8305
  /**
8306
   * Finds the last occurrence of a character in a string within another, case insensitive.
8307
   *
8308
   * @link http://php.net/manual/en/function.mb-strrichr.php
8309
   *
8310
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
8311
   * @param string $needle         <p>The string to find in haystack.</p>
8312
   * @param bool   $before_needle  [optional] <p>
8313
   *                               Determines which portion of haystack
8314
   *                               this function returns.
8315
   *                               If set to true, it returns all of haystack
8316
   *                               from the beginning to the last occurrence of needle.
8317
   *                               If set to false, it returns all of haystack
8318
   *                               from the last occurrence of needle to the end,
8319
   *                               </p>
8320
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8321
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8322
   *
8323
   * @return string|false The portion of haystack or<br>false if needle is not found.
8324
   */
8325 3
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8326
  {
8327 3
    if ('' === $haystack || '' === $needle) {
8328 2
      return false;
8329
    }
8330
8331 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8332 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8333
    }
8334
8335 3
    if ($cleanUtf8 === true) {
8336
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8337
      // if invalid characters are found in $haystack before $needle
8338 2
      $needle = self::clean($needle);
8339 2
      $haystack = self::clean($haystack);
8340
    }
8341
8342 3
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8343
      self::checkForSupport();
8344
    }
8345
8346
    //
8347
    // fallback via mbstring
8348
    //
8349
8350 3
    if (self::$SUPPORT['mbstring'] === true) {
8351 3
      return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8352
    }
8353
8354
    //
8355
    // fallback via vanilla php
8356
    //
8357
8358
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8359
    if ($needleTmp === false) {
8360
      return false;
8361
    }
8362
    $needle = (string)$needleTmp;
8363
8364
    $pos = self::strripos($haystack, $needle, 0, $encoding);
8365
    if ($pos === false) {
8366
      return false;
8367
    }
8368
8369
    if ($before_needle) {
8370
      return self::substr($haystack, 0, $pos, $encoding);
8371
    }
8372
8373
    return self::substr($haystack, $pos, null, $encoding);
8374
  }
8375
8376
  /**
8377
   * Find position of last occurrence of a case-insensitive string.
8378
   *
8379
   * @param string     $haystack  <p>The string to look in.</p>
8380
   * @param string|int $needle    <p>The string to look for.</p>
8381
   * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8382
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8383
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8384
   *
8385
   * @return int|false
8386
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8387
   *                   string.<br>If needle is not found, it returns false.
8388
   */
8389 4
  public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8390
  {
8391 4
    if ('' === $haystack) {
8392
      return false;
8393
    }
8394
8395
    // iconv and mbstring do not support integer $needle
8396 4
    if ((int)$needle === $needle && $needle >= 0) {
8397
      $needle = (string)self::chr($needle);
8398
    }
8399 4
    $needle = (string)$needle;
8400
8401 4
    if ('' === $needle) {
8402
      return false;
8403
    }
8404
8405 4
    if ($cleanUtf8 === true) {
8406
      // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8407 2
      $needle = self::clean($needle);
8408 2
      $haystack = self::clean($haystack);
8409
    }
8410
8411 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8412 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8413
    }
8414
8415
    //
8416
    // fallback for binary || ascii only
8417
    //
8418
8419
    if (
8420 4
        $encoding === 'CP850'
8421
        ||
8422 4
        $encoding === 'ASCII'
8423
    ) {
8424
      return self::strripos_in_byte($haystack, $needle, $offset);
8425
    }
8426
8427 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8428
      self::checkForSupport();
8429
    }
8430
8431
    if (
8432 4
        $encoding !== 'UTF-8'
8433
        &&
8434 4
        self::$SUPPORT['mbstring'] === false
8435
    ) {
8436
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8437
    }
8438
8439
    //
8440
    // fallback via mbstrig
8441
    //
8442
8443 4
    if (self::$SUPPORT['mbstring'] === true) {
8444 4
      return \mb_strripos($haystack, $needle, $offset, $encoding);
8445
    }
8446
8447
    //
8448
    // fallback via intl
8449
    //
8450
8451
    if (
8452
        $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8453
        &&
8454
        $offset >= 0 // grapheme_strripos() can't handle negative offset
8455
        &&
8456
        self::$SUPPORT['intl'] === true
8457
    ) {
8458
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8459
      if ($returnTmp !== false) {
8460
        return $returnTmp;
8461
      }
8462
    }
8463
8464
    //
8465
    // fallback for ascii only
8466
    //
8467
8468
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8469
      return self::strripos_in_byte($haystack, $needle, $offset);
8470
    }
8471
8472
    //
8473
    // fallback via vanilla php
8474
    //
8475
8476
    $haystack = self::strtocasefold($haystack, true, false, $encoding);
8477
    $needle = self::strtocasefold($needle, true, false, $encoding);
8478
8479
    return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8480
  }
8481
8482
  /**
8483
   * Finds position of last occurrence of a string within another, case insensitive.
8484
   *
8485
   * @param string $haystack <p>
8486
   *                         The string from which to get the position of the last occurrence
8487
   *                         of needle.
8488
   *                         </p>
8489
   * @param string $needle   <p>
8490
   *                         The string to find in haystack.
8491
   *                         </p>
8492
   * @param int    $offset   [optional] <p>
8493
   *                         The position in haystack
8494
   *                         to start searching.
8495
   *                         </p>
8496
   *
8497
   * @return int|false Return the numeric position of the last occurrence of needle in the
8498
   *                   haystack string, or false if needle is not found.
8499
   */
8500
  public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
8501
  {
8502
    if ($haystack === '' || $needle === '') {
8503
      return false;
8504
    }
8505
8506
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8507
      self::checkForSupport();
8508
    }
8509
8510
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8511
      // "mb_" is available if overload is used, so use it ...
8512
      return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8513
    }
8514
8515
    return \strripos($haystack, $needle, $offset);
8516
  }
8517
8518
  /**
8519
   * Find position of last occurrence of a string in a string.
8520
   *
8521
   * @link http://php.net/manual/en/function.mb-strrpos.php
8522
   *
8523
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8524
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8525
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8526
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
8527
   *                              the end of the string.
8528
   *                              </p>
8529
   * @param string     $encoding  [optional] <p>Set the charset.</p>
8530
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8531
   *
8532
   * @return int|false
8533
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8534
   *                   string.<br>If needle is not found, it returns false.
8535
   */
8536 38
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8537
  {
8538 38
    if ('' === $haystack) {
8539 3
      return false;
8540
    }
8541
8542
    // iconv and mbstring do not support integer $needle
8543 37
    if ((int)$needle === $needle && $needle >= 0) {
8544 2
      $needle = (string)self::chr($needle);
8545
    }
8546 37
    $needle = (string)$needle;
8547
8548 37
    if ('' === $needle) {
8549 2
      return false;
8550
    }
8551
8552 37
    if ($cleanUtf8 === true) {
8553
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8554 4
      $needle = self::clean($needle);
8555 4
      $haystack = self::clean($haystack);
8556
    }
8557
8558 37
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8559 14
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8560
    }
8561
8562
    //
8563
    // fallback for binary || ascii only
8564
    //
8565
8566
    if (
8567 37
        $encoding === 'CP850'
8568
        ||
8569 37
        $encoding === 'ASCII'
8570
    ) {
8571 2
      return self::strrpos_in_byte($haystack, $needle, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::strrpos_in_byte() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8571
      return self::strrpos_in_byte($haystack, $needle, /** @scrutinizer ignore-type */ $offset);
Loading history...
8572
    }
8573
8574 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8575
      self::checkForSupport();
8576
    }
8577
8578
    if (
8579 37
        $encoding !== 'UTF-8'
8580
        &&
8581 37
        self::$SUPPORT['mbstring'] === false
8582
    ) {
8583
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8584
    }
8585
8586
    //
8587
    // fallback via mbstring
8588
    //
8589
8590 37
    if (self::$SUPPORT['mbstring'] === true) {
8591 37
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
8592
    }
8593
8594
    //
8595
    // fallback via intl
8596
    //
8597
8598
    if (
8599
        $offset !== null
8600
        &&
8601
        $offset >= 0 // grapheme_strrpos() can't handle negative offset
8602
        &&
8603
        $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8604
        &&
8605
        self::$SUPPORT['intl'] === true
8606
    ) {
8607
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8608
      if ($returnTmp !== false) {
8609
        return $returnTmp;
8610
      }
8611
    }
8612
8613
    //
8614
    // fallback for ascii only
8615
    //
8616
8617
    if (
8618
        $offset !== null
8619
        &&
8620
        self::is_ascii($haystack)
8621
        &&
8622
        self::is_ascii($needle)
8623
    ) {
8624
      return self::strrpos_in_byte($haystack, $needle, $offset);
8625
    }
8626
8627
    //
8628
    // fallback via vanilla php
8629
    //
8630
8631
    $haystackTmp = null;
8632
    if ($offset > 0) {
8633
      $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8633
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
8634
    } elseif ($offset < 0) {
8635
      $haystackTmp = self::substr($haystack, 0, $offset);
8636
      $offset = 0;
8637
    }
8638
8639
    if ($haystackTmp !== null) {
8640
      if ($haystackTmp === false) {
8641
        $haystackTmp = '';
8642
      }
8643
      $haystack = (string)$haystackTmp;
8644
    }
8645
8646
    $pos = self::strrpos_in_byte($haystack, $needle);
8647
    if ($pos === false) {
8648
      return false;
8649
    }
8650
8651
    return $offset + self::strlen(self::substr_in_byte($haystack, 0, $pos));
8652
  }
8653
8654
  /**
8655
   * Find position of last occurrence of a string in a string.
8656
   *
8657
   * @param string $haystack <p>
8658
   *                         The string being checked, for the last occurrence
8659
   *                         of needle.
8660
   *                         </p>
8661
   * @param string $needle   <p>
8662
   *                         The string to find in haystack.
8663
   *                         </p>
8664
   * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
8665
   *                         the string. Negative values will stop searching at an arbitrary point
8666
   *                         prior to the end of the string.
8667
   *
8668
   * @return int|false The numeric position of the last occurrence of needle in the
8669
   *                   haystack string. If needle is not found, it returns false.
8670
   */
8671 2
  public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
8672
  {
8673 2
    if ($haystack === '' || $needle === '') {
8674
      return false;
8675
    }
8676
8677 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8678
      self::checkForSupport();
8679
    }
8680
8681 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8682
      // "mb_" is available if overload is used, so use it ...
8683
      return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8684
    }
8685
8686 2
    return \strrpos($haystack, $needle, $offset);
8687
  }
8688
8689
  /**
8690
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8691
   * mask.
8692
   *
8693
   * @param string $str    <p>The input string.</p>
8694
   * @param string $mask   <p>The mask of chars</p>
8695
   * @param int    $offset [optional]
8696
   * @param int    $length [optional]
8697
   *
8698
   * @return int
8699
   */
8700 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
8701
  {
8702 10
    if ($offset || $length !== null) {
8703 2
      $strTmp = self::substr($str, $offset, $length);
8704 2
      if ($strTmp === false) {
8705
        $strTmp = '';
8706
      }
8707 2
      $str = (string)$strTmp;
8708
    }
8709
8710 10
    if ('' === $str || '' === $mask) {
8711 2
      return 0;
8712
    }
8713
8714 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug Best Practice introduced by
The expression return preg_match('/^' ....lf::strlen($str[0]) : 0 could return the type false which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
$str of type string is incompatible with the type null|array expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8714
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
8715
  }
8716
8717
  /**
8718
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8719
   *
8720
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
8721
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
8722
   * @param bool   $before_needle  [optional] <p>
8723
   *                               If <b>TRUE</b>, strstr() returns the part of the
8724
   *                               haystack before the first occurrence of the needle (excluding the needle).
8725
   *                               </p>
8726
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8727
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8728
   *
8729
   * @return string|false
8730
   *                       A sub-string,<br>or <strong>false</strong> if needle is not found.
8731
   */
8732 5
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8733
  {
8734 5
    if ('' === $haystack || '' === $needle) {
8735 2
      return false;
8736
    }
8737
8738 5
    if ($cleanUtf8 === true) {
8739
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8740
      // if invalid characters are found in $haystack before $needle
8741
      $needle = self::clean($needle);
8742
      $haystack = self::clean($haystack);
8743
    }
8744
8745 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8746 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8747
    }
8748
8749
    //
8750
    // fallback for binary || ascii only
8751
    //
8752
8753
    if (
8754 5
        $encoding === 'CP850'
8755
        ||
8756 5
        $encoding === 'ASCII'
8757
    ) {
8758
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8759
    }
8760
8761 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8762
      self::checkForSupport();
8763
    }
8764
8765
    if (
8766 5
        $encoding !== 'UTF-8'
8767
        &&
8768 5
        self::$SUPPORT['mbstring'] === false
8769
    ) {
8770
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8771
    }
8772
8773
    //
8774
    // fallback via mbstring
8775
    //
8776
8777 5
    if (self::$SUPPORT['mbstring'] === true) {
8778 5
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8779
    }
8780
8781
    //
8782
    // fallback via intl
8783
    //
8784
8785
    if (
8786
        $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8787
        &&
8788
        self::$SUPPORT['intl'] === true
8789
    ) {
8790
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8791
      if ($returnTmp !== false) {
8792
        return $returnTmp;
8793
      }
8794
    }
8795
8796
    //
8797
    // fallback for ascii only
8798
    //
8799
8800
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8801
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8802
    }
8803
8804
    //
8805
    // fallback via vanilla php
8806
    //
8807
8808
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8809
8810
    if (!isset($match[1])) {
8811
      return false;
8812
    }
8813
8814
    if ($before_needle) {
8815
      return $match[1];
8816
    }
8817
8818
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8818
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
8819
  }
8820
8821
  /**
8822
   *  * Finds first occurrence of a string within another.
8823
   *
8824
   * @param string $haystack      <p>
8825
   *                              The string from which to get the first occurrence
8826
   *                              of needle.
8827
   *                              </p>
8828
   * @param string $needle        <p>
8829
   *                              The string to find in haystack.
8830
   *                              </p>
8831
   * @param bool   $before_needle [optional] <p>
8832
   *                              Determines which portion of haystack
8833
   *                              this function returns.
8834
   *                              If set to true, it returns all of haystack
8835
   *                              from the beginning to the first occurrence of needle.
8836
   *                              If set to false, it returns all of haystack
8837
   *                              from the first occurrence of needle to the end,
8838
   *                              </p>
8839
   *
8840
   * @return string|false The portion of haystack,
8841
   *                      or false if needle is not found.
8842
   */
8843
  public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
8844
  {
8845
    if ($haystack === '' || $needle === '') {
8846
      return false;
8847
    }
8848
8849
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8850
      self::checkForSupport();
8851
    }
8852
8853
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8854
      // "mb_" is available if overload is used, so use it ...
8855
      return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
8856
    }
8857
8858
    return \strstr($haystack, $needle, $before_needle);
8859
  }
8860
8861
  /**
8862
   * Unicode transformation for case-less matching.
8863
   *
8864
   * @link http://unicode.org/reports/tr21/tr21-5.html
8865
   *
8866
   * @param string      $str       <p>The input string.</p>
8867
   * @param bool        $full      [optional] <p>
8868
   *                               <b>true</b>, replace full case folding chars (default)<br>
8869
   *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
8870
   *                               </p>
8871
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8872
   * @param string      $encoding  [optional] <p>Set the charset.</p>
8873
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8874
   * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase is
8875
   *                               for some languages better ...</p>
8876
   *
8877
   * @return string
8878
   */
8879 53
  public static function strtocasefold(
8880
      string $str,
8881
      bool $full = true,
8882
      bool $cleanUtf8 = false,
8883
      string $encoding = 'UTF-8',
8884
      string $lang = null,
8885
      $lower = true
8886
  ): string
8887
  {
8888 53
    if ('' === $str) {
8889 5
      return '';
8890
    }
8891
8892 52
    $str = self::fixStrCaseHelper($str, $lower, $full);
8893
8894 52
    if ($lower === true) {
8895 2
      return self::strtolower($str, $encoding, $cleanUtf8, $lang);
8896
    }
8897
8898 50
    return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
8899
  }
8900
8901
  /**
8902
   * Make a string lowercase.
8903
   *
8904
   * @link http://php.net/manual/en/function.mb-strtolower.php
8905
   *
8906
   * @param string      $str                   <p>The string being lowercased.</p>
8907
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
8908
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8909
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8910
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8911
   *
8912
   * @return string String with all alphabetic characters converted to lowercase.
8913
   */
8914 151
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8915
  {
8916
    // init
8917 151
    $str = (string)$str;
8918
8919 151
    if ('' === $str) {
8920 4
      return '';
8921
    }
8922
8923 149
    if ($cleanUtf8 === true) {
8924
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8925
      // if invalid characters are found in $haystack before $needle
8926 4
      $str = self::clean($str);
8927
    }
8928
8929 149
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8930 94
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8931
    }
8932
8933
    // hack for old php version or for the polyfill ...
8934 149
    if ($tryToKeepStringLength === true) {
8935
      $str = self::fixStrCaseHelper($str, true);
8936
    }
8937
8938 149
    if ($lang !== null) {
8939
8940 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8941
        self::checkForSupport();
8942
      }
8943
8944 2
      if (self::$SUPPORT['intl'] === true) {
8945
8946 2
        $langCode = $lang . '-Lower';
8947 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8948
          \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, E_USER_WARNING);
8949
8950
          $langCode = 'Any-Lower';
8951
        }
8952
8953
        /** @noinspection PhpComposerExtensionStubsInspection */
8954 2
        return transliterator_transliterate($langCode, $str);
8955
      }
8956
8957
      \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, E_USER_WARNING);
8958
    }
8959
8960
    // always fallback via symfony polyfill
8961 149
    return \mb_strtolower($str, $encoding);
8962
  }
8963
8964
  /**
8965
   * Generic case sensitive transformation for collation matching.
8966
   *
8967
   * @param string $str <p>The input string</p>
8968
   *
8969
   * @return string
8970
   */
8971 6
  private static function strtonatfold(string $str): string
8972
  {
8973
    /** @noinspection PhpUndefinedClassInspection */
8974 6
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
8975
  }
8976
8977
  /**
8978
   * Make a string uppercase.
8979
   *
8980
   * @link http://php.net/manual/en/function.mb-strtoupper.php
8981
   *
8982
   * @param string      $str                   <p>The string being uppercased.</p>
8983
   * @param string      $encoding              [optional] <p>Set the charset.</p>
8984
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8985
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8986
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8987
   *
8988
   * @return string String with all alphabetic characters converted to uppercase.
8989
   */
8990 160
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8991
  {
8992
    // init
8993 160
    $str = (string)$str;
8994
8995 160
    if ('' === $str) {
8996 4
      return '';
8997
    }
8998
8999 158
    if ($cleanUtf8 === true) {
9000
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9001
      // if invalid characters are found in $haystack before $needle
9002 3
      $str = self::clean($str);
9003
    }
9004
9005 158
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9006 72
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9007
    }
9008
9009
    // hack for old php version or for the polyfill ...
9010 158
    if ($tryToKeepStringLength === true) {
9011 2
      $str = self::fixStrCaseHelper($str, false);
9012
    }
9013
9014 158
    if ($lang !== null) {
9015
9016 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9017
        self::checkForSupport();
9018
      }
9019
9020 2
      if (self::$SUPPORT['intl'] === true) {
9021
9022 2
        $langCode = $lang . '-Upper';
9023 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9024
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
9025
9026
          $langCode = 'Any-Upper';
9027
        }
9028
9029
        /** @noinspection PhpComposerExtensionStubsInspection */
9030 2
        return transliterator_transliterate($langCode, $str);
9031
      }
9032
9033
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
9034
    }
9035
9036
    // always fallback via symfony polyfill
9037 158
    return \mb_strtoupper($str, $encoding);
9038
  }
9039
9040
  /**
9041
   * Translate characters or replace sub-strings.
9042
   *
9043
   * @link  http://php.net/manual/en/function.strtr.php
9044
   *
9045
   * @param string          $str  <p>The string being translated.</p>
9046
   * @param string|string[] $from <p>The string replacing from.</p>
9047
   * @param string|string[] $to   <p>The string being translated to to.</p>
9048
   *
9049
   * @return string
9050
   *                This function returns a copy of str, translating all occurrences of each character in from to the
9051
   *                corresponding character in to.
9052
   */
9053 2
  public static function strtr(string $str, $from, $to = INF): string
9054
  {
9055 2
    if ('' === $str) {
9056
      return '';
9057
    }
9058
9059 2
    if ($from === $to) {
9060
      return $str;
9061
    }
9062
9063 2
    if (INF !== $to) {
9064 2
      $from = self::str_split($from);
9065 2
      $to = self::str_split($to);
9066 2
      $countFrom = \count($from);
9067 2
      $countTo = \count($to);
9068
9069 2
      if ($countFrom > $countTo) {
9070 2
        $from = \array_slice($from, 0, $countTo);
9071 2
      } elseif ($countFrom < $countTo) {
9072 2
        $to = \array_slice($to, 0, $countFrom);
9073
      }
9074
9075 2
      $from = \array_combine($from, $to);
9076
    }
9077
9078 2
    if (\is_string($from)) {
9079 2
      return \str_replace($from, '', $str);
9080
    }
9081
9082 2
    return \strtr($str, $from);
9083
  }
9084
9085
  /**
9086
   * Return the width of a string.
9087
   *
9088
   * @param string $str       <p>The input string.</p>
9089
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9090
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9091
   *
9092
   * @return int
9093
   */
9094 2
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
9095
  {
9096 2
    if ('' === $str) {
9097 2
      return 0;
9098
    }
9099
9100 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9101 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9102
    }
9103
9104 2
    if ($cleanUtf8 === true) {
9105
      // iconv and mbstring are not tolerant to invalid encoding
9106
      // further, their behaviour is inconsistent with that of PHP's substr
9107 2
      $str = self::clean($str);
9108
    }
9109
9110 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9111
      self::checkForSupport();
9112
    }
9113
9114
    //
9115
    // fallback via mbstring
9116
    //
9117
9118 2
    if (self::$SUPPORT['mbstring'] === true) {
9119 2
      return \mb_strwidth($str, $encoding);
9120
    }
9121
9122
    //
9123
    // fallback via vanilla php
9124
    //
9125
9126
    if ('UTF-8' !== $encoding) {
9127
      $str = self::encode('UTF-8', $str, false, $encoding);
9128
    }
9129
9130
    $wide = 0;
9131
    $str = (string)preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
9132
9133
    return ($wide << 1) + self::strlen($str, 'UTF-8');
9134
  }
9135
9136
  /**
9137
   * Get part of a string.
9138
   *
9139
   * @link http://php.net/manual/en/function.mb-substr.php
9140
   *
9141
   * @param string $str       <p>The string being checked.</p>
9142
   * @param int    $offset    <p>The first position used in str.</p>
9143
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
9144
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9145
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9146
   *
9147
   * @return string|false
9148
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9149
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9150
   *                      characters long, <b>FALSE</b> will be returned.
9151
   */
9152 402
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
9153
  {
9154 402
    if ('' === $str) {
9155 19
      return '';
9156
    }
9157
9158
    // Empty string
9159 396
    if ($length === 0) {
9160 20
      return '';
9161
    }
9162
9163 393
    if ($cleanUtf8 === true) {
9164
      // iconv and mbstring are not tolerant to invalid encoding
9165
      // further, their behaviour is inconsistent with that of PHP's substr
9166 2
      $str = self::clean($str);
9167
    }
9168
9169
    // Whole string
9170 393
    if (!$offset && $length === null) {
9171 40
      return $str;
9172
    }
9173
9174 364
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9175 157
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9176
    }
9177
9178 364
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9179
      self::checkForSupport();
9180
    }
9181
9182
    //
9183
    // fallback for binary || ascii only
9184
    //
9185
9186
    if (
9187 364
        $encoding === 'CP850'
9188
        ||
9189 364
        $encoding === 'ASCII'
9190
    ) {
9191 5
      return self::substr_in_byte($str, $offset, $length);
9192
    }
9193
9194
    //
9195
    // fallback via mbstring
9196
    //
9197
9198 359
    if (self::$SUPPORT['mbstring'] === true) {
9199 359
      $return = \mb_substr($str, $offset, $length ?? 2147483647, $encoding);
9200 359
      if ($return !== false) {
9201 359
        return $return;
9202
      }
9203
    }
9204
9205
    // otherwise we need the string-length and can't fake it via "2147483647"
9206 4
    $str_length = 0;
9207 4
    if ($offset || $length === null) {
9208 4
      $str_length = self::strlen($str, $encoding);
9209
    }
9210
9211
    // e.g.: invalid chars + mbstring not installed
9212 4
    if ($str_length === false) {
9213
      return false;
9214
    }
9215
9216
    // Empty string
9217 4
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
9218
      return '';
9219
    }
9220
9221
    // Impossible
9222 4
    if ($offset && $offset > $str_length) {
9223
      // "false" is the php native return type here,
9224
      //  but we optimized this for performance ... see "2147483647" instead of "strlen"
9225
      return '';
9226
9227
    }
9228
9229 4
    if ($length === null) {
9230 4
      $length = (int)$str_length;
9231
    } else {
9232 2
      $length = (int)$length;
9233
    }
9234
9235
    if (
9236 4
        $encoding !== 'UTF-8'
9237
        &&
9238 4
        self::$SUPPORT['mbstring'] === false
9239
    ) {
9240 2
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9241
    }
9242
9243
    //
9244
    // fallback via intl
9245
    //
9246
9247
    if (
9248 4
        $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
9249
        &&
9250 4
        $offset >= 0 // grapheme_substr() can't handle negative offset
9251
        &&
9252 4
        self::$SUPPORT['intl'] === true
9253
    ) {
9254
      $returnTmp = \grapheme_substr($str, $offset, $length);
9255
      if ($returnTmp !== false) {
9256
        return $returnTmp;
9257
      }
9258
    }
9259
9260
    //
9261
    // fallback via iconv
9262
    //
9263
9264
    if (
9265 4
        $length >= 0 // "iconv_substr()" can't handle negative length
9266
        &&
9267 4
        self::$SUPPORT['iconv'] === true
9268
    ) {
9269
      $returnTmp = \iconv_substr($str, $offset, $length);
9270
      if ($returnTmp !== false) {
9271
        return $returnTmp;
9272
      }
9273
    }
9274
9275
    //
9276
    // fallback for ascii only
9277
    //
9278
9279 4
    if (self::is_ascii($str)) {
9280
      return \substr($str, $offset, $length);
9281
    }
9282
9283
    //
9284
    // fallback via vanilla php
9285
    //
9286
9287
    // split to array, and remove invalid characters
9288 4
    $array = self::split($str);
9289
9290
    // extract relevant part, and join to make sting again
9291 4
    return \implode('', \array_slice($array, $offset, $length));
9292
  }
9293
9294
  /**
9295
   * Binary safe comparison of two strings from an offset, up to length characters.
9296
   *
9297
   * @param string   $str1               <p>The main string being compared.</p>
9298
   * @param string   $str2               <p>The secondary string being compared.</p>
9299
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
9300
   *                                     counting from the end of the string.</p>
9301
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
9302
   *                                     the length of the str compared to the length of main_str less the offset.</p>
9303
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
9304
   *                                     insensitive.</p>
9305
   *
9306
   * @return int
9307
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9308
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9309
   *             <strong>0</strong> if they are equal.
9310
   */
9311 2
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
9312
  {
9313
    if (
9314 2
        $offset !== 0
9315
        ||
9316 2
        $length !== null
9317
    ) {
9318 2
      $str1Tmp = self::substr($str1, $offset, $length);
9319 2
      if ($str1Tmp === false) {
9320
        $str1Tmp = '';
9321
      }
9322 2
      $str1 = (string)$str1Tmp;
9323
9324 2
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str1) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9324
      $str2Tmp = self::substr($str2, 0, /** @scrutinizer ignore-type */ self::strlen($str1));
Loading history...
9325 2
      if ($str2Tmp === false) {
9326
        $str2Tmp = '';
9327
      }
9328 2
      $str2 = (string)$str2Tmp;
9329
    }
9330
9331 2
    if ($case_insensitivity === true) {
9332 2
      return self::strcasecmp($str1, $str2);
9333
    }
9334
9335 2
    return self::strcmp($str1, $str2);
9336
  }
9337
9338
  /**
9339
   * Count the number of substring occurrences.
9340
   *
9341
   * @link  http://php.net/manual/en/function.substr-count.php
9342
   *
9343
   * @param string $haystack   <p>The string to search in.</p>
9344
   * @param string $needle     <p>The substring to search for.</p>
9345
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
9346
   * @param int    $length     [optional] <p>
9347
   *                           The maximum length after the specified offset to search for the
9348
   *                           substring. It outputs a warning if the offset plus the length is
9349
   *                           greater than the haystack length.
9350
   *                           </p>
9351
   * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9352
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9353
   *
9354
   * @return int|false This functions returns an integer or false if there isn't a string.
9355
   */
9356 18
  public static function substr_count(
9357
      string $haystack,
9358
      string $needle,
9359
      int $offset = 0,
9360
      int $length = null,
9361
      string $encoding = 'UTF-8',
9362
      bool $cleanUtf8 = false
9363
  )
9364
  {
9365 18
    if ('' === $haystack || '' === $needle) {
9366 2
      return false;
9367
    }
9368
9369 18
    if ($offset || $length !== null) {
9370
9371 2
      if ($length === null) {
9372 2
        $lengthTmp = self::strlen($haystack);
9373 2
        if ($lengthTmp === false) {
9374
          return false;
9375
        }
9376 2
        $length = (int)$lengthTmp;
9377
      }
9378
9379
      if (
9380
          (
9381 2
              $length !== 0
9382
              &&
9383 2
              $offset !== 0
9384
          )
9385
          &&
9386 2
          ($length + $offset) <= 0
9387
          &&
9388 2
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9389
      ) {
9390 2
        return false;
9391
      }
9392
9393 2
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
9394 2
      if ($haystackTmp === false) {
9395
        $haystackTmp = '';
9396
      }
9397 2
      $haystack = (string)$haystackTmp;
9398
    }
9399
9400 18
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9401 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9402
    }
9403
9404 18
    if ($cleanUtf8 === true) {
9405
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9406
      // if invalid characters are found in $haystack before $needle
9407
      $needle = self::clean($needle);
9408
      $haystack = self::clean($haystack);
9409
    }
9410
9411 18
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9412
      self::checkForSupport();
9413
    }
9414
9415
    if (
9416 18
        $encoding !== 'UTF-8'
9417
        &&
9418 18
        self::$SUPPORT['mbstring'] === false
9419
    ) {
9420
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9421
    }
9422
9423 18
    if (self::$SUPPORT['mbstring'] === true) {
9424 18
      return \mb_substr_count($haystack, $needle, $encoding);
9425
    }
9426
9427
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
9428
9429
    return \count($matches);
9430
  }
9431
9432
  /**
9433
   * Count the number of substring occurrences.
9434
   *
9435
   * @param string $haystack <p>
9436
   *                         The string being checked.
9437
   *                         </p>
9438
   * @param string $needle   <p>
9439
   *                         The string being found.
9440
   *                         </p>
9441
   * @param int    $offset   [optional] <p>
9442
   *                         The offset where to start counting
9443
   *                         </p>
9444
   * @param int    $length   [optional] <p>
9445
   *                         The maximum length after the specified offset to search for the
9446
   *                         substring. It outputs a warning if the offset plus the length is
9447
   *                         greater than the haystack length.
9448
   *                         </p>
9449
   *
9450
   * @return int|false The number of times the
9451
   *                   needle substring occurs in the
9452
   *                   haystack string.
9453
   */
9454 38
  public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null)
9455
  {
9456 38
    if ($haystack === '' || $needle === '') {
9457
      return 0;
9458
    }
9459
9460 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9461
      self::checkForSupport();
9462
    }
9463
9464
    if (
9465 38
        ($offset || $length !== null)
9466
        &&
9467 38
        self::$SUPPORT['mbstring_func_overload'] === true
9468
    ) {
9469
9470
      if ($length === null) {
9471
        $lengthTmp = self::strlen($haystack);
9472
        if ($lengthTmp === false) {
9473
          return false;
9474
        }
9475
        $length = (int)$lengthTmp;
9476
      }
9477
9478
      if (
9479
          (
9480
              $length !== 0
9481
              &&
9482
              $offset !== 0
9483
          )
9484
          &&
9485
          ($length + $offset) <= 0
9486
          &&
9487
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9488
      ) {
9489
        return false;
9490
      }
9491
9492
      $haystackTmp = self::substr_in_byte($haystack, $offset, $length);
9493
      if ($haystackTmp === false) {
0 ignored issues
show
introduced by
The condition $haystackTmp === false is always false.
Loading history...
9494
        $haystackTmp = '';
9495
      }
9496
      $haystack = (string)$haystackTmp;
9497
    }
9498
9499 38
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9500
      // "mb_" is available if overload is used, so use it ...
9501
      return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
9502
    }
9503
9504 38
    return \substr_count($haystack, $needle, $offset, $length);
9505
  }
9506
9507
  /**
9508
   * Returns the number of occurrences of $substring in the given string.
9509
   * By default, the comparison is case-sensitive, but can be made insensitive
9510
   * by setting $caseSensitive to false.
9511
   *
9512
   * @param string $str           <p>The input string.</p>
9513
   * @param string $substring     <p>The substring to search for.</p>
9514
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
9515
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9516
   *
9517
   * @return int
9518
   */
9519 15
  public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
9520
  {
9521 15
    if ('' === $str || '' === $substring) {
9522 2
      return 0;
9523
    }
9524
9525
    // only a fallback to prevent BC in the api ...
9526 13
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
9527 4
      $encoding = (string)$caseSensitive;
9528
    }
9529
9530 13
    if (!$caseSensitive) {
9531 6
      $str = self::strtocasefold($str, true, false, $encoding, null, false);
9532 6
      $substring = self::strtocasefold($substring, true, false, $encoding, null, false);
9533
    }
9534
9535 13
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
9536
  }
9537
9538
  /**
9539
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
9540
   *
9541
   * @param string $haystack <p>The string to search in.</p>
9542
   * @param string $needle   <p>The substring to search for.</p>
9543
   *
9544
   * @return string Return the sub-string.
9545
   */
9546 2
  public static function substr_ileft(string $haystack, string $needle): string
9547
  {
9548 2
    if ('' === $haystack) {
9549 2
      return '';
9550
    }
9551
9552 2
    if ('' === $needle) {
9553 2
      return $haystack;
9554
    }
9555
9556 2
    if (self::str_istarts_with($haystack, $needle) === true) {
9557 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9557
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9558 2
      if ($haystackTmp === false) {
9559
        $haystackTmp = '';
9560
      }
9561 2
      $haystack = (string)$haystackTmp;
9562
    }
9563
9564 2
    return $haystack;
9565
  }
9566
9567
  /**
9568
   * Get part of a string process in bytes.
9569
   *
9570
   * @param string $str    <p>The string being checked.</p>
9571
   * @param int    $offset <p>The first position used in str.</p>
9572
   * @param int    $length [optional] <p>The maximum length of the returned string.</p>
9573
   *
9574
   * @return string|false
9575
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9576
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9577
   *                      characters long, <b>FALSE</b> will be returned.
9578
   */
9579 55
  public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
9580
  {
9581 55
    if ($str === '') {
9582
      return '';
9583
    }
9584
9585
    // Empty string
9586 55
    if ($length === 0) {
9587
      return '';
9588
    }
9589
9590
    // Whole string
9591 55
    if (!$offset && $length === null) {
9592
      return $str;
9593
    }
9594
9595 55
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9596
      self::checkForSupport();
9597
    }
9598
9599 55
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9600
      // "mb_" is available if overload is used, so use it ...
9601
      return \mb_substr($str, $offset, $length ?? 2147483647, 'CP850'); // 8-BIT
9602
    }
9603
9604 55
    return \substr($str, $offset, $length ?? 2147483647);
9605
  }
9606
9607
  /**
9608
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
9609
   *
9610
   * @param string $haystack <p>The string to search in.</p>
9611
   * @param string $needle   <p>The substring to search for.</p>
9612
   *
9613
   * @return string Return the sub-string.
9614
   */
9615 2
  public static function substr_iright(string $haystack, string $needle): string
9616
  {
9617 2
    if ('' === $haystack) {
9618 2
      return '';
9619
    }
9620
9621 2
    if ('' === $needle) {
9622 2
      return $haystack;
9623
    }
9624
9625 2
    if (self::str_iends_with($haystack, $needle) === true) {
9626 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9627 2
      if ($haystackTmp === false) {
9628
        $haystackTmp = '';
9629
      }
9630 2
      $haystack = (string)$haystackTmp;
9631
    }
9632
9633 2
    return $haystack;
9634
  }
9635
9636
  /**
9637
   * Removes an prefix ($needle) from start of the string ($haystack).
9638
   *
9639
   * @param string $haystack <p>The string to search in.</p>
9640
   * @param string $needle   <p>The substring to search for.</p>
9641
   *
9642
   * @return string Return the sub-string.
9643
   */
9644 2
  public static function substr_left(string $haystack, string $needle): string
9645
  {
9646 2
    if ('' === $haystack) {
9647 2
      return '';
9648
    }
9649
9650 2
    if ('' === $needle) {
9651 2
      return $haystack;
9652
    }
9653
9654 2
    if (self::str_starts_with($haystack, $needle) === true) {
9655 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9655
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9656 2
      if ($haystackTmp === false) {
9657
        $haystackTmp = '';
9658
      }
9659 2
      $haystack = (string)$haystackTmp;
9660
    }
9661
9662 2
    return $haystack;
9663
  }
9664
9665
  /**
9666
   * Replace text within a portion of a string.
9667
   *
9668
   * source: https://gist.github.com/stemar/8287074
9669
   *
9670
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
9671
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
9672
   * @param int|int[]       $offset           <p>
9673
   *                                          If start is positive, the replacing will begin at the start'th offset
9674
   *                                          into string.
9675
   *                                          <br><br>
9676
   *                                          If start is negative, the replacing will begin at the start'th character
9677
   *                                          from the end of string.
9678
   *                                          </p>
9679
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
9680
   *                                          portion of string which is to be replaced. If it is negative, it
9681
   *                                          represents the number of characters from the end of string at which to
9682
   *                                          stop replacing. If it is not given, then it will default to strlen(
9683
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
9684
   *                                          length is zero then this function will have the effect of inserting
9685
   *                                          replacement into string at the given start offset.</p>
9686
   * @param string          $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
9687
   *
9688
   * @return string|string[] The result string is returned. If string is an array then array is returned.
9689
   */
9690 10
  public static function substr_replace($str, $replacement, $offset, $length = null, string $encoding = 'UTF-8')
9691
  {
9692 10
    if (\is_array($str) === true) {
9693 1
      $num = \count($str);
9694
9695
      // the replacement
9696 1
      if (\is_array($replacement) === true) {
9697 1
        $replacement = \array_slice($replacement, 0, $num);
9698
      } else {
9699 1
        $replacement = \array_pad([$replacement], $num, $replacement);
9700
      }
9701
9702
      // the offset
9703 1
      if (\is_array($offset) === true) {
9704 1
        $offset = \array_slice($offset, 0, $num);
9705 1
        foreach ($offset as &$valueTmp) {
9706 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
9707
        }
9708 1
        unset($valueTmp);
9709
      } else {
9710 1
        $offset = \array_pad([$offset], $num, $offset);
9711
      }
9712
9713
      // the length
9714 1
      if (null === $length) {
9715 1
        $length = \array_fill(0, $num, 0);
9716 1
      } elseif (\is_array($length) === true) {
9717 1
        $length = \array_slice($length, 0, $num);
9718 1
        foreach ($length as &$valueTmpV2) {
9719 1
          if (null !== $valueTmpV2) {
9720 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9721
          } else {
9722 1
            $valueTmpV2 = 0;
9723
          }
9724
        }
9725 1
        unset($valueTmpV2);
9726
      } else {
9727 1
        $length = \array_pad([$length], $num, $length);
9728
      }
9729
9730
      // recursive call
9731 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9732
    }
9733
9734 10
    if (\is_array($replacement) === true) {
9735 1
      if (\count($replacement) > 0) {
9736 1
        $replacement = $replacement[0];
9737
      } else {
9738 1
        $replacement = '';
9739
      }
9740
    }
9741
9742
    // init
9743 10
    $str = (string)$str;
9744 10
    $replacement = (string)$replacement;
9745
9746 10
    if ('' === $str) {
9747 1
      return $replacement;
9748
    }
9749
9750 9
    if (self::is_ascii($str)) {
9751 6
      return ($length === null) ?
9752
          \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9752
          \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
9753 6
          \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9753
          \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
9754
    }
9755
9756 8
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9757
      self::checkForSupport();
9758
    }
9759
9760 8
    if (self::$SUPPORT['mbstring'] === true) {
9761 8
      $string_length = self::strlen($str, $encoding);
9762
9763 8
      if ($offset < 0) {
9764 1
        $offset = \max(0, $string_length + $offset);
9765 8
      } elseif ($offset > $string_length) {
9766
        $offset = $string_length;
9767
      }
9768
9769 8
      if ($length < 0) {
9770 1
        $length = \max(0, $string_length - $offset + $length);
9771 8
      } elseif ($length === null || $length > $string_length) {
9772 3
        $length = $string_length;
9773
      }
9774
9775 8
      if (($offset + $length) > $string_length) {
9776 3
        $length = $string_length - $offset;
9777
      }
9778
9779 8
      return self::substr($str, 0, $offset, $encoding) . $replacement . self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $offset, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9779
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $offset, $encoding) . $replacement . self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
Loading history...
Bug introduced by
Are you sure self::substr($str, $offs...t - $length, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9779
      return self::substr($str, 0, $offset, $encoding) . $replacement . /** @scrutinizer ignore-type */ self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
Loading history...
9780
    }
9781
9782
    \preg_match_all('/./us', $str, $smatches);
9783
    \preg_match_all('/./us', $replacement, $rmatches);
9784
9785
    if ($length === null) {
9786
      $lengthTmp = self::strlen($str, $encoding);
9787
      if ($lengthTmp === false) {
9788
        // e.g.: non mbstring support + invalid chars
9789
        return '';
9790
      }
9791
      $length = (int)$lengthTmp;
9792
    }
9793
9794
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9794
    \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9794
    \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
9795
9796
    return \implode('', $smatches[0]);
9797
  }
9798
9799
  /**
9800
   * Removes an suffix ($needle) from end of the string ($haystack).
9801
   *
9802
   * @param string $haystack <p>The string to search in.</p>
9803
   * @param string $needle   <p>The substring to search for.</p>
9804
   *
9805
   * @return string Return the sub-string.
9806
   */
9807 2
  public static function substr_right(string $haystack, string $needle): string
9808
  {
9809 2
    if ('' === $haystack) {
9810 2
      return '';
9811
    }
9812
9813 2
    if ('' === $needle) {
9814 2
      return $haystack;
9815
    }
9816
9817 2
    if (self::str_ends_with($haystack, $needle) === true) {
9818 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9819 2
      if ($haystackTmp === false) {
9820
        $haystackTmp = '';
9821
      }
9822 2
      $haystack = (string)$haystackTmp;
9823
    }
9824
9825 2
    return $haystack;
9826
  }
9827
9828
  /**
9829
   * Returns a case swapped version of the string.
9830
   *
9831
   * @param string $str       <p>The input string.</p>
9832
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9833
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9834
   *
9835
   * @return string Each character's case swapped.
9836
   */
9837 6
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9838
  {
9839 6
    if ('' === $str) {
9840 1
      return '';
9841
    }
9842
9843 6
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9844 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9845
    }
9846
9847 6
    if ($cleanUtf8 === true) {
9848
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9849
      // if invalid characters are found in $haystack before $needle
9850 2
      $str = self::clean($str);
9851
    }
9852
9853 6
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9854
  }
9855
9856
  /**
9857
   * Checks whether mbstring is available on the server.
9858
   *
9859
   * @return bool
9860
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
9861
   */
9862
  public static function symfony_polyfill_used(): bool
9863
  {
9864
    // init
9865
    $return = false;
9866
9867
    $returnTmp = \extension_loaded('mbstring') ? true : false;
9868
    if ($returnTmp === false && \function_exists('mb_strlen')) {
9869
      $return = true;
9870
    }
9871
9872
    $returnTmp = \extension_loaded('iconv') ? true : false;
9873
    if ($returnTmp === false && \function_exists('iconv')) {
9874
      $return = true;
9875
    }
9876
9877
    return $return;
9878
  }
9879
9880
  /**
9881
   * @param string $str
9882
   * @param int    $tabLength
9883
   *
9884
   * @return string
9885
   */
9886 6
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9887
  {
9888 6
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9889
  }
9890
9891
  /**
9892
   * Converts the first character of each word in the string to uppercase
9893
   * and all other chars to lowercase.
9894
   *
9895
   * @param string $str      <p>The input string.</p>
9896
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9897
   *
9898
   * @return string String with all characters of $str being title-cased.
9899
   */
9900 5
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
9901
  {
9902 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9903 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9904
    }
9905
9906
    // always fallback via symfony polyfill
9907 5
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
9908
  }
9909
9910
  /**
9911
   * alias for "UTF8::to_ascii()"
9912
   *
9913
   * @see        UTF8::to_ascii()
9914
   *
9915
   * @param string $str
9916
   * @param string $subst_chr
9917
   * @param bool   $strict
9918
   *
9919
   * @return string
9920
   *
9921
   * @deprecated <p>use "UTF8::to_ascii()"</p>
9922
   */
9923 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
9924
  {
9925 7
    return self::to_ascii($str, $subst_chr, $strict);
9926
  }
9927
9928
  /**
9929
   * alias for "UTF8::to_iso8859()"
9930
   *
9931
   * @see        UTF8::to_iso8859()
9932
   *
9933
   * @param string|string[] $str
9934
   *
9935
   * @return string|string[]
9936
   *
9937
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
9938
   */
9939 2
  public static function toIso8859($str)
9940
  {
9941 2
    return self::to_iso8859($str);
9942
  }
9943
9944
  /**
9945
   * alias for "UTF8::to_latin1()"
9946
   *
9947
   * @see        UTF8::to_latin1()
9948
   *
9949
   * @param string|string[] $str
9950
   *
9951
   * @return string|string[]
9952
   *
9953
   * @deprecated <p>use "UTF8::to_latin1()"</p>
9954
   */
9955 2
  public static function toLatin1($str)
9956
  {
9957 2
    return self::to_latin1($str);
9958
  }
9959
9960
  /**
9961
   * alias for "UTF8::to_utf8()"
9962
   *
9963
   * @see        UTF8::to_utf8()
9964
   *
9965
   * @param string|string[] $str
9966
   *
9967
   * @return string|string[]
9968
   *
9969
   * @deprecated <p>use "UTF8::to_utf8()"</p>
9970
   */
9971 2
  public static function toUTF8($str)
9972
  {
9973 2
    return self::to_utf8($str);
9974
  }
9975
9976
  /**
9977
   * Convert a string into ASCII.
9978
   *
9979
   * @param string $str     <p>The input string.</p>
9980
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
9981
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
9982
   *                        performance</p>
9983
   *
9984
   * @return string
9985
   */
9986 38
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
9987
  {
9988 38
    static $UTF8_TO_ASCII;
9989
9990 38
    if ('' === $str) {
9991 3
      return '';
9992
    }
9993
9994
    // check if we only have ASCII, first (better performance)
9995 35
    if (self::is_ascii($str) === true) {
9996 7
      return $str;
9997
    }
9998
9999 30
    $str = self::clean(
10000 30
        $str,
10001 30
        true,
10002 30
        true,
10003 30
        true,
10004 30
        false,
10005 30
        true,
10006 30
        true
10007
    );
10008
10009
    // check again, if we only have ASCII, now ...
10010 30
    if (self::is_ascii($str) === true) {
10011 12
      return $str;
10012
    }
10013
10014 19
    if ($strict === true) {
10015
10016 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10017
        self::checkForSupport();
10018
      }
10019
10020 1
      if (self::$SUPPORT['intl'] === true) {
10021
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
10022
        /** @noinspection PhpComposerExtensionStubsInspection */
10023 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
10024
10025
        // check again, if we only have ASCII, now ...
10026 1
        if (self::is_ascii($str) === true) {
10027 1
          return $str;
10028
        }
10029
10030
      }
10031
    }
10032
10033 19
    if (self::$ORD === null) {
10034
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10035
    }
10036
10037 19
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
10038 19
    $chars = $ar[0];
10039 19
    $ord = null;
10040 19
    foreach ($chars as &$c) {
10041
10042 19
      $ordC0 = self::$ORD[$c[0]];
10043
10044 19
      if ($ordC0 >= 0 && $ordC0 <= 127) {
10045 15
        continue;
10046
      }
10047
10048 19
      $ordC1 = self::$ORD[$c[1]];
10049
10050
      // ASCII - next please
10051 19
      if ($ordC0 >= 192 && $ordC0 <= 223) {
10052 17
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
10053
      }
10054
10055 19
      if ($ordC0 >= 224) {
10056 8
        $ordC2 = self::$ORD[$c[2]];
10057
10058 8
        if ($ordC0 <= 239) {
10059 7
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
10060
        }
10061
10062 8
        if ($ordC0 >= 240) {
10063 2
          $ordC3 = self::$ORD[$c[3]];
10064
10065 2
          if ($ordC0 <= 247) {
10066 2
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
10067
          }
10068
10069 2
          if ($ordC0 >= 248) {
10070
            $ordC4 = self::$ORD[$c[4]];
10071
10072
            if ($ordC0 <= 251) {
10073
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
10074
            }
10075
10076
            if ($ordC0 >= 252) {
10077
              $ordC5 = self::$ORD[$c[5]];
10078
10079
              if ($ordC0 <= 253) {
10080
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
10081
              }
10082
            }
10083
          }
10084
        }
10085
      }
10086
10087 19
      if ($ordC0 === 254 || $ordC0 === 255) {
10088
        $c = $unknown;
10089
        continue;
10090
      }
10091
10092 19
      if ($ord === null) {
10093
        $c = $unknown;
10094
        continue;
10095
      }
10096
10097 19
      $bank = $ord >> 8;
10098 19
      if (!isset($UTF8_TO_ASCII[$bank])) {
10099 9
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
10100 9
        if ($UTF8_TO_ASCII[$bank] === false) {
10101 2
          $UTF8_TO_ASCII[$bank] = [];
10102
        }
10103
      }
10104
10105 19
      $newchar = $ord & 255;
10106
10107 19
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
10108
10109
        // keep for debugging
10110
        /*
10111
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10112
        echo "char: " . $c . "\n";
10113
        echo "ord: " . $ord . "\n";
10114
        echo "newchar: " . $newchar . "\n";
10115
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
10116
        echo "bank:" . $bank . "\n\n";
10117
        */
10118
10119 18
        $c = $UTF8_TO_ASCII[$bank][$newchar];
10120
      } else {
10121
10122
        // keep for debugging missing chars
10123
        /*
10124
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10125
        echo "char: " . $c . "\n";
10126
        echo "ord: " . $ord . "\n";
10127
        echo "newchar: " . $newchar . "\n";
10128
        echo "bank:" . $bank . "\n\n";
10129
        */
10130
10131 19
        $c = $unknown;
10132
      }
10133
    }
10134
10135 19
    return \implode('', $chars);
10136
  }
10137
10138
  /**
10139
   * @param mixed $str
10140
   *
10141
   * @return bool
10142
   */
10143 19
  public static function to_boolean($str): bool
10144
  {
10145
    // init
10146 19
    $str = (string)$str;
10147
10148 19
    if ('' === $str) {
10149 2
      return false;
10150
    }
10151
10152 17
    $key = \strtolower($str);
10153
10154
    // Info: http://php.net/manual/en/filter.filters.validate.php
10155
    $map = [
10156 17
        'true'  => true,
10157
        '1'     => true,
10158
        'on'    => true,
10159
        'yes'   => true,
10160
        'false' => false,
10161
        '0'     => false,
10162
        'off'   => false,
10163
        'no'    => false,
10164
    ];
10165
10166 17
    if (isset($map[$key])) {
10167 13
      return $map[$key];
10168
    }
10169
10170
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10171 4
    if (\is_numeric($str)) {
10172 2
      return (((float)$str + 0) > 0);
10173
    }
10174
10175 2
    return (bool)self::trim($str);
10176
  }
10177
10178
  /**
10179
   * Convert given string to safe filename (and keep string case).
10180
   *
10181
   * @param string $string
10182
   * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
10183
   *                                  simply replaced with hyphen.
10184
   * @param string $fallback_char
10185
   *
10186
   * @return string
10187
   */
10188 1
  public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
10189
  {
10190 1
    if ($use_transliterate === true) {
10191 1
      $string = self::str_transliterate($string, $fallback_char);
10192
    }
10193
10194 1
    $fallback_char_escaped = \preg_quote($fallback_char, '/');
10195
10196 1
    $string = (string)\preg_replace(
10197
        [
10198 1
            '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
10199 1
            '/[\s]+/',                                            // 2) convert spaces to $fallback_char
10200 1
            '/[' . $fallback_char_escaped . ']+/',                // 3) remove double $fallback_char's
10201
        ],
10202
        [
10203 1
            '',
10204 1
            $fallback_char,
10205 1
            $fallback_char,
10206
        ],
10207 1
        $string
10208
    );
10209
10210
    // trim "$fallback_char" from beginning and end of the string
10211 1
    return \trim($string, $fallback_char);
10212
  }
10213
10214
  /**
10215
   * Convert a string into "ISO-8859"-encoding (Latin-1).
10216
   *
10217
   * @param string|string[] $str
10218
   *
10219
   * @return string|string[]
10220
   */
10221 8
  public static function to_iso8859($str)
10222
  {
10223 8
    if (\is_array($str) === true) {
10224 2
      foreach ($str as $k => $v) {
10225 2
        $str[$k] = self::to_iso8859($v);
10226
      }
10227
10228 2
      return $str;
10229
    }
10230
10231 8
    $str = (string)$str;
10232 8
    if ('' === $str) {
10233 2
      return '';
10234
    }
10235
10236 8
    return self::utf8_decode($str);
10237
  }
10238
10239
  /**
10240
   * alias for "UTF8::to_iso8859()"
10241
   *
10242
   * @see UTF8::to_iso8859()
10243
   *
10244
   * @param string|string[] $str
10245
   *
10246
   * @return string|string[]
10247
   */
10248 2
  public static function to_latin1($str)
10249
  {
10250 2
    return self::to_iso8859($str);
10251
  }
10252
10253
  /**
10254
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
10255
   *
10256
   * <ul>
10257
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
10258
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
10259
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
10260
   * case.</li>
10261
   * </ul>
10262
   *
10263
   * @param string|string[] $str                    <p>Any string or array.</p>
10264
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
10265
   *
10266
   * @return string|string[] The UTF-8 encoded string.
10267
   */
10268 38
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
10269
  {
10270 38
    if (\is_array($str) === true) {
10271 4
      foreach ($str as $k => $v) {
10272 4
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
10273
      }
10274
10275 4
      return $str;
10276
    }
10277
10278 38
    $str = (string)$str;
10279 38
    if ('' === $str) {
10280 6
      return $str;
10281
    }
10282
10283 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10284
      self::checkForSupport();
10285
    }
10286
10287 38
    $max = self::strlen_in_byte($str);
10288 38
    $buf = '';
10289
10290
    /** @noinspection ForeachInvariantsInspection */
10291 38
    for ($i = 0; $i < $max; $i++) {
10292 38
      $c1 = $str[$i];
10293
10294 38
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
10295
10296 34
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
10297
10298 31
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10299
10300 31
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
10301 17
            $buf .= $c1 . $c2;
10302 17
            $i++;
10303
          } else { // not valid UTF8 - convert it
10304 31
            $buf .= self::to_utf8_convert_helper($c1);
10305
          }
10306
10307 34
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
10308
10309 32
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10310 32
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10311
10312 32
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
10313 14
            $buf .= $c1 . $c2 . $c3;
10314 14
            $i += 2;
10315
          } else { // not valid UTF8 - convert it
10316 32
            $buf .= self::to_utf8_convert_helper($c1);
10317
          }
10318
10319 26
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
10320
10321 26
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10322 26
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10323 26
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
10324
10325 26
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
10326 8
            $buf .= $c1 . $c2 . $c3 . $c4;
10327 8
            $i += 3;
10328
          } else { // not valid UTF8 - convert it
10329 26
            $buf .= self::to_utf8_convert_helper($c1);
10330
          }
10331
10332
        } else { // doesn't look like UTF8, but should be converted
10333 34
          $buf .= self::to_utf8_convert_helper($c1);
10334
        }
10335
10336 35
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
10337
10338 4
        $buf .= self::to_utf8_convert_helper($c1);
10339
10340
      } else { // it doesn't need conversion
10341 35
        $buf .= $c1;
10342
      }
10343
    }
10344
10345
    // decode unicode escape sequences
10346 38
    $buf = \preg_replace_callback(
10347 38
        '/\\\\u([0-9a-f]{4})/i',
10348 38
        function ($match) {
10349
          // always fallback via symfony polyfill
10350 8
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
10351 38
        },
10352 38
        $buf
10353
    );
10354
10355
    // decode UTF-8 codepoints
10356 38
    if ($decodeHtmlEntityToUtf8 === true) {
10357 2
      $buf = self::html_entity_decode($buf);
10358
    }
10359
10360 38
    return $buf;
10361
  }
10362
10363
  /**
10364
   * @param int|string $input
10365
   *
10366
   * @return string
10367
   */
10368 30
  private static function to_utf8_convert_helper($input): string
10369
  {
10370
    // init
10371 30
    $buf = '';
10372
10373 30
    if (self::$ORD === null) {
10374 1
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10375
    }
10376
10377 30
    if (self::$CHR === null) {
10378 1
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10379
    }
10380
10381 30
    if (self::$WIN1252_TO_UTF8 === null) {
10382 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10383
    }
10384
10385 30
    $ordC1 = self::$ORD[$input];
10386 30
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
10387 30
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
10388
    } else {
10389 2
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
10390 2
      $cc2 = ((string)$input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
10391 2
      $buf .= $cc1 . $cc2;
10392
    }
10393
10394 30
    return $buf;
10395
  }
10396
10397
  /**
10398
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
10399
   *
10400
   * INFO: This is slower then "trim()"
10401
   *
10402
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
10403
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
10404
   *
10405
   * @param string $str   <p>The string to be trimmed</p>
10406
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
10407
   *
10408
   * @return string The trimmed string.
10409
   */
10410 214
  public static function trim(string $str = '', $chars = INF): string
10411
  {
10412 214
    if ('' === $str) {
10413 11
      return '';
10414
    }
10415
10416
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
10417 206
    if ($chars === INF || !$chars) {
10418 179
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
10419
    } else {
10420 47
      $chars = \preg_quote($chars, '/');
10421 47
      $pattern = "^[$chars]+|[$chars]+\$";
10422
    }
10423
10424 206
    return self::regex_replace($str, $pattern, '', '', '/');
10425
  }
10426
10427
  /**
10428
   * Makes string's first char uppercase.
10429
   *
10430
   * @param string $str       <p>The input string.</p>
10431
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10432
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10433
   *
10434
   * @return string The resulting string.
10435
   */
10436 76
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10437
  {
10438 76
    if ($cleanUtf8 === true) {
10439
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10440
      // if invalid characters are found in $haystack before $needle
10441 1
      $str = self::clean($str);
10442
    }
10443
10444 76
    $strPartTwo = self::substr($str, 1, null, $encoding);
10445 76
    if ($strPartTwo === false) {
10446
      $strPartTwo = '';
10447
    }
10448
10449 76
    $strPartOne = self::strtoupper(
10450 76
        (string)self::substr($str, 0, 1, $encoding),
10451 76
        $encoding,
10452 76
        $cleanUtf8
10453
    );
10454
10455 76
    return $strPartOne . $strPartTwo;
10456
  }
10457
10458
  /**
10459
   * alias for "UTF8::ucfirst()"
10460
   *
10461
   * @see UTF8::ucfirst()
10462
   *
10463
   * @param string $str
10464
   * @param string $encoding
10465
   * @param bool   $cleanUtf8
10466
   *
10467
   * @return string
10468
   */
10469 1
  public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10470
  {
10471 1
    return self::ucfirst($str, $encoding, $cleanUtf8);
10472
  }
10473
10474
  /**
10475
   * Uppercase for all words in the string.
10476
   *
10477
   * @param string   $str        <p>The input string.</p>
10478
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
10479
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
10480
   * @param string   $encoding   [optional] <p>Set the charset.</p>
10481
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
10482
   *
10483
   * @return string
10484
   */
10485 9
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10486
  {
10487 9
    if (!$str) {
10488 2
      return '';
10489
    }
10490
10491
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
10492
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
10493
10494 8
    if ($cleanUtf8 === true) {
10495
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10496
      // if invalid characters are found in $haystack before $needle
10497 1
      $str = self::clean($str);
10498
    }
10499
10500 8
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
10501
10502
    if (
10503 8
        $usePhpDefaultFunctions === true
10504
        &&
10505 8
        self::is_ascii($str) === true
10506
    ) {
10507
      return \ucwords($str);
10508
    }
10509
10510 8
    $words = self::str_to_words($str, $charlist);
10511 8
    $newWords = [];
10512
10513 8
    if (\count($exceptions) > 0) {
10514 1
      $useExceptions = true;
10515
    } else {
10516 8
      $useExceptions = false;
10517
    }
10518
10519 8
    foreach ($words as $word) {
10520
10521 8
      if (!$word) {
10522 8
        continue;
10523
      }
10524
10525
      if (
10526 8
          $useExceptions === false
10527
          ||
10528
          (
10529 1
              $useExceptions === true
10530
              &&
10531 8
              !\in_array($word, $exceptions, true)
10532
          )
10533
      ) {
10534 8
        $word = self::ucfirst($word, $encoding);
10535
      }
10536
10537 8
      $newWords[] = $word;
10538
    }
10539
10540 8
    return \implode('', $newWords);
10541
  }
10542
10543
  /**
10544
   * Multi decode html entity & fix urlencoded-win1252-chars.
10545
   *
10546
   * e.g:
10547
   * 'test+test'                     => 'test test'
10548
   * 'D&#252;sseldorf'               => 'Düsseldorf'
10549
   * 'D%FCsseldorf'                  => 'Düsseldorf'
10550
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
10551
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
10552
   * 'Düsseldorf'                   => 'Düsseldorf'
10553
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
10554
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
10555
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
10556
   *
10557
   * @param string $str          <p>The input string.</p>
10558
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
10559
   *
10560
   * @return string
10561
   */
10562 2
  public static function urldecode(string $str, bool $multi_decode = true): string
10563
  {
10564 2
    if ('' === $str) {
10565 2
      return '';
10566
    }
10567
10568 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
10569 2
    if (\preg_match($pattern, $str)) {
10570 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
10571
    }
10572
10573 2
    $flags = ENT_QUOTES | ENT_HTML5;
10574
10575
    do {
10576 2
      $str_compare = $str;
10577
10578 2
      $str = self::fix_simple_utf8(
10579 2
          \urldecode(
10580 2
              self::html_entity_decode(
10581 2
                  self::to_utf8($str),
10582 2
                  $flags
10583
              )
10584
          )
10585
      );
10586
10587 2
    } while ($multi_decode === true && $str_compare !== $str);
10588
10589 2
    return $str;
10590
  }
10591
10592
  /**
10593
   * Return a array with "urlencoded"-win1252 -> UTF-8
10594
   *
10595
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
10596
   *
10597
   * @return string[]
10598
   */
10599 2
  public static function urldecode_fix_win1252_chars(): array
10600
  {
10601
    return [
10602 2
        '%20' => ' ',
10603
        '%21' => '!',
10604
        '%22' => '"',
10605
        '%23' => '#',
10606
        '%24' => '$',
10607
        '%25' => '%',
10608
        '%26' => '&',
10609
        '%27' => "'",
10610
        '%28' => '(',
10611
        '%29' => ')',
10612
        '%2A' => '*',
10613
        '%2B' => '+',
10614
        '%2C' => ',',
10615
        '%2D' => '-',
10616
        '%2E' => '.',
10617
        '%2F' => '/',
10618
        '%30' => '0',
10619
        '%31' => '1',
10620
        '%32' => '2',
10621
        '%33' => '3',
10622
        '%34' => '4',
10623
        '%35' => '5',
10624
        '%36' => '6',
10625
        '%37' => '7',
10626
        '%38' => '8',
10627
        '%39' => '9',
10628
        '%3A' => ':',
10629
        '%3B' => ';',
10630
        '%3C' => '<',
10631
        '%3D' => '=',
10632
        '%3E' => '>',
10633
        '%3F' => '?',
10634
        '%40' => '@',
10635
        '%41' => 'A',
10636
        '%42' => 'B',
10637
        '%43' => 'C',
10638
        '%44' => 'D',
10639
        '%45' => 'E',
10640
        '%46' => 'F',
10641
        '%47' => 'G',
10642
        '%48' => 'H',
10643
        '%49' => 'I',
10644
        '%4A' => 'J',
10645
        '%4B' => 'K',
10646
        '%4C' => 'L',
10647
        '%4D' => 'M',
10648
        '%4E' => 'N',
10649
        '%4F' => 'O',
10650
        '%50' => 'P',
10651
        '%51' => 'Q',
10652
        '%52' => 'R',
10653
        '%53' => 'S',
10654
        '%54' => 'T',
10655
        '%55' => 'U',
10656
        '%56' => 'V',
10657
        '%57' => 'W',
10658
        '%58' => 'X',
10659
        '%59' => 'Y',
10660
        '%5A' => 'Z',
10661
        '%5B' => '[',
10662
        '%5C' => '\\',
10663
        '%5D' => ']',
10664
        '%5E' => '^',
10665
        '%5F' => '_',
10666
        '%60' => '`',
10667
        '%61' => 'a',
10668
        '%62' => 'b',
10669
        '%63' => 'c',
10670
        '%64' => 'd',
10671
        '%65' => 'e',
10672
        '%66' => 'f',
10673
        '%67' => 'g',
10674
        '%68' => 'h',
10675
        '%69' => 'i',
10676
        '%6A' => 'j',
10677
        '%6B' => 'k',
10678
        '%6C' => 'l',
10679
        '%6D' => 'm',
10680
        '%6E' => 'n',
10681
        '%6F' => 'o',
10682
        '%70' => 'p',
10683
        '%71' => 'q',
10684
        '%72' => 'r',
10685
        '%73' => 's',
10686
        '%74' => 't',
10687
        '%75' => 'u',
10688
        '%76' => 'v',
10689
        '%77' => 'w',
10690
        '%78' => 'x',
10691
        '%79' => 'y',
10692
        '%7A' => 'z',
10693
        '%7B' => '{',
10694
        '%7C' => '|',
10695
        '%7D' => '}',
10696
        '%7E' => '~',
10697
        '%7F' => '',
10698
        '%80' => '`',
10699
        '%81' => '',
10700
        '%82' => '‚',
10701
        '%83' => 'ƒ',
10702
        '%84' => '„',
10703
        '%85' => '…',
10704
        '%86' => '†',
10705
        '%87' => '‡',
10706
        '%88' => 'ˆ',
10707
        '%89' => '‰',
10708
        '%8A' => 'Š',
10709
        '%8B' => '‹',
10710
        '%8C' => 'Œ',
10711
        '%8D' => '',
10712
        '%8E' => 'Ž',
10713
        '%8F' => '',
10714
        '%90' => '',
10715
        '%91' => '‘',
10716
        '%92' => '’',
10717
        '%93' => '“',
10718
        '%94' => '”',
10719
        '%95' => '•',
10720
        '%96' => '–',
10721
        '%97' => '—',
10722
        '%98' => '˜',
10723
        '%99' => '™',
10724
        '%9A' => 'š',
10725
        '%9B' => '›',
10726
        '%9C' => 'œ',
10727
        '%9D' => '',
10728
        '%9E' => 'ž',
10729
        '%9F' => 'Ÿ',
10730
        '%A0' => '',
10731
        '%A1' => '¡',
10732
        '%A2' => '¢',
10733
        '%A3' => '£',
10734
        '%A4' => '¤',
10735
        '%A5' => '¥',
10736
        '%A6' => '¦',
10737
        '%A7' => '§',
10738
        '%A8' => '¨',
10739
        '%A9' => '©',
10740
        '%AA' => 'ª',
10741
        '%AB' => '«',
10742
        '%AC' => '¬',
10743
        '%AD' => '',
10744
        '%AE' => '®',
10745
        '%AF' => '¯',
10746
        '%B0' => '°',
10747
        '%B1' => '±',
10748
        '%B2' => '²',
10749
        '%B3' => '³',
10750
        '%B4' => '´',
10751
        '%B5' => 'µ',
10752
        '%B6' => '¶',
10753
        '%B7' => '·',
10754
        '%B8' => '¸',
10755
        '%B9' => '¹',
10756
        '%BA' => 'º',
10757
        '%BB' => '»',
10758
        '%BC' => '¼',
10759
        '%BD' => '½',
10760
        '%BE' => '¾',
10761
        '%BF' => '¿',
10762
        '%C0' => 'À',
10763
        '%C1' => 'Á',
10764
        '%C2' => 'Â',
10765
        '%C3' => 'Ã',
10766
        '%C4' => 'Ä',
10767
        '%C5' => 'Å',
10768
        '%C6' => 'Æ',
10769
        '%C7' => 'Ç',
10770
        '%C8' => 'È',
10771
        '%C9' => 'É',
10772
        '%CA' => 'Ê',
10773
        '%CB' => 'Ë',
10774
        '%CC' => 'Ì',
10775
        '%CD' => 'Í',
10776
        '%CE' => 'Î',
10777
        '%CF' => 'Ï',
10778
        '%D0' => 'Ð',
10779
        '%D1' => 'Ñ',
10780
        '%D2' => 'Ò',
10781
        '%D3' => 'Ó',
10782
        '%D4' => 'Ô',
10783
        '%D5' => 'Õ',
10784
        '%D6' => 'Ö',
10785
        '%D7' => '×',
10786
        '%D8' => 'Ø',
10787
        '%D9' => 'Ù',
10788
        '%DA' => 'Ú',
10789
        '%DB' => 'Û',
10790
        '%DC' => 'Ü',
10791
        '%DD' => 'Ý',
10792
        '%DE' => 'Þ',
10793
        '%DF' => 'ß',
10794
        '%E0' => 'à',
10795
        '%E1' => 'á',
10796
        '%E2' => 'â',
10797
        '%E3' => 'ã',
10798
        '%E4' => 'ä',
10799
        '%E5' => 'å',
10800
        '%E6' => 'æ',
10801
        '%E7' => 'ç',
10802
        '%E8' => 'è',
10803
        '%E9' => 'é',
10804
        '%EA' => 'ê',
10805
        '%EB' => 'ë',
10806
        '%EC' => 'ì',
10807
        '%ED' => 'í',
10808
        '%EE' => 'î',
10809
        '%EF' => 'ï',
10810
        '%F0' => 'ð',
10811
        '%F1' => 'ñ',
10812
        '%F2' => 'ò',
10813
        '%F3' => 'ó',
10814
        '%F4' => 'ô',
10815
        '%F5' => 'õ',
10816
        '%F6' => 'ö',
10817
        '%F7' => '÷',
10818
        '%F8' => 'ø',
10819
        '%F9' => 'ù',
10820
        '%FA' => 'ú',
10821
        '%FB' => 'û',
10822
        '%FC' => 'ü',
10823
        '%FD' => 'ý',
10824
        '%FE' => 'þ',
10825
        '%FF' => 'ÿ',
10826
    ];
10827
  }
10828
10829
  /**
10830
   * Decodes an UTF-8 string to ISO-8859-1.
10831
   *
10832
   * @param string $str <p>The input string.</p>
10833
   * @param bool   $keepUtf8Chars
10834
   *
10835
   * @return string
10836
   */
10837 14
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10838
  {
10839 14
    if ('' === $str) {
10840 5
      return '';
10841
    }
10842
10843 14
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10844 14
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10845
10846 14
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10847
10848 1
      if (self::$WIN1252_TO_UTF8 === null) {
10849
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10850
      }
10851
10852 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10852
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10853 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10853
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10854
    }
10855
10856
    /** @noinspection PhpInternalEntityUsedInspection */
10857 14
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10858
10859 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10860
      self::checkForSupport();
10861
    }
10862
10863
    // save for later comparision
10864 14
    $str_backup = $str;
10865 14
    $len = self::strlen_in_byte($str);
10866
10867 14
    if (self::$ORD === null) {
10868
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10869
    }
10870
10871 14
    if (self::$CHR === null) {
10872
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10873
    }
10874
10875 14
    $noCharFound = '?';
10876
    /** @noinspection ForeachInvariantsInspection */
10877 14
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10878 14
      switch ($str[$i] & "\xF0") {
10879 14
        case "\xC0":
10880 12
        case "\xD0":
10881 14
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10882 14
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10883 14
          break;
10884
10885
        /** @noinspection PhpMissingBreakStatementInspection */
10886 12
        case "\xF0":
10887
          ++$i;
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment if this fall-through is intended.
Loading history...
10888 12
        case "\xE0":
10889 10
          $str[$j] = $noCharFound;
10890 10
          $i += 2;
10891 10
          break;
10892
10893
        default:
10894 12
          $str[$j] = $str[$i];
10895
      }
10896
    }
10897
10898 14
    $return = self::substr_in_byte($str, 0, $j);
10899 14
    if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
10900
      $return = '';
10901
    }
10902
10903
    if (
10904 14
        $keepUtf8Chars === true
10905
        &&
10906 14
        self::strlen($return) >= self::strlen($str_backup)
10907
    ) {
10908 2
      return $str_backup;
10909
    }
10910
10911 14
    return $return;
10912
  }
10913
10914
  /**
10915
   * Encodes an ISO-8859-1 string to UTF-8.
10916
   *
10917
   * @param string $str <p>The input string.</p>
10918
   *
10919
   * @return string
10920
   */
10921 14
  public static function utf8_encode(string $str): string
10922
  {
10923 14
    if ('' === $str) {
10924 13
      return '';
10925
    }
10926
10927 14
    $str = \utf8_encode($str);
10928
10929
    // the polyfill maybe return false
10930
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10931 14
    if ($str === false) {
10932
      return '';
10933
    }
10934
10935 14
    if (false === \strpos($str, "\xC2")) {
10936 6
      return $str;
10937
    }
10938
10939 12
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
10940 12
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
10941
10942 12
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
10943
10944 1
      if (self::$WIN1252_TO_UTF8 === null) {
10945
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10946
      }
10947
10948 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10948
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10949 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10949
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10950
    }
10951
10952 12
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
10953
  }
10954
10955
  /**
10956
   * fix -> utf8-win1252 chars
10957
   *
10958
   * @param string $str <p>The input string.</p>
10959
   *
10960
   * @return string
10961
   *
10962
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
10963
   */
10964 2
  public static function utf8_fix_win1252_chars(string $str): string
10965
  {
10966 2
    return self::fix_simple_utf8($str);
10967
  }
10968
10969
  /**
10970
   * Returns an array with all utf8 whitespace characters.
10971
   *
10972
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
10973
   *
10974
   * @author: Derek E. [email protected]
10975
   *
10976
   * @return string[]
10977
   *                 An array with all known whitespace characters as values and the type of whitespace as keys
10978
   *                 as defined in above URL.
10979
   */
10980 2
  public static function whitespace_table(): array
10981
  {
10982 2
    return self::$WHITESPACE_TABLE;
10983
  }
10984
10985
  /**
10986
   * Limit the number of words in a string.
10987
   *
10988
   * @param string $str      <p>The input string.</p>
10989
   * @param int    $limit    <p>The limit of words as integer.</p>
10990
   * @param string $strAddOn <p>Replacement for the striped string.</p>
10991
   *
10992
   * @return string
10993
   */
10994 2
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
10995
  {
10996 2
    if ('' === $str) {
10997 2
      return '';
10998
    }
10999
11000 2
    if ($limit < 1) {
11001 2
      return '';
11002
    }
11003
11004 2
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
11005
11006
    if (
11007 2
        !isset($matches[0])
11008
        ||
11009 2
        self::strlen($str) === self::strlen($matches[0])
11010
    ) {
11011 2
      return $str;
11012
    }
11013
11014 2
    return self::rtrim($matches[0]) . $strAddOn;
11015
  }
11016
11017
  /**
11018
   * Wraps a string to a given number of characters
11019
   *
11020
   * @link  http://php.net/manual/en/function.wordwrap.php
11021
   *
11022
   * @param string $str   <p>The input string.</p>
11023
   * @param int    $width [optional] <p>The column width.</p>
11024
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
11025
   * @param bool   $cut   [optional] <p>
11026
   *                      If the cut is set to true, the string is
11027
   *                      always wrapped at or before the specified width. So if you have
11028
   *                      a word that is larger than the given width, it is broken apart.
11029
   *                      </p>
11030
   *
11031
   * @return string The given string wrapped at the specified column.
11032
   */
11033 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
11034
  {
11035 10
    if ('' === $str || '' === $break) {
11036 3
      return '';
11037
    }
11038
11039 8
    $w = '';
11040 8
    $strSplit = \explode($break, $str);
11041 8
    if ($strSplit === false) {
11042
      $count = 0;
11043
    } else {
11044 8
      $count = \count($strSplit);
11045
    }
11046
11047 8
    $chars = [];
11048
    /** @noinspection ForeachInvariantsInspection */
11049 8
    for ($i = 0; $i < $count; ++$i) {
11050
11051 8
      if ($i) {
11052 1
        $chars[] = $break;
11053 1
        $w .= '#';
11054
      }
11055
11056 8
      $c = $strSplit[$i];
11057 8
      unset($strSplit[$i]);
11058
11059 8
      if ($c !== null) {
11060 8
        foreach (self::split($c) as $c) {
11061 8
          $chars[] = $c;
11062 8
          $w .= ' ' === $c ? ' ' : '?';
11063
        }
11064
      }
11065
    }
11066
11067 8
    $strReturn = '';
11068 8
    $j = 0;
11069 8
    $b = $i = -1;
11070 8
    $w = \wordwrap($w, $width, '#', $cut);
11071
11072 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
11073 6
      for (++$i; $i < $b; ++$i) {
11074 6
        $strReturn .= $chars[$j];
11075 6
        unset($chars[$j++]);
11076
      }
11077
11078 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
11079 3
        unset($chars[$j++]);
11080
      }
11081
11082 6
      $strReturn .= $break;
11083
    }
11084
11085 8
    return $strReturn . \implode('', $chars);
11086
  }
11087
11088
  /**
11089
   * Line-Wrap the string after $limit, but also after the next word.
11090
   *
11091
   * @param string $str
11092
   * @param int    $limit
11093
   *
11094
   * @return string
11095
   */
11096 1
  public static function wordwrap_per_line(string $str, int $limit): string
11097
  {
11098 1
    $strings = (array)\preg_split('/\\r\\n|\\r|\\n/', $str);
11099
11100 1
    $string = '';
11101 1
    foreach ($strings as $value) {
11102 1
      if ($value === false) {
11103
        continue;
11104
      }
11105
11106 1
      $string .= wordwrap($value, $limit);
11107 1
      $string .= "\n";
11108
    }
11109
11110 1
    return $string;
11111
  }
11112
11113
  /**
11114
   * Returns an array of Unicode White Space characters.
11115
   *
11116
   * @return string[] An array with numeric code point as key and White Space Character as value.
11117
   */
11118 2
  public static function ws(): array
11119
  {
11120 2
    return self::$WHITESPACE;
11121
  }
11122
11123
11124
}
11125