Passed
Push — master ( d0c577...7cef6e )
by Lars
03:52
created

UTF8::is_alphanumeric()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 1
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206
   * __construct()
207
   */
208 16
  public function __construct()
209
  {
210 16
    self::checkForSupport();
211 16
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219
   * @return string Single Multi-Byte character.
220
   */
221 2
  public static function access(string $str, int $pos): string
222
  {
223 2
    if ('' === $str) {
224 1
      return '';
225
    }
226
227 2
    if ($pos < 0) {
228 1
      return '';
229
    }
230
231 2
    return (string)self::substr($str, $pos, 1);
232
  }
233
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string The output string that contains BOM.
242
   */
243 1
  public static function add_bom_to_string(string $str): string
244
  {
245 1
    if (self::string_has_bom($str) === false) {
246 1
      $str = self::bom() . $str;
247
    }
248
249 1
    return $str;
250
  }
251
252
  /**
253
   * Adds the specified amount of left and right padding to the given string.
254
   * The default character used is a space.
255
   *
256
   * @param string $str
257
   * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
258
   * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
259
   * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
260
   * @param string $encoding [optional] <p>Default: UTF-8</p>
261
   *
262
   * @return string String with padding applied.
263
   */
264 25
  private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
265
  {
266 25
    $strlen = self::strlen($str, $encoding);
267
268 25
    if ($left && $right) {
269 8
      $length = ($left + $right) + $strlen;
270 8
      $type = STR_PAD_BOTH;
271 17
    } elseif ($left) {
272 7
      $length = $left + $strlen;
273 7
      $type = STR_PAD_LEFT;
274 10
    } elseif ($right) {
275 10
      $length = $right + $strlen;
276 10
      $type = STR_PAD_RIGHT;
277
    } else {
278
      $length = ($left + $right) + $strlen;
279
      $type = STR_PAD_BOTH;
280
    }
281
282 25
    return self::str_pad($str, $length, $padStr, $type, $encoding);
283
  }
284
285
  /**
286
   * Changes all keys in an array.
287
   *
288
   * @param array $array <p>The array to work on</p>
289
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
290
   *                     or <strong>CASE_LOWER</strong> (default)</p>
291
   *
292
   * @return string[] An array with its keys lower or uppercased.
293
   */
294 1
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
295
  {
296
    if (
297 1
        $case !== CASE_LOWER
298
        &&
299 1
        $case !== CASE_UPPER
300
    ) {
301
      $case = CASE_LOWER;
302
    }
303
304 1
    $return = [];
305 1
    foreach ($array as $key => $value) {
306 1
      if ($case === CASE_LOWER) {
307 1
        $key = self::strtolower($key);
308
      } else {
309 1
        $key = self::strtoupper($key);
310
      }
311
312 1
      $return[$key] = $value;
313
    }
314
315 1
    return $return;
316
  }
317
318
  /**
319
   * Returns the substring between $start and $end, if found, or an empty
320
   * string. An optional offset may be supplied from which to begin the
321
   * search for the start string.
322
   *
323
   * @param string $str
324
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
325
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
326
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
327
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
328
   *
329
   * @return string
330
   */
331 16
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
332
  {
333 16
    $posStart = self::strpos($str, $start, $offset, $encoding);
334 16
    if ($posStart === false) {
335 2
      return '';
336
    }
337
338 14
    $substrIndex = $posStart + self::strlen($start, $encoding);
339 14
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
340
    if (
341 14
        $posEnd === false
342
        ||
343 14
        $posEnd === $substrIndex
344
    ) {
345 4
      return '';
346
    }
347
348 10
    $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
349
350 10
    if ($return === false) {
351
      return '';
352
    }
353
354 10
    return $return;
355
  }
356
357
  /**
358
   * Convert binary into an string.
359
   *
360
   * @param mixed $bin 1|0
361
   *
362
   * @return string
363
   */
364 1
  public static function binary_to_str($bin): string
365
  {
366 1
    if (!isset($bin[0])) {
367
      return '';
368
    }
369
370 1
    $convert = \base_convert($bin, 2, 16);
371 1
    if ($convert === '0') {
372 1
      return '';
373
    }
374
375 1
    return \pack('H*', $convert);
376
  }
377
378
  /**
379
   * Returns the UTF-8 Byte Order Mark Character.
380
   *
381
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
382
   *
383
   * @return string UTF-8 Byte Order Mark
384
   */
385 2
  public static function bom(): string
386
  {
387 2
    return "\xef\xbb\xbf";
388
  }
389
390
  /**
391
   * @alias of UTF8::chr_map()
392
   *
393
   * @see   UTF8::chr_map()
394
   *
395
   * @param string|array $callback
396
   * @param string       $str
397
   *
398
   * @return string[]
399
   */
400 1
  public static function callback($callback, string $str): array
401
  {
402 1
    return self::chr_map($callback, $str);
403
  }
404
405
  /**
406
   * Returns the character at $index, with indexes starting at 0.
407
   *
408
   * @param string $str
409
   * @param int    $index    <p>Position of the character.</p>
410
   * @param string $encoding [optional] <p>Default is UTF-8</p>
411
   *
412
   * @return string The character at $index.
413
   */
414 9
  public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
415
  {
416 9
    return (string)self::substr($str, $index, 1, $encoding);
417
  }
418
419
  /**
420
   * Returns an array consisting of the characters in the string.
421
   *
422
   * @param string $str <p>The input string.</p>
423
   *
424
   * @return string[] An array of chars.
425
   */
426 3
  public static function chars(string $str): array
427
  {
428 3
    return self::str_split($str, 1);
429
  }
430
431
  /**
432
   * This method will auto-detect your server environment for UTF-8 support.
433
   *
434
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
435
   */
436 19
  public static function checkForSupport()
437
  {
438 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
439
440
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
441
442
      // http://php.net/manual/en/book.mbstring.php
443
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
444
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
445
446
      // http://php.net/manual/en/book.iconv.php
447
      self::$SUPPORT['iconv'] = self::iconv_loaded();
448
449
      // http://php.net/manual/en/book.intl.php
450
      self::$SUPPORT['intl'] = self::intl_loaded();
451
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
452
453
      self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
454
455
      if (
456
          self::$SUPPORT['intl'] === true
457
          &&
458
          \function_exists('transliterator_list_ids') === true
459
      ) {
460
        /** @noinspection PhpComposerExtensionStubsInspection */
461
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
462
      }
463
464
      // http://php.net/manual/en/class.intlchar.php
465
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
466
467
      // http://php.net/manual/en/book.ctype.php
468
      self::$SUPPORT['ctype'] = self::ctype_loaded();
469
470
      // http://php.net/manual/en/class.finfo.php
471
      self::$SUPPORT['finfo'] = self::finfo_loaded();
472
473
      // http://php.net/manual/en/book.json.php
474
      self::$SUPPORT['json'] = self::json_loaded();
475
476
      // http://php.net/manual/en/book.pcre.php
477
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
478
    }
479 19
  }
480
481
  /**
482
   * Generates a UTF-8 encoded character from the given code point.
483
   *
484
   * INFO: opposite to UTF8::ord()
485
   *
486
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
487
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
488
   *
489
   * @return string|null Multi-Byte character, returns null on failure or empty input.
490
   */
491 10
  public static function chr($code_point, string $encoding = 'UTF-8')
492
  {
493
    // init
494 10
    static $CHAR_CACHE = [];
495
496 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
497
      self::checkForSupport();
498
    }
499
500 10
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
501 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
502
    }
503
504
    if (
505 10
        $encoding !== 'UTF-8'
506
        &&
507 10
        $encoding !== 'ISO-8859-1'
508
        &&
509 10
        $encoding !== 'WINDOWS-1252'
510
        &&
511 10
        self::$SUPPORT['mbstring'] === false
512
    ) {
513
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
514
    }
515
516 10
    $cacheKey = $code_point . $encoding;
517 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
518 9
      return $CHAR_CACHE[$cacheKey];
519
    }
520
521 9
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
522
523 8
      if (self::$CHR === null) {
524
        $chrTmp = self::getData('chr');
525
        if ($chrTmp) {
526
          self::$CHR = (array)$chrTmp;
527
        }
528
      }
529
530 8
      $chr = self::$CHR[$code_point];
531
532 8
      if ($encoding !== 'UTF-8') {
533 1
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
534
      }
535
536 8
      return $CHAR_CACHE[$cacheKey] = $chr;
537
    }
538
539 7
    if (self::$SUPPORT['intlChar'] === true) {
540
      /** @noinspection PhpComposerExtensionStubsInspection */
541 7
      $chr = \IntlChar::chr($code_point);
542
543 7
      if ($encoding !== 'UTF-8') {
544
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
545
      }
546
547 7
      return $CHAR_CACHE[$cacheKey] = $chr;
548
    }
549
550
    if (self::$CHR === null) {
551
      $chrTmp = self::getData('chr');
552
      if ($chrTmp) {
553
        self::$CHR = (array)$chrTmp;
554
      }
555
    }
556
557
    $code_point = (int)$code_point;
558
    if ($code_point <= 0x7F) {
559
      $chr = self::$CHR[$code_point];
560
    } elseif ($code_point <= 0x7FF) {
561
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
562
             self::$CHR[($code_point & 0x3F) + 0x80];
563
    } elseif ($code_point <= 0xFFFF) {
564
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
565
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
566
             self::$CHR[($code_point & 0x3F) + 0x80];
567
    } else {
568
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
569
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
570
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
571
             self::$CHR[($code_point & 0x3F) + 0x80];
572
    }
573
574
    if ($encoding !== 'UTF-8') {
575
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
576
    }
577
578
    return $CHAR_CACHE[$cacheKey] = $chr;
579
  }
580
581
  /**
582
   * Applies callback to all characters of a string.
583
   *
584
   * @param string|array $callback <p>The callback function.</p>
585
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
586
   *
587
   * @return string[] The outcome of callback.
588
   */
589 1
  public static function chr_map($callback, string $str): array
590
  {
591 1
    $chars = self::split($str);
592
593 1
    return \array_map($callback, $chars);
594
  }
595
596
  /**
597
   * Generates an array of byte length of each character of a Unicode string.
598
   *
599
   * 1 byte => U+0000  - U+007F
600
   * 2 byte => U+0080  - U+07FF
601
   * 3 byte => U+0800  - U+FFFF
602
   * 4 byte => U+10000 - U+10FFFF
603
   *
604
   * @param string $str <p>The original unicode string.</p>
605
   *
606
   * @return int[] An array of byte lengths of each character.
607
   */
608 2
  public static function chr_size_list(string $str): array
609
  {
610 2
    if ('' === $str) {
611 2
      return [];
612
    }
613
614 2
    $strSplit = self::split($str);
615
616 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
617
      return \array_map(
618
          function ($data) {
619
            return UTF8::strlen($data, 'CP850'); // 8-BIT
620
          },
621
          $strSplit
622
      );
623
    }
624
625 2
    return \array_map('\strlen', $strSplit);
626
  }
627
628
  /**
629
   * Get a decimal code representation of a specific character.
630
   *
631
   * @param string $char <p>The input character.</p>
632
   *
633
   * @return int
634
   */
635 2
  public static function chr_to_decimal(string $char): int
636
  {
637 2
    $code = self::ord($char[0]);
638 2
    $bytes = 1;
639
640 2
    if (!($code & 0x80)) {
641
      // 0xxxxxxx
642 2
      return $code;
643
    }
644
645 2
    if (($code & 0xe0) === 0xc0) {
646
      // 110xxxxx
647 2
      $bytes = 2;
648 2
      $code &= ~0xc0;
649 2
    } elseif (($code & 0xf0) === 0xe0) {
650
      // 1110xxxx
651 2
      $bytes = 3;
652 2
      $code &= ~0xe0;
653 1
    } elseif (($code & 0xf8) === 0xf0) {
654
      // 11110xxx
655 1
      $bytes = 4;
656 1
      $code &= ~0xf0;
657
    }
658
659 2
    for ($i = 2; $i <= $bytes; $i++) {
660
      // 10xxxxxx
661 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
662
    }
663
664 2
    return $code;
665
  }
666
667
  /**
668
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
669
   *
670
   * @param string $char <p>The input character</p>
671
   * @param string $pfix [optional]
672
   *
673
   * @return string The code point encoded as U+xxxx
674
   */
675 1
  public static function chr_to_hex(string $char, string $pfix = 'U+'): string
676
  {
677 1
    if ('' === $char) {
678 1
      return '';
679
    }
680
681 1
    if ($char === '&#0;') {
682 1
      $char = '';
683
    }
684
685 1
    return self::int_to_hex(self::ord($char), $pfix);
686
  }
687
688
  /**
689
   * alias for "UTF8::chr_to_decimal()"
690
   *
691
   * @see UTF8::chr_to_decimal()
692
   *
693
   * @param string $chr
694
   *
695
   * @return int
696
   */
697 1
  public static function chr_to_int(string $chr): int
698
  {
699 1
    return self::chr_to_decimal($chr);
700
  }
701
702
  /**
703
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
704
   *
705
   * @param string $body     <p>The original string to be split.</p>
706
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
707
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
708
   *
709
   * @return string The chunked string.
710
   */
711 2
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
712
  {
713 2
    return \implode($end, self::split($body, $chunklen));
714
  }
715
716
  /**
717
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
718
   *
719
   * @param string $str                           <p>The string to be sanitized.</p>
720
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
721
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
722
   *                                              whitespace.</p>
723
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
724
   *                                              e.g.: "…"
725
   *                                              => "..."</p>
726
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
727
   *                                              combination with
728
   *                                              $normalize_whitespace</p>
729
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
730
   *                                              mark e.g.: "�"</p>
731
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
732
   *                                              characters e.g.: "\0"</p>
733
   *
734
   * @return string Clean UTF-8 encoded string.
735
   */
736 81
  public static function clean(
737
      string $str,
738
      bool $remove_bom = false,
739
      bool $normalize_whitespace = false,
740
      bool $normalize_msword = false,
741
      bool $keep_non_breaking_space = false,
742
      bool $replace_diamond_question_mark = false,
743
      bool $remove_invisible_characters = true
744
  ): string
745
  {
746
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
747
    // caused connection reset problem on larger strings
748
749 81
    $regx = '/
750
      (
751
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
752
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
753
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
754
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
755
        ){1,100}                      # ...one or more times
756
      )
757
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
758
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
759
    /x';
760 81
    $str = (string)\preg_replace($regx, '$1', $str);
761
762 81
    if ($replace_diamond_question_mark === true) {
763 53
      $str = self::replace_diamond_question_mark($str, '');
764
    }
765
766 81
    if ($remove_invisible_characters === true) {
767 81
      $str = self::remove_invisible_characters($str);
768
    }
769
770 81
    if ($normalize_whitespace === true) {
771 55
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
772
    }
773
774 81
    if ($normalize_msword === true) {
775 31
      $str = self::normalize_msword($str);
776
    }
777
778 81
    if ($remove_bom === true) {
779 54
      $str = self::remove_bom($str);
780
    }
781
782 81
    return $str;
783
  }
784
785
  /**
786
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
787
   *
788
   * @param string $str <p>The input string.</p>
789
   *
790
   * @return string
791
   */
792 25
  public static function cleanup(string $str): string
793
  {
794 25
    if ('' === $str) {
795 3
      return '';
796
    }
797
798
    // fixed ISO <-> UTF-8 Errors
799 25
    $str = self::fix_simple_utf8($str);
800
801
    // remove all none UTF-8 symbols
802
    // && remove diamond question mark (�)
803
    // && remove remove invisible characters (e.g. "\0")
804
    // && remove BOM
805
    // && normalize whitespace chars (but keep non-breaking-spaces)
806 25
    $str = self::clean(
807 25
        $str,
808 25
        true,
809 25
        true,
810 25
        false,
811 25
        true,
812 25
        true,
813 25
        true
814
    );
815
816 25
    return $str;
817
  }
818
819
  /**
820
   * Accepts a string or a array of strings and returns an array of Unicode code points.
821
   *
822
   * INFO: opposite to UTF8::string()
823
   *
824
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
825
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
826
   *                                    default, code points will be returned as integers.</p>
827
   *
828
   * @return array<int|string>
829
   *                           The array of code points:<br>
830
   *                           array<int> for $u_style === false<br>
831
   *                           array<string> for $u_style === true<br>
832
   */
833 7
  public static function codepoints($arg, bool $u_style = false): array
834
  {
835 7
    if (\is_string($arg) === true) {
836 7
      $arg = self::split($arg);
837
    }
838
839 7
    $arg = \array_map(
840
        [
841 7
            self::class,
842
            'ord',
843
        ],
844 7
        $arg
845
    );
846
847 7
    if (\count($arg) === 0) {
848 4
      return [];
849
    }
850
851 6
    if ($u_style) {
852 1
      $arg = \array_map(
853
          [
854 1
              self::class,
855
              'int_to_hex',
856
          ],
857 1
          $arg
858
      );
859
    }
860
861 6
    return $arg;
862
  }
863
864
  /**
865
   * Trims the string and replaces consecutive whitespace characters with a
866
   * single space. This includes tabs and newline characters, as well as
867
   * multibyte whitespace such as the thin space and ideographic space.
868
   *
869
   * @param string $str <p>The input string.</p>
870
   *
871
   * @return string String with a trimmed $str and condensed whitespace.
872
   */
873 13
  public static function collapse_whitespace(string $str): string
874
  {
875 13
    return self::trim(
876 13
        self::regex_replace($str, '[[:space:]]+', ' ')
877
    );
878
  }
879
880
  /**
881
   * Returns count of characters used in a string.
882
   *
883
   * @param string $str       <p>The input string.</p>
884
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
885
   *
886
   * @return int[] An associative array of Character as keys and
887
   *               their count as values.
888
   */
889 9
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
890
  {
891 9
    return \array_count_values(self::split($str, 1, $cleanUtf8));
892
  }
893
894
  /**
895
   * Remove css media-queries.
896
   *
897
   * @param string $str
898
   *
899
   * @return string
900
   */
901 1
  public static function css_stripe_media_queries(string $str): string
902
  {
903 1
    return (string)\preg_replace(
904 1
        '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
905 1
        '',
906 1
        $str
907
    );
908
  }
909
910
  /**
911
   * Checks whether ctype is available on the server.
912
   *
913
   * @return bool
914
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
915
   */
916
  public static function ctype_loaded(): bool
917
  {
918
    return \extension_loaded('ctype');
919
  }
920
921
  /**
922
   * Converts a int-value into an UTF-8 character.
923
   *
924
   * @param mixed $int
925
   *
926
   * @return string
927
   */
928 5
  public static function decimal_to_chr($int): string
929
  {
930 5
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
931
  }
932
933
  /**
934
   * Encode a string with a new charset-encoding.
935
   *
936
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
937
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
938
   *
939
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
940
   * @param string $str      <p>The input string</p>
941
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
942
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
943
   *
944
   * @return string
945
   */
946 14
  public static function encode(string $encoding, string $str, bool $force = true): string
947
  {
948 14
    if ('' === $str || '' === $encoding) {
949 6
      return $str;
950
    }
951
952 14
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
953 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
954
    }
955
956 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
957
      self::checkForSupport();
958
    }
959
960 14
    $encodingDetected = self::str_detect_encoding($str);
961
962
    // DEBUG
963
    //var_dump($encoding, $encodingDetected, $str, "\n\n");
964
965
    if (
966 14
        $force === true
967
        ||
968
        (
969 7
            $encodingDetected !== false
970
            &&
971 14
            $encodingDetected !== $encoding
972
        )
973
    ) {
974
975
      if (
976 14
          $encoding === 'UTF-8'
977
          &&
978
          (
979 14
              $force === true
980 5
              || $encodingDetected === 'UTF-8'
981 5
              || $encodingDetected === 'WINDOWS-1252'
982 14
              || $encodingDetected === 'ISO-8859-1'
983
          )
984
      ) {
985 13
        return self::to_utf8($str);
986
      }
987
988
      if (
989 4
          $encoding === 'ISO-8859-1'
990
          &&
991
          (
992 2
              $force === true
993 2
              || $encodingDetected === 'ISO-8859-1'
994 2
              || $encodingDetected === 'WINDOWS-1252'
995 4
              || $encodingDetected === 'UTF-8'
996
          )
997
      ) {
998 2
        return self::to_iso8859($str);
999
      }
1000
1001
      if (
1002 3
          $encoding !== 'UTF-8'
1003
          &&
1004 3
          $encoding !== 'ISO-8859-1'
1005
          &&
1006 3
          $encoding !== 'WINDOWS-1252'
1007
          &&
1008 3
          self::$SUPPORT['mbstring'] === false
1009
      ) {
1010
        \trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1011
      }
1012
1013 3
      $strEncoded = \mb_convert_encoding(
1014 3
          $str,
1015 3
          $encoding,
1016 3
          ($force === true ? $encoding : $encodingDetected)
0 ignored issues
show
Bug introduced by
It seems like $force === true ? $encoding : $encodingDetected can also be of type false; however, parameter $from_encoding of mb_convert_encoding() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1016
          /** @scrutinizer ignore-type */ ($force === true ? $encoding : $encodingDetected)
Loading history...
1017
      );
1018
1019 3
      if ($strEncoded) {
1020 3
        return $strEncoded;
1021
      }
1022
    }
1023
1024 4
    return $str;
1025
  }
1026
1027
  /**
1028
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1029
   *
1030
   * @param string   $str                    <p>The input string.</p>
1031
   * @param string   $search                 <p>The searched string.</p>
1032
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1033
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1034
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "\mb_" function</p>
1035
   *
1036
   * @return string
1037
   */
1038 1
  public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1039
  {
1040 1
    if ('' === $str) {
1041 1
      return '';
1042
    }
1043
1044 1
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1045
1046 1
    if ($length === null) {
1047 1
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
1048
    }
1049
1050 1
    if (empty($search)) {
1051
1052 1
      $stringLength = self::strlen($str, $encoding);
1053
1054 1
      if ($length > 0) {
1055 1
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1056
      } else {
1057 1
        $end = 0;
1058
      }
1059
1060 1
      $pos = (int)\min(
1061 1
          self::strpos($str, ' ', $end, $encoding),
1062 1
          self::strpos($str, '.', $end, $encoding)
1063
      );
1064
1065 1
      if ($pos) {
1066 1
        $strSub = self::substr($str, 0, $pos, $encoding);
1067 1
        if ($strSub === false) {
1068
          return '';
1069
        }
1070
1071 1
        return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1072
      }
1073
1074
      return $str;
1075
    }
1076
1077 1
    $wordPos = self::stripos($str, $search, 0, $encoding);
1078 1
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1079
1080 1
    $pos_start = 0;
1081 1
    if ($halfSide > 0) {
1082 1
      $halfText = self::substr($str, 0, $halfSide, $encoding);
1083 1
      if ($halfText !== false) {
1084 1
        $pos_start = (int)\max(
1085 1
            self::strrpos($halfText, ' ', 0, $encoding),
1086 1
            self::strrpos($halfText, '.', 0, $encoding)
1087
        );
1088
      }
1089
    }
1090
1091 1
    if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type integer|false is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1092 1
      $l = $pos_start + $length - 1;
1093 1
      $realLength = self::strlen($str, $encoding);
1094
1095 1
      if ($l > $realLength) {
1096
        $l = $realLength;
1097
      }
1098
1099 1
      $pos_end = (int)\min(
1100 1
              self::strpos($str, ' ', $l, $encoding),
1101 1
              self::strpos($str, '.', $l, $encoding)
1102 1
          ) - $pos_start;
1103
1104 1
      if (!$pos_end || $pos_end <= 0) {
1105 1
        $strSub = self::substr($str, $pos_start, self::strlen($str), $encoding);
1106 1
        if ($strSub !== false) {
1107 1
          $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1108
        } else {
1109 1
          $extract = '';
1110
        }
1111
      } else {
1112 1
        $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1113 1
        if ($strSub !== false) {
1114 1
          $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1115
        } else {
1116 1
          $extract = '';
1117
        }
1118
      }
1119
1120
    } else {
1121
1122 1
      $l = $length - 1;
1123 1
      $trueLength = self::strlen($str, $encoding);
1124
1125 1
      if ($l > $trueLength) {
1126
        $l = $trueLength;
1127
      }
1128
1129 1
      $pos_end = \min(
1130 1
          self::strpos($str, ' ', $l, $encoding),
1131 1
          self::strpos($str, '.', $l, $encoding)
1132
      );
1133
1134 1
      if ($pos_end) {
1135 1
        $strSub = self::substr($str, 0, $pos_end, $encoding);
1136 1
        if ($strSub !== false) {
1137 1
          $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1138
        } else {
1139 1
          $extract = '';
1140
        }
1141
      } else {
1142 1
        $extract = $str;
1143
      }
1144
    }
1145
1146 1
    return $extract;
1147
  }
1148
1149
  /**
1150
   * Reads entire file into a string.
1151
   *
1152
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1153
   *
1154
   * @link http://php.net/manual/en/function.file-get-contents.php
1155
   *
1156
   * @param string        $filename         <p>
1157
   *                                        Name of the file to read.
1158
   *                                        </p>
1159
   * @param bool          $use_include_path [optional] <p>
1160
   *                                        Prior to PHP 5, this parameter is called
1161
   *                                        use_include_path and is a bool.
1162
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1163
   *                                        to trigger include path
1164
   *                                        search.
1165
   *                                        </p>
1166
   * @param resource|null $context          [optional] <p>
1167
   *                                        A valid context resource created with
1168
   *                                        stream_context_create. If you don't need to use a
1169
   *                                        custom context, you can skip this parameter by &null;.
1170
   *                                        </p>
1171
   * @param int|null      $offset           [optional] <p>
1172
   *                                        The offset where the reading starts.
1173
   *                                        </p>
1174
   * @param int|null      $maxLength        [optional] <p>
1175
   *                                        Maximum length of data read. The default is to read until end
1176
   *                                        of file is reached.
1177
   *                                        </p>
1178
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1179
   *
1180
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
1181
   *                                        images or pdf, because they used non default utf-8 chars.</p>
1182
   *
1183
   * @return string|false The function returns the read data or false on failure.
1184
   */
1185 6
  public static function file_get_contents(
1186
      string $filename,
1187
      bool $use_include_path = false,
1188
      $context = null,
1189
      int $offset = null,
1190
      int $maxLength = null,
1191
      int $timeout = 10,
1192
      bool $convertToUtf8 = true
1193
  )
1194
  {
1195
    // init
1196 6
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1197
1198 6
    if ($timeout && $context === null) {
1199 5
      $context = \stream_context_create(
1200
          [
1201
              'http' =>
1202
                  [
1203 5
                      'timeout' => $timeout,
1204
                  ],
1205
          ]
1206
      );
1207
    }
1208
1209 6
    if ($offset === null) {
1210 6
      $offset = 0;
1211
    }
1212
1213 6
    if (\is_int($maxLength) === true) {
1214 1
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1215
    } else {
1216 6
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1217
    }
1218
1219
    // return false on error
1220 6
    if ($data === false) {
1221
      return false;
1222
    }
1223
1224 6
    if ($convertToUtf8 === true) {
1225
      // only for non binary, but also for UTF-16 or UTF-32
1226
      if (
1227 6
          self::is_binary($data, true) !== true
1228
          ||
1229 4
          self::is_utf16($data) !== false
1230
          ||
1231 6
          self::is_utf32($data) !== false
1232
      ) {
1233 5
        $data = self::encode('UTF-8', $data, false);
1234 5
        $data = self::cleanup($data);
1235
      }
1236
    }
1237
1238 6
    return $data;
1239
  }
1240
1241
  /**
1242
   * Checks if a file starts with BOM (Byte Order Mark) character.
1243
   *
1244
   * @param string $file_path <p>Path to a valid file.</p>
1245
   *
1246
   * @throws \RuntimeException if file_get_contents() returned false
1247
   *
1248
   * @return bool
1249
   *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.
1250
   */
1251 1
  public static function file_has_bom(string $file_path): bool
1252
  {
1253 1
    $file_content = \file_get_contents($file_path);
1254 1
    if ($file_content === false) {
1255
      throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1256
    }
1257
1258 1
    return self::string_has_bom($file_content);
1259
  }
1260
1261
  /**
1262
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1263
   *
1264
   * @param mixed  $var
1265
   * @param int    $normalization_form
1266
   * @param string $leading_combining
1267
   *
1268
   * @return mixed
1269
   */
1270 35
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1271
  {
1272 35
    switch (\gettype($var)) {
1273 35
      case 'array':
1274 3
        foreach ($var as $k => $v) {
1275
          /** @noinspection AlterInForeachInspection */
1276 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1277
        }
1278 3
        break;
1279 35
      case 'object':
1280 2
        foreach ($var as $k => $v) {
1281 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1282
        }
1283 2
        break;
1284 35
      case 'string':
1285
1286 35
        if (false !== \strpos($var, "\r")) {
1287
          // Workaround https://bugs.php.net/65732
1288 2
          $var = self::normalize_line_ending($var);
1289
        }
1290
1291 35
        if (self::is_ascii($var) === false) {
1292
          /** @noinspection PhpUndefinedClassInspection */
1293 18
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1294 15
            $n = '-';
1295
          } else {
1296
            /** @noinspection PhpUndefinedClassInspection */
1297 7
            $n = \Normalizer::normalize($var, $normalization_form);
1298
1299 7
            if (isset($n[0])) {
1300 4
              $var = $n;
1301
            } else {
1302 5
              $var = self::encode('UTF-8', $var, true);
1303
            }
1304
          }
1305
1306
          if (
1307 18
              $var[0] >= "\x80"
1308
              &&
1309 18
              isset($n[0], $leading_combining[0])
1310
              &&
1311 18
              \preg_match('/^\p{Mn}/u', $var)
1312
          ) {
1313
            // Prevent leading combining chars
1314
            // for NFC-safe concatenations.
1315 2
            $var = $leading_combining . $var;
1316
          }
1317
        }
1318
1319 35
        break;
1320
    }
1321
1322 35
    return $var;
1323
  }
1324
1325
  /**
1326
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1327
   *
1328
   * Gets a specific external variable by name and optionally filters it
1329
   *
1330
   * @link  http://php.net/manual/en/function.filter-input.php
1331
   *
1332
   * @param int    $type          <p>
1333
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1334
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1335
   *                              <b>INPUT_ENV</b>.
1336
   *                              </p>
1337
   * @param string $variable_name <p>
1338
   *                              Name of a variable to get.
1339
   *                              </p>
1340
   * @param int    $filter        [optional] <p>
1341
   *                              The ID of the filter to apply. The
1342
   *                              manual page lists the available filters.
1343
   *                              </p>
1344
   * @param mixed  $options       [optional] <p>
1345
   *                              Associative array of options or bitwise disjunction of flags. If filter
1346
   *                              accepts options, flags can be provided in "flags" field of array.
1347
   *                              </p>
1348
   *
1349
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1350
   *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1351
   *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1352
   */
1353
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
1354
  {
1355
    if (4 > \func_num_args()) {
1356
      $var = \filter_input($type, $variable_name, $filter);
1357
    } else {
1358
      $var = \filter_input($type, $variable_name, $filter, $options);
1359
    }
1360
1361
    return self::filter($var);
1362
  }
1363
1364
  /**
1365
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1366
   *
1367
   * Gets external variables and optionally filters them
1368
   *
1369
   * @link  http://php.net/manual/en/function.filter-input-array.php
1370
   *
1371
   * @param int   $type       <p>
1372
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1373
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1374
   *                          <b>INPUT_ENV</b>.
1375
   *                          </p>
1376
   * @param mixed $definition [optional] <p>
1377
   *                          An array defining the arguments. A valid key is a string
1378
   *                          containing a variable name and a valid value is either a filter type, or an array
1379
   *                          optionally specifying the filter, flags and options. If the value is an
1380
   *                          array, valid keys are filter which specifies the
1381
   *                          filter type,
1382
   *                          flags which specifies any flags that apply to the
1383
   *                          filter, and options which specifies any options that
1384
   *                          apply to the filter. See the example below for a better understanding.
1385
   *                          </p>
1386
   *                          <p>
1387
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1388
   *                          input array are filtered by this filter.
1389
   *                          </p>
1390
   * @param bool  $add_empty  [optional] <p>
1391
   *                          Add missing keys as <b>NULL</b> to the return value.
1392
   *                          </p>
1393
   *
1394
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1395
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set. Or
1396
   *               if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable is not
1397
   *               set and <b>NULL</b> if the filter fails.
1398
   */
1399
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1400
  {
1401
    if (2 > \func_num_args()) {
1402
      $a = \filter_input_array($type);
1403
    } else {
1404
      $a = \filter_input_array($type, $definition, $add_empty);
1405
    }
1406
1407
    return self::filter($a);
1408
  }
1409
1410
  /**
1411
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1412
   *
1413
   * Filters a variable with a specified filter
1414
   *
1415
   * @link  http://php.net/manual/en/function.filter-var.php
1416
   *
1417
   * @param mixed $variable <p>
1418
   *                        Value to filter.
1419
   *                        </p>
1420
   * @param int   $filter   [optional] <p>
1421
   *                        The ID of the filter to apply. The
1422
   *                        manual page lists the available filters.
1423
   *                        </p>
1424
   * @param mixed $options  [optional] <p>
1425
   *                        Associative array of options or bitwise disjunction of flags. If filter
1426
   *                        accepts options, flags can be provided in "flags" field of array. For
1427
   *                        the "callback" filter, callable type should be passed. The
1428
   *                        callback must accept one argument, the value to be filtered, and return
1429
   *                        the value after filtering/sanitizing it.
1430
   *                        </p>
1431
   *                        <p>
1432
   *                        <code>
1433
   *                        // for filters that accept options, use this format
1434
   *                        $options = array(
1435
   *                        'options' => array(
1436
   *                        'default' => 3, // value to return if the filter fails
1437
   *                        // other options here
1438
   *                        'min_range' => 0
1439
   *                        ),
1440
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1441
   *                        );
1442
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1443
   *                        // for filter that only accept flags, you can pass them directly
1444
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1445
   *                        // for filter that only accept flags, you can also pass as an array
1446
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1447
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1448
   *                        // callback validate filter
1449
   *                        function foo($value)
1450
   *                        {
1451
   *                        // Expected format: Surname, GivenNames
1452
   *                        if (strpos($value, ", ") === false) return false;
1453
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1454
   *                        $empty = (empty($surname) || empty($givennames));
1455
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1456
   *                        if ($empty || $notstrings) {
1457
   *                        return false;
1458
   *                        } else {
1459
   *                        return $value;
1460
   *                        }
1461
   *                        }
1462
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1463
   *                        </code>
1464
   *                        </p>
1465
   *
1466
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1467
   */
1468 1
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
1469
  {
1470 1
    if (3 > \func_num_args()) {
1471 1
      $variable = \filter_var($variable, $filter);
1472
    } else {
1473 1
      $variable = \filter_var($variable, $filter, $options);
1474
    }
1475
1476 1
    return self::filter($variable);
1477
  }
1478
1479
  /**
1480
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1481
   *
1482
   * Gets multiple variables and optionally filters them
1483
   *
1484
   * @link  http://php.net/manual/en/function.filter-var-array.php
1485
   *
1486
   * @param array $data       <p>
1487
   *                          An array with string keys containing the data to filter.
1488
   *                          </p>
1489
   * @param mixed $definition [optional] <p>
1490
   *                          An array defining the arguments. A valid key is a string
1491
   *                          containing a variable name and a valid value is either a
1492
   *                          filter type, or an
1493
   *                          array optionally specifying the filter, flags and options.
1494
   *                          If the value is an array, valid keys are filter
1495
   *                          which specifies the filter type,
1496
   *                          flags which specifies any flags that apply to the
1497
   *                          filter, and options which specifies any options that
1498
   *                          apply to the filter. See the example below for a better understanding.
1499
   *                          </p>
1500
   *                          <p>
1501
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1502
   *                          input array are filtered by this filter.
1503
   *                          </p>
1504
   * @param bool  $add_empty  [optional] <p>
1505
   *                          Add missing keys as <b>NULL</b> to the return value.
1506
   *                          </p>
1507
   *
1508
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1509
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set.
1510
   */
1511 1
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1512
  {
1513 1
    if (2 > \func_num_args()) {
1514 1
      $a = \filter_var_array($data);
1515
    } else {
1516 1
      $a = \filter_var_array($data, $definition, $add_empty);
1517
    }
1518
1519 1
    return self::filter($a);
1520
  }
1521
1522
  /**
1523
   * Checks whether finfo is available on the server.
1524
   *
1525
   * @return bool
1526
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
1527
   */
1528
  public static function finfo_loaded(): bool
1529
  {
1530
    return \class_exists('finfo');
1531
  }
1532
1533
  /**
1534
   * Returns the first $n characters of the string.
1535
   *
1536
   * @param string $str      <p>The input string.</p>
1537
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1538
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
1539
   *
1540
   * @return string
1541
   */
1542 13
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1543
  {
1544 13
    if ($n <= 0) {
1545 4
      return '';
1546
    }
1547
1548 9
    $strSub = self::substr($str, 0, $n, $encoding);
1549 9
    if ($strSub === false) {
1550
      return '';
1551
    }
1552
1553 9
    return $strSub;
1554
  }
1555
1556
  /**
1557
   * Check if the number of unicode characters are not more than the specified integer.
1558
   *
1559
   * @param string $str      The original string to be checked.
1560
   * @param int    $box_size The size in number of chars to be checked against string.
1561
   *
1562
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1563
   */
1564 1
  public static function fits_inside(string $str, int $box_size): bool
1565
  {
1566 1
    return (self::strlen($str) <= $box_size);
1567
  }
1568
1569
  /**
1570
   * Try to fix simple broken UTF-8 strings.
1571
   *
1572
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1573
   *
1574
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1575
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1576
   * See: http://en.wikipedia.org/wiki/Windows-1252
1577
   *
1578
   * @param string $str <p>The input string</p>
1579
   *
1580
   * @return string
1581
   */
1582 30
  public static function fix_simple_utf8(string $str): string
1583
  {
1584 30
    if ('' === $str) {
1585 2
      return '';
1586
    }
1587
1588 30
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1589 30
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1590
1591 30
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1592
1593 1
      if (self::$BROKEN_UTF8_FIX === null) {
1594 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_fix') can also be of type false. However, the property $BROKEN_UTF8_FIX is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1595
      }
1596
1597 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1597
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1598 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1598
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1599
    }
1600
1601 30
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1602
  }
1603
1604
  /**
1605
   * Fix a double (or multiple) encoded UTF8 string.
1606
   *
1607
   * @param string[]|string $str You can use a string or an array of strings.
1608
   *
1609
   * @return string[]|string
1610
   *                          Will return the fixed input-"array" or
1611
   *                          the fixed input-"string".
1612
   */
1613 1
  public static function fix_utf8($str)
1614
  {
1615 1
    if (\is_array($str) === true) {
1616 1
      foreach ($str as $k => $v) {
1617 1
        $str[$k] = self::fix_utf8($v);
1618
      }
1619
1620 1
      return $str;
1621
    }
1622
1623 1
    $str = (string)$str;
1624 1
    $last = '';
1625 1
    while ($last !== $str) {
1626 1
      $last = $str;
1627 1
      $str = self::to_utf8(
1628 1
          self::utf8_decode($str, true)
1629
      );
1630
    }
1631
1632 1
    return $str;
1633
  }
1634
1635
  /**
1636
   * Get character of a specific character.
1637
   *
1638
   * @param string $char
1639
   *
1640
   * @return string 'RTL' or 'LTR'
1641
   */
1642 1
  public static function getCharDirection(string $char): string
1643
  {
1644 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1645
      self::checkForSupport();
1646
    }
1647
1648 1
    if (self::$SUPPORT['intlChar'] === true) {
1649
      /** @noinspection PhpComposerExtensionStubsInspection */
1650 1
      $tmpReturn = \IntlChar::charDirection($char);
1651
1652
      // from "IntlChar"-Class
1653
      $charDirection = [
1654 1
          'RTL' => [1, 13, 14, 15, 21],
1655
          'LTR' => [0, 11, 12, 20],
1656
      ];
1657
1658 1
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1659
        return 'LTR';
1660
      }
1661
1662 1
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1663 1
        return 'RTL';
1664
      }
1665
    }
1666
1667 1
    $c = static::chr_to_decimal($char);
1668
1669 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1670 1
      return 'LTR';
1671
    }
1672
1673 1
    if (0x85e >= $c) {
1674
1675 1
      if (0x5be === $c ||
1676 1
          0x5c0 === $c ||
1677 1
          0x5c3 === $c ||
1678 1
          0x5c6 === $c ||
1679 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1680 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1681 1
          0x608 === $c ||
1682 1
          0x60b === $c ||
1683 1
          0x60d === $c ||
1684 1
          0x61b === $c ||
1685 1
          (0x61e <= $c && 0x64a >= $c) ||
1686
          (0x66d <= $c && 0x66f >= $c) ||
1687
          (0x671 <= $c && 0x6d5 >= $c) ||
1688
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1689
          (0x6ee <= $c && 0x6ef >= $c) ||
1690
          (0x6fa <= $c && 0x70d >= $c) ||
1691
          0x710 === $c ||
1692
          (0x712 <= $c && 0x72f >= $c) ||
1693
          (0x74d <= $c && 0x7a5 >= $c) ||
1694
          0x7b1 === $c ||
1695
          (0x7c0 <= $c && 0x7ea >= $c) ||
1696
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1697
          0x7fa === $c ||
1698
          (0x800 <= $c && 0x815 >= $c) ||
1699
          0x81a === $c ||
1700
          0x824 === $c ||
1701
          0x828 === $c ||
1702
          (0x830 <= $c && 0x83e >= $c) ||
1703
          (0x840 <= $c && 0x858 >= $c) ||
1704 1
          0x85e === $c
1705
      ) {
1706 1
        return 'RTL';
1707
      }
1708
1709 1
    } elseif (0x200f === $c) {
1710
1711
      return 'RTL';
1712
1713 1
    } elseif (0xfb1d <= $c) {
1714
1715 1
      if (0xfb1d === $c ||
1716 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1717 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1718 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1719 1
          0xfb3e === $c ||
1720 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1721 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1722 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1723 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1724 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1725 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1726 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1727 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1728 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1729 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1730 1
          0x10808 === $c ||
1731 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1732 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1733 1
          0x1083c === $c ||
1734 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1735 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1736 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1737 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1738 1
          0x1093f === $c ||
1739 1
          0x10a00 === $c ||
1740 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1741 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1742 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1743 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1744 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1745 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1746 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1747 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1748 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1749 1
          (0x10b78 <= $c && 0x10b7f >= $c)
1750
      ) {
1751 1
        return 'RTL';
1752
      }
1753
    }
1754
1755 1
    return 'LTR';
1756
  }
1757
1758
  /**
1759
   * get data from "/data/*.ser"
1760
   *
1761
   * @param string $file
1762
   *
1763
   * @return mixed|false Will return false on error.
1764
   */
1765 13
  private static function getData(string $file)
1766
  {
1767 13
    $file = __DIR__ . '/data/' . $file . '.php';
1768 13
    if (\file_exists($file)) {
1769
      /** @noinspection PhpIncludeInspection */
1770 12
      return require $file;
1771
    }
1772
1773 2
    return false;
1774
  }
1775
1776
  /**
1777
   * Check for php-support.
1778
   *
1779
   * @param string|null $key
1780
   *
1781
   * @return mixed
1782
   *               Return the full support-"array", if $key === null<br>
1783
   *               return bool-value, if $key is used and available<br>
1784
   *               otherwise return <strong>null</strong>.
1785
   */
1786 19
  public static function getSupportInfo(string $key = null)
1787
  {
1788 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1789
      self::checkForSupport();
1790
    }
1791
1792 19
    if ($key === null) {
1793 2
      return self::$SUPPORT;
1794
    }
1795
1796 18
    if (!isset(self::$SUPPORT[$key])) {
1797 1
      return null;
1798
    }
1799
1800 17
    return self::$SUPPORT[$key];
1801
  }
1802
1803
  /**
1804
   * @param int    $length        <p>Length of the random string.</p>
1805
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
1806
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
1807
   *
1808
   * @return string
1809
   */
1810 1
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
1811
  {
1812
    // init
1813 1
    $i = 0;
1814 1
    $str = '';
1815 1
    $maxlength = self::strlen($possibleChars, $encoding);
1816
1817 1
    if ($maxlength === 0) {
1818 1
      return '';
1819
    }
1820
1821
    // add random chars
1822 1
    while ($i < $length) {
1823
      try {
1824 1
        $randInt = \random_int(0, $maxlength - 1);
1825
      } catch (\Exception $e) {
1826
        /** @noinspection RandomApiMigrationInspection */
1827
        $randInt = \mt_rand(0, $maxlength - 1);
1828
      }
1829 1
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
1830 1
      $str .= $char;
1831 1
      $i++;
1832
    }
1833
1834 1
    return $str;
1835
  }
1836
1837
  /**
1838
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
1839
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
1840
   *
1841
   * @return string
1842
   */
1843 1
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
1844
  {
1845 1
    $uniqueHelper = \mt_rand() .
1846 1
                    \session_id() .
1847 1
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
1848 1
                    ($_SERVER['SERVER_ADDR'] ?? '') .
1849 1
                    $entropyExtra;
1850
1851 1
    $uniqueString = \uniqid($uniqueHelper, true);
1852
1853 1
    if ($md5) {
1854 1
      $uniqueString = \md5($uniqueString . $uniqueHelper);
1855
    }
1856
1857 1
    return $uniqueString;
1858
  }
1859
1860
  /**
1861
   * alias for "UTF8::string_has_bom()"
1862
   *
1863
   * @see        UTF8::string_has_bom()
1864
   *
1865
   * @param string $str
1866
   *
1867
   * @return bool
1868
   *
1869
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1870
   */
1871 1
  public static function hasBom(string $str): bool
1872
  {
1873 1
    return self::string_has_bom($str);
1874
  }
1875
1876
  /**
1877
   * Returns true if the string contains a lower case char, false otherwise.
1878
   *
1879
   * @param string $str <p>The input string.</p>
1880
   *
1881
   * @return bool Whether or not the string contains a lower case character.
1882
   */
1883 47
  public static function has_lowercase(string $str): bool
1884
  {
1885 47
    return self::str_matches_pattern($str, '.*[[:lower:]]');
1886
  }
1887
1888
  /**
1889
   * Returns true if the string contains an upper case char, false otherwise.
1890
   *
1891
   * @param string $str <p>The input string.</p>
1892
   *
1893
   * @return bool Whether or not the string contains an upper case character.
1894
   */
1895 12
  public static function has_uppercase(string $str): bool
1896
  {
1897 12
    return self::str_matches_pattern($str, '.*[[:upper:]]');
1898
  }
1899
1900
  /**
1901
   * Converts a hexadecimal-value into an UTF-8 character.
1902
   *
1903
   * @param string $hexdec <p>The hexadecimal value.</p>
1904
   *
1905
   * @return string|false One single UTF-8 character.
1906
   */
1907 2
  public static function hex_to_chr(string $hexdec)
1908
  {
1909 2
    return self::decimal_to_chr(\hexdec($hexdec));
1910
  }
1911
1912
  /**
1913
   * Converts hexadecimal U+xxxx code point representation to integer.
1914
   *
1915
   * INFO: opposite to UTF8::int_to_hex()
1916
   *
1917
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1918
   *
1919
   * @return int|false The code point, or false on failure.
1920
   */
1921 1
  public static function hex_to_int(string $hexDec)
1922
  {
1923 1
    if ('' === $hexDec) {
1924 1
      return false;
1925
    }
1926
1927 1
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1928 1
      return \intval($match[1], 16);
1929
    }
1930
1931 1
    return false;
1932
  }
1933
1934
  /**
1935
   * alias for "UTF8::html_entity_decode()"
1936
   *
1937
   * @see UTF8::html_entity_decode()
1938
   *
1939
   * @param string $str
1940
   * @param int    $flags
1941
   * @param string $encoding
1942
   *
1943
   * @return string
1944
   */
1945 1
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1946
  {
1947 1
    return self::html_entity_decode($str, $flags, $encoding);
1948
  }
1949
1950
  /**
1951
   * Converts a UTF-8 string to a series of HTML numbered entities.
1952
   *
1953
   * INFO: opposite to UTF8::html_decode()
1954
   *
1955
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1956
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1957
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
1958
   *
1959
   * @return string HTML numbered entities.
1960
   */
1961 8
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
1962
  {
1963 8
    if ('' === $str) {
1964 2
      return '';
1965
    }
1966
1967 8
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1968 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1969
    }
1970
1971
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1972 8
    if (\function_exists('mb_encode_numericentity')) {
1973
1974 8
      $startCode = 0x00;
1975 8
      if ($keepAsciiChars === true) {
1976 8
        $startCode = 0x80;
1977
      }
1978
1979 8
      return \mb_encode_numericentity(
1980 8
          $str,
1981 8
          [$startCode, 0xfffff, 0, 0xfffff, 0],
1982 8
          $encoding
1983
      );
1984
    }
1985
1986
    return \implode(
1987
        '',
1988
        \array_map(
1989
            function ($data) use ($keepAsciiChars, $encoding) {
1990
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1991
            },
1992
            self::split($str)
1993
        )
1994
    );
1995
  }
1996
1997
  /**
1998
   * UTF-8 version of html_entity_decode()
1999
   *
2000
   * The reason we are not using html_entity_decode() by itself is because
2001
   * while it is not technically correct to leave out the semicolon
2002
   * at the end of an entity most browsers will still interpret the entity
2003
   * correctly. html_entity_decode() does not convert entities without
2004
   * semicolons, so we are left with our own little solution here. Bummer.
2005
   *
2006
   * Convert all HTML entities to their applicable characters
2007
   *
2008
   * INFO: opposite to UTF8::html_encode()
2009
   *
2010
   * @link http://php.net/manual/en/function.html-entity-decode.php
2011
   *
2012
   * @param string $str      <p>
2013
   *                         The input string.
2014
   *                         </p>
2015
   * @param int    $flags    [optional] <p>
2016
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2017
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2018
   *                         <table>
2019
   *                         Available <i>flags</i> constants
2020
   *                         <tr valign="top">
2021
   *                         <td>Constant Name</td>
2022
   *                         <td>Description</td>
2023
   *                         </tr>
2024
   *                         <tr valign="top">
2025
   *                         <td><b>ENT_COMPAT</b></td>
2026
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2027
   *                         </tr>
2028
   *                         <tr valign="top">
2029
   *                         <td><b>ENT_QUOTES</b></td>
2030
   *                         <td>Will convert both double and single quotes.</td>
2031
   *                         </tr>
2032
   *                         <tr valign="top">
2033
   *                         <td><b>ENT_NOQUOTES</b></td>
2034
   *                         <td>Will leave both double and single quotes unconverted.</td>
2035
   *                         </tr>
2036
   *                         <tr valign="top">
2037
   *                         <td><b>ENT_HTML401</b></td>
2038
   *                         <td>
2039
   *                         Handle code as HTML 4.01.
2040
   *                         </td>
2041
   *                         </tr>
2042
   *                         <tr valign="top">
2043
   *                         <td><b>ENT_XML1</b></td>
2044
   *                         <td>
2045
   *                         Handle code as XML 1.
2046
   *                         </td>
2047
   *                         </tr>
2048
   *                         <tr valign="top">
2049
   *                         <td><b>ENT_XHTML</b></td>
2050
   *                         <td>
2051
   *                         Handle code as XHTML.
2052
   *                         </td>
2053
   *                         </tr>
2054
   *                         <tr valign="top">
2055
   *                         <td><b>ENT_HTML5</b></td>
2056
   *                         <td>
2057
   *                         Handle code as HTML 5.
2058
   *                         </td>
2059
   *                         </tr>
2060
   *                         </table>
2061
   *                         </p>
2062
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
2063
   *
2064
   * @return string The decoded string.
2065
   */
2066 22
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2067
  {
2068 22
    if ('' === $str) {
2069 6
      return '';
2070
    }
2071
2072 22
    if (!isset($str[3])) { // examples: &; || &x;
2073 10
      return $str;
2074
    }
2075
2076
    if (
2077 21
        \strpos($str, '&') === false
2078
        ||
2079
        (
2080 21
            \strpos($str, '&#') === false
2081
            &&
2082 21
            \strpos($str, ';') === false
2083
        )
2084
    ) {
2085 9
      return $str;
2086
    }
2087
2088 21
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2089 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2090
    }
2091
2092 21
    if ($flags === null) {
2093 5
      $flags = ENT_QUOTES | ENT_HTML5;
2094
    }
2095
2096
    if (
2097 21
        $encoding !== 'UTF-8'
2098
        &&
2099 21
        $encoding !== 'ISO-8859-1'
2100
        &&
2101 21
        $encoding !== 'WINDOWS-1252'
2102
        &&
2103 21
        self::$SUPPORT['mbstring'] === false
2104
    ) {
2105
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2106
    }
2107
2108
    do {
2109 21
      $str_compare = $str;
2110
2111 21
      $str = (string)\preg_replace_callback(
2112 21
          "/&#\d{2,6};/",
2113 21
          function ($matches) use ($encoding) {
2114 15
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2115
2116 15
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2117 13
              return $returnTmp;
2118
            }
2119
2120 8
            return $matches[0];
2121 21
          },
2122 21
          $str
2123
      );
2124
2125
      // decode numeric & UTF16 two byte entities
2126 21
      $str = \html_entity_decode(
2127 21
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2128 21
          $flags,
2129 21
          $encoding
2130
      );
2131
2132 21
    } while ($str_compare !== $str);
2133
2134 21
    return $str;
2135
  }
2136
2137
  /**
2138
   * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2139
   *
2140
   * @param string $str
2141
   * @param string $encoding [optional] <p>Default: UTF-8</p>
2142
   *
2143
   * @return string
2144
   */
2145 6
  public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2146
  {
2147 6
    return self::htmlspecialchars(
2148 6
        $str,
2149 6
        ENT_QUOTES | ENT_SUBSTITUTE,
2150 6
        $encoding
2151
    );
2152
  }
2153
2154
  /**
2155
   * Remove empty html-tag.
2156
   *
2157
   * e.g.: <tag></tag>
2158
   *
2159
   * @param string $str
2160
   *
2161
   * @return string
2162
   */
2163 1
  public static function html_stripe_empty_tags(string $str): string
2164
  {
2165 1
    return (string)\preg_replace(
2166 1
        "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2167 1
        '',
2168 1
        $str
2169
    );
2170
  }
2171
2172
  /**
2173
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2174
   *
2175
   * @link http://php.net/manual/en/function.htmlentities.php
2176
   *
2177
   * @param string $str           <p>
2178
   *                              The input string.
2179
   *                              </p>
2180
   * @param int    $flags         [optional] <p>
2181
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2182
   *                              invalid code unit sequences and the used document type. The default is
2183
   *                              ENT_COMPAT | ENT_HTML401.
2184
   *                              <table>
2185
   *                              Available <i>flags</i> constants
2186
   *                              <tr valign="top">
2187
   *                              <td>Constant Name</td>
2188
   *                              <td>Description</td>
2189
   *                              </tr>
2190
   *                              <tr valign="top">
2191
   *                              <td><b>ENT_COMPAT</b></td>
2192
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2193
   *                              </tr>
2194
   *                              <tr valign="top">
2195
   *                              <td><b>ENT_QUOTES</b></td>
2196
   *                              <td>Will convert both double and single quotes.</td>
2197
   *                              </tr>
2198
   *                              <tr valign="top">
2199
   *                              <td><b>ENT_NOQUOTES</b></td>
2200
   *                              <td>Will leave both double and single quotes unconverted.</td>
2201
   *                              </tr>
2202
   *                              <tr valign="top">
2203
   *                              <td><b>ENT_IGNORE</b></td>
2204
   *                              <td>
2205
   *                              Silently discard invalid code unit sequences instead of returning
2206
   *                              an empty string. Using this flag is discouraged as it
2207
   *                              may have security implications.
2208
   *                              </td>
2209
   *                              </tr>
2210
   *                              <tr valign="top">
2211
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2212
   *                              <td>
2213
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2214
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2215
   *                              </td>
2216
   *                              </tr>
2217
   *                              <tr valign="top">
2218
   *                              <td><b>ENT_DISALLOWED</b></td>
2219
   *                              <td>
2220
   *                              Replace invalid code points for the given document type with a
2221
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2222
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2223
   *                              instance, to ensure the well-formedness of XML documents with
2224
   *                              embedded external content.
2225
   *                              </td>
2226
   *                              </tr>
2227
   *                              <tr valign="top">
2228
   *                              <td><b>ENT_HTML401</b></td>
2229
   *                              <td>
2230
   *                              Handle code as HTML 4.01.
2231
   *                              </td>
2232
   *                              </tr>
2233
   *                              <tr valign="top">
2234
   *                              <td><b>ENT_XML1</b></td>
2235
   *                              <td>
2236
   *                              Handle code as XML 1.
2237
   *                              </td>
2238
   *                              </tr>
2239
   *                              <tr valign="top">
2240
   *                              <td><b>ENT_XHTML</b></td>
2241
   *                              <td>
2242
   *                              Handle code as XHTML.
2243
   *                              </td>
2244
   *                              </tr>
2245
   *                              <tr valign="top">
2246
   *                              <td><b>ENT_HTML5</b></td>
2247
   *                              <td>
2248
   *                              Handle code as HTML 5.
2249
   *                              </td>
2250
   *                              </tr>
2251
   *                              </table>
2252
   *                              </p>
2253
   * @param string $encoding      [optional] <p>
2254
   *                              Like <b>htmlspecialchars</b>,
2255
   *                              <b>htmlentities</b> takes an optional third argument
2256
   *                              <i>encoding</i> which defines encoding used in
2257
   *                              conversion.
2258
   *                              Although this argument is technically optional, you are highly
2259
   *                              encouraged to specify the correct value for your code.
2260
   *                              </p>
2261
   * @param bool   $double_encode [optional] <p>
2262
   *                              When <i>double_encode</i> is turned off PHP will not
2263
   *                              encode existing html entities. The default is to convert everything.
2264
   *                              </p>
2265
   *
2266
   *
2267
   * @return string The encoded string.
2268
   * </p>
2269
   * <p>
2270
   * If the input <i>string</i> contains an invalid code unit
2271
   * sequence within the given <i>encoding</i> an empty string
2272
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2273
   * <b>ENT_SUBSTITUTE</b> flags are set.
2274
   */
2275 7
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2276
  {
2277 7
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2278 6
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2279
    }
2280
2281 7
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2282
2283
    /**
2284
     * PHP doesn't replace a backslash to its html entity since this is something
2285
     * that's mostly used to escape characters when inserting in a database. Since
2286
     * we're using a decent database layer, we don't need this shit and we're replacing
2287
     * the double backslashes by its' html entity equivalent.
2288
     *
2289
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2290
     */
2291 7
    $str = \str_replace('\\', '&#92;', $str);
2292
2293 7
    return self::html_encode($str, true, $encoding);
2294
  }
2295
2296
  /**
2297
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2298
   *
2299
   * INFO: Take a look at "UTF8::htmlentities()"
2300
   *
2301
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2302
   *
2303
   * @param string $str           <p>
2304
   *                              The string being converted.
2305
   *                              </p>
2306
   * @param int    $flags         [optional] <p>
2307
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2308
   *                              invalid code unit sequences and the used document type. The default is
2309
   *                              ENT_COMPAT | ENT_HTML401.
2310
   *                              <table>
2311
   *                              Available <i>flags</i> constants
2312
   *                              <tr valign="top">
2313
   *                              <td>Constant Name</td>
2314
   *                              <td>Description</td>
2315
   *                              </tr>
2316
   *                              <tr valign="top">
2317
   *                              <td><b>ENT_COMPAT</b></td>
2318
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2319
   *                              </tr>
2320
   *                              <tr valign="top">
2321
   *                              <td><b>ENT_QUOTES</b></td>
2322
   *                              <td>Will convert both double and single quotes.</td>
2323
   *                              </tr>
2324
   *                              <tr valign="top">
2325
   *                              <td><b>ENT_NOQUOTES</b></td>
2326
   *                              <td>Will leave both double and single quotes unconverted.</td>
2327
   *                              </tr>
2328
   *                              <tr valign="top">
2329
   *                              <td><b>ENT_IGNORE</b></td>
2330
   *                              <td>
2331
   *                              Silently discard invalid code unit sequences instead of returning
2332
   *                              an empty string. Using this flag is discouraged as it
2333
   *                              may have security implications.
2334
   *                              </td>
2335
   *                              </tr>
2336
   *                              <tr valign="top">
2337
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2338
   *                              <td>
2339
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2340
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2341
   *                              </td>
2342
   *                              </tr>
2343
   *                              <tr valign="top">
2344
   *                              <td><b>ENT_DISALLOWED</b></td>
2345
   *                              <td>
2346
   *                              Replace invalid code points for the given document type with a
2347
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2348
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2349
   *                              instance, to ensure the well-formedness of XML documents with
2350
   *                              embedded external content.
2351
   *                              </td>
2352
   *                              </tr>
2353
   *                              <tr valign="top">
2354
   *                              <td><b>ENT_HTML401</b></td>
2355
   *                              <td>
2356
   *                              Handle code as HTML 4.01.
2357
   *                              </td>
2358
   *                              </tr>
2359
   *                              <tr valign="top">
2360
   *                              <td><b>ENT_XML1</b></td>
2361
   *                              <td>
2362
   *                              Handle code as XML 1.
2363
   *                              </td>
2364
   *                              </tr>
2365
   *                              <tr valign="top">
2366
   *                              <td><b>ENT_XHTML</b></td>
2367
   *                              <td>
2368
   *                              Handle code as XHTML.
2369
   *                              </td>
2370
   *                              </tr>
2371
   *                              <tr valign="top">
2372
   *                              <td><b>ENT_HTML5</b></td>
2373
   *                              <td>
2374
   *                              Handle code as HTML 5.
2375
   *                              </td>
2376
   *                              </tr>
2377
   *                              </table>
2378
   *                              </p>
2379
   * @param string $encoding      [optional] <p>
2380
   *                              Defines encoding used in conversion.
2381
   *                              </p>
2382
   *                              <p>
2383
   *                              For the purposes of this function, the encodings
2384
   *                              ISO-8859-1, ISO-8859-15,
2385
   *                              UTF-8, cp866,
2386
   *                              cp1251, cp1252, and
2387
   *                              KOI8-R are effectively equivalent, provided the
2388
   *                              <i>string</i> itself is valid for the encoding, as
2389
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2390
   *                              the same positions in all of these encodings.
2391
   *                              </p>
2392
   * @param bool   $double_encode [optional] <p>
2393
   *                              When <i>double_encode</i> is turned off PHP will not
2394
   *                              encode existing html entities, the default is to convert everything.
2395
   *                              </p>
2396
   *
2397
   * @return string The converted string.
2398
   * </p>
2399
   * <p>
2400
   * If the input <i>string</i> contains an invalid code unit
2401
   * sequence within the given <i>encoding</i> an empty string
2402
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2403
   * <b>ENT_SUBSTITUTE</b> flags are set.
2404
   */
2405 7
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2406
  {
2407 7
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2408 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2409
    }
2410
2411 7
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2412
  }
2413
2414
  /**
2415
   * Checks whether iconv is available on the server.
2416
   *
2417
   * @return bool
2418
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2419
   */
2420
  public static function iconv_loaded(): bool
2421
  {
2422
    return \extension_loaded('iconv') ? true : false;
2423
  }
2424
2425
  /**
2426
   * alias for "UTF8::decimal_to_chr()"
2427
   *
2428
   * @see UTF8::decimal_to_chr()
2429
   *
2430
   * @param mixed $int
2431
   *
2432
   * @return string
2433
   */
2434 2
  public static function int_to_chr($int): string
2435
  {
2436 2
    return self::decimal_to_chr($int);
2437
  }
2438
2439
  /**
2440
   * Converts Integer to hexadecimal U+xxxx code point representation.
2441
   *
2442
   * INFO: opposite to UTF8::hex_to_int()
2443
   *
2444
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2445
   * @param string $pfix [optional]
2446
   *
2447
   * @return string The code point, or empty string on failure.
2448
   */
2449 3
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2450
  {
2451 3
    $hex = \dechex($int);
2452
2453 3
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2454
2455 3
    return $pfix . $hex . '';
2456
  }
2457
2458
  /**
2459
   * Checks whether intl-char is available on the server.
2460
   *
2461
   * @return bool
2462
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2463
   */
2464
  public static function intlChar_loaded(): bool
2465
  {
2466
    return \class_exists('IntlChar');
2467
  }
2468
2469
  /**
2470
   * Checks whether intl is available on the server.
2471
   *
2472
   * @return bool
2473
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2474
   */
2475 3
  public static function intl_loaded(): bool
2476
  {
2477 3
    return \extension_loaded('intl');
2478
  }
2479
2480
  /**
2481
   * alias for "UTF8::is_ascii()"
2482
   *
2483
   * @see        UTF8::is_ascii()
2484
   *
2485
   * @param string $str
2486
   *
2487
   * @return bool
2488
   *
2489
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2490
   */
2491 1
  public static function isAscii(string $str): bool
2492
  {
2493 1
    return self::is_ascii($str);
2494
  }
2495
2496
  /**
2497
   * alias for "UTF8::is_base64()"
2498
   *
2499
   * @see        UTF8::is_base64()
2500
   *
2501
   * @param string $str
2502
   *
2503
   * @return bool
2504
   *
2505
   * @deprecated <p>use "UTF8::is_base64()"</p>
2506
   */
2507 1
  public static function isBase64(string $str): bool
2508
  {
2509 1
    return self::is_base64($str);
2510
  }
2511
2512
  /**
2513
   * alias for "UTF8::is_binary()"
2514
   *
2515
   * @see        UTF8::is_binary()
2516
   *
2517
   * @param mixed $str
2518
   * @param bool  $strict
2519
   *
2520
   * @return bool
2521
   *
2522
   * @deprecated <p>use "UTF8::is_binary()"</p>
2523
   */
2524 2
  public static function isBinary($str, $strict = false): bool
2525
  {
2526 2
    return self::is_binary($str, $strict);
2527
  }
2528
2529
  /**
2530
   * alias for "UTF8::is_bom()"
2531
   *
2532
   * @see        UTF8::is_bom()
2533
   *
2534
   * @param string $utf8_chr
2535
   *
2536
   * @return bool
2537
   *
2538
   * @deprecated <p>use "UTF8::is_bom()"</p>
2539
   */
2540 1
  public static function isBom(string $utf8_chr): bool
2541
  {
2542 1
    return self::is_bom($utf8_chr);
2543
  }
2544
2545
  /**
2546
   * alias for "UTF8::is_html()"
2547
   *
2548
   * @see        UTF8::is_html()
2549
   *
2550
   * @param string $str
2551
   *
2552
   * @return bool
2553
   *
2554
   * @deprecated <p>use "UTF8::is_html()"</p>
2555
   */
2556 1
  public static function isHtml(string $str): bool
2557
  {
2558 1
    return self::is_html($str);
2559
  }
2560
2561
  /**
2562
   * alias for "UTF8::is_json()"
2563
   *
2564
   * @see        UTF8::is_json()
2565
   *
2566
   * @param string $str
2567
   *
2568
   * @return bool
2569
   *
2570
   * @deprecated <p>use "UTF8::is_json()"</p>
2571
   */
2572
  public static function isJson(string $str): bool
2573
  {
2574
    return self::is_json($str);
2575
  }
2576
2577
  /**
2578
   * alias for "UTF8::is_utf16()"
2579
   *
2580
   * @see        UTF8::is_utf16()
2581
   *
2582
   * @param string $str
2583
   *
2584
   * @return int|false
2585
   *                    <strong>false</strong> if is't not UTF16,<br>
2586
   *                    <strong>1</strong> for UTF-16LE,<br>
2587
   *                    <strong>2</strong> for UTF-16BE.
2588
   *
2589
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2590
   */
2591 1
  public static function isUtf16(string $str)
2592
  {
2593 1
    return self::is_utf16($str);
2594
  }
2595
2596
  /**
2597
   * alias for "UTF8::is_utf32()"
2598
   *
2599
   * @see        UTF8::is_utf32()
2600
   *
2601
   * @param string $str
2602
   *
2603
   * @return int|false
2604
   *                   <strong>false</strong> if is't not UTF16,
2605
   *                   <strong>1</strong> for UTF-32LE,
2606
   *                   <strong>2</strong> for UTF-32BE.
2607
   *
2608
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2609
   */
2610 1
  public static function isUtf32(string $str)
2611
  {
2612 1
    return self::is_utf32($str);
2613
  }
2614
2615
  /**
2616
   * alias for "UTF8::is_utf8()"
2617
   *
2618
   * @see        UTF8::is_utf8()
2619
   *
2620
   * @param string $str
2621
   * @param bool   $strict
2622
   *
2623
   * @return bool
2624
   *
2625
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2626
   */
2627 16
  public static function isUtf8($str, $strict = false): bool
2628
  {
2629 16
    return self::is_utf8($str, $strict);
2630
  }
2631
2632
  /**
2633
   * Returns true if the string contains only alphabetic chars, false otherwise.
2634
   *
2635
   * @param string $str
2636
   *
2637
   * @return bool
2638
   *               Whether or not $str contains only alphabetic chars.
2639
   */
2640 10
  public static function is_alpha(string $str): bool
2641
  {
2642 10
    return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2643
  }
2644
2645
  /**
2646
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2647
   *
2648
   * @param string $str
2649
   *
2650
   * @return bool
2651
   *               Whether or not $str contains only alphanumeric chars.
2652
   */
2653 13
  public static function is_alphanumeric(string $str): bool
2654
  {
2655 13
    return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2656
  }
2657
2658
  /**
2659
   * Checks if a string is 7 bit ASCII.
2660
   *
2661
   * @param string $str <p>The string to check.</p>
2662
   *
2663
   * @return bool
2664
   *              <strong>true</strong> if it is ASCII<br>
2665
   *              <strong>false</strong> otherwise
2666
   *
2667
   */
2668 101
  public static function is_ascii(string $str): bool
2669
  {
2670 101
    if ('' === $str) {
2671 6
      return true;
2672
    }
2673
2674 100
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2675
  }
2676
2677
  /**
2678
   * Returns true if the string is base64 encoded, false otherwise.
2679
   *
2680
   * @param string $str <p>The input string.</p>
2681
   *
2682
   * @return bool Whether or not $str is base64 encoded.
2683
   */
2684 8
  public static function is_base64(string $str): bool
2685
  {
2686 8
    $base64String = (string)\base64_decode($str, true);
2687
2688 8
    return $base64String && \base64_encode($base64String) === $str;
2689
  }
2690
2691
  /**
2692
   * Check if the input is binary... (is look like a hack).
2693
   *
2694
   * @param mixed $input
2695
   * @param bool  $strict
2696
   *
2697
   * @return bool
2698
   */
2699 19
  public static function is_binary($input, bool $strict = false): bool
2700
  {
2701 19
    $input = (string)$input;
2702 19
    if ('' === $input) {
2703 5
      return false;
2704
    }
2705
2706 19
    if (\preg_match('~^[01]+$~', $input)) {
2707 6
      return true;
2708
    }
2709
2710 19
    $testNull = 0;
2711 19
    $testLength = \strlen($input);
2712 19
    if ($testLength) {
2713 19
      $testNull = \substr_count($input, "\x0");
2714 19
      if (($testNull / $testLength) > 0.3) {
2715 6
        return true;
2716
      }
2717
    }
2718
2719 18
    if ($strict === true) {
2720
2721 16
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2722
        self::checkForSupport();
2723
      }
2724
2725 16
      if (self::$SUPPORT['finfo'] === false) {
2726
        throw new \RuntimeException('ext-fileinfo: is not installed');
2727
      }
2728
2729
      /** @noinspection PhpComposerExtensionStubsInspection */
2730 16
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
2731 16
      $finfo_encoding = $finfo->buffer($input);
2732 16
      if ($finfo_encoding && $finfo_encoding === 'binary') {
2733 16
        return true;
2734
      }
2735
2736 8
    } elseif ($testNull > 0) {
2737
2738 3
      return true;
2739
2740
    }
2741
2742 17
    return false;
2743
  }
2744
2745
  /**
2746
   * Check if the file is binary.
2747
   *
2748
   * @param string $file
2749
   *
2750
   * @return bool
2751
   */
2752 3
  public static function is_binary_file($file): bool
2753
  {
2754
    // init
2755 3
    $block = '';
2756
2757 3
    $fp = \fopen($file, 'rb');
2758 3
    if (\is_resource($fp)) {
2759 3
      $block = \fread($fp, 512);
2760 3
      \fclose($fp);
2761
    }
2762
2763 3
    if ($block === '') {
2764 1
      return false;
2765
    }
2766
2767 3
    return self::is_binary($block, true);
2768
  }
2769
2770
  /**
2771
   * Returns true if the string contains only whitespace chars, false otherwise.
2772
   *
2773
   * @param string $str
2774
   *
2775
   * @return bool
2776
   *               Whether or not $str contains only whitespace characters.
2777
   */
2778 15
  public static function is_blank(string $str): bool
2779
  {
2780 15
    return self::str_matches_pattern($str, '^[[:space:]]*$');
2781
  }
2782
2783
  /**
2784
   * Checks if the given string is equal to any "Byte Order Mark".
2785
   *
2786
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2787
   *
2788
   * @param string $str <p>The input string.</p>
2789
   *
2790
   * @return bool
2791
   *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.
2792
   */
2793 1
  public static function is_bom($str): bool
2794
  {
2795 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2796 1
      if ($str === $bomString) {
2797 1
        return true;
2798
      }
2799
    }
2800
2801 1
    return false;
2802
  }
2803
2804
  /**
2805
   * Determine whether the string is considered to be empty.
2806
   *
2807
   * A variable is considered empty if it does not exist or if its value equals FALSE.
2808
   * empty() does not generate a warning if the variable does not exist.
2809
   *
2810
   * @param mixed $str
2811
   *
2812
   * @return bool Whether or not $str is empty().
2813
   */
2814
  public static function is_empty($str): bool
2815
  {
2816
    return empty($str);
2817
  }
2818
2819
  /**
2820
   * Returns true if the string contains only hexadecimal chars, false otherwise.
2821
   *
2822
   * @param string $str
2823
   *
2824
   * @return bool
2825
   *               Whether or not $str contains only hexadecimal chars.
2826
   */
2827 13
  public static function is_hexadecimal(string $str): bool
2828
  {
2829 13
    return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
2830
  }
2831
2832
  /**
2833
   * Check if the string contains any html-tags <lall>.
2834
   *
2835
   * @param string $str <p>The input string.</p>
2836
   *
2837
   * @return bool
2838
   */
2839 2
  public static function is_html(string $str): bool
2840
  {
2841 2
    if ('' === $str) {
2842 2
      return false;
2843
    }
2844
2845
    // init
2846 2
    $matches = [];
2847
2848 2
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2849
2850 2
    return !(\count($matches) === 0);
2851
  }
2852
2853
  /**
2854
   * Try to check if "$str" is an json-string.
2855
   *
2856
   * @param string $str <p>The input string.</p>
2857
   *
2858
   * @return bool
2859
   */
2860 21
  public static function is_json(string $str): bool
2861
  {
2862 21
    if ('' === $str) {
2863 2
      return false;
2864
    }
2865
2866 20
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2867
      self::checkForSupport();
2868
    }
2869
2870 20
    if (self::$SUPPORT['json'] === false) {
2871
      throw new \RuntimeException('ext-json: is not installed');
2872
    }
2873
2874 20
    $json = self::json_decode($str);
2875
2876
    /** @noinspection PhpComposerExtensionStubsInspection */
2877
    return (
2878 20
               \is_object($json) === true
2879
               ||
2880 20
               \is_array($json) === true
2881
           )
2882
           &&
2883 20
           \json_last_error() === JSON_ERROR_NONE;
2884
  }
2885
2886
  /**
2887
   * @param string $str
2888
   *
2889
   * @return bool
2890
   */
2891 8
  public static function is_lowercase(string $str): bool
2892
  {
2893 8
    if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
2894 3
      return true;
2895
    }
2896
2897 5
    return false;
2898
  }
2899
2900
  /**
2901
   * Returns true if the string is serialized, false otherwise.
2902
   *
2903
   * @param string $str
2904
   *
2905
   * @return bool Whether or not $str is serialized.
2906
   */
2907 7
  public static function is_serialized(string $str): bool
2908
  {
2909 7
    if ('' === $str) {
2910 1
      return false;
2911
    }
2912
2913
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2914
    /** @noinspection UnserializeExploitsInspection */
2915 6
    return $str === 'b:0;'
2916
           ||
2917 6
           @\unserialize($str) !== false;
2918
  }
2919
2920
  /**
2921
   * Returns true if the string contains only lower case chars, false
2922
   * otherwise.
2923
   *
2924
   * @param string $str <p>The input string.</p>
2925
   *
2926
   * @return bool
2927
   *               Whether or not $str contains only lower case characters.
2928
   */
2929 8
  public static function is_uppercase(string $str): bool
2930
  {
2931 8
    return self::str_matches_pattern($str, '^[[:upper:]]*$');
2932
  }
2933
2934
  /**
2935
   * Check if the string is UTF-16.
2936
   *
2937
   * @param string $str <p>The input string.</p>
2938
   *
2939
   * @return int|false
2940
   *                   <strong>false</strong> if is't not UTF-16,<br>
2941
   *                   <strong>1</strong> for UTF-16LE,<br>
2942
   *                   <strong>2</strong> for UTF-16BE.
2943
   */
2944 10
  public static function is_utf16(string $str)
2945
  {
2946 10
    if (self::is_binary($str) === false) {
2947 4
      return false;
2948
    }
2949
2950
    // init
2951 8
    $strChars = [];
2952
2953 8
    $str = self::remove_bom($str);
2954
2955 8
    $maybeUTF16LE = 0;
2956 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2957 8
    if ($test) {
2958 7
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2959 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2960 7
      if ($test3 === $test) {
2961 7
        if (\count($strChars) === 0) {
2962 7
          $strChars = self::count_chars($str, true);
2963
        }
2964 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2965 7
          if (\in_array($test3char, $strChars, true) === true) {
2966 7
            $maybeUTF16LE++;
2967
          }
2968
        }
2969
      }
2970
    }
2971
2972 8
    $maybeUTF16BE = 0;
2973 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2974 8
    if ($test) {
2975 7
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2976 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2977 7
      if ($test3 === $test) {
2978 7
        if (\count($strChars) === 0) {
2979 3
          $strChars = self::count_chars($str, true);
2980
        }
2981 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2982 7
          if (\in_array($test3char, $strChars, true) === true) {
2983 7
            $maybeUTF16BE++;
2984
          }
2985
        }
2986
      }
2987
    }
2988
2989 8
    if ($maybeUTF16BE !== $maybeUTF16LE) {
2990 3
      if ($maybeUTF16LE > $maybeUTF16BE) {
2991 2
        return 1;
2992
      }
2993
2994 3
      return 2;
2995
    }
2996
2997 6
    return false;
2998
  }
2999
3000
  /**
3001
   * Check if the string is UTF-32.
3002
   *
3003
   * @param string $str
3004
   *
3005
   * @return int|false
3006
   *                   <strong>false</strong> if is't not UTF-32,<br>
3007
   *                   <strong>1</strong> for UTF-32LE,<br>
3008
   *                   <strong>2</strong> for UTF-32BE.
3009
   */
3010 8
  public static function is_utf32(string $str)
3011
  {
3012 8
    if (self::is_binary($str) === false) {
3013 4
      return false;
3014
    }
3015
3016
    // init
3017 6
    $strChars = [];
3018
3019 6
    $str = self::remove_bom($str);
3020
3021 6
    $maybeUTF32LE = 0;
3022 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3023 6
    if ($test) {
3024 5
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3025 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3026 5
      if ($test3 === $test) {
3027 5
        if (\count($strChars) === 0) {
3028 5
          $strChars = self::count_chars($str, true);
3029
        }
3030 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3031 5
          if (\in_array($test3char, $strChars, true) === true) {
3032 5
            $maybeUTF32LE++;
3033
          }
3034
        }
3035
      }
3036
    }
3037
3038 6
    $maybeUTF32BE = 0;
3039 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3040 6
    if ($test) {
3041 5
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3042 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3043 5
      if ($test3 === $test) {
3044 5
        if (\count($strChars) === 0) {
3045 3
          $strChars = self::count_chars($str, true);
3046
        }
3047 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3048 5
          if (\in_array($test3char, $strChars, true) === true) {
3049 5
            $maybeUTF32BE++;
3050
          }
3051
        }
3052
      }
3053
    }
3054
3055 6
    if ($maybeUTF32BE !== $maybeUTF32LE) {
3056 1
      if ($maybeUTF32LE > $maybeUTF32BE) {
3057 1
        return 1;
3058
      }
3059
3060 1
      return 2;
3061
    }
3062
3063 6
    return false;
3064
  }
3065
3066
  /**
3067
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3068
   *
3069
   * @see    http://hsivonen.iki.fi/php-utf8/
3070
   *
3071
   * @param string|string[] $str    <p>The string to be checked.</p>
3072
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3073
   *
3074
   * @return bool
3075
   */
3076 92
  public static function is_utf8($str, bool $strict = false): bool
3077
  {
3078 92
    if (\is_array($str) === true) {
3079 1
      foreach ($str as $k => $v) {
3080 1
        if (false === self::is_utf8($v, $strict)) {
3081 1
          return false;
3082
        }
3083
      }
3084
3085
      return true;
3086
    }
3087
3088 92
    if ('' === $str) {
3089 11
      return true;
3090
    }
3091
3092 88
    if ($strict === true) {
3093 1
      if (self::is_utf16($str) !== false) {
3094 1
        return false;
3095
      }
3096
3097
      if (self::is_utf32($str) !== false) {
3098
        return false;
3099
      }
3100
    }
3101
3102 88
    if (self::pcre_utf8_support() !== true) {
3103
3104
      // If even just the first character can be matched, when the /u
3105
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3106
      // invalid, nothing at all will match, even if the string contains
3107
      // some valid sequences
3108
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
3109
    }
3110
3111 88
    $mState = 0; // cached expected number of octets after the current octet
3112
    // until the beginning of the next UTF8 character sequence
3113 88
    $mUcs4 = 0; // cached Unicode character
3114 88
    $mBytes = 1; // cached expected number of octets in the current sequence
3115
3116 88
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3117
      self::checkForSupport();
3118
    }
3119
3120 88
    if (self::$ORD === null) {
3121
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3122
    }
3123
3124 88
    $len = self::strlen_in_byte((string)$str);
3125
    /** @noinspection ForeachInvariantsInspection */
3126 88
    for ($i = 0; $i < $len; $i++) {
3127 88
      $in = self::$ORD[$str[$i]];
3128 88
      if ($mState === 0) {
3129
        // When mState is zero we expect either a US-ASCII character or a
3130
        // multi-octet sequence.
3131 88
        if (0 === (0x80 & $in)) {
3132
          // US-ASCII, pass straight through.
3133 85
          $mBytes = 1;
3134 69
        } elseif (0xC0 === (0xE0 & $in)) {
3135
          // First octet of 2 octet sequence.
3136 62
          $mUcs4 = $in;
3137 62
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3138 62
          $mState = 1;
3139 62
          $mBytes = 2;
3140 46
        } elseif (0xE0 === (0xF0 & $in)) {
3141
          // First octet of 3 octet sequence.
3142 30
          $mUcs4 = $in;
3143 30
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3144 30
          $mState = 2;
3145 30
          $mBytes = 3;
3146 23
        } elseif (0xF0 === (0xF8 & $in)) {
3147
          // First octet of 4 octet sequence.
3148 13
          $mUcs4 = $in;
3149 13
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3150 13
          $mState = 3;
3151 13
          $mBytes = 4;
3152 11
        } elseif (0xF8 === (0xFC & $in)) {
3153
          /* First octet of 5 octet sequence.
3154
          *
3155
          * This is illegal because the encoded codepoint must be either
3156
          * (a) not the shortest form or
3157
          * (b) outside the Unicode range of 0-0x10FFFF.
3158
          * Rather than trying to resynchronize, we will carry on until the end
3159
          * of the sequence and let the later error handling code catch it.
3160
          */
3161 4
          $mUcs4 = $in;
3162 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3163 4
          $mState = 4;
3164 4
          $mBytes = 5;
3165 8
        } elseif (0xFC === (0xFE & $in)) {
3166
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3167 4
          $mUcs4 = $in;
3168 4
          $mUcs4 = ($mUcs4 & 1) << 30;
3169 4
          $mState = 5;
3170 4
          $mBytes = 6;
3171
        } else {
3172
          // Current octet is neither in the US-ASCII range nor a legal first
3173
          // octet of a multi-octet sequence.
3174 88
          return false;
3175
        }
3176
      } else {
3177
        // When mState is non-zero, we expect a continuation of the multi-octet
3178
        // sequence
3179 69
        if (0x80 === (0xC0 & $in)) {
3180
          // Legal continuation.
3181 63
          $shift = ($mState - 1) * 6;
3182 63
          $tmp = $in;
3183 63
          $tmp = ($tmp & 0x0000003F) << $shift;
3184 63
          $mUcs4 |= $tmp;
3185
          // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3186
          // Unicode code point to be output.
3187 63
          if (0 === --$mState) {
3188
            // Check for illegal sequences and code points.
3189
            //
3190
            // From Unicode 3.1, non-shortest form is illegal
3191
            if (
3192 63
                (2 === $mBytes && $mUcs4 < 0x0080)
3193
                ||
3194 63
                (3 === $mBytes && $mUcs4 < 0x0800)
3195
                ||
3196 63
                (4 === $mBytes && $mUcs4 < 0x10000)
3197
                ||
3198 63
                (4 < $mBytes)
3199
                ||
3200
                // From Unicode 3.2, surrogate characters are illegal.
3201 63
                (($mUcs4 & 0xFFFFF800) === 0xD800)
3202
                ||
3203
                // Code points outside the Unicode range are illegal.
3204 63
                ($mUcs4 > 0x10FFFF)
3205
            ) {
3206 7
              return false;
3207
            }
3208
            // initialize UTF8 cache
3209 63
            $mState = 0;
3210 63
            $mUcs4 = 0;
3211 63
            $mBytes = 1;
3212
          }
3213
        } else {
3214
          // ((0xC0 & (*in) != 0x80) && (mState != 0))
3215
          // Incomplete multi-octet sequence.
3216 28
          return false;
3217
        }
3218
      }
3219
    }
3220
3221 56
    return true;
3222
  }
3223
3224
  /**
3225
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3226
   * Decodes a JSON string
3227
   *
3228
   * @link http://php.net/manual/en/function.json-decode.php
3229
   *
3230
   * @param string $json    <p>
3231
   *                        The <i>json</i> string being decoded.
3232
   *                        </p>
3233
   *                        <p>
3234
   *                        This function only works with UTF-8 encoded strings.
3235
   *                        </p>
3236
   *                        <p>PHP implements a superset of
3237
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3238
   *                        only supports these values when they are nested inside an array or an object.
3239
   *                        </p>
3240
   * @param bool   $assoc   [optional] <p>
3241
   *                        When <b>TRUE</b>, returned objects will be converted into
3242
   *                        associative arrays.
3243
   *                        </p>
3244
   * @param int    $depth   [optional] <p>
3245
   *                        User specified recursion depth.
3246
   *                        </p>
3247
   * @param int    $options [optional] <p>
3248
   *                        Bitmask of JSON decode options. Currently only
3249
   *                        <b>JSON_BIGINT_AS_STRING</b>
3250
   *                        is supported (default is to cast large integers as floats)
3251
   *                        </p>
3252
   *
3253
   * @return mixed
3254
   *                The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3255
   *                null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3256
   *                <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3257
   *                is deeper than the recursion limit.
3258
   */
3259 21
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3260
  {
3261 21
    $json = self::filter($json);
3262
3263 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3264
      self::checkForSupport();
3265
    }
3266
3267 21
    if (self::$SUPPORT['json'] === false) {
3268
      throw new \RuntimeException('ext-json: is not installed');
3269
    }
3270
3271
    /** @noinspection PhpComposerExtensionStubsInspection */
3272 21
    $json = \json_decode($json, $assoc, $depth, $options);
3273
3274 21
    return $json;
3275
  }
3276
3277
  /**
3278
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3279
   * Returns the JSON representation of a value.
3280
   *
3281
   * @link http://php.net/manual/en/function.json-encode.php
3282
   *
3283
   * @param mixed $value   <p>
3284
   *                       The <i>value</i> being encoded. Can be any type except
3285
   *                       a resource.
3286
   *                       </p>
3287
   *                       <p>
3288
   *                       All string data must be UTF-8 encoded.
3289
   *                       </p>
3290
   *                       <p>PHP implements a superset of
3291
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3292
   *                       only supports these values when they are nested inside an array or an object.
3293
   *                       </p>
3294
   * @param int   $options [optional] <p>
3295
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3296
   *                       <b>JSON_HEX_TAG</b>,
3297
   *                       <b>JSON_HEX_AMP</b>,
3298
   *                       <b>JSON_HEX_APOS</b>,
3299
   *                       <b>JSON_NUMERIC_CHECK</b>,
3300
   *                       <b>JSON_PRETTY_PRINT</b>,
3301
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3302
   *                       <b>JSON_FORCE_OBJECT</b>,
3303
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3304
   *                       constants is described on
3305
   *                       the JSON constants page.
3306
   *                       </p>
3307
   * @param int   $depth   [optional] <p>
3308
   *                       Set the maximum depth. Must be greater than zero.
3309
   *                       </p>
3310
   *
3311
   * @return string|false
3312
   *                      A JSON encoded <strong>string</strong> on success or<br>
3313
   *                      <strong>FALSE</strong> on failure.
3314
   */
3315 2
  public static function json_encode($value, int $options = 0, int $depth = 512)
3316
  {
3317 2
    $value = self::filter($value);
3318
3319 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3320
      self::checkForSupport();
3321
    }
3322
3323 2
    if (self::$SUPPORT['json'] === false) {
3324
      throw new \RuntimeException('ext-json: is not installed');
3325
    }
3326
3327
    /** @noinspection PhpComposerExtensionStubsInspection */
3328 2
    $json = \json_encode($value, $options, $depth);
3329
3330 2
    return $json;
3331
  }
3332
3333
  /**
3334
   * Checks whether JSON is available on the server.
3335
   *
3336
   * @return bool
3337
   *              <strong>true</strong> if available, <strong>false</strong> otherwise
3338
   */
3339
  public static function json_loaded(): bool
3340
  {
3341
    return \function_exists('json_decode');
3342
  }
3343
3344
  /**
3345
   * Makes string's first char lowercase.
3346
   *
3347
   * @param string $str       <p>The input string</p>
3348
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
3349
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3350
   *
3351
   * @return string The resulting string.
3352
   */
3353 44
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3354
  {
3355 44
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3356 44
    if ($strPartTwo === false) {
3357
      $strPartTwo = '';
3358
    }
3359
3360 44
    $strPartOne = self::strtolower(
3361 44
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3362 44
        $encoding,
3363 44
        $cleanUtf8
3364
    );
3365
3366 44
    return $strPartOne . $strPartTwo;
3367
  }
3368
3369
  /**
3370
   * alias for "UTF8::lcfirst()"
3371
   *
3372
   * @see UTF8::lcfirst()
3373
   *
3374
   * @param string $str
3375
   * @param string $encoding
3376
   * @param bool   $cleanUtf8
3377
   *
3378
   * @return string
3379
   */
3380 1
  public static function lcword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3381
  {
3382 1
    return self::lcfirst($str, $encoding, $cleanUtf8);
3383
  }
3384
3385
  /**
3386
   * Lowercase for all words in the string.
3387
   *
3388
   * @param string   $str        <p>The input string.</p>
3389
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3390
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3391
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3392
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3393
   *
3394
   * @return string
3395
   */
3396 1
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3397
  {
3398 1
    if (!$str) {
3399 1
      return '';
3400
    }
3401
3402 1
    $words = self::str_to_words($str, $charlist);
3403 1
    $newWords = [];
3404
3405 1
    if (\count($exceptions) > 0) {
3406 1
      $useExceptions = true;
3407
    } else {
3408 1
      $useExceptions = false;
3409
    }
3410
3411 1
    foreach ($words as $word) {
3412
3413 1
      if (!$word) {
3414 1
        continue;
3415
      }
3416
3417
      if (
3418 1
          $useExceptions === false
3419
          ||
3420
          (
3421 1
              $useExceptions === true
3422
              &&
3423 1
              !\in_array($word, $exceptions, true)
3424
          )
3425
      ) {
3426 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3427
      }
3428
3429 1
      $newWords[] = $word;
3430
    }
3431
3432 1
    return \implode('', $newWords);
3433
  }
3434
3435
  /**
3436
   * alias for "UTF8::lcfirst()"
3437
   *
3438
   * @see UTF8::lcfirst()
3439
   *
3440
   * @param string $str
3441
   * @param string $encoding
3442
   * @param bool   $cleanUtf8
3443
   *
3444
   * @return string
3445
   */
3446 5
  public static function lowerCaseFirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3447
  {
3448 5
    return self::lcfirst($str, $encoding, $cleanUtf8);
3449
  }
3450
3451
  /**
3452
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3453
   *
3454
   * @param string $str   <p>The string to be trimmed</p>
3455
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3456
   *
3457
   * @return string The string with unwanted characters stripped from the left.
3458
   */
3459 21
  public static function ltrim(string $str = '', $chars = INF): string
3460
  {
3461 21
    if ('' === $str) {
3462 2
      return '';
3463
    }
3464
3465
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3466 20
    if ($chars === INF || !$chars) {
3467 13
      $pattern = "^[\pZ\pC]+";
3468
    } else {
3469 9
      $chars = \preg_quote($chars, '/');
3470 9
      $pattern = "^[$chars]+";
3471
    }
3472
3473 20
    return self::regex_replace($str, $pattern, '', '', '/');
3474
  }
3475
3476
  /**
3477
   * Returns the UTF-8 character with the maximum code point in the given data.
3478
   *
3479
   * @param string|array<string> $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3480
   *
3481
   * @return string|null The character with the highest code point than others, returns null on failure or empty input.
3482
   */
3483 1
  public static function max($arg)
3484
  {
3485 1
    if (\is_array($arg) === true) {
3486 1
      $arg = \implode('', $arg);
3487
    }
3488
3489 1
    $codepoints = self::codepoints($arg, false);
3490 1
    if (\count($codepoints) === 0) {
3491 1
      return null;
3492
    }
3493
3494 1
    $codepoint_max = \max($codepoints);
3495
3496 1
    return self::chr($codepoint_max);
3497
  }
3498
3499
  /**
3500
   * Calculates and returns the maximum number of bytes taken by any
3501
   * UTF-8 encoded character in the given string.
3502
   *
3503
   * @param string $str <p>The original Unicode string.</p>
3504
   *
3505
   * @return int Max byte lengths of the given chars.
3506
   */
3507 1
  public static function max_chr_width(string $str): int
3508
  {
3509 1
    $bytes = self::chr_size_list($str);
3510 1
    if (\count($bytes) > 0) {
3511 1
      return (int)\max($bytes);
3512
    }
3513
3514 1
    return 0;
3515
  }
3516
3517
  /**
3518
   * Checks whether mbstring is available on the server.
3519
   *
3520
   * @return bool
3521
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
3522
   */
3523 12
  public static function mbstring_loaded(): bool
3524
  {
3525 12
    $return = \extension_loaded('mbstring') ? true : false;
3526
3527 12
    if ($return === true) {
3528 12
      \mb_internal_encoding('UTF-8');
3529
    }
3530
3531 12
    return $return;
3532
  }
3533
3534
  /**
3535
   * Checks whether mbstring "overloaded" is active on the server.
3536
   *
3537
   * @return bool
3538
   */
3539
  private static function mbstring_overloaded(): bool
3540
  {
3541
    /**
3542
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3543
     */
3544
3545
    /** @noinspection PhpComposerExtensionStubsInspection */
3546
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3547
    return \defined('MB_OVERLOAD_STRING')
3548
           &&
3549
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3550
  }
3551
3552
  /**
3553
   * Returns the UTF-8 character with the minimum code point in the given data.
3554
   *
3555
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3556
   *
3557
   * @return string|null The character with the lowest code point than others, returns null on failure or empty input.
3558
   */
3559 1
  public static function min($arg)
3560
  {
3561 1
    if (\is_array($arg) === true) {
3562 1
      $arg = \implode('', $arg);
3563
    }
3564
3565 1
    $codepoints = self::codepoints($arg, false);
3566 1
    if (\count($codepoints) === 0) {
3567 1
      return null;
3568
    }
3569
3570 1
    $codepoint_min = \min($codepoints);
3571
3572 1
    return self::chr($codepoint_min);
3573
  }
3574
3575
  /**
3576
   * alias for "UTF8::normalize_encoding()"
3577
   *
3578
   * @see        UTF8::normalize_encoding()
3579
   *
3580
   * @param string $encoding
3581
   * @param mixed  $fallback
3582
   *
3583
   * @return mixed
3584
   *
3585
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3586
   */
3587 1
  public static function normalizeEncoding(string $encoding, $fallback = '')
3588
  {
3589 1
    return self::normalize_encoding($encoding, $fallback);
3590
  }
3591
3592
  /**
3593
   * Normalize the encoding-"name" input.
3594
   *
3595
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3596
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3597
   *
3598
   * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3599
   */
3600 325
  public static function normalize_encoding(string $encoding, $fallback = '')
3601
  {
3602 325
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3603
3604 325
    if (!$encoding) {
3605 302
      return $fallback;
3606
    }
3607
3608
    if (
3609 26
        'UTF-8' === $encoding
3610
        ||
3611 26
        'UTF8' === $encoding
3612
    ) {
3613 11
      return 'UTF-8';
3614
    }
3615
3616 22
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3617 19
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3618
    }
3619
3620 6
    if (self::$ENCODINGS === null) {
3621 1
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3622
    }
3623
3624 6
    if (\in_array($encoding, self::$ENCODINGS, true)) {
0 ignored issues
show
Bug introduced by
It seems like self::ENCODINGS can also be of type false; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3624
    if (\in_array($encoding, /** @scrutinizer ignore-type */ self::$ENCODINGS, true)) {
Loading history...
3625 3
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3626
3627 3
      return $encoding;
3628
    }
3629
3630 5
    $encodingOrig = $encoding;
3631 5
    $encoding = \strtoupper($encoding);
3632 5
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3633
3634
    $equivalences = [
3635 5
        'ISO8859'     => 'ISO-8859-1',
3636
        'ISO88591'    => 'ISO-8859-1',
3637
        'ISO'         => 'ISO-8859-1',
3638
        'LATIN'       => 'ISO-8859-1',
3639
        'LATIN1'      => 'ISO-8859-1', // Western European
3640
        'ISO88592'    => 'ISO-8859-2',
3641
        'LATIN2'      => 'ISO-8859-2', // Central European
3642
        'ISO88593'    => 'ISO-8859-3',
3643
        'LATIN3'      => 'ISO-8859-3', // Southern European
3644
        'ISO88594'    => 'ISO-8859-4',
3645
        'LATIN4'      => 'ISO-8859-4', // Northern European
3646
        'ISO88595'    => 'ISO-8859-5',
3647
        'ISO88596'    => 'ISO-8859-6', // Greek
3648
        'ISO88597'    => 'ISO-8859-7',
3649
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3650
        'ISO88599'    => 'ISO-8859-9',
3651
        'LATIN5'      => 'ISO-8859-9', // Turkish
3652
        'ISO885911'   => 'ISO-8859-11',
3653
        'TIS620'      => 'ISO-8859-11', // Thai
3654
        'ISO885910'   => 'ISO-8859-10',
3655
        'LATIN6'      => 'ISO-8859-10', // Nordic
3656
        'ISO885913'   => 'ISO-8859-13',
3657
        'LATIN7'      => 'ISO-8859-13', // Baltic
3658
        'ISO885914'   => 'ISO-8859-14',
3659
        'LATIN8'      => 'ISO-8859-14', // Celtic
3660
        'ISO885915'   => 'ISO-8859-15',
3661
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3662
        'ISO885916'   => 'ISO-8859-16',
3663
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3664
        'CP1250'      => 'WINDOWS-1250',
3665
        'WIN1250'     => 'WINDOWS-1250',
3666
        'WINDOWS1250' => 'WINDOWS-1250',
3667
        'CP1251'      => 'WINDOWS-1251',
3668
        'WIN1251'     => 'WINDOWS-1251',
3669
        'WINDOWS1251' => 'WINDOWS-1251',
3670
        'CP1252'      => 'WINDOWS-1252',
3671
        'WIN1252'     => 'WINDOWS-1252',
3672
        'WINDOWS1252' => 'WINDOWS-1252',
3673
        'CP1253'      => 'WINDOWS-1253',
3674
        'WIN1253'     => 'WINDOWS-1253',
3675
        'WINDOWS1253' => 'WINDOWS-1253',
3676
        'CP1254'      => 'WINDOWS-1254',
3677
        'WIN1254'     => 'WINDOWS-1254',
3678
        'WINDOWS1254' => 'WINDOWS-1254',
3679
        'CP1255'      => 'WINDOWS-1255',
3680
        'WIN1255'     => 'WINDOWS-1255',
3681
        'WINDOWS1255' => 'WINDOWS-1255',
3682
        'CP1256'      => 'WINDOWS-1256',
3683
        'WIN1256'     => 'WINDOWS-1256',
3684
        'WINDOWS1256' => 'WINDOWS-1256',
3685
        'CP1257'      => 'WINDOWS-1257',
3686
        'WIN1257'     => 'WINDOWS-1257',
3687
        'WINDOWS1257' => 'WINDOWS-1257',
3688
        'CP1258'      => 'WINDOWS-1258',
3689
        'WIN1258'     => 'WINDOWS-1258',
3690
        'WINDOWS1258' => 'WINDOWS-1258',
3691
        'UTF16'       => 'UTF-16',
3692
        'UTF32'       => 'UTF-32',
3693
        'UTF8'        => 'UTF-8',
3694
        'UTF'         => 'UTF-8',
3695
        'UTF7'        => 'UTF-7',
3696
        '8BIT'        => 'CP850',
3697
        'BINARY'      => 'CP850',
3698
    ];
3699
3700 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3701 4
      $encoding = $equivalences[$encodingUpperHelper];
3702
    }
3703
3704 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3705
3706 5
    return $encoding;
3707
  }
3708
3709
  /**
3710
   * Standardize line ending to unix-like.
3711
   *
3712
   * @param string $str
3713
   *
3714
   * @return string
3715
   */
3716 3
  public static function normalize_line_ending(string $str): string
3717
  {
3718 3
    return (string)str_replace(["\r\n", "\r"], "\n", $str);
3719
  }
3720
3721
  /**
3722
   * Normalize some MS Word special characters.
3723
   *
3724
   * @param string $str <p>The string to be normalized.</p>
3725
   *
3726
   * @return string
3727
   */
3728 36
  public static function normalize_msword(string $str): string
3729
  {
3730 36
    if ('' === $str) {
3731 1
      return '';
3732
    }
3733
3734 36
    static $UTF8_MSWORD_KEYS_CACHE = null;
3735 36
    static $UTF8_MSWORD_VALUES_CACHE = null;
3736
3737 36
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3738
3739 1
      if (self::$UTF8_MSWORD === null) {
3740 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_msword') can also be of type false. However, the property $UTF8_MSWORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3741
      }
3742
3743 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3743
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
3744 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3744
      $UTF8_MSWORD_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
3745
    }
3746
3747 36
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3748
  }
3749
3750
  /**
3751
   * Normalize the whitespace.
3752
   *
3753
   * @param string $str                     <p>The string to be normalized.</p>
3754
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3755
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3756
   *                                        bidirectional text chars.</p>
3757
   *
3758
   * @return string
3759
   */
3760 76
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
3761
  {
3762 76
    if ('' === $str) {
3763 5
      return '';
3764
    }
3765
3766 76
    static $WHITESPACE_CACHE = [];
3767 76
    $cacheKey = (int)$keepNonBreakingSpace;
3768
3769 76
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3770
3771 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3772
3773 2
      if ($keepNonBreakingSpace === true) {
3774 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3775
      }
3776
3777 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
3778
    }
3779
3780 76
    if ($keepBidiUnicodeControls === false) {
3781 76
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3782
3783 76
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3784 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3785
      }
3786
3787 76
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3788
    }
3789
3790 76
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3791
  }
3792
3793
  /**
3794
   * Calculates Unicode code point of the given UTF-8 encoded character.
3795
   *
3796
   * INFO: opposite to UTF8::chr()
3797
   *
3798
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3799
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3800
   *
3801
   * @return int
3802
   *             Unicode code point of the given character,<br>
3803
   *             0 on invalid UTF-8 byte sequence.
3804
   */
3805 23
  public static function ord(string $chr, string $encoding = 'UTF-8'): int
3806
  {
3807
    // init
3808 23
    static $CHAR_CACHE = [];
3809
3810
    // save the original string
3811 23
    $chr_orig = $chr;
3812
3813 23
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3814 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3815
3816
      // check again, if it's still not UTF-8
3817
      /** @noinspection NotOptimalIfConditionsInspection */
3818 2
      if ($encoding !== 'UTF-8') {
3819 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3820
      }
3821
    }
3822
3823 23
    $cacheKey = $chr_orig . $encoding;
3824 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3825 23
      return $CHAR_CACHE[$cacheKey];
3826
    }
3827
3828 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3829
      self::checkForSupport();
3830
    }
3831
3832 11
    if (self::$SUPPORT['intlChar'] === true) {
3833
      /** @noinspection PhpComposerExtensionStubsInspection */
3834 10
      $code = \IntlChar::ord($chr);
3835 10
      if ($code) {
3836 9
        return $CHAR_CACHE[$cacheKey] = $code;
3837
      }
3838
    }
3839
3840
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3841 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
0 ignored issues
show
Bug introduced by
$chr of type array is incompatible with the type string expected by parameter $str of voku\helper\UTF8::substr(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3841
    $chr = \unpack('C*', (string)self::substr(/** @scrutinizer ignore-type */ $chr, 0, 4, 'CP850'));
Loading history...
3842 6
    $code = $chr ? $chr[1] : 0;
3843
3844 6
    if (0xF0 <= $code && isset($chr[4])) {
3845
      /** @noinspection UnnecessaryCastingInspection */
3846
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
3847
    }
3848
3849 6
    if (0xE0 <= $code && isset($chr[3])) {
3850
      /** @noinspection UnnecessaryCastingInspection */
3851 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
3852
    }
3853
3854 6
    if (0xC0 <= $code && isset($chr[2])) {
3855
      /** @noinspection UnnecessaryCastingInspection */
3856 2
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xC0) << 6) + $chr[2] - 0x80);
3857
    }
3858
3859 5
    return $CHAR_CACHE[$cacheKey] = $code;
3860
  }
3861
3862
  /**
3863
   * Parses the string into an array (into the the second parameter).
3864
   *
3865
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3866
   *          if the second parameter is not set!
3867
   *
3868
   * @link http://php.net/manual/en/function.parse-str.php
3869
   *
3870
   * @param string $str       <p>The input string.</p>
3871
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
3872
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3873
   *
3874
   * @return bool Will return <strong>false</strong> if php can't parse the string and we haven't any $result.
3875
   */
3876 1
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
3877
  {
3878 1
    if ($cleanUtf8 === true) {
3879 1
      $str = self::clean($str);
3880
    }
3881
3882
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3883 1
    $return = \mb_parse_str($str, $result);
3884
3885 1
    return !($return === false || empty($result));
3886
  }
3887
3888
  /**
3889
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3890
   *
3891
   * @return bool
3892
   *              <strong>true</strong> if support is available,<br>
3893
   *              <strong>false</strong> otherwise.
3894
   */
3895 88
  public static function pcre_utf8_support(): bool
3896
  {
3897
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3898 88
    return (bool)@\preg_match('//u', '');
3899
  }
3900
3901
  /**
3902
   * Create an array containing a range of UTF-8 characters.
3903
   *
3904
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3905
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3906
   *
3907
   * @return string[]
3908
   */
3909 1
  public static function range($var1, $var2): array
3910
  {
3911 1
    if (!$var1 || !$var2) {
3912 1
      return [];
3913
    }
3914
3915 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3916
      self::checkForSupport();
3917
    }
3918
3919 1
    if (self::$SUPPORT['ctype'] === false) {
3920
      throw new \RuntimeException('ext-ctype: is not installed');
3921
    }
3922
3923
    /** @noinspection PhpComposerExtensionStubsInspection */
3924 1
    if (\ctype_digit((string)$var1)) {
3925 1
      $start = (int)$var1;
3926 1
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
3927
      $start = (int)self::hex_to_int($var1);
3928
    } else {
3929 1
      $start = self::ord($var1);
3930
    }
3931
3932 1
    if (!$start) {
3933
      return [];
3934
    }
3935
3936
    /** @noinspection PhpComposerExtensionStubsInspection */
3937 1
    if (\ctype_digit((string)$var2)) {
3938 1
      $end = (int)$var2;
3939 1
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
3940
      $end = (int)self::hex_to_int($var2);
3941
    } else {
3942 1
      $end = self::ord($var2);
3943
    }
3944
3945 1
    if (!$end) {
3946
      return [];
3947
    }
3948
3949 1
    return \array_map(
3950
        [
3951 1
            self::class,
3952
            'chr',
3953
        ],
3954 1
        \range($start, $end)
3955
    );
3956
  }
3957
3958
  /**
3959
   * Multi decode html entity & fix urlencoded-win1252-chars.
3960
   *
3961
   * e.g:
3962
   * 'test+test'                     => 'test+test'
3963
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3964
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3965
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3966
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3967
   * 'Düsseldorf'                   => 'Düsseldorf'
3968
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3969
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3970
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3971
   *
3972
   * @param string $str          <p>The input string.</p>
3973
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3974
   *
3975
   * @return string
3976
   */
3977 2
  public static function rawurldecode(string $str, bool $multi_decode = true): string
3978
  {
3979 2
    if ('' === $str) {
3980 1
      return '';
3981
    }
3982
3983 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3984 2
    if (\preg_match($pattern, $str)) {
3985 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
3986
    }
3987
3988 2
    $flags = ENT_QUOTES | ENT_HTML5;
3989
3990
    do {
3991 2
      $str_compare = $str;
3992
3993 2
      $str = self::fix_simple_utf8(
3994 2
          \rawurldecode(
3995 2
              self::html_entity_decode(
3996 2
                  self::to_utf8($str),
3997 2
                  $flags
3998
              )
3999
          )
4000
      );
4001
4002 2
    } while ($multi_decode === true && $str_compare !== $str);
4003
4004 2
    return $str;
4005
  }
4006
4007
  /**
4008
   * @param array $strings
4009
   * @param bool  $removeEmptyValues
4010
   * @param int   $removeShortValues
4011
   *
4012
   * @return array
4013
   */
4014 1
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
4015
  {
4016
    // init
4017 1
    $return = [];
4018
4019 1
    foreach ($strings as $str) {
4020
      if (
4021 1
          $removeShortValues !== null
4022
          &&
4023 1
          self::strlen($str) <= $removeShortValues
4024
      ) {
4025 1
        continue;
4026
      }
4027
4028
      if (
4029 1
          $removeEmptyValues === true
4030
          &&
4031 1
          \trim($str) === ''
4032
      ) {
4033 1
        continue;
4034
      }
4035
4036 1
      $return[] = $str;
4037
    }
4038
4039 1
    return $return;
4040
  }
4041
4042
  /**
4043
   * Replaces all occurrences of $pattern in $str by $replacement.
4044
   *
4045
   * @param string $str         <p>The input string.</p>
4046
   * @param string $pattern     <p>The regular expression pattern.</p>
4047
   * @param string $replacement <p>The string to replace with.</p>
4048
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
4049
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4050
   *
4051
   * @return string
4052
   */
4053 274
  public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4054
  {
4055 274
    if ($options === 'msr') {
4056 9
      $options = 'ms';
4057
    }
4058
4059
    // fallback
4060 274
    if (!$delimiter) {
4061
      $delimiter = '/';
4062
    }
4063
4064 274
    $str = (string)\preg_replace(
4065 274
        $delimiter . $pattern . $delimiter . 'u' . $options,
4066 274
        $replacement,
4067 274
        $str
4068
    );
4069
4070 274
    return $str;
4071
  }
4072
4073
  /**
4074
   * alias for "UTF8::remove_bom()"
4075
   *
4076
   * @see        UTF8::remove_bom()
4077
   *
4078
   * @param string $str
4079
   *
4080
   * @return string
4081
   *
4082
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4083
   */
4084
  public static function removeBOM(string $str): string
4085
  {
4086
    return self::remove_bom($str);
4087
  }
4088
4089
  /**
4090
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4091
   *
4092
   * @param string $str <p>The input string.</p>
4093
   *
4094
   * @return string String without UTF-BOM.
4095
   */
4096 60
  public static function remove_bom(string $str): string
4097
  {
4098 60
    if ('' === $str) {
4099 4
      return '';
4100
    }
4101
4102 60
    foreach (self::$BOM as $bomString => $bomByteLength) {
4103 60
      if (0 === self::strpos($str, $bomString, 0, 'CP850')) {
4104 5
        $strTmp = self::substr($str, $bomByteLength, null, 'CP850');
4105 5
        if ($strTmp === false) {
4106
          $strTmp = '';
4107
        }
4108 60
        $str = (string)$strTmp;
4109
      }
4110
    }
4111
4112 60
    return $str;
4113
  }
4114
4115
  /**
4116
   * Removes duplicate occurrences of a string in another string.
4117
   *
4118
   * @param string          $str  <p>The base string.</p>
4119
   * @param string|string[] $what <p>String to search for in the base string.</p>
4120
   *
4121
   * @return string The result string with removed duplicates.
4122
   */
4123 1
  public static function remove_duplicates(string $str, $what = ' '): string
4124
  {
4125 1
    if (\is_string($what) === true) {
4126 1
      $what = [$what];
4127
    }
4128
4129 1
    if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4130
      /** @noinspection ForeachSourceInspection */
4131 1
      foreach ($what as $item) {
4132 1
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4133
      }
4134
    }
4135
4136 1
    return $str;
4137
  }
4138
4139
  /**
4140
   * Remove html via "strip_tags()" from the string.
4141
   *
4142
   * @param string $str
4143
   * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4144
   *                              not be stripped. Default: null
4145
   *                              </p>
4146
   *
4147
   * @return string
4148
   */
4149 6
  public static function remove_html(string $str, string $allowableTags = ''): string
4150
  {
4151 6
    return \strip_tags($str, $allowableTags);
4152
  }
4153
4154
  /**
4155
   * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4156
   *
4157
   * @param string $str
4158
   * @param string $replacement [optional] <p>Default is a empty string.</p>
4159
   *
4160
   * @return string
4161
   */
4162 6
  public static function remove_html_breaks(string $str, string $replacement = ''): string
4163
  {
4164 6
    return (string)\preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4165
  }
4166
4167
  /**
4168
   * Remove invisible characters from a string.
4169
   *
4170
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4171
   *
4172
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4173
   *
4174
   * @param string $str
4175
   * @param bool   $url_encoded
4176
   * @param string $replacement
4177
   *
4178
   * @return string
4179
   */
4180 82
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4181
  {
4182
    // init
4183 82
    $non_displayables = [];
4184
4185
    // every control character except newline (dec 10),
4186
    // carriage return (dec 13) and horizontal tab (dec 09)
4187 82
    if ($url_encoded) {
4188 82
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4189 82
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4190
    }
4191
4192 82
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4193
4194
    do {
4195 82
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4196 82
    } while ($count !== 0);
4197
4198 82
    return $str;
4199
  }
4200
4201
  /**
4202
   * Returns a new string with the prefix $substring removed, if present.
4203
   *
4204
   * @param string $str
4205
   * @param string $substring <p>The prefix to remove.</p>
4206
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4207
   *
4208
   * @return string String without the prefix $substring.
4209
   */
4210 12
  public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4211
  {
4212 12
    if (self::str_starts_with($str, $substring)) {
4213
4214 6
      return (string)self::substr(
4215 6
          $str,
4216 6
          self::strlen($substring, $encoding),
4217 6
          null,
4218 6
          $encoding
4219
      );
4220
    }
4221
4222 6
    return $str;
4223
  }
4224
4225
  /**
4226
   * Returns a new string with the suffix $substring removed, if present.
4227
   *
4228
   * @param string $str
4229
   * @param string $substring <p>The suffix to remove.</p>
4230
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4231
   *
4232
   * @return string String having a $str without the suffix $substring.
4233
   */
4234 12
  public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4235
  {
4236 12
    if (self::str_ends_with($str, $substring)) {
4237
4238 6
      return (string)self::substr(
4239 6
          $str,
4240 6
          0,
4241 6
          self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4242
      );
4243
    }
4244
4245 6
    return $str;
4246
  }
4247
4248
  /**
4249
   * Replaces all occurrences of $search in $str by $replacement.
4250
   *
4251
   * @param string $str           <p>The input string.</p>
4252
   * @param string $search        <p>The needle to search for.</p>
4253
   * @param string $replacement   <p>The string to replace with.</p>
4254
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4255
   *
4256
   * @return string String after the replacements.
4257
   */
4258 29
  public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4259
  {
4260 29
    if ($caseSensitive) {
4261 22
      return self::str_replace($search, $replacement, $str);
4262
    }
4263
4264 7
    return self::str_ireplace($search, $replacement, $str);
4265
  }
4266
4267
  /**
4268
   * Replaces all occurrences of $search in $str by $replacement.
4269
   *
4270
   * @param string       $str           <p>The input string.</p>
4271
   * @param array        $search        <p>The elements to search for.</p>
4272
   * @param string|array $replacement   <p>The string to replace with.</p>
4273
   * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4274
   *
4275
   * @return string String after the replacements.
4276
   */
4277 30
  public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4278
  {
4279 30
    if ($caseSensitive) {
4280 23
      return self::str_replace($search, $replacement, $str);
4281
    }
4282
4283 7
    return self::str_ireplace($search, $replacement, $str);
4284
  }
4285
4286
  /**
4287
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4288
   *
4289
   * @param string $str                <p>The input string</p>
4290
   * @param string $replacementChar    <p>The replacement character.</p>
4291
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4292
   *
4293
   * @return string
4294
   */
4295 54
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4296
  {
4297 54
    if ('' === $str) {
4298 5
      return '';
4299
    }
4300
4301 54
    if ($processInvalidUtf8 === true) {
4302 54
      $replacementCharHelper = $replacementChar;
4303 54
      if ($replacementChar === '') {
4304 54
        $replacementCharHelper = 'none';
4305
      }
4306
4307 54
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4308
        self::checkForSupport();
4309
      }
4310
4311 54
      $save = \mb_substitute_character();
4312 54
      \mb_substitute_character($replacementCharHelper);
4313 54
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4314 54
      \mb_substitute_character($save);
4315
4316 54
      if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4317 54
        $str = $strTmp;
4318
      } else {
4319
        $str = '';
4320
      }
4321
    }
4322
4323 54
    return str_replace(
4324
        [
4325 54
            "\xEF\xBF\xBD",
4326
            '�',
4327
        ],
4328
        [
4329 54
            $replacementChar,
4330 54
            $replacementChar,
4331
        ],
4332 54
        $str
4333
    );
4334
  }
4335
4336
  /**
4337
   * Strip whitespace or other characters from end of a UTF-8 string.
4338
   *
4339
   * @param string $str   <p>The string to be trimmed.</p>
4340
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4341
   *
4342
   * @return string The string with unwanted characters stripped from the right.
4343
   */
4344 20
  public static function rtrim(string $str = '', $chars = INF): string
4345
  {
4346 20
    if ('' === $str) {
4347 2
      return '';
4348
    }
4349
4350
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4351 19
    if ($chars === INF || !$chars) {
4352 14
      $pattern = "[\pZ\pC]+\$";
4353
    } else {
4354 7
      $chars = \preg_quote($chars, '/');
4355 7
      $pattern = "[$chars]+\$";
4356
    }
4357
4358 19
    return self::regex_replace($str, $pattern, '', '', '/');
4359
  }
4360
4361
  /**
4362
   * rxClass
4363
   *
4364
   * @param string $s
4365
   * @param string $class
4366
   *
4367
   * @return string
4368
   */
4369 32
  private static function rxClass(string $s, string $class = ''): string
4370
  {
4371 32
    static $RX_CLASSS_CACHE = [];
4372
4373 32
    $cacheKey = $s . $class;
4374
4375 32
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4376 20
      return $RX_CLASSS_CACHE[$cacheKey];
4377
    }
4378
4379
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4380 16
    $class = [$class];
4381
4382
    /** @noinspection SuspiciousLoopInspection */
4383 16
    foreach (self::str_split($s) as $s) {
4384 15
      if ('-' === $s) {
4385
        $class[0] = '-' . $class[0];
4386 15
      } elseif (!isset($s[2])) {
4387 15
        $class[0] .= \preg_quote($s, '/');
4388 1
      } elseif (1 === self::strlen($s)) {
4389 1
        $class[0] .= $s;
4390
      } else {
4391 15
        $class[] = $s;
4392
      }
4393
    }
4394
4395 16
    if ($class[0]) {
4396 16
      $class[0] = '[' . $class[0] . ']';
4397
    }
4398
4399 16
    if (1 === \count($class)) {
4400 16
      $return = $class[0];
4401
    } else {
4402
      $return = '(?:' . \implode('|', $class) . ')';
4403
    }
4404
4405 16
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4406
4407 16
    return $return;
4408
  }
4409
4410
  /**
4411
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4412
   */
4413 1
  public static function showSupport()
4414
  {
4415 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4416
      self::checkForSupport();
4417
    }
4418
4419 1
    echo '<pre>';
4420 1
    foreach (self::$SUPPORT as $key => $value) {
4421 1
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4422
    }
4423 1
    echo '</pre>';
4424 1
  }
4425
4426
  /**
4427
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4428
   *
4429
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4430
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4431
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
4432
   *
4433
   * @return string The HTML numbered entity.
4434
   */
4435 1
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4436
  {
4437 1
    if ('' === $char) {
4438 1
      return '';
4439
    }
4440
4441
    if (
4442 1
        $keepAsciiChars === true
4443
        &&
4444 1
        self::is_ascii($char) === true
4445
    ) {
4446 1
      return $char;
4447
    }
4448
4449 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4450 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4451
    }
4452
4453 1
    return '&#' . self::ord($char, $encoding) . ';';
4454
  }
4455
4456
  /**
4457
   * @param string $str
4458
   * @param int    $tabLength
4459
   *
4460
   * @return string
4461
   */
4462 5
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4463
  {
4464 5
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4465
  }
4466
4467
  /**
4468
   * Convert a string to an array of Unicode characters.
4469
   *
4470
   * @param string $str       <p>The string to split into array.</p>
4471
   * @param int    $length    [optional] <p>Max character length of each array element.</p>
4472
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4473
   *
4474
   * @return string[] An array containing chunks of the string.
4475
   */
4476 36
  public static function split(string $str, int $length = 1, bool $cleanUtf8 = false): array
4477
  {
4478 36
    if ('' === $str) {
4479 5
      return [];
4480
    }
4481
4482
    // init
4483 35
    $ret = [];
4484
4485 35
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4486
      self::checkForSupport();
4487
    }
4488
4489 35
    if ($cleanUtf8 === true) {
4490 9
      $str = self::clean($str);
4491
    }
4492
4493 35
    if (self::$SUPPORT['pcre_utf8'] === true) {
4494
4495 35
      \preg_match_all('/./us', $str, $retArray);
4496 35
      if (isset($retArray[0])) {
4497 35
        $ret = $retArray[0];
4498
      }
4499 35
      unset($retArray);
4500
4501
    } else {
4502
4503
      // fallback
4504
4505 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4506
        self::checkForSupport();
4507
      }
4508
4509 2
      $len = self::strlen_in_byte($str);
4510
4511
      /** @noinspection ForeachInvariantsInspection */
4512 2
      for ($i = 0; $i < $len; $i++) {
4513
4514 2
        if (($str[$i] & "\x80") === "\x00") {
4515
4516 2
          $ret[] = $str[$i];
4517
4518
        } elseif (
4519 2
            isset($str[$i + 1])
4520
            &&
4521 2
            ($str[$i] & "\xE0") === "\xC0"
4522
        ) {
4523
4524
          if (($str[$i + 1] & "\xC0") === "\x80") {
4525
            $ret[] = $str[$i] . $str[$i + 1];
4526
4527
            $i++;
4528
          }
4529
4530
        } elseif (
4531 2
            isset($str[$i + 2])
4532
            &&
4533 2
            ($str[$i] & "\xF0") === "\xE0"
4534
        ) {
4535
4536
          if (
4537 2
              ($str[$i + 1] & "\xC0") === "\x80"
4538
              &&
4539 2
              ($str[$i + 2] & "\xC0") === "\x80"
4540
          ) {
4541 2
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4542
4543 2
            $i += 2;
4544
          }
4545
4546
        } elseif (
4547
            isset($str[$i + 3])
4548
            &&
4549
            ($str[$i] & "\xF8") === "\xF0"
4550
        ) {
4551
4552
          if (
4553
              ($str[$i + 1] & "\xC0") === "\x80"
4554
              &&
4555
              ($str[$i + 2] & "\xC0") === "\x80"
4556
              &&
4557
              ($str[$i + 3] & "\xC0") === "\x80"
4558
          ) {
4559
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4560
4561
            $i += 3;
4562
          }
4563
4564
        }
4565
      }
4566
    }
4567
4568 35
    if ($length > 1) {
4569 6
      $ret = \array_chunk($ret, $length);
4570
4571 6
      return \array_map(
4572 6
          function ($item) {
4573 6
            return \implode('', $item);
4574 6
          }, $ret
4575
      );
4576
    }
4577
4578 30
    if (isset($ret[0]) && $ret[0] === '') {
4579
      return [];
4580
    }
4581
4582 30
    return $ret;
4583
  }
4584
4585
  /**
4586
   * Returns a camelCase version of the string. Trims surrounding spaces,
4587
   * capitalizes letters following digits, spaces, dashes and underscores,
4588
   * and removes spaces, dashes, as well as underscores.
4589
   *
4590
   * @param string $str      <p>The input string.</p>
4591
   * @param string $encoding [optional] <p>Default: UTF-8</p>
4592
   *
4593
   * @return string
4594
   */
4595 32
  public static function str_camelize(string $str, string $encoding = 'UTF-8'): string
4596
  {
4597 32
    $str = self::lcfirst(self::trim($str), $encoding);
4598 32
    $str = (string)\preg_replace('/^[-_]+/', '', $str);
4599
4600 32
    $str = (string)\preg_replace_callback(
4601 32
        '/[-_\s]+(.)?/u',
4602 32
        function ($match) use ($encoding) {
4603 27
          if (isset($match[1])) {
4604 27
            return UTF8::strtoupper($match[1], $encoding);
4605
          }
4606
4607 1
          return '';
4608 32
        },
4609 32
        $str
4610
    );
4611
4612 32
    $str = (string)\preg_replace_callback(
4613 32
        '/[\d]+(.)?/u',
4614 32
        function ($match) use ($encoding) {
4615 6
          return UTF8::strtoupper($match[0], $encoding);
4616 32
        },
4617 32
        $str
4618
    );
4619
4620 32
    return $str;
4621
  }
4622
4623
  /**
4624
   * Returns the string with the first letter of each word capitalized,
4625
   * except for when the word is a name which shouldn't be capitalized.
4626
   *
4627
   * @param string $str
4628
   *
4629
   * @return string String with $str capitalized.
4630
   */
4631 1
  public static function str_capitalize_name(string $str): string
4632
  {
4633 1
    $str = self::collapse_whitespace($str);
4634
4635 1
    $str = self::str_capitalize_name_helper($str, ' ');
4636 1
    $str = self::str_capitalize_name_helper($str, '-');
4637
4638 1
    return $str;
4639
  }
4640
4641
  /**
4642
   * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
4643
   *
4644
   * @param string $names
4645
   * @param string $delimiter
4646
   * @param string $encoding
4647
   *
4648
   * @return string
4649
   */
4650 1
  private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
4651
  {
4652
    // init
4653 1
    $namesArray = \explode($delimiter, $names);
4654
4655 1
    if ($namesArray === false) {
4656
      return '';
4657
    }
4658
4659
    $specialCases = [
4660 1
        'names'    => [
4661
            'ab',
4662
            'af',
4663
            'al',
4664
            'and',
4665
            'ap',
4666
            'bint',
4667
            'binte',
4668
            'da',
4669
            'de',
4670
            'del',
4671
            'den',
4672
            'der',
4673
            'di',
4674
            'dit',
4675
            'ibn',
4676
            'la',
4677
            'mac',
4678
            'nic',
4679
            'of',
4680
            'ter',
4681
            'the',
4682
            'und',
4683
            'van',
4684
            'von',
4685
            'y',
4686
            'zu',
4687
        ],
4688
        'prefixes' => [
4689
            'al-',
4690
            "d'",
4691
            'ff',
4692
            "l'",
4693
            'mac',
4694
            'mc',
4695
            'nic',
4696
        ],
4697
    ];
4698
4699 1
    foreach ($namesArray as &$name) {
4700 1
      if (\in_array($name, $specialCases['names'], true)) {
4701 1
        continue;
4702
      }
4703
4704 1
      $continue = false;
4705
4706 1
      if ($delimiter == '-') {
4707 1
        foreach ($specialCases['names'] as $beginning) {
4708 1
          if (self::strpos($name, $beginning, 0, $encoding) === 0) {
4709 1
            $continue = true;
4710
          }
4711
        }
4712
      }
4713
4714 1
      foreach ($specialCases['prefixes'] as $beginning) {
4715 1
        if (self::strpos($name, $beginning, 0, $encoding) === 0) {
4716 1
          $continue = true;
4717
        }
4718
      }
4719
4720 1
      if ($continue) {
4721 1
        continue;
4722
      }
4723
4724 1
      $name = self::str_upper_first($name);
4725
    }
4726
4727 1
    return \implode($delimiter, $namesArray);
4728
  }
4729
4730
  /**
4731
   * Returns true if the string contains $needle, false otherwise. By default
4732
   * the comparison is case-sensitive, but can be made insensitive by setting
4733
   * $caseSensitive to false.
4734
   *
4735
   * @param string $haystack      <p>The input string.</p>
4736
   * @param string $needle        <p>Substring to look for.</p>
4737
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4738
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4739
   *
4740
   * @return bool Whether or not $haystack contains $needle.
4741
   */
4742 106
  public static function str_contains(string $haystack, string $needle, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4743
  {
4744 106
    if ($caseSensitive) {
4745 56
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
4746
    }
4747
4748 50
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
4749
  }
4750
4751
  /**
4752
   * Returns true if the string contains all $needles, false otherwise. By
4753
   * default the comparison is case-sensitive, but can be made insensitive by
4754
   * setting $caseSensitive to false.
4755
   *
4756
   * @param string $haystack      <p>The input string.</p>
4757
   * @param array  $needles       <p>SubStrings to look for.</p>
4758
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4759
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4760
   *
4761
   * @return bool Whether or not $haystack contains $needle.
4762
   */
4763 44
  public static function str_contains_all(string $haystack, array $needles, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4764
  {
4765 44
    if ('' === $haystack) {
4766
      return false;
4767
    }
4768
4769 44
    if (empty($needles)) {
4770 1
      return false;
4771
    }
4772
4773 43
    foreach ($needles as $needle) {
4774 43
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4775 43
        return false;
4776
      }
4777
    }
4778
4779 24
    return true;
4780
  }
4781
4782
  /**
4783
   * Returns true if the string contains any $needles, false otherwise. By
4784
   * default the comparison is case-sensitive, but can be made insensitive by
4785
   * setting $caseSensitive to false.
4786
   *
4787
   * @param string $haystack      <p>The input string.</p>
4788
   * @param array  $needles       <p>SubStrings to look for.</p>
4789
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4790
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4791
   *
4792
   * @return bool
4793
   *               Whether or not $str contains $needle.
4794
   */
4795 43
  public static function str_contains_any(string $haystack, array $needles, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4796
  {
4797 43
    if (empty($needles)) {
4798 1
      return false;
4799
    }
4800
4801 42
    foreach ($needles as $needle) {
4802 42
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4803 42
        return true;
4804
      }
4805
    }
4806
4807 18
    return false;
4808
  }
4809
4810
  /**
4811
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
4812
   * inserted before uppercase characters (with the exception of the first
4813
   * character of the string), and in place of spaces as well as underscores.
4814
   *
4815
   * @param string $str      <p>The input string.</p>
4816
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
4817
   *
4818
   * @return string
4819
   */
4820 19
  public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
4821
  {
4822 19
    return self::str_delimit($str, '-', $encoding);
4823
  }
4824
4825
  /**
4826
   * Returns a lowercase and trimmed string separated by the given delimiter.
4827
   * Delimiters are inserted before uppercase characters (with the exception
4828
   * of the first character of the string), and in place of spaces, dashes,
4829
   * and underscores. Alpha delimiters are not converted to lowercase.
4830
   *
4831
   * @param string $str       <p>The input string.</p>
4832
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
4833
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
4834
   *
4835
   * @return string
4836
   */
4837 49
  public static function str_delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
4838
  {
4839 49
    $str = self::trim($str);
4840
4841 49
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
4842
4843 49
    $str = self::strtolower($str, $encoding);
4844
4845 49
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
4846
  }
4847
4848
  /**
4849
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4850
   *
4851
   * @param string $str <p>The input string.</p>
4852
   *
4853
   * @return false|string
4854
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4855
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
4856
   */
4857 15
  public static function str_detect_encoding(string $str)
4858
  {
4859
    //
4860
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
4861
    //
4862
4863 15
    if (self::is_binary($str, true) === true) {
4864
4865 5
      if (self::is_utf16($str) === 1) {
4866 1
        return 'UTF-16LE';
4867
      }
4868
4869 5
      if (self::is_utf16($str) === 2) {
4870 1
        return 'UTF-16BE';
4871
      }
4872
4873 4
      if (self::is_utf32($str) === 1) {
4874
        return 'UTF-32LE';
4875
      }
4876
4877 4
      if (self::is_utf32($str) === 2) {
4878
        return 'UTF-32BE';
4879
      }
4880
4881
      // is binary but not "UTF-16" or "UTF-32"
4882 4
      return false;
4883
    }
4884
4885
    //
4886
    // 2.) simple check for ASCII chars
4887
    //
4888
4889 13
    if (self::is_ascii($str) === true) {
4890 5
      return 'ASCII';
4891
    }
4892
4893
    //
4894
    // 3.) simple check for UTF-8 chars
4895
    //
4896
4897 13
    if (self::is_utf8($str) === true) {
4898 9
      return 'UTF-8';
4899
    }
4900
4901
    //
4902
    // 4.) check via "\mb_detect_encoding()"
4903
    //
4904
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4905
4906
    $detectOrder = [
4907 8
        'ISO-8859-1',
4908
        'ISO-8859-2',
4909
        'ISO-8859-3',
4910
        'ISO-8859-4',
4911
        'ISO-8859-5',
4912
        'ISO-8859-6',
4913
        'ISO-8859-7',
4914
        'ISO-8859-8',
4915
        'ISO-8859-9',
4916
        'ISO-8859-10',
4917
        'ISO-8859-13',
4918
        'ISO-8859-14',
4919
        'ISO-8859-15',
4920
        'ISO-8859-16',
4921
        'WINDOWS-1251',
4922
        'WINDOWS-1252',
4923
        'WINDOWS-1254',
4924
        'CP932',
4925
        'CP936',
4926
        'CP950',
4927
        'CP866',
4928
        'CP850',
4929
        'CP51932',
4930
        'CP50220',
4931
        'CP50221',
4932
        'CP50222',
4933
        'ISO-2022-JP',
4934
        'ISO-2022-KR',
4935
        'JIS',
4936
        'JIS-ms',
4937
        'EUC-CN',
4938
        'EUC-JP',
4939
    ];
4940
4941 8
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4942 8
    if ($encoding) {
4943 8
      return $encoding;
4944
    }
4945
4946
    //
4947
    // 5.) check via "iconv()"
4948
    //
4949
4950
    if (self::$ENCODINGS === null) {
4951
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4952
    }
4953
4954
    $md5 = \md5($str);
4955
    foreach (self::$ENCODINGS as $encodingTmp) {
4956
      # INFO: //IGNORE but still throw notice
4957
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4958
      if (\md5((string)@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4959
        return $encodingTmp;
4960
      }
4961
    }
4962
4963
    return false;
4964
  }
4965
4966
  /**
4967
   * Check if the string ends with the given substring.
4968
   *
4969
   * @param string $haystack <p>The string to search in.</p>
4970
   * @param string $needle   <p>The substring to search for.</p>
4971
   *
4972
   * @return bool
4973
   */
4974 38
  public static function str_ends_with(string $haystack, string $needle): bool
4975
  {
4976 38
    if ('' === $haystack || '' === $needle) {
4977 3
      return false;
4978
    }
4979
4980 36
    return \substr($haystack, -\strlen($needle)) === $needle;
4981
  }
4982
4983
  /**
4984
   * Returns true if the string ends with any of $substrings, false otherwise.
4985
   *
4986
   * - case-sensitive
4987
   *
4988
   * @param string   $str        <p>The input string.</p>
4989
   * @param string[] $substrings <p>Substrings to look for.</p>
4990
   *
4991
   * @return bool Whether or not $str ends with $substring.
4992
   */
4993 7
  public static function str_ends_with_any(string $str, array $substrings): bool
4994
  {
4995 7
    if (empty($substrings)) {
4996
      return false;
4997
    }
4998
4999 7
    foreach ($substrings as $substring) {
5000 7
      if (self::str_ends_with($str, $substring)) {
5001 7
        return true;
5002
      }
5003
    }
5004
5005 6
    return false;
5006
  }
5007
5008
  /**
5009
   * Ensures that the string begins with $substring. If it doesn't, it's
5010
   * prepended.
5011
   *
5012
   * @param string $str       <p>The input string.</p>
5013
   * @param string $substring <p>The substring to add if not present.</p>
5014
   *
5015
   * @return string
5016
   */
5017 10
  public static function str_ensure_left(string $str, string $substring): string
5018
  {
5019 10
    if (!self::str_starts_with($str, $substring)) {
5020 4
      $str = $substring . $str;
5021
    }
5022
5023 10
    return $str;
5024
  }
5025
5026
  /**
5027
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
5028
   *
5029
   * @param string $str       <p>The input string.</p>
5030
   * @param string $substring <p>The substring to add if not present.</p>
5031
   *
5032
   * @return string
5033
   */
5034 10
  public static function str_ensure_right(string $str, string $substring): string
5035
  {
5036 10
    if (!self::str_ends_with($str, $substring)) {
5037 4
      $str .= $substring;
5038
    }
5039
5040 10
    return $str;
5041
  }
5042
5043
  /**
5044
   * Capitalizes the first word of the string, replaces underscores with
5045
   * spaces, and strips '_id'.
5046
   *
5047
   * @param string $str
5048
   *
5049
   * @return string
5050
   */
5051 3
  public static function str_humanize($str): string
5052
  {
5053 3
    $str = self::str_replace(
5054
        [
5055 3
            '_id',
5056
            '_',
5057
        ],
5058
        [
5059 3
            '',
5060
            ' ',
5061
        ],
5062 3
        $str
5063
    );
5064
5065 3
    return self::ucfirst(self::trim($str));
5066
  }
5067
5068
  /**
5069
   * Check if the string ends with the given substring, case insensitive.
5070
   *
5071
   * @param string $haystack <p>The string to search in.</p>
5072
   * @param string $needle   <p>The substring to search for.</p>
5073
   *
5074
   * @return bool
5075
   */
5076 10
  public static function str_iends_with(string $haystack, string $needle): bool
5077
  {
5078 10
    if ('' === $haystack || '' === $needle) {
5079 1
      return false;
5080
    }
5081
5082 10
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
5083 10
      return true;
5084
    }
5085
5086 6
    return false;
5087
  }
5088
5089
  /**
5090
   * Returns true if the string ends with any of $substrings, false otherwise.
5091
   *
5092
   * - case-insensitive
5093
   *
5094
   * @param string   $str        <p>The input string.</p>
5095
   * @param string[] $substrings <p>Substrings to look for.</p>
5096
   *
5097
   * @return bool Whether or not $str ends with $substring.
5098
   */
5099 4
  public static function str_iends_with_any(string $str, array $substrings): bool
5100
  {
5101 4
    if (empty($substrings)) {
5102
      return false;
5103
    }
5104
5105 4
    foreach ($substrings as $substring) {
5106 4
      if (self::str_iends_with($str, $substring)) {
5107 4
        return true;
5108
      }
5109
    }
5110
5111
    return false;
5112
  }
5113
5114
  /**
5115
   * Returns the index of the first occurrence of $needle in the string,
5116
   * and false if not found. Accepts an optional offset from which to begin
5117
   * the search.
5118
   *
5119
   * @param string $str      <p>The input string.</p>
5120
   * @param string $needle   <p>Substring to look for.</p>
5121
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5122
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5123
   *
5124
   * @return int|false
5125
   *                    The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5126
   */
5127 2
  public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5128
  {
5129 2
    return self::stripos(
5130 2
        $str,
5131 2
        $needle,
5132 2
        $offset,
5133 2
        $encoding
5134
    );
5135
  }
5136
5137
  /**
5138
   * Returns the index of the last occurrence of $needle in the string,
5139
   * and false if not found. Accepts an optional offset from which to begin
5140
   * the search. Offsets may be negative to count from the last character
5141
   * in the string.
5142
   *
5143
   * @param string $str      <p>The input string.</p>
5144
   * @param string $needle   <p>Substring to look for.</p>
5145
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5146
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5147
   *
5148
   * @return int|false
5149
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5150
   */
5151 2
  public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5152
  {
5153 2
    return self::strripos(
5154 2
        $str,
5155 2
        $needle,
5156 2
        $offset,
5157 2
        $encoding
5158
    );
5159
  }
5160
5161
  /**
5162
   * Returns the index of the first occurrence of $needle in the string,
5163
   * and false if not found. Accepts an optional offset from which to begin
5164
   * the search.
5165
   *
5166
   * @param string $str      <p>The input string.</p>
5167
   * @param string $needle   <p>Substring to look for.</p>
5168
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5169
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5170
   *
5171
   * @return int|false
5172
   *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5173
   */
5174 12
  public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5175
  {
5176 12
    return self::strpos(
5177 12
        $str,
5178 12
        $needle,
5179 12
        $offset,
5180 12
        $encoding
5181
    );
5182
  }
5183
5184
  /**
5185
   * Returns the index of the last occurrence of $needle in the string,
5186
   * and false if not found. Accepts an optional offset from which to begin
5187
   * the search. Offsets may be negative to count from the last character
5188
   * in the string.
5189
   *
5190
   * @param string $str      <p>The input string.</p>
5191
   * @param string $needle   <p>Substring to look for.</p>
5192
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5193
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5194
   *
5195
   * @return int|false
5196
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5197
   */
5198 12
  public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5199
  {
5200 12
    return self::strrpos(
5201 12
        $str,
5202 12
        $needle,
5203 12
        $offset,
5204 12
        $encoding
5205
    );
5206
  }
5207
5208
  /**
5209
   * Inserts $substring into the string at the $index provided.
5210
   *
5211
   * @param string $str       <p>The input string.</p>
5212
   * @param string $substring <p>String to be inserted.</p>
5213
   * @param int    $index     <p>The index at which to insert the substring.</p>
5214
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5215
   *
5216
   * @return string
5217
   */
5218 8
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5219
  {
5220 8
    $len = self::strlen($str, $encoding);
5221
5222 8
    if ($index > $len) {
5223 1
      return $str;
5224
    }
5225
5226 7
    $start = self::substr($str, 0, $index, $encoding);
5227 7
    $end = self::substr($str, $index, $len, $encoding);
5228
5229 7
    return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5229
    return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5229
    return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
5230
  }
5231
5232
  /**
5233
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5234
   *
5235
   * @link  http://php.net/manual/en/function.str-ireplace.php
5236
   *
5237
   * @param mixed $search  <p>
5238
   *                       Every replacement with search array is
5239
   *                       performed on the result of previous replacement.
5240
   *                       </p>
5241
   * @param mixed $replace <p>
5242
   *                       </p>
5243
   * @param mixed $subject <p>
5244
   *                       If subject is an array, then the search and
5245
   *                       replace is performed with every entry of
5246
   *                       subject, and the return value is an array as
5247
   *                       well.
5248
   *                       </p>
5249
   * @param int   $count   [optional] <p>
5250
   *                       The number of matched and replaced needles will
5251
   *                       be returned in count which is passed by
5252
   *                       reference.
5253
   *                       </p>
5254
   *
5255
   * @return mixed A string or an array of replacements.
5256
   */
5257 40
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5258
  {
5259 40
    $search = (array)$search;
5260
5261
    /** @noinspection AlterInForeachInspection */
5262 40
    foreach ($search as &$s) {
5263 40
      if ('' === $s .= '') {
5264 7
        $s = '/^(?<=.)$/';
5265
      } else {
5266 40
        $s = '/' . \preg_quote($s, '/') . '/ui';
5267
      }
5268
    }
5269
5270 40
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5271 40
    $count = $replace; // used as reference parameter
5272
5273 40
    return $subject;
5274
  }
5275
5276
  /**
5277
   * Check if the string starts with the given substring, case insensitive.
5278
   *
5279
   * @param string $haystack <p>The string to search in.</p>
5280
   * @param string $needle   <p>The substring to search for.</p>
5281
   *
5282
   * @return bool
5283
   */
5284 10
  public static function str_istarts_with(string $haystack, string $needle): bool
5285
  {
5286 10
    if ('' === $haystack || '' === $needle) {
5287 1
      return false;
5288
    }
5289
5290 10
    if (self::stripos($haystack, $needle) === 0) {
5291 10
      return true;
5292
    }
5293
5294 2
    return false;
5295
  }
5296
5297
  /**
5298
   * Returns true if the string begins with any of $substrings, false otherwise.
5299
   *
5300
   * - case-insensitive
5301
   *
5302
   * @param string $str        <p>The input string.</p>
5303
   * @param array  $substrings <p>Substrings to look for.</p>
5304
   *
5305
   * @return bool Whether or not $str starts with $substring.
5306
   */
5307 4
  public static function str_istarts_with_any(string $str, array $substrings): bool
5308
  {
5309 4
    if ('' === $str) {
5310
      return false;
5311
    }
5312
5313 4
    if (empty($substrings)) {
5314
      return false;
5315
    }
5316
5317 4
    foreach ($substrings as $substring) {
5318 4
      if (self::str_istarts_with($str, $substring)) {
5319 4
        return true;
5320
      }
5321
    }
5322
5323
    return false;
5324
  }
5325
5326
  /**
5327
   * Gets the substring after the first occurrence of a separator.
5328
   *
5329
   * @param string $str       <p>The input string.</p>
5330
   * @param string $separator <p>The string separator.</p>
5331
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5332
   *
5333
   * @return string
5334
   */
5335 1
  public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5336
  {
5337
    if (
5338 1
        $separator === ''
5339
        ||
5340 1
        $str === ''
5341
    ) {
5342 1
      return '';
5343
    }
5344
5345 1
    $offset = self::str_iindex_first($str, $separator);
5346 1
    if ($offset === false) {
5347 1
      return '';
5348
    }
5349
5350 1
    return (string)self::substr(
5351 1
        $str,
5352 1
        $offset + self::strlen($separator, $encoding),
5353 1
        null,
5354 1
        $encoding
5355
    );
5356
  }
5357
5358
  /**
5359
   * Gets the substring after the last occurrence of a separator.
5360
   *
5361
   * @param string $str       <p>The input string.</p>
5362
   * @param string $separator <p>The string separator.</p>
5363
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5364
   *
5365
   * @return string
5366
   */
5367 1
  public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5368
  {
5369
    if (
5370 1
        $separator === ''
5371
        ||
5372 1
        $str === ''
5373
    ) {
5374 1
      return '';
5375
    }
5376
5377 1
    $offset = self::str_iindex_last($str, $separator);
5378 1
    if ($offset === false) {
5379 1
      return '';
5380
    }
5381
5382 1
    return (string)self::substr(
5383 1
        $str,
5384 1
        $offset + self::strlen($separator, $encoding),
5385 1
        null,
5386 1
        $encoding
5387
    );
5388
  }
5389
5390
  /**
5391
   * Gets the substring before the first occurrence of a separator.
5392
   *
5393
   * @param string $str       <p>The input string.</p>
5394
   * @param string $separator <p>The string separator.</p>
5395
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5396
   *
5397
   * @return string
5398
   */
5399 1
  public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5400
  {
5401
    if (
5402 1
        $separator === ''
5403
        ||
5404 1
        $str === ''
5405
    ) {
5406 1
      return '';
5407
    }
5408
5409 1
    $offset = self::str_iindex_first($str, $separator);
5410 1
    if ($offset === false) {
5411 1
      return '';
5412
    }
5413
5414 1
    return (string)self::substr($str, 0, $offset, $encoding);
5415
  }
5416
5417
  /**
5418
   * Gets the substring before the last occurrence of a separator.
5419
   *
5420
   * @param string $str       <p>The input string.</p>
5421
   * @param string $separator <p>The string separator.</p>
5422
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5423
   *
5424
   * @return string
5425
   */
5426 1
  public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5427
  {
5428
    if (
5429 1
        $separator === ''
5430
        ||
5431 1
        $str === ''
5432
    ) {
5433 1
      return '';
5434
    }
5435
5436 1
    $offset = self::str_iindex_last($str, $separator);
5437 1
    if ($offset === false) {
5438 1
      return '';
5439
    }
5440
5441 1
    return (string)self::substr($str, 0, $offset, $encoding);
5442
  }
5443
5444
  /**
5445
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5446
   *
5447
   * @param string $str          <p>The input string.</p>
5448
   * @param string $needle       <p>The string to look for.</p>
5449
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5450
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5451
   *
5452
   * @return string
5453
   */
5454 2
  public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5455
  {
5456
    if (
5457 2
        '' === $needle
5458
        ||
5459 2
        '' === $str
5460
    ) {
5461 2
      return '';
5462
    }
5463
5464 2
    $part = self::stristr(
5465 2
        $str,
5466 2
        $needle,
5467 2
        $beforeNeedle,
5468 2
        $encoding
5469
    );
5470 2
    if (false === $part) {
5471 2
      return '';
5472
    }
5473
5474 2
    return $part;
5475
  }
5476
5477
  /**
5478
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5479
   *
5480
   * @param string $str          <p>The input string.</p>
5481
   * @param string $needle       <p>The string to look for.</p>
5482
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5483
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5484
   *
5485
   * @return string
5486
   */
5487 1
  public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5488
  {
5489
    if (
5490 1
        '' === $needle
5491
        ||
5492 1
        '' === $str
5493
    ) {
5494 1
      return '';
5495
    }
5496
5497 1
    $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5498 1
    if (false === $part) {
0 ignored issues
show
introduced by
The condition false === $part is always false.
Loading history...
5499 1
      return '';
5500
    }
5501
5502 1
    return $part;
5503
  }
5504
5505
  /**
5506
   * Returns the last $n characters of the string.
5507
   *
5508
   * @param string $str      <p>The input string.</p>
5509
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5510
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
5511
   *
5512
   * @return string
5513
   */
5514 12
  public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5515
  {
5516 12
    if ($n <= 0) {
5517 4
      return '';
5518
    }
5519
5520 8
    $returnTmp = self::substr($str, -$n, null, $encoding);
5521
5522 8
    return ($returnTmp === false ? '' : $returnTmp);
5523
  }
5524
5525
  /**
5526
   * Limit the number of characters in a string.
5527
   *
5528
   * @param string $str      <p>The input string.</p>
5529
   * @param int    $length   [optional] <p>Default: 100</p>
5530
   * @param string $strAddOn [optional] <p>Default: …</p>
5531
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5532
   *
5533
   * @return string
5534
   */
5535 1
  public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5536
  {
5537 1
    if ('' === $str) {
5538 1
      return '';
5539
    }
5540
5541 1
    if ($length <= 0) {
5542 1
      return '';
5543
    }
5544
5545 1
    if (self::strlen($str, $encoding) <= $length) {
5546 1
      return $str;
5547
    }
5548
5549 1
    return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5549
    return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
5550
  }
5551
5552
  /**
5553
   * Limit the number of characters in a string, but also after the next word.
5554
   *
5555
   * @param string $str      <p>The input string.</p>
5556
   * @param int    $length   [optional] <p>Default: 100</p>
5557
   * @param string $strAddOn [optional] <p>Default: …</p>
5558
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5559
   *
5560
   * @return string
5561
   */
5562 5
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5563
  {
5564 5
    if ('' === $str) {
5565 1
      return '';
5566
    }
5567
5568 5
    if ($length <= 0) {
5569 1
      return '';
5570
    }
5571
5572 5
    if (self::strlen($str, $encoding) <= $length) {
5573 1
      return $str;
5574
    }
5575
5576 5
    if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
5577 4
      return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5577
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
5578
    }
5579
5580 2
    $str = (string)self::substr($str, 0, $length, $encoding);
5581 2
    $array = \explode(' ', $str);
5582 2
    \array_pop($array);
5583 2
    $new_str = \implode(' ', $array);
5584
5585 2
    if ($new_str === '') {
5586 1
      $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
5587
    } else {
5588 2
      $str = $new_str . $strAddOn;
5589
    }
5590
5591 2
    return $str;
5592
  }
5593
5594
  /**
5595
   * Returns the longest common prefix between the string and $otherStr.
5596
   *
5597
   * @param string $str      <p>The input sting.</p>
5598
   * @param string $otherStr <p>Second string for comparison.</p>
5599
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
5600
   *
5601
   * @return string
5602
   */
5603 10
  public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5604
  {
5605 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5606
5607 10
    $longestCommonPrefix = '';
5608 10
    for ($i = 0; $i < $maxLength; $i++) {
5609 8
      $char = self::substr($str, $i, 1, $encoding);
5610
5611 8
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
5612 6
        $longestCommonPrefix .= $char;
5613
      } else {
5614 6
        break;
5615
      }
5616
    }
5617
5618 10
    return $longestCommonPrefix;
5619
  }
5620
5621
  /**
5622
   * Returns the longest common substring between the string and $otherStr.
5623
   * In the case of ties, it returns that which occurs first.
5624
   *
5625
   * @param string $str
5626
   * @param string $otherStr <p>Second string for comparison.</p>
5627
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
5628
   *
5629
   * @return string String with its $str being the longest common substring.
5630
   */
5631 11
  public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5632
  {
5633
    // Uses dynamic programming to solve
5634
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
5635 11
    $strLength = self::strlen($str, $encoding);
5636 11
    $otherLength = self::strlen($otherStr, $encoding);
5637
5638
    // Return if either string is empty
5639 11
    if ($strLength == 0 || $otherLength == 0) {
5640 2
      return '';
5641
    }
5642
5643 9
    $len = 0;
5644 9
    $end = 0;
5645 9
    $table = \array_fill(
5646 9
        0,
5647 9
        $strLength + 1,
5648 9
        \array_fill(0, $otherLength + 1, 0)
5649
    );
5650
5651 9
    for ($i = 1; $i <= $strLength; $i++) {
5652 9
      for ($j = 1; $j <= $otherLength; $j++) {
5653 9
        $strChar = self::substr($str, $i - 1, 1, $encoding);
5654 9
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
5655
5656 9
        if ($strChar == $otherChar) {
5657 8
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
5658 8
          if ($table[$i][$j] > $len) {
5659 8
            $len = $table[$i][$j];
5660 8
            $end = $i;
5661
          }
5662
        } else {
5663 9
          $table[$i][$j] = 0;
5664
        }
5665
      }
5666
    }
5667
5668 9
    $returnTmp = self::substr($str, $end - $len, $len, $encoding);
5669
5670 9
    return ($returnTmp === false ? '' : $returnTmp);
5671
  }
5672
5673
  /**
5674
   * Returns the longest common suffix between the string and $otherStr.
5675
   *
5676
   * @param string $str
5677
   * @param string $otherStr <p>Second string for comparison.</p>
5678
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
5679
   *
5680
   * @return string
5681
   */
5682 10
  public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5683
  {
5684 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5685
5686 10
    $longestCommonSuffix = '';
5687 10
    for ($i = 1; $i <= $maxLength; $i++) {
5688 8
      $char = self::substr($str, -$i, 1, $encoding);
5689
5690 8
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
5691 6
        $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5691
        $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
5692
      } else {
5693 6
        break;
5694
      }
5695
    }
5696
5697 10
    return $longestCommonSuffix;
5698
  }
5699
5700
  /**
5701
   * Returns true if $str matches the supplied pattern, false otherwise.
5702
   *
5703
   * @param string $str     <p>The input string.</p>
5704
   * @param string $pattern <p>Regex pattern to match against.</p>
5705
   *
5706
   * @return bool Whether or not $str matches the pattern.
5707
   */
5708 126
  public static function str_matches_pattern(string $str, string $pattern): bool
5709
  {
5710 126
    if (\preg_match('/' . $pattern . '/u', $str)) {
5711 87
      return true;
5712
    }
5713
5714 39
    return false;
5715
  }
5716
5717
  /**
5718
   * Returns whether or not a character exists at an index. Offsets may be
5719
   * negative to count from the last character in the string. Implements
5720
   * part of the ArrayAccess interface.
5721
   *
5722
   * @param string $str      <p>The input string.</p>
5723
   * @param int    $offset   <p>The index to check.</p>
5724
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
5725
   *
5726
   *
5727
   * @return bool Whether or not the index exists.
5728
   */
5729 6
  public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
5730
  {
5731
    // init
5732 6
    $length = self::strlen($str, $encoding);
5733
5734 6
    if ($offset >= 0) {
5735 3
      return ($length > $offset);
5736
    }
5737
5738 3
    return ($length >= \abs($offset));
5739
  }
5740
5741
  /**
5742
   * Returns the character at the given index. Offsets may be negative to
5743
   * count from the last character in the string. Implements part of the
5744
   * ArrayAccess interface, and throws an OutOfBoundsException if the index
5745
   * does not exist.
5746
   *
5747
   * @param string $str      <p>The input string.</p>
5748
   * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
5749
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5750
   *
5751
   * @return string The character at the specified index.
5752
   *
5753
   * @throws \OutOfBoundsException If the positive or negative offset does not exist.
5754
   */
5755 2
  public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
5756
  {
5757
    // init
5758 2
    $length = self::strlen($str);
5759
5760
    if (
5761 2
        ($index >= 0 && $length <= $index)
5762
        ||
5763 2
        $length < \abs($index)
5764
    ) {
5765 1
      throw new \OutOfBoundsException('No character exists at the index');
5766
    }
5767
5768 1
    return self::char_at($str, $index, $encoding);
5769
  }
5770
5771
  /**
5772
   * Pad a UTF-8 string to given length with another string.
5773
   *
5774
   * @param string $str        <p>The input string.</p>
5775
   * @param int    $pad_length <p>The length of return string.</p>
5776
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
5777
   * @param int    $pad_type   [optional] <p>
5778
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
5779
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
5780
   *                           </p>
5781
   * @param string $encoding   [optional] <p>Default: UTF-8</p>
5782
   *
5783
   * @return string Returns the padded string.
5784
   */
5785 40
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
5786
  {
5787 40
    if ('' === $str) {
5788
      return '';
5789
    }
5790
5791 40
    if ($pad_type !== (int)$pad_type) {
5792 13
      if ($pad_type == 'left') {
5793 3
        $pad_type = STR_PAD_LEFT;
5794 10
      } else if ($pad_type == 'right') {
5795 6
        $pad_type = STR_PAD_RIGHT;
5796 4
      } else if ($pad_type == 'both') {
5797 3
        $pad_type = STR_PAD_BOTH;
5798
      } else {
5799 1
        throw new \InvalidArgumentException(
5800 1
            'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
5801
        );
5802
      }
5803
    }
5804
5805 39
    $str_length = self::strlen($str, $encoding);
5806
5807
    if (
5808 39
        $pad_length > 0
5809
        &&
5810 39
        $pad_length >= $str_length
5811
    ) {
5812 38
      $ps_length = self::strlen($pad_string, $encoding);
5813
5814 38
      $diff = ($pad_length - $str_length);
5815
5816
      switch ($pad_type) {
5817 38
        case STR_PAD_LEFT:
5818 12
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
5819 12
          $pre = (string)self::substr($pre, 0, $diff, $encoding);
5820 12
          $post = '';
5821 12
          break;
5822
5823 28
        case STR_PAD_BOTH:
5824 13
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
5825 13
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2), $encoding);
5826 13
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
5827 13
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2), $encoding);
5828 13
          break;
5829
5830 17
        case STR_PAD_RIGHT:
5831
        default:
5832 17
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
5833 17
          $post = (string)self::substr($post, 0, $diff, $encoding);
5834 17
          $pre = '';
5835
      }
5836
5837 38
      return $pre . $str . $post;
5838
    }
5839
5840 3
    return $str;
5841
  }
5842
5843
  /**
5844
   * Returns a new string of a given length such that both sides of the
5845
   * string are padded. Alias for pad() with a $padType of 'both'.
5846
   *
5847
   * @param string $str
5848
   * @param int    $length   <p>Desired string length after padding.</p>
5849
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
5850
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5851
   *
5852
   * @return string String with padding applied.
5853
   */
5854 11
  public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
5855
  {
5856 11
    $padding = $length - self::strlen($str, $encoding);
5857
5858 11
    return self::apply_padding($str, (int)\floor($padding / 2), (int)\ceil($padding / 2), $padStr, $encoding);
5859
  }
5860
5861
  /**
5862
   * Returns a new string of a given length such that the beginning of the
5863
   * string is padded. Alias for pad() with a $padType of 'left'.
5864
   *
5865
   * @param string $str
5866
   * @param int    $length   <p>Desired string length after padding.</p>
5867
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
5868
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5869
   *
5870
   * @return string String with left padding.
5871
   */
5872 7
  public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
5873
  {
5874 7
    return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
5875
  }
5876
5877
  /**
5878
   * Returns a new string of a given length such that the end of the string
5879
   * is padded. Alias for pad() with a $padType of 'right'.
5880
   *
5881
   * @param string $str
5882
   * @param int    $length   <p>Desired string length after padding.</p>
5883
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
5884
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5885
   *
5886
   * @return string String with right padding.
5887
   */
5888 7
  public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
5889
  {
5890 7
    return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
5891
  }
5892
5893
  /**
5894
   * Repeat a string.
5895
   *
5896
   * @param string $str        <p>
5897
   *                           The string to be repeated.
5898
   *                           </p>
5899
   * @param int    $multiplier <p>
5900
   *                           Number of time the input string should be
5901
   *                           repeated.
5902
   *                           </p>
5903
   *                           <p>
5904
   *                           multiplier has to be greater than or equal to 0.
5905
   *                           If the multiplier is set to 0, the function
5906
   *                           will return an empty string.
5907
   *                           </p>
5908
   *
5909
   * @return string The repeated string.
5910
   */
5911 8
  public static function str_repeat(string $str, int $multiplier): string
5912
  {
5913 8
    $str = self::filter($str);
5914
5915 8
    return \str_repeat($str, $multiplier);
5916
  }
5917
5918
  /**
5919
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
5920
   *
5921
   * Replace all occurrences of the search string with the replacement string
5922
   *
5923
   * @link http://php.net/manual/en/function.str-replace.php
5924
   *
5925
   * @param mixed $search  <p>
5926
   *                       The value being searched for, otherwise known as the needle.
5927
   *                       An array may be used to designate multiple needles.
5928
   *                       </p>
5929
   * @param mixed $replace <p>
5930
   *                       The replacement value that replaces found search
5931
   *                       values. An array may be used to designate multiple replacements.
5932
   *                       </p>
5933
   * @param mixed $subject <p>
5934
   *                       The string or array being searched and replaced on,
5935
   *                       otherwise known as the haystack.
5936
   *                       </p>
5937
   *                       <p>
5938
   *                       If subject is an array, then the search and
5939
   *                       replace is performed with every entry of
5940
   *                       subject, and the return value is an array as
5941
   *                       well.
5942
   *                       </p>
5943
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
5944
   *
5945
   * @return mixed This function returns a string or an array with the replaced values.
5946
   */
5947 92
  public static function str_replace($search, $replace, $subject, int &$count = null)
5948
  {
5949 92
    return \str_replace($search, $replace, $subject, $count);
5950
  }
5951
5952
  /**
5953
   * Replaces all occurrences of $search from the beginning of string with $replacement.
5954
   *
5955
   * @param string $str         <p>The input string.</p>
5956
   * @param string $search      <p>The string to search for.</p>
5957
   * @param string $replacement <p>The replacement.</p>
5958
   *
5959
   * @return string String after the replacements.
5960
   */
5961 16
  public static function str_replace_beginning(string $str, string $search, string $replacement): string
5962
  {
5963 16
    return self::regex_replace(
5964 16
        $str,
5965 16
        '^' . \preg_quote($search, '/'),
5966 16
        self::str_replace('\\', '\\\\', $replacement)
5967
    );
5968
  }
5969
5970
  /**
5971
   * Replaces all occurrences of $search from the ending of string with $replacement.
5972
   *
5973
   * @param string $str         <p>The input string.</p>
5974
   * @param string $search      <p>The string to search for.</p>
5975
   * @param string $replacement <p>The replacement.</p>
5976
   *
5977
   * @return string String after the replacements.
5978
   */
5979 16
  public static function str_replace_ending(string $str, string $search, string $replacement): string
5980
  {
5981 16
    return self::regex_replace(
5982 16
        $str,
5983 16
        \preg_quote($search, '/') . '$',
5984 16
        self::str_replace('\\', '\\\\', $replacement)
5985
    );
5986
  }
5987
5988
  /**
5989
   * Replace the first "$search"-term with the "$replace"-term.
5990
   *
5991
   * @param string $search
5992
   * @param string $replace
5993
   * @param string $subject
5994
   *
5995
   * @return string
5996
   */
5997 1
  public static function str_replace_first(string $search, string $replace, string $subject): string
5998
  {
5999 1
    $pos = self::strpos($subject, $search);
6000
6001 1
    if ($pos !== false) {
6002 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6003
    }
6004
6005 1
    return $subject;
6006
  }
6007
6008
  /**
6009
   * Replace the last "$search"-term with the "$replace"-term.
6010
   *
6011
   * @param string $search
6012
   * @param string $replace
6013
   * @param string $subject
6014
   *
6015
   * @return string
6016
   */
6017 1
  public static function str_replace_last(string $search, string $replace, string $subject): string
6018
  {
6019 1
    $pos = self::strrpos($subject, $search);
6020
6021 1
    if ($pos !== false) {
6022 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6023
    }
6024
6025 1
    return $subject;
6026
  }
6027
6028
  /**
6029
   * Shuffles all the characters in the string.
6030
   *
6031
   * PS: uses random algorithm which is weak for cryptography purposes
6032
   *
6033
   * @param string $str <p>The input string</p>
6034
   *
6035
   * @return string The shuffled string.
6036
   */
6037 4
  public static function str_shuffle(string $str): string
6038
  {
6039 4
    $indexes = \range(0, self::strlen($str) - 1);
6040
    /** @noinspection NonSecureShuffleUsageInspection */
6041 4
    \shuffle($indexes);
6042
6043 4
    $shuffledStr = '';
6044 4
    foreach ($indexes as $i) {
6045 4
      $shuffledStr .= self::substr($str, $i, 1);
6046
    }
6047
6048 4
    return $shuffledStr;
6049
  }
6050
6051
  /**
6052
   * Returns the substring beginning at $start, and up to, but not including
6053
   * the index specified by $end. If $end is omitted, the function extracts
6054
   * the remaining string. If $end is negative, it is computed from the end
6055
   * of the string.
6056
   *
6057
   * @param string $str
6058
   * @param int    $start    <p>Initial index from which to begin extraction.</p>
6059
   * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6060
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6061
   *
6062
   * @return string|false
6063
   *                     <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6064
   *                     characters long, <b>FALSE</b> will be returned.
6065
   */
6066 18
  public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6067
  {
6068 18
    if ($end === null) {
6069 6
      $length = self::strlen($str);
6070 12
    } elseif ($end >= 0 && $end <= $start) {
6071 4
      return '';
6072 8
    } elseif ($end < 0) {
6073 2
      $length = self::strlen($str) + $end - $start;
6074
    } else {
6075 6
      $length = $end - $start;
6076
    }
6077
6078 14
    return self::substr($str, $start, $length, $encoding);
6079
  }
6080
6081
  /**
6082
   * Convert a string to e.g.: "snake_case"
6083
   *
6084
   * @param string $str
6085
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6086
   *
6087
   * @return string String in snake_case.
6088
   */
6089 20
  public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6090
  {
6091 20
    $str = self::normalize_whitespace($str);
6092 20
    $str = \str_replace('-', '_', $str);
6093
6094 20
    $str = (string)\preg_replace_callback(
6095 20
        '/([\d|A-Z])/u',
6096 20
        function ($matches) use ($encoding) {
6097 8
          $match = $matches[1];
6098 8
          $matchInt = (int)$match;
6099
6100 8
          if ((string)$matchInt == $match) {
6101 4
            return '_' . $match . '_';
6102
          }
6103
6104 4
          return '_' . UTF8::strtolower($match, $encoding);
6105 20
        },
6106 20
        $str
6107
    );
6108
6109 20
    $str = (string)\preg_replace(
6110
        [
6111 20
            '/\s+/',        // convert spaces to "_"
6112
            '/^\s+|\s+$/',  // trim leading & trailing spaces
6113
            '/_+/',         // remove double "_"
6114
        ],
6115
        [
6116 20
            '_',
6117
            '',
6118
            '_',
6119
        ],
6120 20
        $str
6121
    );
6122
6123 20
    $str = self::trim($str, '_'); // trim leading & trailing "_"
6124 20
    $str = self::trim($str); // trim leading & trailing whitespace
6125
6126 20
    return $str;
6127
  }
6128
6129
  /**
6130
   * Sort all characters according to code points.
6131
   *
6132
   * @param string $str    <p>A UTF-8 string.</p>
6133
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6134
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6135
   *
6136
   * @return string String of sorted characters.
6137
   */
6138 1
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6139
  {
6140 1
    $array = self::codepoints($str);
6141
6142 1
    if ($unique) {
6143 1
      $array = \array_flip(\array_flip($array));
6144
    }
6145
6146 1
    if ($desc) {
6147 1
      \arsort($array);
6148
    } else {
6149 1
      \asort($array);
6150
    }
6151
6152 1
    return self::string($array);
6153
  }
6154
6155
  /**
6156
   * Split a string into an array.
6157
   *
6158
   * @param string|string[] $str
6159
   * @param int             $len
6160
   *
6161
   * @return string[]
6162
   */
6163 22
  public static function str_split($str, int $len = 1): array
6164
  {
6165 22
    if ($len <= 0) {
6166 1
      return [];
6167
    }
6168
6169 22
    if (\is_array($str) === true) {
6170 1
      foreach ($str as $k => $v) {
6171 1
        $str[$k] = self::str_split($v, $len);
6172
      }
6173
6174 1
      return $str;
6175
    }
6176
6177 22
    if ('' === $str) {
6178 3
      return [];
6179
    }
6180
6181
    /** @noinspection NotOptimalRegularExpressionsInspection */
6182 20
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
6183 20
    $a = $a[0];
6184
6185 20
    if ($len === 1) {
6186 20
      return $a;
6187
    }
6188
6189 1
    $arrayOutput = [];
6190 1
    $p = -1;
6191
6192
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
6193 1
    foreach ($a as $l => $a) {
6194 1
      if ($l % $len) {
6195 1
        $arrayOutput[$p] .= $a;
6196
      } else {
6197 1
        $arrayOutput[++$p] = $a;
6198
      }
6199
    }
6200
6201 1
    return $arrayOutput;
6202
  }
6203
6204
  /**
6205
   * Splits the string with the provided regular expression, returning an
6206
   * array of Stringy objects. An optional integer $limit will truncate the
6207
   * results.
6208
   *
6209
   * @param string $str
6210
   * @param string $pattern <p>The regex with which to split the string.</p>
6211
   * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6212
   *
6213
   * @return string[] An array of strings.
6214
   */
6215 16
  public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6216
  {
6217 16
    if ($limit === 0) {
6218 2
      return [];
6219
    }
6220
6221
    // this->split errors when supplied an empty pattern in < PHP 5.4.13
6222
    // and current versions of HHVM (3.8 and below)
6223 14
    if ($pattern === '') {
6224 1
      return [$str];
6225
    }
6226
6227
    // this->split returns the remaining unsplit string in the last index when
6228
    // supplying a limit
6229 13
    if ($limit > 0) {
6230 8
      ++$limit;
6231
    } else {
6232 5
      $limit = -1;
6233
    }
6234
6235 13
    $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6236
6237 13
    if ($array === false) {
6238
      return [];
6239
    }
6240
6241 13
    if ($limit > 0 && \count($array) === $limit) {
6242 4
      \array_pop($array);
6243
    }
6244
6245 13
    return $array;
6246
  }
6247
6248
  /**
6249
   * Check if the string starts with the given substring.
6250
   *
6251
   * @param string $haystack <p>The string to search in.</p>
6252
   * @param string $needle   <p>The substring to search for.</p>
6253
   *
6254
   * @return bool
6255
   */
6256 39
  public static function str_starts_with(string $haystack, string $needle): bool
6257
  {
6258 39
    if ('' === $haystack || '' === $needle) {
6259 3
      return false;
6260
    }
6261
6262 37
    if (\strpos($haystack, $needle) === 0) {
6263 17
      return true;
6264
    }
6265
6266 22
    return false;
6267
  }
6268
6269
  /**
6270
   * Returns true if the string begins with any of $substrings, false otherwise.
6271
   *
6272
   * - case-sensitive
6273
   *
6274
   * @param string $str        <p>The input string.</p>
6275
   * @param array  $substrings <p>Substrings to look for.</p>
6276
   *
6277
   * @return bool Whether or not $str starts with $substring.
6278
   */
6279 8
  public static function str_starts_with_any(string $str, array $substrings): bool
6280
  {
6281 8
    if ('' === $str) {
6282
      return false;
6283
    }
6284
6285 8
    if (empty($substrings)) {
6286
      return false;
6287
    }
6288
6289 8
    foreach ($substrings as $substring) {
6290 8
      if (self::str_starts_with($str, $substring)) {
6291 8
        return true;
6292
      }
6293
    }
6294
6295 6
    return false;
6296
  }
6297
6298
  /**
6299
   * Gets the substring after the first occurrence of a separator.
6300
   *
6301
   * @param string $str       <p>The input string.</p>
6302
   * @param string $separator <p>The string separator.</p>
6303
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6304
   *
6305
   * @return string
6306
   */
6307 1
  public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6308
  {
6309
    if (
6310 1
        $separator === ''
6311
        ||
6312 1
        $str === ''
6313
    ) {
6314 1
      return '';
6315
    }
6316
6317 1
    $offset = self::str_index_first($str, $separator);
6318 1
    if ($offset === false) {
6319 1
      return '';
6320
    }
6321
6322 1
    return (string)self::substr(
6323 1
        $str,
6324 1
        $offset + self::strlen($separator, $encoding),
6325 1
        null,
6326 1
        $encoding
6327
    );
6328
  }
6329
6330
  /**
6331
   * Gets the substring after the last occurrence of a separator.
6332
   *
6333
   * @param string $str       <p>The input string.</p>
6334
   * @param string $separator <p>The string separator.</p>
6335
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6336
   *
6337
   * @return string
6338
   */
6339 1
  public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6340
  {
6341
    if (
6342 1
        $separator === ''
6343
        ||
6344 1
        $str === ''
6345
    ) {
6346 1
      return '';
6347
    }
6348
6349 1
    $offset = self::str_index_last($str, $separator);
6350 1
    if ($offset === false) {
6351 1
      return '';
6352
    }
6353
6354 1
    return (string)self::substr(
6355 1
        $str,
6356 1
        $offset + self::strlen($separator, $encoding),
6357 1
        null,
6358 1
        $encoding
6359
    );
6360
  }
6361
6362
  /**
6363
   * Gets the substring before the first occurrence of a separator.
6364
   *
6365
   * @param string $str       <p>The input string.</p>
6366
   * @param string $separator <p>The string separator.</p>
6367
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6368
   *
6369
   * @return string
6370
   */
6371 1
  public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6372
  {
6373
    if (
6374 1
        $separator === ''
6375
        ||
6376 1
        $str === ''
6377
    ) {
6378 1
      return '';
6379
    }
6380
6381 1
    $offset = self::str_index_first($str, $separator);
6382 1
    if ($offset === false) {
6383 1
      return '';
6384
    }
6385
6386 1
    return (string)self::substr(
6387 1
        $str,
6388 1
        0,
6389 1
        $offset,
6390 1
        $encoding
6391
    );
6392
  }
6393
6394
  /**
6395
   * Gets the substring before the last occurrence of a separator.
6396
   *
6397
   * @param string $str       <p>The input string.</p>
6398
   * @param string $separator <p>The string separator.</p>
6399
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6400
   *
6401
   * @return string
6402
   */
6403 1
  public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6404
  {
6405
    if (
6406 1
        $separator === ''
6407
        ||
6408 1
        $str === ''
6409
    ) {
6410 1
      return '';
6411
    }
6412
6413 1
    $offset = self::str_index_last($str, $separator);
6414 1
    if ($offset === false) {
6415 1
      return '';
6416
    }
6417
6418 1
    return (string)self::substr(
6419 1
        $str,
6420 1
        0,
6421 1
        $offset,
6422 1
        $encoding
6423
    );
6424
  }
6425
6426
  /**
6427
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6428
   *
6429
   * @param string $str          <p>The input string.</p>
6430
   * @param string $needle       <p>The string to look for.</p>
6431
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6432
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6433
   *
6434
   * @return string
6435
   */
6436 2
  public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6437
  {
6438
    if (
6439 2
        '' === $str
6440
        ||
6441 2
        '' === $needle
6442
    ) {
6443 2
      return '';
6444
    }
6445
6446 2
    $part = self::strstr(
6447 2
        $str,
6448 2
        $needle,
6449 2
        $beforeNeedle,
6450 2
        $encoding
6451
    );
6452 2
    if (false === $part) {
6453 2
      return '';
6454
    }
6455
6456 2
    return $part;
6457
  }
6458
6459
  /**
6460
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6461
   *
6462
   * @param string $str          <p>The input string.</p>
6463
   * @param string $needle       <p>The string to look for.</p>
6464
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6465
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6466
   *
6467
   * @return string
6468
   */
6469 2
  public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6470
  {
6471
    if (
6472 2
        '' === $str
6473
        ||
6474 2
        '' === $needle
6475
    ) {
6476 2
      return '';
6477
    }
6478
6479 2
    $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6480 2
    if (false === $part) {
0 ignored issues
show
introduced by
The condition false === $part is always false.
Loading history...
6481 2
      return '';
6482
    }
6483
6484 2
    return $part;
6485
  }
6486
6487
  /**
6488
   * Surrounds $str with the given substring.
6489
   *
6490
   * @param string $str
6491
   * @param string $substring <p>The substring to add to both sides.</P>
6492
   *
6493
   * @return string String with the substring both prepended and appended.
6494
   */
6495 5
  public static function str_surround(string $str, string $substring): string
6496
  {
6497 5
    return \implode('', [$substring, $str, $substring]);
6498
  }
6499
6500
  /**
6501
   * Returns a trimmed string with the first letter of each word capitalized.
6502
   * Also accepts an array, $ignore, allowing you to list words not to be
6503
   * capitalized.
6504
   *
6505
   * @param string              $str
6506
   * @param string[]|array|null $ignore   [optional] <p>An array of words not to capitalize or null. Default: null</p>
6507
   * @param string              $encoding [optional] <p>Default: UTF-8</p>
6508
   *
6509
   * @return string The titleized string.
6510
   */
6511 5
  public static function str_titleize(string $str, array $ignore = null, string $encoding = 'UTF-8'): string
6512
  {
6513 5
    $str = self::trim($str);
6514
6515 5
    $str = (string)\preg_replace_callback(
6516 5
        '/([\S]+)/u',
6517 5
        function ($match) use ($encoding, $ignore) {
6518 5
          if ($ignore && \in_array($match[0], $ignore, true)) {
6519 2
            return $match[0];
6520
          }
6521
6522 5
          return self::str_upper_first(self::strtolower($match[0], $encoding));
6523 5
        },
6524 5
        $str
6525
    );
6526
6527 5
    return $str;
6528
  }
6529
6530
  /**
6531
   * Returns a trimmed string in proper title case.
6532
   *
6533
   * Also accepts an array, $ignore, allowing you to list words not to be
6534
   * capitalized.
6535
   *
6536
   * Adapted from John Gruber's script.
6537
   *
6538
   * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6539
   *
6540
   * @param string $str
6541
   * @param array  $ignore   <p>An array of words not to capitalize.</p>
6542
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6543
   *
6544
   * @return string The titleized string.
6545
   */
6546 35
  public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6547
  {
6548 35
    $smallWords = \array_merge(
6549
        [
6550 35
            '(?<!q&)a',
6551
            'an',
6552
            'and',
6553
            'as',
6554
            'at(?!&t)',
6555
            'but',
6556
            'by',
6557
            'en',
6558
            'for',
6559
            'if',
6560
            'in',
6561
            'of',
6562
            'on',
6563
            'or',
6564
            'the',
6565
            'to',
6566
            'v[.]?',
6567
            'via',
6568
            'vs[.]?',
6569
        ],
6570 35
        $ignore
6571
    );
6572
6573 35
    $smallWordsRx = \implode('|', $smallWords);
6574 35
    $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6575
6576 35
    $str = self::trim($str);
6577
6578 35
    if (self::has_lowercase($str) === false) {
6579 2
      $str = self::strtolower($str);
6580
    }
6581
6582
    // The main substitutions
6583 35
    $str = (string)\preg_replace_callback(
6584
        '~\b (_*) (?:                                                              # 1. Leading underscore and
6585
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6586 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6587
                        |
6588 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6589
                        |
6590 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6591
                        |
6592 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
6593
                      ) (_*) \b                                                           # 6. With trailing underscore
6594
                    ~ux',
6595 35
        function ($matches) use ($encoding) {
6596
          // Preserve leading underscore
6597 35
          $str = $matches[1];
6598 35
          if ($matches[2]) {
6599
            // Preserve URLs, domains, emails and file paths
6600 5
            $str .= $matches[2];
6601 35
          } elseif ($matches[3]) {
6602
            // Lower-case small words
6603 25
            $str .= self::strtolower($matches[3], $encoding);
6604 35
          } elseif ($matches[4]) {
6605
            // Capitalize word w/o internal caps
6606 34
            $str .= static::str_upper_first($matches[4], $encoding);
6607
          } else {
6608
            // Preserve other kinds of word (iPhone)
6609 7
            $str .= $matches[5];
6610
          }
6611
          // Preserve trailing underscore
6612 35
          $str .= $matches[6];
6613
6614 35
          return $str;
6615 35
        },
6616 35
        $str
6617
    );
6618
6619
    // Exceptions for small words: capitalize at start of title...
6620 35
    $str = (string)\preg_replace_callback(
6621
        '~(  \A [[:punct:]]*                # start of title...
6622
                      |  [:.;?!][ ]+               # or of subsentence...
6623
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
6624 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
6625
                     ~uxi',
6626 35
        function ($matches) use ($encoding) {
6627 11
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
6628 35
        },
6629 35
        $str
6630
    );
6631
6632
    // ...and end of title
6633 35
    $str = (string)\preg_replace_callback(
6634 35
        '~\b ( ' . $smallWordsRx . ' ) # small word...
6635
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
6636
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
6637
                     ~uxi',
6638 35
        function ($matches) use ($encoding) {
6639 3
          return static::str_upper_first($matches[1], $encoding);
6640 35
        },
6641 35
        $str
6642
    );
6643
6644
    // Exceptions for small words in hyphenated compound words
6645
    // e.g. "in-flight" -> In-Flight
6646 35
    $str = (string)\preg_replace_callback(
6647
        '~\b
6648
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
6649 35
                        ( ' . $smallWordsRx . ' )
6650
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
6651
                       ~uxi',
6652 35
        function ($matches) use ($encoding) {
6653
          return static::str_upper_first($matches[1], $encoding);
6654 35
        },
6655 35
        $str
6656
    );
6657
6658
    // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
6659 35
    $str = (string)\preg_replace_callback(
6660
        '~\b
6661
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
6662
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
6663 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
6664
                      (?!	- )                   # Negative lookahead for another -
6665
                     ~uxi',
6666 35
        function ($matches) use ($encoding) {
6667
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
6668 35
        },
6669 35
        $str
6670
    );
6671
6672 35
    return $str;
6673
  }
6674
6675
  /**
6676
   * Get a binary representation of a specific string.
6677
   *
6678
   * @param string $str <p>The input string.</p>
6679
   *
6680
   * @return string
6681
   */
6682 1
  public static function str_to_binary(string $str): string
6683
  {
6684 1
    $value = \unpack('H*', $str);
6685
6686 1
    return \base_convert($value[1], 16, 2);
6687
  }
6688
6689
  /**
6690
   * @param string   $str
6691
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6692
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
6693
   *
6694
   * @return string[]
6695
   */
6696 17
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
6697
  {
6698 17
    if ('' === $str) {
6699 1
      return ($removeEmptyValues === true ? [] : ['']);
6700
    }
6701
6702 16
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
6703
6704 16
    if ($return === false) {
6705
      return ($removeEmptyValues === true ? [] : ['']);
6706
    }
6707
6708
    if (
6709 16
        $removeShortValues === null
6710
        &&
6711 16
        $removeEmptyValues === false
6712
    ) {
6713 16
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
6714
    }
6715
6716
    $tmpReturn = self::reduce_string_array(
6717
        $return,
6718
        $removeEmptyValues,
6719
        $removeShortValues
6720
    );
6721
6722
    return $tmpReturn;
6723
  }
6724
6725
  /**
6726
   * Convert a string into an array of words.
6727
   *
6728
   * @param string   $str
6729
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
6730
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6731
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
6732
   *
6733
   * @return string[]
6734
   */
6735 10
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
6736
  {
6737 10
    if ('' === $str) {
6738 2
      return ($removeEmptyValues === true ? [] : ['']);
6739
    }
6740
6741 10
    $charList = self::rxClass($charList, '\pL');
6742
6743 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
6744
6745 10
    if ($return === false) {
6746
      return ($removeEmptyValues === true ? [] : ['']);
6747
    }
6748
6749
    if (
6750 10
        $removeShortValues === null
6751
        &&
6752 10
        $removeEmptyValues === false
6753
    ) {
6754 10
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
6755
    }
6756
6757 1
    $tmpReturn = self::reduce_string_array(
6758 1
        $return,
6759 1
        $removeEmptyValues,
6760 1
        $removeShortValues
6761
    );
6762
6763 1
    foreach ($tmpReturn as &$item) {
6764 1
      $item = (string)$item;
6765
    }
6766
6767 1
    return $tmpReturn;
6768
  }
6769
6770
  /**
6771
   * alias for "UTF8::to_ascii()"
6772
   *
6773
   * @see UTF8::to_ascii()
6774
   *
6775
   * @param string $str
6776
   * @param string $unknown
6777
   * @param bool   $strict
6778
   *
6779
   * @return string
6780
   */
6781 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
6782
  {
6783 7
    return self::to_ascii($str, $unknown, $strict);
6784
  }
6785
6786
  /**
6787
   * Truncates the string to a given length. If $substring is provided, and
6788
   * truncating occurs, the string is further truncated so that the substring
6789
   * may be appended without exceeding the desired length.
6790
   *
6791
   * @param string $str
6792
   * @param int    $length    <p>Desired length of the truncated string.</p>
6793
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
6794
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6795
   *
6796
   * @return string String after truncating.
6797
   */
6798 22
  public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
6799
  {
6800
    // init
6801 22
    $str = (string)$str;
6802
6803 22
    if ('' === $str) {
6804
      return '';
6805
    }
6806
6807 22
    if ($length >= self::strlen($str, $encoding)) {
6808 4
      return $str;
6809
    }
6810
6811
    // Need to further trim the string so we can append the substring
6812 18
    $substringLength = self::strlen($substring, $encoding);
6813 18
    $length -= $substringLength;
6814
6815 18
    $truncated = self::substr($str, 0, $length, $encoding);
6816
6817 18
    return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6817
    return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
6818
  }
6819
6820
  /**
6821
   * Truncates the string to a given length, while ensuring that it does not
6822
   * split words. If $substring is provided, and truncating occurs, the
6823
   * string is further truncated so that the substring may be appended without
6824
   * exceeding the desired length.
6825
   *
6826
   * @param string $str
6827
   * @param int    $length    <p>Desired length of the truncated string.</p>
6828
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
6829
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6830
   *
6831
   * @return string String after truncating.
6832
   */
6833 23
  public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
6834
  {
6835 23
    if ($length >= self::strlen($str, $encoding)) {
6836 4
      return $str;
6837
    }
6838
6839
    // need to further trim the string so we can append the substring
6840 19
    $substringLength = self::strlen($substring, $encoding);
6841 19
    $length -= $substringLength;
6842
6843 19
    $truncated = self::substr($str, 0, $length, $encoding);
6844 19
    if ($truncated === false) {
6845
      return '';
6846
    }
6847
6848
    // if the last word was truncated
6849 19
    $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
6850 19
    if ($strPosSpace != $length) {
6851
      // find pos of the last occurrence of a space, get up to that
6852 12
      $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
6853
6854 12
      if ($lastPos !== false || $strPosSpace !== false) {
6855 11
        $truncated = self::substr($truncated, 0, (int)$lastPos, $encoding);
6856
      }
6857
    }
6858
6859 19
    $str = $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6859
    $str = /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
6860
6861 19
    return $str;
6862
  }
6863
6864
  /**
6865
   * Returns a lowercase and trimmed string separated by underscores.
6866
   * Underscores are inserted before uppercase characters (with the exception
6867
   * of the first character of the string), and in place of spaces as well as
6868
   * dashes.
6869
   *
6870
   * @param string $str
6871
   *
6872
   * @return string The underscored string.
6873
   */
6874 16
  public static function str_underscored(string $str): string
6875
  {
6876 16
    return self::str_delimit($str, '_');
6877
  }
6878
6879
  /**
6880
   * Returns an UpperCamelCase version of the supplied string. It trims
6881
   * surrounding spaces, capitalizes letters following digits, spaces, dashes
6882
   * and underscores, and removes spaces, dashes, underscores.
6883
   *
6884
   * @param string $str      <p>The input string.</p>
6885
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6886
   *
6887
   * @return string String in UpperCamelCase.
6888
   */
6889 13
  public static function str_upper_camelize(string $str, string $encoding = 'UTF-8'): string
6890
  {
6891 13
    return self::str_upper_first(self::str_camelize($str, $encoding), $encoding);
6892
  }
6893
6894
  /**
6895
   * alias for "UTF8::ucfirst()"
6896
   *
6897
   * @see UTF8::ucfirst()
6898
   *
6899
   * @param string $str
6900
   * @param string $encoding
6901
   * @param bool   $cleanUtf8
6902
   *
6903
   * @return string
6904
   */
6905 58
  public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6906
  {
6907 58
    return self::ucfirst($str, $encoding, $cleanUtf8);
6908
  }
6909
6910
  /**
6911
   * Counts number of words in the UTF-8 string.
6912
   *
6913
   * @param string $str      <p>The input string.</p>
6914
   * @param int    $format   [optional] <p>
6915
   *                         <strong>0</strong> => return a number of words (default)<br>
6916
   *                         <strong>1</strong> => return an array of words<br>
6917
   *                         <strong>2</strong> => return an array of words with word-offset as key
6918
   *                         </p>
6919
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6920
   *
6921
   * @return string[]|int The number of words in the string
6922
   */
6923 1
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
6924
  {
6925 1
    $strParts = self::str_to_words($str, $charlist);
6926
6927 1
    $len = \count($strParts);
6928
6929 1
    if ($format === 1) {
6930
6931 1
      $numberOfWords = [];
6932 1
      for ($i = 1; $i < $len; $i += 2) {
6933 1
        $numberOfWords[] = $strParts[$i];
6934
      }
6935
6936 1
    } elseif ($format === 2) {
6937
6938 1
      $numberOfWords = [];
6939 1
      $offset = self::strlen($strParts[0]);
6940 1
      for ($i = 1; $i < $len; $i += 2) {
6941 1
        $numberOfWords[$offset] = $strParts[$i];
6942 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
6943
      }
6944
6945
    } else {
6946
6947 1
      $numberOfWords = (int)(($len - 1) / 2);
6948
6949
    }
6950
6951 1
    return $numberOfWords;
6952
  }
6953
6954
  /**
6955
   * Case-insensitive string comparison.
6956
   *
6957
   * INFO: Case-insensitive version of UTF8::strcmp()
6958
   *
6959
   * @param string $str1
6960
   * @param string $str2
6961
   *
6962
   * @return int
6963
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
6964
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
6965
   *             <strong>0</strong> if they are equal.
6966
   */
6967 19
  public static function strcasecmp(string $str1, string $str2): int
6968
  {
6969 19
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
6970
  }
6971
6972
  /**
6973
   * alias for "UTF8::strstr()"
6974
   *
6975
   * @see UTF8::strstr()
6976
   *
6977
   * @param string $haystack
6978
   * @param string $needle
6979
   * @param bool   $before_needle
6980
   * @param string $encoding
6981
   * @param bool   $cleanUtf8
6982
   *
6983
   * @return string|false
6984
   */
6985 1
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6986
  {
6987 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
6988
  }
6989
6990
  /**
6991
   * Case-sensitive string comparison.
6992
   *
6993
   * @param string $str1
6994
   * @param string $str2
6995
   *
6996
   * @return int
6997
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
6998
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
6999
   *              <strong>0</strong> if they are equal.
7000
   */
7001 22
  public static function strcmp(string $str1, string $str2): int
7002
  {
7003
    /** @noinspection PhpUndefinedClassInspection */
7004 22
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7005 17
        \Normalizer::normalize($str1, \Normalizer::NFD),
7006 22
        \Normalizer::normalize($str2, \Normalizer::NFD)
7007
    );
7008
  }
7009
7010
  /**
7011
   * Find length of initial segment not matching mask.
7012
   *
7013
   * @param string $str
7014
   * @param string $charList
7015
   * @param int    $offset
7016
   * @param int    $length
7017
   *
7018
   * @return int|null
7019
   */
7020 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
7021
  {
7022 15
    if ('' === $charList .= '') {
7023 1
      return null;
7024
    }
7025
7026 14
    if ($offset || $length !== null) {
7027 2
      $strTmp = self::substr($str, $offset, $length);
7028 2
      if ($strTmp === false) {
7029
        return null;
7030
      }
7031 2
      $str = (string)$strTmp;
7032
    }
7033
7034 14
    if ('' === $str) {
7035 1
      return null;
7036
    }
7037
7038 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept null|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7038
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7039 13
      return self::strlen($length[1]);
7040
    }
7041
7042 1
    return self::strlen($str);
7043
  }
7044
7045
  /**
7046
   * alias for "UTF8::stristr()"
7047
   *
7048
   * @see UTF8::stristr()
7049
   *
7050
   * @param string $haystack
7051
   * @param string $needle
7052
   * @param bool   $before_needle
7053
   * @param string $encoding
7054
   * @param bool   $cleanUtf8
7055
   *
7056
   * @return string|false
7057
   */
7058 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7059
  {
7060 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7061
  }
7062
7063
  /**
7064
   * Create a UTF-8 string from code points.
7065
   *
7066
   * INFO: opposite to UTF8::codepoints()
7067
   *
7068
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7069
   *
7070
   * @return string UTF-8 encoded string.
7071
   */
7072 2
  public static function string(array $array): string
7073
  {
7074 2
    return \implode(
7075 2
        '',
7076 2
        \array_map(
7077
            [
7078 2
                self::class,
7079
                'chr',
7080
            ],
7081 2
            $array
7082
        )
7083
    );
7084
  }
7085
7086
  /**
7087
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7088
   *
7089
   * @param string $str <p>The input string.</p>
7090
   *
7091
   * @return bool
7092
   *              <strong>true</strong> if the string has BOM at the start,<br>
7093
   *              <strong>false</strong> otherwise.
7094
   */
7095 3
  public static function string_has_bom(string $str): bool
7096
  {
7097 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
7098 3
      if (0 === \strpos($str, $bomString)) {
7099 3
        return true;
7100
      }
7101
    }
7102
7103 3
    return false;
7104
  }
7105
7106
  /**
7107
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7108
   *
7109
   * @link http://php.net/manual/en/function.strip-tags.php
7110
   *
7111
   * @param string $str             <p>
7112
   *                                The input string.
7113
   *                                </p>
7114
   * @param string $allowable_tags  [optional] <p>
7115
   *                                You can use the optional second parameter to specify tags which should
7116
   *                                not be stripped.
7117
   *                                </p>
7118
   *                                <p>
7119
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
7120
   *                                can not be changed with allowable_tags.
7121
   *                                </p>
7122
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
7123
   *
7124
   * @return string The stripped string.
7125
   */
7126 2
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7127
  {
7128 2
    if ('' === $str) {
7129 1
      return '';
7130
    }
7131
7132 2
    if ($cleanUtf8 === true) {
7133 1
      $str = self::clean($str);
7134
    }
7135
7136 2
    return \strip_tags($str, $allowable_tags);
7137
  }
7138
7139
  /**
7140
   * Strip all whitespace characters. This includes tabs and newline
7141
   * characters, as well as multibyte whitespace such as the thin space
7142
   * and ideographic space.
7143
   *
7144
   * @param string $str
7145
   *
7146
   * @return string
7147
   */
7148 24
  public static function strip_whitespace(string $str): string
7149
  {
7150 24
    if ('' === $str) {
7151 2
      return '';
7152
    }
7153
7154 22
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
7155
  }
7156
7157
  /**
7158
   * Finds position of first occurrence of a string within another, case insensitive.
7159
   *
7160
   * @link http://php.net/manual/en/function.mb-stripos.php
7161
   *
7162
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7163
   * @param string $needle    <p>The string to find in haystack.</p>
7164
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7165
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7166
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7167
   *
7168
   * @return int|false
7169
   *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7170
   *                   haystack string,<br> or <strong>false</strong> if needle is not found.
7171
   */
7172 71
  public static function stripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7173
  {
7174 71
    if ('' === $haystack || '' === $needle) {
7175 3
      return false;
7176
    }
7177
7178 70
    if ($cleanUtf8 === true) {
7179
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7180
      // if invalid characters are found in $haystack before $needle
7181 1
      $haystack = self::clean($haystack);
7182 1
      $needle = self::clean($needle);
7183
    }
7184
7185 70
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7186 22
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7187
    }
7188
7189 70
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7190
      self::checkForSupport();
7191
    }
7192
7193
    if (
7194 70
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7195
        &&
7196 70
        self::$SUPPORT['intl'] === true
7197
    ) {
7198 70
      return \grapheme_stripos($haystack, $needle, $offset);
7199
    }
7200
7201
    // fallback to "mb_"-function via polyfill
7202 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
7203
  }
7204
7205
  /**
7206
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
7207
   *
7208
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7209
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7210
   * @param bool   $before_needle  [optional] <p>
7211
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
7212
   *                               haystack before the first occurrence of the needle (excluding the needle).
7213
   *                               </p>
7214
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
7215
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7216
   *
7217
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
7218
   */
7219 19
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7220
  {
7221 19
    if ('' === $haystack || '' === $needle) {
7222 6
      return false;
7223
    }
7224
7225 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7226 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7227
    }
7228
7229 13
    if ($cleanUtf8 === true) {
7230
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7231
      // if invalid characters are found in $haystack before $needle
7232 1
      $needle = self::clean($needle);
7233 1
      $haystack = self::clean($haystack);
7234
    }
7235
7236 13
    if (!$needle) {
7237
      return $haystack;
7238
    }
7239
7240 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7241
      self::checkForSupport();
7242
    }
7243
7244
    if (
7245 13
        $encoding !== 'UTF-8'
7246
        &&
7247 13
        self::$SUPPORT['mbstring'] === false
7248
    ) {
7249
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7250
    }
7251
7252 13
    if (self::$SUPPORT['mbstring'] === true) {
7253 13
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7254
    }
7255
7256
    if (
7257
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7258
        &&
7259
        self::$SUPPORT['intl'] === true
7260
    ) {
7261
      return \grapheme_stristr($haystack, $needle, $before_needle);
7262
    }
7263
7264
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7265
      return \stristr($haystack, $needle, $before_needle);
7266
    }
7267
7268
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7269
7270
    if (!isset($match[1])) {
7271
      return false;
7272
    }
7273
7274
    if ($before_needle) {
7275
      return $match[1];
7276
    }
7277
7278
    return self::substr($haystack, self::strlen($match[1]));
7279
  }
7280
7281
  /**
7282
   * Get the string length, not the byte-length!
7283
   *
7284
   * @link     http://php.net/manual/en/function.mb-strlen.php
7285
   *
7286
   * @param string $str       <p>The string being checked for length.</p>
7287
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7288
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7289
   *
7290
   * @return int The number of characters in the string $str having character encoding $encoding. (One multi-byte
7291
   *             character counted as +1)
7292
   */
7293 380
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
7294
  {
7295 380
    if ('' === $str) {
7296 33
      return 0;
7297
    }
7298
7299 378
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7300 161
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7301
    }
7302
7303 378
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7304
      self::checkForSupport();
7305
    }
7306
7307
    switch ($encoding) {
7308 378
      case 'ASCII':
7309 378
      case 'CP850':
7310
        if (
7311 6
            $encoding === 'CP850'
7312
            &&
7313 6
            self::$SUPPORT['mbstring_func_overload'] === false
7314
        ) {
7315 6
          return \strlen($str);
7316
        }
7317
7318
        return (int)\mb_strlen($str, 'CP850'); // 8-BIT
7319
    }
7320
7321 373
    if ($cleanUtf8 === true) {
7322
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
7323
      // if invalid characters are found in $str
7324 2
      $str = self::clean($str);
7325
    }
7326
7327
    if (
7328 373
        $encoding !== 'UTF-8'
7329
        &&
7330 373
        self::$SUPPORT['mbstring'] === false
7331
        &&
7332 373
        self::$SUPPORT['iconv'] === false
7333
    ) {
7334
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7335
    }
7336
7337
    if (
7338 373
        $encoding !== 'UTF-8'
7339
        &&
7340 373
        self::$SUPPORT['iconv'] === true
7341
        &&
7342 373
        self::$SUPPORT['mbstring'] === false
7343
    ) {
7344
      $returnTmp = \iconv_strlen($str, $encoding);
7345
      if ($returnTmp !== false) {
7346
        return $returnTmp;
7347
      }
7348
    }
7349
7350 373
    if (self::$SUPPORT['mbstring'] === true) {
7351 372
      $returnTmp = \mb_strlen($str, $encoding);
7352 372
      if ($returnTmp !== false) {
7353 372
        return $returnTmp;
7354
      }
7355
    }
7356
7357 2
    if (self::$SUPPORT['iconv'] === true) {
7358
      $returnTmp = \iconv_strlen($str, $encoding);
7359
      if ($returnTmp !== false) {
7360
        return $returnTmp;
7361
      }
7362
    }
7363
7364
    if (
7365 2
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7366
        &&
7367 2
        self::$SUPPORT['intl'] === true
7368
    ) {
7369
      return (int)\grapheme_strlen($str);
7370
    }
7371
7372
    // fallback for ascii only
7373 2
    if (self::is_ascii($str)) {
7374 1
      return \strlen($str);
7375
    }
7376
7377
    // fallback via symfony polyfill
7378 2
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7379
      return (int)\mb_strlen($str, $encoding);
7380
    }
7381
7382
    // fallback via vanilla php
7383 2
    \preg_match_all('/./us', $str, $parts);
7384 2
    $returnTmp = \count($parts[0]);
7385 2
    if ($returnTmp !== 0) {
7386 2
      return $returnTmp;
7387
    }
7388
7389
    // fallback to "mb_"-function via polyfill
7390
    return (int)\mb_strlen($str, $encoding);
7391
  }
7392
7393
  /**
7394
   * Get string length in byte.
7395
   *
7396
   * @param string $str
7397
   *
7398
   * @return int
7399
   */
7400 101
  public static function strlen_in_byte(string $str): int
7401
  {
7402 101
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7403
      $len = \mb_strlen($str, 'CP850'); // 8-BIT
7404
    } else {
7405 101
      $len = \strlen($str);
7406
    }
7407
7408 101
    return $len;
7409
  }
7410
7411
  /**
7412
   * Case insensitive string comparisons using a "natural order" algorithm.
7413
   *
7414
   * INFO: natural order version of UTF8::strcasecmp()
7415
   *
7416
   * @param string $str1 <p>The first string.</p>
7417
   * @param string $str2 <p>The second string.</p>
7418
   *
7419
   * @return int
7420
   *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7421
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7422
   *             <strong>0</strong> if they are equal
7423
   */
7424 1
  public static function strnatcasecmp(string $str1, string $str2): int
7425
  {
7426 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
7427
  }
7428
7429
  /**
7430
   * String comparisons using a "natural order" algorithm
7431
   *
7432
   * INFO: natural order version of UTF8::strcmp()
7433
   *
7434
   * @link  http://php.net/manual/en/function.strnatcmp.php
7435
   *
7436
   * @param string $str1 <p>The first string.</p>
7437
   * @param string $str2 <p>The second string.</p>
7438
   *
7439
   * @return int
7440
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7441
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7442
   *             <strong>0</strong> if they are equal
7443
   */
7444 2
  public static function strnatcmp(string $str1, string $str2): int
7445
  {
7446 2
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
7447
  }
7448
7449
  /**
7450
   * Case-insensitive string comparison of the first n characters.
7451
   *
7452
   * @link  http://php.net/manual/en/function.strncasecmp.php
7453
   *
7454
   * @param string $str1 <p>The first string.</p>
7455
   * @param string $str2 <p>The second string.</p>
7456
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
7457
   *
7458
   * @return int
7459
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7460
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7461
   *             <strong>0</strong> if they are equal
7462
   */
7463 1
  public static function strncasecmp(string $str1, string $str2, int $len): int
7464
  {
7465 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
7466
  }
7467
7468
  /**
7469
   * String comparison of the first n characters.
7470
   *
7471
   * @link  http://php.net/manual/en/function.strncmp.php
7472
   *
7473
   * @param string $str1 <p>The first string.</p>
7474
   * @param string $str2 <p>The second string.</p>
7475
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
7476
   *
7477
   * @return int
7478
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7479
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7480
   *             <strong>0</strong> if they are equal
7481
   */
7482 2
  public static function strncmp(string $str1, string $str2, int $len): int
7483
  {
7484 2
    $str1 = (string)self::substr($str1, 0, $len);
7485 2
    $str2 = (string)self::substr($str2, 0, $len);
7486
7487 2
    return self::strcmp($str1, $str2);
7488
  }
7489
7490
  /**
7491
   * Search a string for any of a set of characters.
7492
   *
7493
   * @link  http://php.net/manual/en/function.strpbrk.php
7494
   *
7495
   * @param string $haystack  <p>The string where char_list is looked for.</p>
7496
   * @param string $char_list <p>This parameter is case sensitive.</p>
7497
   *
7498
   * @return string|false String starting from the character found, or false if it is not found.
7499
   */
7500 1
  public static function strpbrk(string $haystack, string $char_list)
7501
  {
7502 1
    if ('' === $haystack || '' === $char_list) {
7503 1
      return false;
7504
    }
7505
7506 1
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7507 1
      return \substr($haystack, (int)\strpos($haystack, $m[0]));
7508
    }
7509
7510 1
    return false;
7511
  }
7512
7513
  /**
7514
   * Find position of first occurrence of string in a string.
7515
   *
7516
   * @link http://php.net/manual/en/function.mb-strpos.php
7517
   *
7518
   * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7519
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7520
   * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7521
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7522
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7523
   *
7524
   * @return int|false
7525
   *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
7526
   *                   string.<br> If needle is not found it returns false.
7527
   */
7528 181
  public static function strpos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7529
  {
7530 181
    if ('' === $haystack) {
7531 3
      return false;
7532
    }
7533
7534
    // iconv and mbstring do not support integer $needle
7535 180
    if ((int)$needle === $needle && $needle >= 0) {
7536
      $needle = (string)self::chr($needle);
7537
    }
7538 180
    $needle = (string)$needle;
7539
7540 180
    if ('' === $needle) {
7541 2
      return false;
7542
    }
7543
7544 179
    if ($cleanUtf8 === true) {
7545
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7546
      // if invalid characters are found in $haystack before $needle
7547 2
      $needle = self::clean($needle);
7548 2
      $haystack = self::clean($haystack);
7549
    }
7550
7551 179
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7552 57
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7553
    }
7554
7555 179
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7556
      self::checkForSupport();
7557
    }
7558
7559
    if (
7560 179
        $encoding === 'CP850'
7561
        &&
7562 179
        self::$SUPPORT['mbstring_func_overload'] === false
7563
    ) {
7564 61
      return \strpos($haystack, $needle, $offset);
7565
    }
7566
7567
    if (
7568 119
        $encoding !== 'UTF-8'
7569
        &&
7570 119
        self::$SUPPORT['iconv'] === false
7571
        &&
7572 119
        self::$SUPPORT['mbstring'] === false
7573
    ) {
7574
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7575
    }
7576
7577
    if (
7578 119
        $offset >= 0 // iconv_strpos() can't handle negative offset
7579
        &&
7580 119
        $encoding !== 'UTF-8'
7581
        &&
7582 119
        self::$SUPPORT['mbstring'] === false
7583
        &&
7584 119
        self::$SUPPORT['iconv'] === true
7585
    ) {
7586
      // ignore invalid negative offset to keep compatibility
7587
      // with php < 5.5.35, < 5.6.21, < 7.0.6
7588
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
7589
      if ($returnTmp !== false) {
7590
        return $returnTmp;
7591
      }
7592
    }
7593
7594 119
    if (self::$SUPPORT['mbstring'] === true) {
7595 119
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
7596 119
      if ($returnTmp !== false) {
7597 84
        return $returnTmp;
7598
      }
7599
    }
7600
7601
    if (
7602 49
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7603
        &&
7604 49
        self::$SUPPORT['intl'] === true
7605
    ) {
7606 48
      return \grapheme_strpos($haystack, $needle, $offset);
7607
    }
7608
7609
    if (
7610 2
        $offset >= 0 // iconv_strpos() can't handle negative offset
7611
        &&
7612 2
        self::$SUPPORT['iconv'] === true
7613
    ) {
7614
      // ignore invalid negative offset to keep compatibility
7615
      // with php < 5.5.35, < 5.6.21, < 7.0.6
7616 1
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
7617 1
      if ($returnTmp !== false) {
7618
        return $returnTmp;
7619
      }
7620
    }
7621
7622
    // fallback for ascii only
7623 2
    $haystackIsAscii = self::is_ascii($haystack);
7624 2
    if ($haystackIsAscii && self::is_ascii($needle)) {
7625 1
      return \strpos($haystack, $needle, $offset);
7626
    }
7627
7628
    // fallback via symfony polyfill
7629 2
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7630
      return \mb_strpos($haystack, $needle, $offset, $encoding);
7631
    }
7632
7633
    // fallback via vanilla php
7634
7635 2
    if ($haystackIsAscii) {
7636
      $haystackTmp = \substr($haystack, $offset);
7637
    } else {
7638 2
      $haystackTmp = self::substr($haystack, $offset);
7639
    }
7640 2
    if ($haystackTmp === false) {
7641
      $haystackTmp = '';
7642
    }
7643 2
    $haystack = (string)$haystackTmp;
7644
7645 2
    if ($offset < 0) {
7646
      $offset = 0;
7647
    }
7648
7649 2
    $pos = \strpos($haystack, $needle);
7650 2
    if ($pos === false) {
7651
      return false;
7652
    }
7653
7654 2
    $returnTmp = $offset + self::strlen(\substr($haystack, 0, $pos));
7655 2
    if ($returnTmp !== false) {
0 ignored issues
show
introduced by
The condition $returnTmp !== false is always true.
Loading history...
7656 2
      return $returnTmp;
7657
    }
7658
7659
    // fallback to "mb_"-function via polyfill
7660
    return \mb_strpos($haystack, $needle, $offset, $encoding);
7661
  }
7662
7663
  /**
7664
   * Finds the last occurrence of a character in a string within another.
7665
   *
7666
   * @link http://php.net/manual/en/function.mb-strrchr.php
7667
   *
7668
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
7669
   * @param string $needle        <p>The string to find in haystack</p>
7670
   * @param bool   $before_needle [optional] <p>
7671
   *                              Determines which portion of haystack
7672
   *                              this function returns.
7673
   *                              If set to true, it returns all of haystack
7674
   *                              from the beginning to the last occurrence of needle.
7675
   *                              If set to false, it returns all of haystack
7676
   *                              from the last occurrence of needle to the end,
7677
   *                              </p>
7678
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
7679
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
7680
   *
7681
   * @return string|false The portion of haystack or false if needle is not found.
7682
   */
7683 3
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7684
  {
7685 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7686 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7687
    }
7688
7689 3
    if ($cleanUtf8 === true) {
7690
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7691
      // if invalid characters are found in $haystack before $needle
7692 1
      $needle = self::clean($needle);
7693 1
      $haystack = self::clean($haystack);
7694
    }
7695
7696
    // fallback to "mb_"-function via polyfill
7697 3
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
7698
  }
7699
7700
  /**
7701
   * Reverses characters order in the string.
7702
   *
7703
   * @param string $str <p>The input string.</p>
7704
   *
7705
   * @return string The string with characters in the reverse sequence.
7706
   */
7707 9
  public static function strrev(string $str): string
7708
  {
7709 9
    if ('' === $str) {
7710 3
      return '';
7711
    }
7712
7713 7
    $reversed = '';
7714 7
    $i = self::strlen($str);
7715 7
    while ($i--) {
7716 7
      $reversed .= self::substr($str, $i, 1);
7717
    }
7718
7719 7
    return $reversed;
7720
  }
7721
7722
  /**
7723
   * Finds the last occurrence of a character in a string within another, case insensitive.
7724
   *
7725
   * @link http://php.net/manual/en/function.mb-strrichr.php
7726
   *
7727
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
7728
   * @param string $needle         <p>The string to find in haystack.</p>
7729
   * @param bool   $before_needle  [optional] <p>
7730
   *                               Determines which portion of haystack
7731
   *                               this function returns.
7732
   *                               If set to true, it returns all of haystack
7733
   *                               from the beginning to the last occurrence of needle.
7734
   *                               If set to false, it returns all of haystack
7735
   *                               from the last occurrence of needle to the end,
7736
   *                               </p>
7737
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
7738
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7739
   *
7740
   * @return string|false The portion of haystack or<br>false if needle is not found.
7741
   */
7742 2
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7743
  {
7744 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7745 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7746
    }
7747
7748 2
    if ($cleanUtf8 === true) {
7749
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7750
      // if invalid characters are found in $haystack before $needle
7751 1
      $needle = self::clean($needle);
7752 1
      $haystack = self::clean($haystack);
7753
    }
7754
7755 2
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
7756
  }
7757
7758
  /**
7759
   * Find position of last occurrence of a case-insensitive string.
7760
   *
7761
   * @param string     $haystack  <p>The string to look in.</p>
7762
   * @param string|int $needle    <p>The string to look for.</p>
7763
   * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
7764
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7765
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7766
   *
7767
   * @return int|false
7768
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
7769
   *                   string.<br>If needle is not found, it returns false.
7770
   */
7771 3
  public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7772
  {
7773 3
    if ('' === $haystack) {
7774
      return false;
7775
    }
7776
7777
    // iconv and mbstring do not support integer $needle
7778 3
    if ((int)$needle === $needle && $needle >= 0) {
7779
      $needle = (string)self::chr($needle);
7780
    }
7781 3
    $needle = (string)$needle;
7782
7783 3
    if ('' === $needle) {
7784
      return false;
7785
    }
7786
7787 3
    if ($cleanUtf8 === true) {
7788
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
7789 1
      $needle = self::clean($needle);
7790 1
      $haystack = self::clean($haystack);
7791
    }
7792
7793 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7794 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7795
    }
7796
7797 3
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7798
      self::checkForSupport();
7799
    }
7800
7801
    if (
7802 3
        $encoding !== 'UTF-8'
7803
        &&
7804 3
        self::$SUPPORT['mbstring'] === false
7805
    ) {
7806
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7807
    }
7808
7809 3
    if (self::$SUPPORT['mbstring'] === true) {
7810 3
      return \mb_strripos($haystack, $needle, $offset, $encoding);
7811
    }
7812
7813
    if (
7814
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7815
        &&
7816
        self::$SUPPORT['intl'] === true
7817
    ) {
7818
      return \grapheme_strripos($haystack, $needle, $offset);
7819
    }
7820
7821
    // fallback for ascii only
7822
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7823
      return \strripos($haystack, $needle, $offset);
7824
    }
7825
7826
    // fallback via symfony polyfill
7827
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7828
      return \mb_strripos($haystack, $needle, $offset, $encoding);
7829
    }
7830
7831
    // fallback via vanilla php
7832
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
7833
  }
7834
7835
  /**
7836
   * Find position of last occurrence of a string in a string.
7837
   *
7838
   * @link http://php.net/manual/en/function.mb-strrpos.php
7839
   *
7840
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
7841
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7842
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
7843
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
7844
   *                              the end of the string.
7845
   *                              </p>
7846
   * @param string     $encoding  [optional] <p>Set the charset.</p>
7847
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7848
   *
7849
   * @return int|false
7850
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
7851
   *                   string.<br>If needle is not found, it returns false.
7852
   */
7853 36
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7854
  {
7855 36
    if ('' === $haystack) {
7856 2
      return false;
7857
    }
7858
7859
    // iconv and mbstring do not support integer $needle
7860 35
    if ((int)$needle === $needle && $needle >= 0) {
7861 2
      $needle = (string)self::chr($needle);
7862
    }
7863 35
    $needle = (string)$needle;
7864
7865 35
    if ('' === $needle) {
7866 1
      return false;
7867
    }
7868
7869 35
    if ($cleanUtf8 === true) {
7870
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
7871 3
      $needle = self::clean($needle);
7872 3
      $haystack = self::clean($haystack);
7873
    }
7874
7875 35
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7876 13
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7877
    }
7878
7879 35
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7880
      self::checkForSupport();
7881
    }
7882
7883
    if (
7884 35
        $encoding !== 'UTF-8'
7885
        &&
7886 35
        self::$SUPPORT['mbstring'] === false
7887
    ) {
7888
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7889
    }
7890
7891 35
    if (self::$SUPPORT['mbstring'] === true) {
7892 35
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
7893
    }
7894
7895
    if (
7896
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7897
        &&
7898
        self::$SUPPORT['intl'] === true
7899
    ) {
7900
      return \grapheme_strrpos($haystack, $needle, $offset);
7901
    }
7902
7903
    // fallback for ascii only
7904
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7905
      return \strrpos($haystack, $needle, (int)$offset);
7906
    }
7907
7908
    // fallback via symfony polyfill
7909
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
7910
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
7911
    }
7912
7913
    // fallback via vanilla php
7914
7915
    $haystackTmp = null;
7916
    if ($offset > 0) {
7917
      $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7917
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
7918
    } elseif ($offset < 0) {
7919
      $haystackTmp = self::substr($haystack, 0, $offset);
7920
      $offset = 0;
7921
    }
7922
7923
    if ($haystackTmp !== null) {
7924
      if ($haystackTmp === false) {
7925
        $haystackTmp = '';
7926
      }
7927
      $haystack = (string)$haystackTmp;
7928
    }
7929
7930
    $pos = \strrpos($haystack, $needle);
7931
    if ($pos === false) {
7932
      return false;
7933
    }
7934
7935
    return $offset + self::strlen(\substr($haystack, 0, $pos));
7936
  }
7937
7938
  /**
7939
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
7940
   * mask.
7941
   *
7942
   * @param string $str    <p>The input string.</p>
7943
   * @param string $mask   <p>The mask of chars</p>
7944
   * @param int    $offset [optional]
7945
   * @param int    $length [optional]
7946
   *
7947
   * @return int
7948
   */
7949 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
7950
  {
7951 10
    if ($offset || $length !== null) {
7952 2
      $strTmp = self::substr($str, $offset, $length);
7953 2
      if ($strTmp === false) {
7954
        $strTmp = '';
7955
      }
7956 2
      $str = (string)$strTmp;
7957
    }
7958
7959 10
    if ('' === $str || '' === $mask) {
7960 2
      return 0;
7961
    }
7962
7963 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug introduced by
$str of type string is incompatible with the type null|array expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7963
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
7964
  }
7965
7966
  /**
7967
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
7968
   *
7969
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7970
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7971
   * @param bool   $before_needle  [optional] <p>
7972
   *                               If <b>TRUE</b>, strstr() returns the part of the
7973
   *                               haystack before the first occurrence of the needle (excluding the needle).
7974
   *                               </p>
7975
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
7976
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7977
   *
7978
   * @return string|false
7979
   *                       A sub-string,<br>or <strong>false</strong> if needle is not found.
7980
   */
7981 4
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
7982
  {
7983 4
    if ('' === $haystack || '' === $needle) {
7984 1
      return false;
7985
    }
7986
7987 4
    if ($cleanUtf8 === true) {
7988
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7989
      // if invalid characters are found in $haystack before $needle
7990
      $needle = self::clean($needle);
7991
      $haystack = self::clean($haystack);
7992
    }
7993
7994 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7995 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7996
    }
7997
7998 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7999
      self::checkForSupport();
8000
    }
8001
8002
    if (
8003 4
        $encoding !== 'UTF-8'
8004
        &&
8005 4
        self::$SUPPORT['mbstring'] === false
8006
    ) {
8007
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8008
    }
8009
8010 4
    if (self::$SUPPORT['mbstring'] === true) {
8011 4
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8012
    }
8013
8014
    if (
8015
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8016
        &&
8017
        self::$SUPPORT['intl'] === true
8018
    ) {
8019
      return \grapheme_strstr($haystack, $needle, $before_needle);
8020
    }
8021
8022
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8023
8024
    if (!isset($match[1])) {
8025
      return false;
8026
    }
8027
8028
    if ($before_needle) {
8029
      return $match[1];
8030
    }
8031
8032
    return self::substr($haystack, self::strlen($match[1]));
8033
  }
8034
8035
  /**
8036
   * Unicode transformation for case-less matching.
8037
   *
8038
   * @link http://unicode.org/reports/tr21/tr21-5.html
8039
   *
8040
   * @param string $str        <p>The input string.</p>
8041
   * @param bool   $full       [optional] <p>
8042
   *                           <b>true</b>, replace full case folding chars (default)<br>
8043
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
8044
   *                           </p>
8045
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
8046
   *
8047
   * @return string
8048
   */
8049 21
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
8050
  {
8051 21
    if ('' === $str) {
8052 4
      return '';
8053
    }
8054
8055 20
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
8056 20
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
8057
8058 20
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
8059 1
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
8060 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
8061
    }
8062
8063 20
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
8064
8065 20
    if ($full) {
8066
8067 20
      static $FULL_CASE_FOLD = null;
8068 20
      if ($FULL_CASE_FOLD === null) {
8069 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
8070
      }
8071
8072 20
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
8073
    }
8074
8075 20
    if ($cleanUtf8 === true) {
8076 1
      $str = self::clean($str);
8077
    }
8078
8079 20
    return self::strtolower($str);
8080
  }
8081
8082
  /**
8083
   * Make a string lowercase.
8084
   *
8085
   * @link http://php.net/manual/en/function.mb-strtolower.php
8086
   *
8087
   * @param string      $str       <p>The string being lowercased.</p>
8088
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
8089
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8090
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8091
   *
8092
   * @return string String with all alphabetic characters converted to lowercase.
8093
   */
8094 163
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
8095
  {
8096
    // init
8097 163
    $str = (string)$str;
8098
8099 163
    if ('' === $str) {
8100 3
      return '';
8101
    }
8102
8103 161
    if ($cleanUtf8 === true) {
8104
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
8105
      // if invalid characters are found in $haystack before $needle
8106 1
      $str = self::clean($str);
8107
    }
8108
8109 161
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8110 92
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8111
    }
8112
8113 161
    if ($lang !== null) {
8114
8115 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8116
        self::checkForSupport();
8117
      }
8118
8119 1
      if (self::$SUPPORT['intl'] === true) {
8120
8121 1
        $langCode = $lang . '-Lower';
8122 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8123
          \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, E_USER_WARNING);
8124
8125
          $langCode = 'Any-Lower';
8126
        }
8127
8128
        /** @noinspection PhpComposerExtensionStubsInspection */
8129 1
        return transliterator_transliterate($langCode, $str);
8130
      }
8131
8132
      \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, E_USER_WARNING);
8133
    }
8134
8135 161
    return \mb_strtolower($str, $encoding);
8136
  }
8137
8138
  /**
8139
   * Generic case sensitive transformation for collation matching.
8140
   *
8141
   * @param string $str <p>The input string</p>
8142
   *
8143
   * @return string
8144
   */
8145 3
  private static function strtonatfold(string $str): string
8146
  {
8147
    /** @noinspection PhpUndefinedClassInspection */
8148 3
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
8149
  }
8150
8151
  /**
8152
   * Make a string uppercase.
8153
   *
8154
   * @link http://php.net/manual/en/function.mb-strtoupper.php
8155
   *
8156
   * @param string      $str       <p>The string being uppercased.</p>
8157
   * @param string      $encoding  [optional] <p>Set the charset.</p>
8158
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8159
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8160
   *
8161
   * @return string String with all alphabetic characters converted to uppercase.
8162
   */
8163 112
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
8164
  {
8165
    // init
8166 112
    $str = (string)$str;
8167
8168 112
    if ('' === $str) {
8169 4
      return '';
8170
    }
8171
8172 110
    if ($cleanUtf8 === true) {
8173
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
8174
      // if invalid characters are found in $haystack before $needle
8175 2
      $str = self::clean($str);
8176
    }
8177
8178 110
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8179 68
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8180
    }
8181
8182 110
    if ($lang !== null) {
8183
8184 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8185
        self::checkForSupport();
8186
      }
8187
8188 1
      if (self::$SUPPORT['intl'] === true) {
8189
8190 1
        $langCode = $lang . '-Upper';
8191 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8192
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
8193
8194
          $langCode = 'Any-Upper';
8195
        }
8196
8197
        /** @noinspection PhpComposerExtensionStubsInspection */
8198 1
        return transliterator_transliterate($langCode, $str);
8199
      }
8200
8201
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
8202
    }
8203
8204 110
    return \mb_strtoupper($str, $encoding);
8205
  }
8206
8207
  /**
8208
   * Translate characters or replace sub-strings.
8209
   *
8210
   * @link  http://php.net/manual/en/function.strtr.php
8211
   *
8212
   * @param string          $str  <p>The string being translated.</p>
8213
   * @param string|string[] $from <p>The string replacing from.</p>
8214
   * @param string|string[] $to   <p>The string being translated to to.</p>
8215
   *
8216
   * @return string
8217
   *                This function returns a copy of str, translating all occurrences of each character in from to the
8218
   *                corresponding character in to.
8219
   */
8220 1
  public static function strtr(string $str, $from, $to = INF): string
8221
  {
8222 1
    if ('' === $str) {
8223
      return '';
8224
    }
8225
8226 1
    if ($from === $to) {
8227
      return $str;
8228
    }
8229
8230 1
    if (INF !== $to) {
8231 1
      $from = self::str_split($from);
8232 1
      $to = self::str_split($to);
8233 1
      $countFrom = \count($from);
8234 1
      $countTo = \count($to);
8235
8236 1
      if ($countFrom > $countTo) {
8237 1
        $from = \array_slice($from, 0, $countTo);
8238 1
      } elseif ($countFrom < $countTo) {
8239 1
        $to = \array_slice($to, 0, $countFrom);
8240
      }
8241
8242 1
      $from = \array_combine($from, $to);
8243
    }
8244
8245 1
    if (\is_string($from)) {
8246 1
      return \str_replace($from, '', $str);
8247
    }
8248
8249 1
    return \strtr($str, $from);
8250
  }
8251
8252
  /**
8253
   * Return the width of a string.
8254
   *
8255
   * @param string $str       <p>The input string.</p>
8256
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
8257
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8258
   *
8259
   * @return int
8260
   */
8261 1
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
8262
  {
8263 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8264 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8265
    }
8266
8267 1
    if ($cleanUtf8 === true) {
8268
      // iconv and mbstring are not tolerant to invalid encoding
8269
      // further, their behaviour is inconsistent with that of PHP's substr
8270 1
      $str = self::clean($str);
8271
    }
8272
8273
    // fallback to "mb_"-function via polyfill
8274 1
    return \mb_strwidth($str, $encoding);
8275
  }
8276
8277
  /**
8278
   * Get part of a string.
8279
   *
8280
   * @link http://php.net/manual/en/function.mb-substr.php
8281
   *
8282
   * @param string $str       <p>The string being checked.</p>
8283
   * @param int    $offset    <p>The first position used in str.</p>
8284
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
8285
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
8286
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8287
   *
8288
   * @return string|false
8289
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
8290
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
8291
   *                      characters long, <b>FALSE</b> will be returned.
8292
   */
8293 358
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8294
  {
8295 358
    if ('' === $str) {
8296 14
      return '';
8297
    }
8298
8299
    // Empty string
8300 352
    if ($length === 0) {
8301 12
      return '';
8302
    }
8303
8304 349
    if ($cleanUtf8 === true) {
8305
      // iconv and mbstring are not tolerant to invalid encoding
8306
      // further, their behaviour is inconsistent with that of PHP's substr
8307 1
      $str = self::clean($str);
8308
    }
8309
8310
    // Whole string
8311 349
    if (!$offset && $length === null) {
8312 5
      return $str;
8313
    }
8314
8315 344
    $str_length = 0;
8316 344
    if ($offset || $length === null) {
8317 220
      $str_length = self::strlen($str, $encoding);
8318
    }
8319
8320
    // Empty string
8321 344
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
8322 19
      return '';
8323
    }
8324
8325
    // Impossible
8326 344
    if ($offset && $offset > $str_length) {
8327 3
      return false;
8328
    }
8329
8330 341
    if ($length === null) {
8331 133
      $length = $str_length;
8332
    } else {
8333 311
      $length = (int)$length;
8334
    }
8335
8336 341
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8337 152
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8338
    }
8339
8340 341
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8341
      self::checkForSupport();
8342
    }
8343
8344
    if (
8345 341
        $encoding === 'CP850'
8346
        &&
8347 341
        self::$SUPPORT['mbstring_func_overload'] === false
8348
    ) {
8349 16
      return \substr($str, $offset, $length ?? $str_length);
8350
    }
8351
8352
    if (
8353 325
        $encoding !== 'UTF-8'
8354
        &&
8355 325
        self::$SUPPORT['mbstring'] === false
8356
    ) {
8357
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8358
    }
8359
8360 325
    if (self::$SUPPORT['mbstring'] === true) {
8361 325
      return \mb_substr($str, $offset, $length, $encoding);
8362
    }
8363
8364
    if (
8365
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8366
        &&
8367
        self::$SUPPORT['intl'] === true
8368
    ) {
8369
      return \grapheme_substr($str, $offset, $length);
8370
    }
8371
8372
    if (
8373
        $length >= 0 // "iconv_substr()" can't handle negative length
8374
        &&
8375
        self::$SUPPORT['iconv'] === true
8376
    ) {
8377
      $returnTmp = \iconv_substr($str, $offset, $length);
8378
      if ($returnTmp !== false) {
8379
        return $returnTmp;
8380
      }
8381
    }
8382
8383
    // fallback for ascii only
8384
    if (self::is_ascii($str)) {
8385
      return \substr($str, $offset, $length);
8386
    }
8387
8388
    // fallback via symfony polyfill
8389
    if (self::$SUPPORT['symfony_polyfill_used'] === true) {
8390
      return \mb_substr($str, $offset, $length, $encoding);
8391
    }
8392
8393
    // fallback via vanilla php
8394
8395
    // split to array, and remove invalid characters
8396
    $array = self::split($str);
8397
8398
    // extract relevant part, and join to make sting again
8399
    return \implode('', \array_slice($array, $offset, $length));
8400
  }
8401
8402
  /**
8403
   * Binary safe comparison of two strings from an offset, up to length characters.
8404
   *
8405
   * @param string   $str1               <p>The main string being compared.</p>
8406
   * @param string   $str2               <p>The secondary string being compared.</p>
8407
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
8408
   *                                     counting from the end of the string.</p>
8409
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
8410
   *                                     the length of the str compared to the length of main_str less the offset.</p>
8411
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
8412
   *                                     insensitive.</p>
8413
   *
8414
   * @return int
8415
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8416
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8417
   *             <strong>0</strong> if they are equal.
8418
   */
8419 1
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
8420
  {
8421
    if (
8422 1
        $offset !== 0
8423
        ||
8424 1
        $length !== null
8425
    ) {
8426 1
      $str1Tmp = self::substr($str1, $offset, $length);
8427 1
      if ($str1Tmp === false) {
8428
        $str1Tmp = '';
8429
      }
8430 1
      $str1 = $str1Tmp;
8431
8432 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
8433 1
      if ($str2Tmp === false) {
8434
        $str2Tmp = '';
8435
      }
8436 1
      $str2 = $str2Tmp;
8437
    }
8438
8439 1
    if ($case_insensitivity === true) {
8440 1
      return self::strcasecmp($str1, $str2);
8441
    }
8442
8443 1
    return self::strcmp($str1, $str2);
8444
  }
8445
8446
  /**
8447
   * Count the number of substring occurrences.
8448
   *
8449
   * @link  http://php.net/manual/en/function.substr-count.php
8450
   *
8451
   * @param string $haystack   <p>The string to search in.</p>
8452
   * @param string $needle     <p>The substring to search for.</p>
8453
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
8454
   * @param int    $length     [optional] <p>
8455
   *                           The maximum length after the specified offset to search for the
8456
   *                           substring. It outputs a warning if the offset plus the length is
8457
   *                           greater than the haystack length.
8458
   *                           </p>
8459
   * @param string $encoding   [optional] <p>Set the charset for e.g. "\mb_" function</p>
8460
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
8461
   *
8462
   * @return int|false This functions returns an integer or false if there isn't a string.
8463
   */
8464 19
  public static function substr_count(
8465
      string $haystack,
8466
      string $needle,
8467
      int $offset = 0,
8468
      int $length = null,
8469
      string $encoding = 'UTF-8',
8470
      bool $cleanUtf8 = false
8471
  )
8472
  {
8473 19
    if ('' === $haystack || '' === $needle) {
8474 3
      return false;
8475
    }
8476
8477 17
    if ($offset || $length !== null) {
8478
8479 1
      if ($length === null) {
8480 1
        $length = self::strlen($haystack);
8481
      }
8482
8483
      if (
8484
          (
8485 1
              $length !== 0
8486
              &&
8487 1
              $offset !== 0
8488
          )
8489
          &&
8490 1
          ($length + $offset) <= 0
8491
          &&
8492 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
8493
      ) {
8494 1
        return false;
8495
      }
8496
8497 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
8498 1
      if ($haystackTmp === false) {
8499
        $haystackTmp = '';
8500
      }
8501 1
      $haystack = (string)$haystackTmp;
8502
    }
8503
8504 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8505 11
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8506
    }
8507
8508 17
    if ($cleanUtf8 === true) {
8509
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
8510
      // if invalid characters are found in $haystack before $needle
8511
      $needle = self::clean($needle);
8512
      $haystack = self::clean($haystack);
8513
    }
8514
8515 17
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8516
      self::checkForSupport();
8517
    }
8518
8519
    if (
8520 17
        $encoding !== 'UTF-8'
8521
        &&
8522 17
        self::$SUPPORT['mbstring'] === false
8523
    ) {
8524
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8525
    }
8526
8527 17
    if (self::$SUPPORT['mbstring'] === true) {
8528 17
      return \mb_substr_count($haystack, $needle, $encoding);
8529
    }
8530
8531
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
8532
8533
    return \count($matches);
8534
  }
8535
8536
  /**
8537
   * Returns the number of occurrences of $substring in the given string.
8538
   * By default, the comparison is case-sensitive, but can be made insensitive
8539
   * by setting $caseSensitive to false.
8540
   *
8541
   * @param string $str           <p>The input string.</p>
8542
   * @param string $substring     <p>The substring to search for.</p>
8543
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
8544
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
8545
   *
8546
   * @return int
8547
   */
8548 15
  public static function substr_count_simple(string $str, string $substring, bool $caseSensitive = true, string $encoding = 'UTF-8'): int
8549
  {
8550 15
    if (!$caseSensitive) {
8551 6
      $str = self::strtoupper($str, $encoding);
8552 6
      $substring = self::strtoupper($substring, $encoding);
8553
    }
8554
8555 15
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
8556
  }
8557
8558
  /**
8559
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
8560
   *
8561
   * @param string $haystack <p>The string to search in.</p>
8562
   * @param string $needle   <p>The substring to search for.</p>
8563
   *
8564
   * @return string Return the sub-string.
8565
   */
8566 1
  public static function substr_ileft(string $haystack, string $needle): string
8567
  {
8568 1
    if ('' === $haystack) {
8569 1
      return '';
8570
    }
8571
8572 1
    if ('' === $needle) {
8573 1
      return $haystack;
8574
    }
8575
8576 1
    if (self::str_istarts_with($haystack, $needle) === true) {
8577 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
8578 1
      if ($haystackTmp === false) {
8579
        $haystackTmp = '';
8580
      }
8581 1
      $haystack = (string)$haystackTmp;
8582
    }
8583
8584 1
    return $haystack;
8585
  }
8586
8587
  /**
8588
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
8589
   *
8590
   * @param string $haystack <p>The string to search in.</p>
8591
   * @param string $needle   <p>The substring to search for.</p>
8592
   *
8593
   * @return string Return the sub-string.
8594
   */
8595 1
  public static function substr_iright(string $haystack, string $needle): string
8596
  {
8597 1
    if ('' === $haystack) {
8598 1
      return '';
8599
    }
8600
8601 1
    if ('' === $needle) {
8602 1
      return $haystack;
8603
    }
8604
8605 1
    if (self::str_iends_with($haystack, $needle) === true) {
8606 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
8607 1
      if ($haystackTmp === false) {
8608
        $haystackTmp = '';
8609
      }
8610 1
      $haystack = (string)$haystackTmp;
8611
    }
8612
8613 1
    return $haystack;
8614
  }
8615
8616
  /**
8617
   * Removes an prefix ($needle) from start of the string ($haystack).
8618
   *
8619
   * @param string $haystack <p>The string to search in.</p>
8620
   * @param string $needle   <p>The substring to search for.</p>
8621
   *
8622
   * @return string Return the sub-string.
8623
   */
8624 1
  public static function substr_left(string $haystack, string $needle): string
8625
  {
8626 1
    if ('' === $haystack) {
8627 1
      return '';
8628
    }
8629
8630 1
    if ('' === $needle) {
8631 1
      return $haystack;
8632
    }
8633
8634 1
    if (self::str_starts_with($haystack, $needle) === true) {
8635 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
8636 1
      if ($haystackTmp === false) {
8637
        $haystackTmp = '';
8638
      }
8639 1
      $haystack = (string)$haystackTmp;
8640
    }
8641
8642 1
    return $haystack;
8643
  }
8644
8645
  /**
8646
   * Replace text within a portion of a string.
8647
   *
8648
   * source: https://gist.github.com/stemar/8287074
8649
   *
8650
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
8651
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
8652
   * @param int|int[]       $offset           <p>
8653
   *                                          If start is positive, the replacing will begin at the start'th offset
8654
   *                                          into string.
8655
   *                                          <br><br>
8656
   *                                          If start is negative, the replacing will begin at the start'th character
8657
   *                                          from the end of string.
8658
   *                                          </p>
8659
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
8660
   *                                          portion of string which is to be replaced. If it is negative, it
8661
   *                                          represents the number of characters from the end of string at which to
8662
   *                                          stop replacing. If it is not given, then it will default to strlen(
8663
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
8664
   *                                          length is zero then this function will have the effect of inserting
8665
   *                                          replacement into string at the given start offset.</p>
8666
   *
8667
   * @return string|string[] The result string is returned. If string is an array then array is returned.
8668
   */
8669 8
  public static function substr_replace($str, $replacement, $offset, $length = null)
8670
  {
8671 8
    if (\is_array($str) === true) {
8672 1
      $num = \count($str);
8673
8674
      // the replacement
8675 1
      if (\is_array($replacement) === true) {
8676 1
        $replacement = \array_slice($replacement, 0, $num);
8677
      } else {
8678 1
        $replacement = \array_pad([$replacement], $num, $replacement);
8679
      }
8680
8681
      // the offset
8682 1
      if (\is_array($offset) === true) {
8683 1
        $offset = \array_slice($offset, 0, $num);
8684 1
        foreach ($offset as &$valueTmp) {
8685 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
8686
        }
8687 1
        unset($valueTmp);
8688
      } else {
8689 1
        $offset = \array_pad([$offset], $num, $offset);
8690
      }
8691
8692
      // the length
8693 1
      if (null === $length) {
8694 1
        $length = \array_fill(0, $num, 0);
8695 1
      } elseif (\is_array($length) === true) {
8696 1
        $length = \array_slice($length, 0, $num);
8697 1
        foreach ($length as &$valueTmpV2) {
8698 1
          if (null !== $valueTmpV2) {
8699 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
8700
          } else {
8701 1
            $valueTmpV2 = 0;
8702
          }
8703
        }
8704 1
        unset($valueTmpV2);
8705
      } else {
8706 1
        $length = \array_pad([$length], $num, $length);
8707
      }
8708
8709
      // recursive call
8710 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
8711
    }
8712
8713 8
    if (\is_array($replacement) === true) {
8714 1
      if (\count($replacement) > 0) {
8715 1
        $replacement = $replacement[0];
8716
      } else {
8717 1
        $replacement = '';
8718
      }
8719
    }
8720
8721
    // init
8722 8
    $str = (string)$str;
8723 8
    $replacement = (string)$replacement;
8724
8725 8
    if ('' === $str) {
8726 1
      return $replacement;
8727
    }
8728
8729 7
    if (self::is_ascii($str)) {
8730 4
      return ($length === null) ?
8731
          \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8731
          \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
8732 4
          \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8732
          \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
8733
    }
8734
8735 6
    \preg_match_all('/./us', $str, $smatches);
8736 6
    \preg_match_all('/./us', $replacement, $rmatches);
8737
8738 6
    if ($length === null) {
8739 3
      $length = self::strlen($str);
8740
    }
8741
8742 6
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8742
    \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8742
    \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
8743
8744 6
    return \implode('', $smatches[0]);
8745
  }
8746
8747
  /**
8748
   * Removes an suffix ($needle) from end of the string ($haystack).
8749
   *
8750
   * @param string $haystack <p>The string to search in.</p>
8751
   * @param string $needle   <p>The substring to search for.</p>
8752
   *
8753
   * @return string Return the sub-string.
8754
   */
8755 1
  public static function substr_right(string $haystack, string $needle): string
8756
  {
8757 1
    if ('' === $haystack) {
8758 1
      return '';
8759
    }
8760
8761 1
    if ('' === $needle) {
8762 1
      return $haystack;
8763
    }
8764
8765 1
    if (self::str_ends_with($haystack, $needle) === true) {
8766 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
8767 1
      if ($haystackTmp === false) {
8768
        $haystackTmp = '';
8769
      }
8770 1
      $haystack = $haystackTmp;
8771
    }
8772
8773 1
    return $haystack;
8774
  }
8775
8776
  /**
8777
   * Returns a case swapped version of the string.
8778
   *
8779
   * @param string $str       <p>The input string.</p>
8780
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
8781
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8782
   *
8783
   * @return string Each character's case swapped.
8784
   */
8785 5
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
8786
  {
8787 5
    if ('' === $str) {
8788 1
      return '';
8789
    }
8790
8791 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8792 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8793
    }
8794
8795 5
    if ($cleanUtf8 === true) {
8796
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
8797
      // if invalid characters are found in $haystack before $needle
8798 1
      $str = self::clean($str);
8799
    }
8800
8801 5
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
8802
  }
8803
8804
  /**
8805
   * Checks whether mbstring is available on the server.
8806
   *
8807
   * @return bool
8808
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
8809
   */
8810
  public static function symfony_polyfill_used(): bool
8811
  {
8812
    // init
8813
    $return = false;
8814
8815
    $returnTmp = \extension_loaded('mbstring') ? true : false;
8816
    if ($returnTmp === false && \function_exists('mb_strlen')) {
8817
      $return = true;
8818
    }
8819
8820
    $returnTmp = \extension_loaded('iconv') ? true : false;
8821
    if ($returnTmp === false && \function_exists('iconv')) {
8822
      $return = true;
8823
    }
8824
8825
    return $return;
8826
  }
8827
8828
  /**
8829
   * @param string $str
8830
   * @param int    $tabLength
8831
   *
8832
   * @return string
8833
   */
8834 6
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
8835
  {
8836 6
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
8837
  }
8838
8839
  /**
8840
   * Converts the first character of each word in the string to uppercase
8841
   * and all other chars to lowercase.
8842
   *
8843
   * @param string $str      <p>The input string.</p>
8844
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
8845
   *
8846
   * @return string String with all characters of $str being title-cased.
8847
   */
8848 5
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
8849
  {
8850 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8851 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8852
    }
8853
8854
    // "mb_convert_case()" used a polyfill if needed ...
8855 5
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
8856
  }
8857
8858
  /**
8859
   * alias for "UTF8::to_ascii()"
8860
   *
8861
   * @see        UTF8::to_ascii()
8862
   *
8863
   * @param string $str
8864
   * @param string $subst_chr
8865
   * @param bool   $strict
8866
   *
8867
   * @return string
8868
   *
8869
   * @deprecated <p>use "UTF8::to_ascii()"</p>
8870
   */
8871 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
8872
  {
8873 7
    return self::to_ascii($str, $subst_chr, $strict);
8874
  }
8875
8876
  /**
8877
   * alias for "UTF8::to_iso8859()"
8878
   *
8879
   * @see        UTF8::to_iso8859()
8880
   *
8881
   * @param string|string[] $str
8882
   *
8883
   * @return string|string[]
8884
   *
8885
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
8886
   */
8887 1
  public static function toIso8859($str)
8888
  {
8889 1
    return self::to_iso8859($str);
8890
  }
8891
8892
  /**
8893
   * alias for "UTF8::to_latin1()"
8894
   *
8895
   * @see        UTF8::to_latin1()
8896
   *
8897
   * @param string|string[] $str
8898
   *
8899
   * @return string|string[]
8900
   *
8901
   * @deprecated <p>use "UTF8::to_latin1()"</p>
8902
   */
8903 1
  public static function toLatin1($str)
8904
  {
8905 1
    return self::to_latin1($str);
8906
  }
8907
8908
  /**
8909
   * alias for "UTF8::to_utf8()"
8910
   *
8911
   * @see        UTF8::to_utf8()
8912
   *
8913
   * @param string|string[] $str
8914
   *
8915
   * @return string|string[]
8916
   *
8917
   * @deprecated <p>use "UTF8::to_utf8()"</p>
8918
   */
8919 1
  public static function toUTF8($str)
8920
  {
8921 1
    return self::to_utf8($str);
8922
  }
8923
8924
  /**
8925
   * Convert a string into ASCII.
8926
   *
8927
   * @param string $str     <p>The input string.</p>
8928
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
8929
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
8930
   *                        performance</p>
8931
   *
8932
   * @return string
8933
   */
8934 37
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
8935
  {
8936 37
    static $UTF8_TO_ASCII;
8937
8938 37
    if ('' === $str) {
8939 4
      return '';
8940
    }
8941
8942
    // check if we only have ASCII, first (better performance)
8943 34
    if (self::is_ascii($str) === true) {
8944 6
      return $str;
8945
    }
8946
8947 29
    $str = self::clean(
8948 29
        $str,
8949 29
        true,
8950 29
        true,
8951 29
        true,
8952 29
        false,
8953 29
        true,
8954 29
        true
8955
    );
8956
8957
    // check again, if we only have ASCII, now ...
8958 29
    if (self::is_ascii($str) === true) {
8959 12
      return $str;
8960
    }
8961
8962 18
    if ($strict === true) {
8963
8964 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8965
        self::checkForSupport();
8966
      }
8967
8968 1
      if (self::$SUPPORT['intl'] === true) {
8969
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
8970
        /** @noinspection PhpComposerExtensionStubsInspection */
8971 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
8972
8973
        // check again, if we only have ASCII, now ...
8974 1
        if (self::is_ascii($str) === true) {
8975 1
          return $str;
8976
        }
8977
8978
      }
8979
    }
8980
8981 18
    if (self::$ORD === null) {
8982
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
8983
    }
8984
8985 18
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
8986 18
    $chars = $ar[0];
8987 18
    $ord = null;
8988 18
    foreach ($chars as &$c) {
8989
8990 18
      $ordC0 = self::$ORD[$c[0]];
8991
8992 18
      if ($ordC0 >= 0 && $ordC0 <= 127) {
8993 14
        continue;
8994
      }
8995
8996 18
      $ordC1 = self::$ORD[$c[1]];
8997
8998
      // ASCII - next please
8999 18
      if ($ordC0 >= 192 && $ordC0 <= 223) {
9000 16
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
9001
      }
9002
9003 18
      if ($ordC0 >= 224) {
9004 7
        $ordC2 = self::$ORD[$c[2]];
9005
9006 7
        if ($ordC0 <= 239) {
9007 6
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
9008
        }
9009
9010 7
        if ($ordC0 >= 240) {
9011 2
          $ordC3 = self::$ORD[$c[3]];
9012
9013 2
          if ($ordC0 <= 247) {
9014 2
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
9015
          }
9016
9017 2
          if ($ordC0 >= 248) {
9018
            $ordC4 = self::$ORD[$c[4]];
9019
9020
            if ($ordC0 <= 251) {
9021
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
9022
            }
9023
9024
            if ($ordC0 >= 252) {
9025
              $ordC5 = self::$ORD[$c[5]];
9026
9027
              if ($ordC0 <= 253) {
9028
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
9029
              }
9030
            }
9031
          }
9032
        }
9033
      }
9034
9035 18
      if ($ordC0 === 254 || $ordC0 === 255) {
9036
        $c = $unknown;
9037
        continue;
9038
      }
9039
9040 18
      if ($ord === null) {
9041
        $c = $unknown;
9042
        continue;
9043
      }
9044
9045 18
      $bank = $ord >> 8;
9046 18
      if (!isset($UTF8_TO_ASCII[$bank])) {
9047 9
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
9048 9
        if ($UTF8_TO_ASCII[$bank] === false) {
9049 2
          $UTF8_TO_ASCII[$bank] = [];
9050
        }
9051
      }
9052
9053 18
      $newchar = $ord & 255;
9054
9055 18
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
9056
9057
        // keep for debugging
9058
        /*
9059
        echo "file: " . sprintf('x%02x', $bank) . "\n";
9060
        echo "char: " . $c . "\n";
9061
        echo "ord: " . $ord . "\n";
9062
        echo "newchar: " . $newchar . "\n";
9063
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
9064
        echo "bank:" . $bank . "\n\n";
9065
        */
9066
9067 17
        $c = $UTF8_TO_ASCII[$bank][$newchar];
9068
      } else {
9069
9070
        // keep for debugging missing chars
9071
        /*
9072
        echo "file: " . sprintf('x%02x', $bank) . "\n";
9073
        echo "char: " . $c . "\n";
9074
        echo "ord: " . $ord . "\n";
9075
        echo "newchar: " . $newchar . "\n";
9076
        echo "bank:" . $bank . "\n\n";
9077
        */
9078
9079 18
        $c = $unknown;
9080
      }
9081
    }
9082
9083 18
    return \implode('', $chars);
9084
  }
9085
9086
  /**
9087
   * @param mixed $str
9088
   *
9089
   * @return bool
9090
   */
9091 19
  public static function to_boolean($str): bool
9092
  {
9093
    // init
9094 19
    $str = (string)$str;
9095
9096 19
    if ('' === $str) {
9097 2
      return false;
9098
    }
9099
9100 17
    $key = \strtolower($str);
9101
9102
    // Info: http://php.net/manual/en/filter.filters.validate.php
9103
    $map = [
9104 17
        'true'  => true,
9105
        '1'     => true,
9106
        'on'    => true,
9107
        'yes'   => true,
9108
        'false' => false,
9109
        '0'     => false,
9110
        'off'   => false,
9111
        'no'    => false,
9112
    ];
9113
9114 17
    if (isset($map[$key])) {
9115 13
      return $map[$key];
9116
    }
9117
9118
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
9119 4
    if (\is_numeric($str)) {
9120 2
      return (((float)$str + 0) > 0);
9121
    }
9122
9123 2
    return (bool)self::trim($str);
9124
  }
9125
9126
  /**
9127
   * Convert a string into "ISO-8859"-encoding (Latin-1).
9128
   *
9129
   * @param string|string[] $str
9130
   *
9131
   * @return string|string[]
9132
   */
9133 3
  public static function to_iso8859($str)
9134
  {
9135 3
    if (\is_array($str) === true) {
9136 1
      foreach ($str as $k => $v) {
9137 1
        $str[$k] = self::to_iso8859($v);
9138
      }
9139
9140 1
      return $str;
9141
    }
9142
9143 3
    $str = (string)$str;
9144 3
    if ('' === $str) {
9145 1
      return '';
9146
    }
9147
9148 3
    return self::utf8_decode($str);
9149
  }
9150
9151
  /**
9152
   * alias for "UTF8::to_iso8859()"
9153
   *
9154
   * @see UTF8::to_iso8859()
9155
   *
9156
   * @param string|string[] $str
9157
   *
9158
   * @return string|string[]
9159
   */
9160 1
  public static function to_latin1($str)
9161
  {
9162 1
    return self::to_iso8859($str);
9163
  }
9164
9165
  /**
9166
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
9167
   *
9168
   * <ul>
9169
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
9170
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
9171
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
9172
   * case.</li>
9173
   * </ul>
9174
   *
9175
   * @param string|string[] $str                    <p>Any string or array.</p>
9176
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
9177
   *
9178
   * @return string|string[] The UTF-8 encoded string.
9179
   */
9180 22
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
9181
  {
9182 22
    if (\is_array($str) === true) {
9183 2
      foreach ($str as $k => $v) {
9184 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
9185
      }
9186
9187 2
      return $str;
9188
    }
9189
9190 22
    $str = (string)$str;
9191 22
    if ('' === $str) {
9192 3
      return $str;
9193
    }
9194
9195 22
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9196
      self::checkForSupport();
9197
    }
9198
9199 22
    $max = self::strlen_in_byte($str);
9200 22
    $buf = '';
9201
9202
    /** @noinspection ForeachInvariantsInspection */
9203 22
    for ($i = 0; $i < $max; $i++) {
9204 22
      $c1 = $str[$i];
9205
9206 22
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
9207
9208 22
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
9209
9210 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9211
9212 20
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
9213 15
            $buf .= $c1 . $c2;
9214 15
            $i++;
9215
          } else { // not valid UTF8 - convert it
9216 20
            $buf .= self::to_utf8_convert_helper($c1);
9217
          }
9218
9219 21
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
9220
9221 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9222 20
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
9223
9224 20
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
9225 12
            $buf .= $c1 . $c2 . $c3;
9226 12
            $i += 2;
9227
          } else { // not valid UTF8 - convert it
9228 20
            $buf .= self::to_utf8_convert_helper($c1);
9229
          }
9230
9231 14
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
9232
9233 14
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9234 14
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
9235 14
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
9236
9237 14
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
9238 5
            $buf .= $c1 . $c2 . $c3 . $c4;
9239 5
            $i += 3;
9240
          } else { // not valid UTF8 - convert it
9241 14
            $buf .= self::to_utf8_convert_helper($c1);
9242
          }
9243
9244
        } else { // doesn't look like UTF8, but should be converted
9245 22
          $buf .= self::to_utf8_convert_helper($c1);
9246
        }
9247
9248 20
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
9249
9250 2
        $buf .= self::to_utf8_convert_helper($c1);
9251
9252
      } else { // it doesn't need conversion
9253 20
        $buf .= $c1;
9254
      }
9255
    }
9256
9257
    // decode unicode escape sequences
9258 22
    $buf = \preg_replace_callback(
9259 22
        '/\\\\u([0-9a-f]{4})/i',
9260 22
        function ($match) {
9261 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
9262 22
        },
9263 22
        $buf
9264
    );
9265
9266
    // decode UTF-8 codepoints
9267 22
    if ($decodeHtmlEntityToUtf8 === true) {
9268 1
      $buf = self::html_entity_decode($buf);
9269
    }
9270
9271 22
    return $buf;
9272
  }
9273
9274
  /**
9275
   * @param int|string $input
9276
   *
9277
   * @return string
9278
   */
9279 16
  private static function to_utf8_convert_helper($input): string
9280
  {
9281
    // init
9282 16
    $buf = '';
9283
9284 16
    if (self::$ORD === null) {
9285 1
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9286
    }
9287
9288 16
    if (self::$CHR === null) {
9289 1
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9290
    }
9291
9292 16
    if (self::$WIN1252_TO_UTF8 === null) {
9293 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9294
    }
9295
9296 16
    $ordC1 = self::$ORD[$input];
9297 16
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
9298 16
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
9299
    } else {
9300 1
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
9301 1
      $cc2 = ((string)$input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
9302 1
      $buf .= $cc1 . $cc2;
9303
    }
9304
9305 16
    return $buf;
9306
  }
9307
9308
  /**
9309
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
9310
   *
9311
   * INFO: This is slower then "trim()"
9312
   *
9313
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
9314
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
9315
   *
9316
   * @param string $str   <p>The string to be trimmed</p>
9317
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
9318
   *
9319
   * @return string The trimmed string.
9320
   */
9321 197
  public static function trim(string $str = '', $chars = INF): string
9322
  {
9323 197
    if ('' === $str) {
9324 7
      return '';
9325
    }
9326
9327
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
9328 192
    if ($chars === INF || !$chars) {
9329 175
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
9330
    } else {
9331 37
      $chars = \preg_quote($chars, '/');
9332 37
      $pattern = "^[$chars]+|[$chars]+\$";
9333
    }
9334
9335 192
    return self::regex_replace($str, $pattern, '', '', '/');
9336
  }
9337
9338
  /**
9339
   * Makes string's first char uppercase.
9340
   *
9341
   * @param string $str       <p>The input string.</p>
9342
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
9343
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9344
   *
9345
   * @return string The resulting string.
9346
   */
9347 75
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9348
  {
9349 75
    if ($cleanUtf8 === true) {
9350
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
9351
      // if invalid characters are found in $haystack before $needle
9352 1
      $str = self::clean($str);
9353
    }
9354
9355 75
    $strPartTwo = self::substr($str, 1, null, $encoding);
9356 75
    if ($strPartTwo === false) {
9357
      $strPartTwo = '';
9358
    }
9359
9360 75
    $strPartOne = self::strtoupper(
9361 75
        (string)self::substr($str, 0, 1, $encoding),
9362 75
        $encoding,
9363 75
        $cleanUtf8
9364
    );
9365
9366 75
    return $strPartOne . $strPartTwo;
9367
  }
9368
9369
  /**
9370
   * alias for "UTF8::ucfirst()"
9371
   *
9372
   * @see UTF8::ucfirst()
9373
   *
9374
   * @param string $str
9375
   * @param string $encoding
9376
   * @param bool   $cleanUtf8
9377
   *
9378
   * @return string
9379
   */
9380 1
  public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9381
  {
9382 1
    return self::ucfirst($str, $encoding, $cleanUtf8);
9383
  }
9384
9385
  /**
9386
   * Uppercase for all words in the string.
9387
   *
9388
   * @param string   $str        <p>The input string.</p>
9389
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
9390
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9391
   * @param string   $encoding   [optional] <p>Set the charset.</p>
9392
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9393
   *
9394
   * @return string
9395
   */
9396 8
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9397
  {
9398 8
    if (!$str) {
9399 2
      return '';
9400
    }
9401
9402
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
9403
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
9404
9405 7
    if ($cleanUtf8 === true) {
9406
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
9407
      // if invalid characters are found in $haystack before $needle
9408 1
      $str = self::clean($str);
9409
    }
9410
9411 7
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
9412
9413
    if (
9414 7
        $usePhpDefaultFunctions === true
9415
        &&
9416 7
        self::is_ascii($str) === true
9417
    ) {
9418
      return \ucwords($str);
9419
    }
9420
9421 7
    $words = self::str_to_words($str, $charlist);
9422 7
    $newWords = [];
9423
9424 7
    if (\count($exceptions) > 0) {
9425 1
      $useExceptions = true;
9426
    } else {
9427 7
      $useExceptions = false;
9428
    }
9429
9430 7
    foreach ($words as $word) {
9431
9432 7
      if (!$word) {
9433 7
        continue;
9434
      }
9435
9436
      if (
9437 7
          $useExceptions === false
9438
          ||
9439
          (
9440 1
              $useExceptions === true
9441
              &&
9442 7
              !\in_array($word, $exceptions, true)
9443
          )
9444
      ) {
9445 7
        $word = self::ucfirst($word, $encoding);
9446
      }
9447
9448 7
      $newWords[] = $word;
9449
    }
9450
9451 7
    return \implode('', $newWords);
9452
  }
9453
9454
  /**
9455
   * Multi decode html entity & fix urlencoded-win1252-chars.
9456
   *
9457
   * e.g:
9458
   * 'test+test'                     => 'test test'
9459
   * 'D&#252;sseldorf'               => 'Düsseldorf'
9460
   * 'D%FCsseldorf'                  => 'Düsseldorf'
9461
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
9462
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
9463
   * 'Düsseldorf'                   => 'Düsseldorf'
9464
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
9465
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
9466
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
9467
   *
9468
   * @param string $str          <p>The input string.</p>
9469
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
9470
   *
9471
   * @return string
9472
   */
9473 1
  public static function urldecode(string $str, bool $multi_decode = true): string
9474
  {
9475 1
    if ('' === $str) {
9476 1
      return '';
9477
    }
9478
9479 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
9480 1
    if (\preg_match($pattern, $str)) {
9481 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
9482
    }
9483
9484 1
    $flags = ENT_QUOTES | ENT_HTML5;
9485
9486
    do {
9487 1
      $str_compare = $str;
9488
9489 1
      $str = self::fix_simple_utf8(
9490 1
          \urldecode(
9491 1
              self::html_entity_decode(
9492 1
                  self::to_utf8($str),
9493 1
                  $flags
9494
              )
9495
          )
9496
      );
9497
9498 1
    } while ($multi_decode === true && $str_compare !== $str);
9499
9500 1
    return $str;
9501
  }
9502
9503
  /**
9504
   * Return a array with "urlencoded"-win1252 -> UTF-8
9505
   *
9506
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
9507
   *
9508
   * @return string[]
9509
   */
9510 1
  public static function urldecode_fix_win1252_chars(): array
9511
  {
9512
    return [
9513 1
        '%20' => ' ',
9514
        '%21' => '!',
9515
        '%22' => '"',
9516
        '%23' => '#',
9517
        '%24' => '$',
9518
        '%25' => '%',
9519
        '%26' => '&',
9520
        '%27' => "'",
9521
        '%28' => '(',
9522
        '%29' => ')',
9523
        '%2A' => '*',
9524
        '%2B' => '+',
9525
        '%2C' => ',',
9526
        '%2D' => '-',
9527
        '%2E' => '.',
9528
        '%2F' => '/',
9529
        '%30' => '0',
9530
        '%31' => '1',
9531
        '%32' => '2',
9532
        '%33' => '3',
9533
        '%34' => '4',
9534
        '%35' => '5',
9535
        '%36' => '6',
9536
        '%37' => '7',
9537
        '%38' => '8',
9538
        '%39' => '9',
9539
        '%3A' => ':',
9540
        '%3B' => ';',
9541
        '%3C' => '<',
9542
        '%3D' => '=',
9543
        '%3E' => '>',
9544
        '%3F' => '?',
9545
        '%40' => '@',
9546
        '%41' => 'A',
9547
        '%42' => 'B',
9548
        '%43' => 'C',
9549
        '%44' => 'D',
9550
        '%45' => 'E',
9551
        '%46' => 'F',
9552
        '%47' => 'G',
9553
        '%48' => 'H',
9554
        '%49' => 'I',
9555
        '%4A' => 'J',
9556
        '%4B' => 'K',
9557
        '%4C' => 'L',
9558
        '%4D' => 'M',
9559
        '%4E' => 'N',
9560
        '%4F' => 'O',
9561
        '%50' => 'P',
9562
        '%51' => 'Q',
9563
        '%52' => 'R',
9564
        '%53' => 'S',
9565
        '%54' => 'T',
9566
        '%55' => 'U',
9567
        '%56' => 'V',
9568
        '%57' => 'W',
9569
        '%58' => 'X',
9570
        '%59' => 'Y',
9571
        '%5A' => 'Z',
9572
        '%5B' => '[',
9573
        '%5C' => '\\',
9574
        '%5D' => ']',
9575
        '%5E' => '^',
9576
        '%5F' => '_',
9577
        '%60' => '`',
9578
        '%61' => 'a',
9579
        '%62' => 'b',
9580
        '%63' => 'c',
9581
        '%64' => 'd',
9582
        '%65' => 'e',
9583
        '%66' => 'f',
9584
        '%67' => 'g',
9585
        '%68' => 'h',
9586
        '%69' => 'i',
9587
        '%6A' => 'j',
9588
        '%6B' => 'k',
9589
        '%6C' => 'l',
9590
        '%6D' => 'm',
9591
        '%6E' => 'n',
9592
        '%6F' => 'o',
9593
        '%70' => 'p',
9594
        '%71' => 'q',
9595
        '%72' => 'r',
9596
        '%73' => 's',
9597
        '%74' => 't',
9598
        '%75' => 'u',
9599
        '%76' => 'v',
9600
        '%77' => 'w',
9601
        '%78' => 'x',
9602
        '%79' => 'y',
9603
        '%7A' => 'z',
9604
        '%7B' => '{',
9605
        '%7C' => '|',
9606
        '%7D' => '}',
9607
        '%7E' => '~',
9608
        '%7F' => '',
9609
        '%80' => '`',
9610
        '%81' => '',
9611
        '%82' => '‚',
9612
        '%83' => 'ƒ',
9613
        '%84' => '„',
9614
        '%85' => '…',
9615
        '%86' => '†',
9616
        '%87' => '‡',
9617
        '%88' => 'ˆ',
9618
        '%89' => '‰',
9619
        '%8A' => 'Š',
9620
        '%8B' => '‹',
9621
        '%8C' => 'Œ',
9622
        '%8D' => '',
9623
        '%8E' => 'Ž',
9624
        '%8F' => '',
9625
        '%90' => '',
9626
        '%91' => '‘',
9627
        '%92' => '’',
9628
        '%93' => '“',
9629
        '%94' => '”',
9630
        '%95' => '•',
9631
        '%96' => '–',
9632
        '%97' => '—',
9633
        '%98' => '˜',
9634
        '%99' => '™',
9635
        '%9A' => 'š',
9636
        '%9B' => '›',
9637
        '%9C' => 'œ',
9638
        '%9D' => '',
9639
        '%9E' => 'ž',
9640
        '%9F' => 'Ÿ',
9641
        '%A0' => '',
9642
        '%A1' => '¡',
9643
        '%A2' => '¢',
9644
        '%A3' => '£',
9645
        '%A4' => '¤',
9646
        '%A5' => '¥',
9647
        '%A6' => '¦',
9648
        '%A7' => '§',
9649
        '%A8' => '¨',
9650
        '%A9' => '©',
9651
        '%AA' => 'ª',
9652
        '%AB' => '«',
9653
        '%AC' => '¬',
9654
        '%AD' => '',
9655
        '%AE' => '®',
9656
        '%AF' => '¯',
9657
        '%B0' => '°',
9658
        '%B1' => '±',
9659
        '%B2' => '²',
9660
        '%B3' => '³',
9661
        '%B4' => '´',
9662
        '%B5' => 'µ',
9663
        '%B6' => '¶',
9664
        '%B7' => '·',
9665
        '%B8' => '¸',
9666
        '%B9' => '¹',
9667
        '%BA' => 'º',
9668
        '%BB' => '»',
9669
        '%BC' => '¼',
9670
        '%BD' => '½',
9671
        '%BE' => '¾',
9672
        '%BF' => '¿',
9673
        '%C0' => 'À',
9674
        '%C1' => 'Á',
9675
        '%C2' => 'Â',
9676
        '%C3' => 'Ã',
9677
        '%C4' => 'Ä',
9678
        '%C5' => 'Å',
9679
        '%C6' => 'Æ',
9680
        '%C7' => 'Ç',
9681
        '%C8' => 'È',
9682
        '%C9' => 'É',
9683
        '%CA' => 'Ê',
9684
        '%CB' => 'Ë',
9685
        '%CC' => 'Ì',
9686
        '%CD' => 'Í',
9687
        '%CE' => 'Î',
9688
        '%CF' => 'Ï',
9689
        '%D0' => 'Ð',
9690
        '%D1' => 'Ñ',
9691
        '%D2' => 'Ò',
9692
        '%D3' => 'Ó',
9693
        '%D4' => 'Ô',
9694
        '%D5' => 'Õ',
9695
        '%D6' => 'Ö',
9696
        '%D7' => '×',
9697
        '%D8' => 'Ø',
9698
        '%D9' => 'Ù',
9699
        '%DA' => 'Ú',
9700
        '%DB' => 'Û',
9701
        '%DC' => 'Ü',
9702
        '%DD' => 'Ý',
9703
        '%DE' => 'Þ',
9704
        '%DF' => 'ß',
9705
        '%E0' => 'à',
9706
        '%E1' => 'á',
9707
        '%E2' => 'â',
9708
        '%E3' => 'ã',
9709
        '%E4' => 'ä',
9710
        '%E5' => 'å',
9711
        '%E6' => 'æ',
9712
        '%E7' => 'ç',
9713
        '%E8' => 'è',
9714
        '%E9' => 'é',
9715
        '%EA' => 'ê',
9716
        '%EB' => 'ë',
9717
        '%EC' => 'ì',
9718
        '%ED' => 'í',
9719
        '%EE' => 'î',
9720
        '%EF' => 'ï',
9721
        '%F0' => 'ð',
9722
        '%F1' => 'ñ',
9723
        '%F2' => 'ò',
9724
        '%F3' => 'ó',
9725
        '%F4' => 'ô',
9726
        '%F5' => 'õ',
9727
        '%F6' => 'ö',
9728
        '%F7' => '÷',
9729
        '%F8' => 'ø',
9730
        '%F9' => 'ù',
9731
        '%FA' => 'ú',
9732
        '%FB' => 'û',
9733
        '%FC' => 'ü',
9734
        '%FD' => 'ý',
9735
        '%FE' => 'þ',
9736
        '%FF' => 'ÿ',
9737
    ];
9738
  }
9739
9740
  /**
9741
   * Decodes an UTF-8 string to ISO-8859-1.
9742
   *
9743
   * @param string $str <p>The input string.</p>
9744
   * @param bool   $keepUtf8Chars
9745
   *
9746
   * @return string
9747
   */
9748 6
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
9749
  {
9750 6
    if ('' === $str) {
9751 3
      return '';
9752
    }
9753
9754 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
9755 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
9756
9757 6
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
9758
9759 1
      if (self::$WIN1252_TO_UTF8 === null) {
9760
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9761
      }
9762
9763 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9763
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
9764 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9764
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
9765
    }
9766
9767
    /** @noinspection PhpInternalEntityUsedInspection */
9768 6
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
9769
9770 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9771
      self::checkForSupport();
9772
    }
9773
9774
    // save for later comparision
9775 6
    $str_backup = $str;
9776 6
    $len = self::strlen_in_byte($str);
9777
9778 6
    if (self::$ORD === null) {
9779
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9780
    }
9781
9782 6
    if (self::$CHR === null) {
9783
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9784
    }
9785
9786 6
    $noCharFound = '?';
9787
    /** @noinspection ForeachInvariantsInspection */
9788 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
9789 6
      switch ($str[$i] & "\xF0") {
9790 6
        case "\xC0":
9791 6
        case "\xD0":
9792 6
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
9793 6
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
9794 6
          break;
9795
9796
        /** @noinspection PhpMissingBreakStatementInspection */
9797 6
        case "\xF0":
9798
          ++$i;
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment if this fall-through is intended.
Loading history...
9799 6
        case "\xE0":
9800 5
          $str[$j] = $noCharFound;
9801 5
          $i += 2;
9802 5
          break;
9803
9804
        default:
9805 6
          $str[$j] = $str[$i];
9806
      }
9807
    }
9808
9809 6
    $return = (string)self::substr($str, 0, $j, 'CP850'); // 8-BIT
9810
9811
    if (
9812 6
        $keepUtf8Chars === true
9813
        &&
9814 6
        self::strlen($return) >= self::strlen($str_backup)
9815
    ) {
9816 1
      return $str_backup;
9817
    }
9818
9819 6
    return $return;
9820
  }
9821
9822
  /**
9823
   * Encodes an ISO-8859-1 string to UTF-8.
9824
   *
9825
   * @param string $str <p>The input string.</p>
9826
   *
9827
   * @return string
9828
   */
9829 7
  public static function utf8_encode(string $str): string
9830
  {
9831 7
    if ('' === $str) {
9832 7
      return '';
9833
    }
9834
9835 7
    $str = \utf8_encode($str);
9836
9837
    // the polyfill maybe return false
9838 7
    if ($str === false) {
9839
      return '';
9840
    }
9841
9842 7
    if (false === \strpos($str, "\xC2")) {
9843 3
      return $str;
9844
    }
9845
9846 6
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
9847 6
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
9848
9849 6
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
9850
9851 1
      if (self::$WIN1252_TO_UTF8 === null) {
9852
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9853
      }
9854
9855 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9855
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
9856 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9856
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
9857
    }
9858
9859 6
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
9860
  }
9861
9862
  /**
9863
   * fix -> utf8-win1252 chars
9864
   *
9865
   * @param string $str <p>The input string.</p>
9866
   *
9867
   * @return string
9868
   *
9869
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
9870
   */
9871 1
  public static function utf8_fix_win1252_chars(string $str): string
9872
  {
9873 1
    return self::fix_simple_utf8($str);
9874
  }
9875
9876
  /**
9877
   * Returns an array with all utf8 whitespace characters.
9878
   *
9879
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
9880
   *
9881
   * @author: Derek E. [email protected]
9882
   *
9883
   * @return string[]
9884
   *                 An array with all known whitespace characters as values and the type of whitespace as keys
9885
   *                 as defined in above URL.
9886
   */
9887 1
  public static function whitespace_table(): array
9888
  {
9889 1
    return self::$WHITESPACE_TABLE;
9890
  }
9891
9892
  /**
9893
   * Limit the number of words in a string.
9894
   *
9895
   * @param string $str      <p>The input string.</p>
9896
   * @param int    $limit    <p>The limit of words as integer.</p>
9897
   * @param string $strAddOn <p>Replacement for the striped string.</p>
9898
   *
9899
   * @return string
9900
   */
9901 1
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
9902
  {
9903 1
    if ('' === $str) {
9904 1
      return '';
9905
    }
9906
9907 1
    if ($limit < 1) {
9908 1
      return '';
9909
    }
9910
9911 1
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
9912
9913
    if (
9914 1
        !isset($matches[0])
9915
        ||
9916 1
        self::strlen($str) === self::strlen($matches[0])
9917
    ) {
9918 1
      return $str;
9919
    }
9920
9921 1
    return self::rtrim($matches[0]) . $strAddOn;
9922
  }
9923
9924
  /**
9925
   * Wraps a string to a given number of characters
9926
   *
9927
   * @link  http://php.net/manual/en/function.wordwrap.php
9928
   *
9929
   * @param string $str   <p>The input string.</p>
9930
   * @param int    $width [optional] <p>The column width.</p>
9931
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
9932
   * @param bool   $cut   [optional] <p>
9933
   *                      If the cut is set to true, the string is
9934
   *                      always wrapped at or before the specified width. So if you have
9935
   *                      a word that is larger than the given width, it is broken apart.
9936
   *                      </p>
9937
   *
9938
   * @return string The given string wrapped at the specified column.
9939
   */
9940 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
9941
  {
9942 10
    if ('' === $str || '' === $break) {
9943 3
      return '';
9944
    }
9945
9946 8
    $w = '';
9947 8
    $strSplit = \explode($break, $str);
9948 8
    if ($strSplit === false) {
9949
      $count = 0;
9950
    } else {
9951 8
      $count = \count($strSplit);
9952
    }
9953
9954 8
    $chars = [];
9955
    /** @noinspection ForeachInvariantsInspection */
9956 8
    for ($i = 0; $i < $count; ++$i) {
9957
9958 8
      if ($i) {
9959 1
        $chars[] = $break;
9960 1
        $w .= '#';
9961
      }
9962
9963 8
      $c = $strSplit[$i];
9964 8
      unset($strSplit[$i]);
9965
9966 8
      if ($c !== null) {
9967 8
        foreach (self::split($c) as $c) {
9968 8
          $chars[] = $c;
9969 8
          $w .= ' ' === $c ? ' ' : '?';
9970
        }
9971
      }
9972
    }
9973
9974 8
    $strReturn = '';
9975 8
    $j = 0;
9976 8
    $b = $i = -1;
9977 8
    $w = \wordwrap($w, $width, '#', $cut);
9978
9979 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
9980 6
      for (++$i; $i < $b; ++$i) {
9981 6
        $strReturn .= $chars[$j];
9982 6
        unset($chars[$j++]);
9983
      }
9984
9985 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
9986 3
        unset($chars[$j++]);
9987
      }
9988
9989 6
      $strReturn .= $break;
9990
    }
9991
9992 8
    return $strReturn . \implode('', $chars);
9993
  }
9994
9995
  /**
9996
   * Line-Wrap the string after $limit, but also after the next word.
9997
   *
9998
   * @param string $str
9999
   * @param int    $limit
10000
   *
10001
   * @return string
10002
   */
10003 1
  public static function wordwrap_per_line(string $str, int $limit): string
10004
  {
10005 1
    $strings = (array)\preg_split('/\\r\\n|\\r|\\n/', $str);
10006
10007 1
    $string = '';
10008 1
    foreach ($strings as $value) {
10009 1
      if ($value === false) {
10010
        continue;
10011
      }
10012
10013 1
      $string .= wordwrap($value, $limit);
10014 1
      $string .= "\n";
10015
    }
10016
10017 1
    return $string;
10018
  }
10019
10020
  /**
10021
   * Returns an array of Unicode White Space characters.
10022
   *
10023
   * @return string[] An array with numeric code point as key and White Space Character as value.
10024
   */
10025 1
  public static function ws(): array
10026
  {
10027 1
    return self::$WHITESPACE;
10028
  }
10029
10030
}
10031