Passed
Push — master ( 5a8437...94b019 )
by Lars
04:13
created

UTF8::str_pad()   B

Complexity

Conditions 11
Paths 22

Size

Total Lines 56
Code Lines 38

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 33
CRAP Score 11.003

Importance

Changes 0
Metric Value
cc 11
eloc 38
nc 22
nop 5
dl 0
loc 56
ccs 33
cts 34
cp 0.9706
crap 11.003
rs 7.3166
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206
   * __construct()
207
   */
208 16
  public function __construct()
209
  {
210 16
    self::checkForSupport();
211 16
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219
   * @return string <p>Single Multi-Byte character.</p>
220
   */
221 2
  public static function access(string $str, int $pos): string
222
  {
223 2
    if ('' === $str) {
224 1
      return '';
225
    }
226
227 2
    if ($pos < 0) {
228 1
      return '';
229
    }
230
231 2
    return (string)self::substr($str, $pos, 1);
232
  }
233
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string <p>The output string that contains BOM.</p>
242
   */
243 1
  public static function add_bom_to_string(string $str): string
244
  {
245 1
    if (self::string_has_bom($str) === false) {
246 1
      $str = self::bom() . $str;
247
    }
248
249 1
    return $str;
250
  }
251
252
  /**
253
   * Adds the specified amount of left and right padding to the given string.
254
   * The default character used is a space.
255
   *
256
   * @param string $str
257
   * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
258
   * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
259
   * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
260
   * @param string $encoding [optional] <p>Default: UTF-8</p>
261
   *
262
   * @return string <p>String with padding applied.</p>
263
   */
264 25
  private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
265
  {
266 25
    $strlen = self::strlen($str, $encoding);
267
268 25
    if ($left && $right) {
269 8
      $length = ($left + $right) + $strlen;
270 8
      $type = STR_PAD_BOTH;
271 17
    } elseif ($left) {
272 7
      $length = $left + $strlen;
273 7
      $type = STR_PAD_LEFT;
274 10
    } elseif ($right) {
275 10
      $length = $right + $strlen;
276 10
      $type = STR_PAD_RIGHT;
277
    } else {
278
      $length = ($left + $right) + $strlen;
279
      $type = STR_PAD_BOTH;
280
    }
281
282 25
    return self::str_pad($str, $length, $padStr, $type, $encoding);
283
  }
284
285
  /**
286
   * Changes all keys in an array.
287
   *
288
   * @param array $array <p>The array to work on</p>
289
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
290
   *                     or <strong>CASE_LOWER</strong> (default)</p>
291
   *
292
   * @return string[] <p>An array with its keys lower or uppercased.</p>
293
   */
294 1
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
295
  {
296
    if (
297 1
        $case !== CASE_LOWER
298
        &&
299 1
        $case !== CASE_UPPER
300
    ) {
301
      $case = CASE_LOWER;
302
    }
303
304 1
    $return = [];
305 1
    foreach ($array as $key => $value) {
306 1
      if ($case === CASE_LOWER) {
307 1
        $key = self::strtolower($key);
308
      } else {
309 1
        $key = self::strtoupper($key);
310
      }
311
312 1
      $return[$key] = $value;
313
    }
314
315 1
    return $return;
316
  }
317
318
  /**
319
   * Returns the substring between $start and $end, if found, or an empty
320
   * string. An optional offset may be supplied from which to begin the
321
   * search for the start string.
322
   *
323
   * @param string $str
324
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
325
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
326
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
327
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
328
   *
329
   * @return string
330
   */
331 16
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
332
  {
333 16
    $posStart = self::strpos($str, $start, $offset, $encoding);
334 16
    if ($posStart === false) {
335 2
      return '';
336
    }
337
338 14
    $substrIndex = $posStart + self::strlen($start, $encoding);
339 14
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
340
    if (
341 14
        $posEnd === false
342
        ||
343 14
        $posEnd === $substrIndex
344
    ) {
345 4
      return '';
346
    }
347
348 10
    return self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str...substrIndex, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
349
  }
350
351
  /**
352
   * Convert binary into an string.
353
   *
354
   * @param mixed $bin 1|0
355
   *
356
   * @return string
357
   */
358 1
  public static function binary_to_str($bin): string
359
  {
360 1
    if (!isset($bin[0])) {
361
      return '';
362
    }
363
364 1
    $convert = \base_convert($bin, 2, 16);
365 1
    if ($convert === '0') {
366 1
      return '';
367
    }
368
369 1
    return \pack('H*', $convert);
370
  }
371
372
  /**
373
   * Returns the UTF-8 Byte Order Mark Character.
374
   *
375
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
376
   *
377
   * @return string UTF-8 Byte Order Mark
378
   */
379 2
  public static function bom(): string
380
  {
381 2
    return "\xef\xbb\xbf";
382
  }
383
384
  /**
385
   * @alias of UTF8::chr_map()
386
   *
387
   * @see   UTF8::chr_map()
388
   *
389
   * @param string|array $callback
390
   * @param string       $str
391
   *
392
   * @return string[]
393
   */
394 1
  public static function callback($callback, string $str): array
395
  {
396 1
    return self::chr_map($callback, $str);
397
  }
398
399
  /**
400
   * Returns the character at $index, with indexes starting at 0.
401
   *
402
   * @param string $str
403
   * @param int    $index    <p>Position of the character.</p>
404
   * @param string $encoding [optional] <p>Default is UTF-8</p>
405
   *
406
   * @return string <p>The character at $index.</p>
407
   */
408 8
  public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
409
  {
410 8
    return self::substr($str, $index, 1, $encoding);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str, $index, 1, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
411
  }
412
413
  /**
414
   * Returns an array consisting of the characters in the string.
415
   *
416
   * @param string $str <p>The input string.</p>
417
   *
418
   * @return string[] <p>An array of chars.</p>
419
   */
420 3
  public static function chars(string $str): array
421
  {
422 3
    return self::str_split($str, 1);
423
  }
424
425
  /**
426
   * This method will auto-detect your server environment for UTF-8 support.
427
   *
428
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
429
   */
430 19
  public static function checkForSupport()
431
  {
432 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
433
434
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
435
436
      // http://php.net/manual/en/book.mbstring.php
437
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
438
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
439
440
      // http://php.net/manual/en/book.iconv.php
441
      self::$SUPPORT['iconv'] = self::iconv_loaded();
442
443
      // http://php.net/manual/en/book.intl.php
444
      self::$SUPPORT['intl'] = self::intl_loaded();
445
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
446
      if (
447
          self::$SUPPORT['intl'] === true
448
          &&
449
          \function_exists('transliterator_list_ids') === true
450
      ) {
451
        /** @noinspection PhpComposerExtensionStubsInspection */
452
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
453
      }
454
455
      // http://php.net/manual/en/class.intlchar.php
456
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
457
458
      // http://php.net/manual/en/book.ctype.php
459
      self::$SUPPORT['ctype'] = self::ctype_loaded();
460
461
      // http://php.net/manual/en/class.finfo.php
462
      self::$SUPPORT['finfo'] = self::finfo_loaded();
463
464
      // http://php.net/manual/en/book.json.php
465
      self::$SUPPORT['json'] = self::json_loaded();
466
467
      // http://php.net/manual/en/book.pcre.php
468
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
469
    }
470 19
  }
471
472
  /**
473
   * Generates a UTF-8 encoded character from the given code point.
474
   *
475
   * INFO: opposite to UTF8::ord()
476
   *
477
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
478
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
479
   *
480
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
481
   */
482 10
  public static function chr($code_point, string $encoding = 'UTF-8')
483
  {
484
    // init
485 10
    static $CHAR_CACHE = [];
486
487 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
488
      self::checkForSupport();
489
    }
490
491 10
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
492 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
493
    }
494
495
    if (
496 10
        $encoding !== 'UTF-8'
497
        &&
498 10
        $encoding !== 'ISO-8859-1'
499
        &&
500 10
        $encoding !== 'WINDOWS-1252'
501
        &&
502 10
        self::$SUPPORT['mbstring'] === false
503
    ) {
504
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
505
    }
506
507 10
    $cacheKey = $code_point . $encoding;
508 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
509 8
      return $CHAR_CACHE[$cacheKey];
510
    }
511
512 9
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
513
514 7
      if (self::$CHR === null) {
515
        self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type integer or string or boolean. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
516
      }
517
518 7
      $chr = self::$CHR[$code_point];
519
520 7
      if ($encoding !== 'UTF-8') {
521 1
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
522
      }
523
524 7
      return $CHAR_CACHE[$cacheKey] = $chr;
525
    }
526
527 7
    if (self::$SUPPORT['intlChar'] === true) {
528
      /** @noinspection PhpComposerExtensionStubsInspection */
529 7
      $chr = \IntlChar::chr($code_point);
530
531 7
      if ($encoding !== 'UTF-8') {
532
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
533
      }
534
535 7
      return $CHAR_CACHE[$cacheKey] = $chr;
536
    }
537
538
    if (self::$CHR === null) {
539
      self::$CHR = self::getData('chr');
540
    }
541
542
    if ($code_point <= 0x7F) {
543
      $chr = self::$CHR[$code_point];
544
    } elseif ($code_point <= 0x7FF) {
545
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
546
             self::$CHR[($code_point & 0x3F) + 0x80];
547
    } elseif ($code_point <= 0xFFFF) {
548
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
549
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
550
             self::$CHR[($code_point & 0x3F) + 0x80];
551
    } else {
552
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
553
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
554
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
555
             self::$CHR[($code_point & 0x3F) + 0x80];
556
    }
557
558
    if ($encoding !== 'UTF-8') {
559
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
560
    }
561
562
    return $CHAR_CACHE[$cacheKey] = $chr;
563
  }
564
565
  /**
566
   * Applies callback to all characters of a string.
567
   *
568
   * @param string|array $callback <p>The callback function.</p>
569
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
570
   *
571
   * @return string[] <p>The outcome of callback.</p>
572
   */
573 1
  public static function chr_map($callback, string $str): array
574
  {
575 1
    $chars = self::split($str);
576
577 1
    return \array_map($callback, $chars);
578
  }
579
580
  /**
581
   * Generates an array of byte length of each character of a Unicode string.
582
   *
583
   * 1 byte => U+0000  - U+007F
584
   * 2 byte => U+0080  - U+07FF
585
   * 3 byte => U+0800  - U+FFFF
586
   * 4 byte => U+10000 - U+10FFFF
587
   *
588
   * @param string $str <p>The original unicode string.</p>
589
   *
590
   * @return int[] <p>An array of byte lengths of each character.</p>
591
   */
592 2
  public static function chr_size_list(string $str): array
593
  {
594 2
    if ('' === $str) {
595 2
      return [];
596
    }
597
598 2
    $strSplit = self::split($str);
599
600 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
601
      return \array_map(
602
          function ($data) {
603
            return UTF8::strlen($data, 'CP850'); // 8-BIT
604
          },
605
          $strSplit
606
      );
607
    }
608
609 2
    return \array_map('\strlen', $strSplit);
610
  }
611
612
  /**
613
   * Get a decimal code representation of a specific character.
614
   *
615
   * @param string $char <p>The input character.</p>
616
   *
617
   * @return int
618
   */
619 2
  public static function chr_to_decimal(string $char): int
620
  {
621 2
    $code = self::ord($char[0]);
622 2
    $bytes = 1;
623
624 2
    if (!($code & 0x80)) {
625
      // 0xxxxxxx
626 2
      return $code;
627
    }
628
629 2
    if (($code & 0xe0) === 0xc0) {
630
      // 110xxxxx
631 2
      $bytes = 2;
632 2
      $code &= ~0xc0;
633 2
    } elseif (($code & 0xf0) === 0xe0) {
634
      // 1110xxxx
635 2
      $bytes = 3;
636 2
      $code &= ~0xe0;
637 1
    } elseif (($code & 0xf8) === 0xf0) {
638
      // 11110xxx
639 1
      $bytes = 4;
640 1
      $code &= ~0xf0;
641
    }
642
643 2
    for ($i = 2; $i <= $bytes; $i++) {
644
      // 10xxxxxx
645 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
646
    }
647
648 2
    return $code;
649
  }
650
651
  /**
652
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
653
   *
654
   * @param string $char <p>The input character</p>
655
   * @param string $pfix [optional]
656
   *
657
   * @return string <p>The code point encoded as U+xxxx<p>
658
   */
659 1
  public static function chr_to_hex(string $char, string $pfix = 'U+'): string
660
  {
661 1
    if ('' === $char) {
662 1
      return '';
663
    }
664
665 1
    if ($char === '&#0;') {
666 1
      $char = '';
667
    }
668
669 1
    return self::int_to_hex(self::ord($char), $pfix);
670
  }
671
672
  /**
673
   * alias for "UTF8::chr_to_decimal()"
674
   *
675
   * @see UTF8::chr_to_decimal()
676
   *
677
   * @param string $chr
678
   *
679
   * @return int
680
   */
681 1
  public static function chr_to_int(string $chr): int
682
  {
683 1
    return self::chr_to_decimal($chr);
684
  }
685
686
  /**
687
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
688
   *
689
   * @param string $body     <p>The original string to be split.</p>
690
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
691
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
692
   *
693
   * @return string <p>The chunked string</p>
694
   */
695 1
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
696
  {
697 1
    return \implode($end, self::split($body, $chunklen));
698
  }
699
700
  /**
701
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
702
   *
703
   * @param string $str                           <p>The string to be sanitized.</p>
704
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
705
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
706
   *                                              whitespace.</p>
707
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
708
   *                                              e.g.: "…"
709
   *                                              => "..."</p>
710
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
711
   *                                              combination with
712
   *                                              $normalize_whitespace</p>
713
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
714
   *                                              mark e.g.: "�"</p>
715
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
716
   *                                              characters e.g.: "\0"</p>
717
   *
718
   * @return string <p>Clean UTF-8 encoded string.</p>
719
   */
720 81
  public static function clean(
721
      string $str,
722
      bool $remove_bom = false,
723
      bool $normalize_whitespace = false,
724
      bool $normalize_msword = false,
725
      bool $keep_non_breaking_space = false,
726
      bool $replace_diamond_question_mark = false,
727
      bool $remove_invisible_characters = true
728
  ): string
729
  {
730
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
731
    // caused connection reset problem on larger strings
732
733 81
    $regx = '/
734
      (
735
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
736
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
737
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
738
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
739
        ){1,100}                      # ...one or more times
740
      )
741
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
742
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
743
    /x';
744 81
    $str = (string)\preg_replace($regx, '$1', $str);
745
746 81
    if ($replace_diamond_question_mark === true) {
747 53
      $str = self::replace_diamond_question_mark($str, '');
748
    }
749
750 81
    if ($remove_invisible_characters === true) {
751 81
      $str = self::remove_invisible_characters($str);
752
    }
753
754 81
    if ($normalize_whitespace === true) {
755 55
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
756
    }
757
758 81
    if ($normalize_msword === true) {
759 31
      $str = self::normalize_msword($str);
760
    }
761
762 81
    if ($remove_bom === true) {
763 54
      $str = self::remove_bom($str);
764
    }
765
766 81
    return $str;
767
  }
768
769
  /**
770
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
771
   *
772
   * @param string $str <p>The input string.</p>
773
   *
774
   * @return string
775
   */
776 25
  public static function cleanup(string $str): string
777
  {
778 25
    if ('' === $str) {
779 3
      return '';
780
    }
781
782
    // fixed ISO <-> UTF-8 Errors
783 25
    $str = self::fix_simple_utf8($str);
784
785
    // remove all none UTF-8 symbols
786
    // && remove diamond question mark (�)
787
    // && remove remove invisible characters (e.g. "\0")
788
    // && remove BOM
789
    // && normalize whitespace chars (but keep non-breaking-spaces)
790 25
    $str = self::clean(
791 25
        $str,
792 25
        true,
793 25
        true,
794 25
        false,
795 25
        true,
796 25
        true,
797 25
        true
798
    );
799
800 25
    return $str;
801
  }
802
803
  /**
804
   * Accepts a string or a array of strings and returns an array of Unicode code points.
805
   *
806
   * INFO: opposite to UTF8::string()
807
   *
808
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
809
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
810
   *                                    default, code points will be returned as integers.</p>
811
   *
812
   * @return int[] <p>The array of code points.</p>
813
   */
814 7
  public static function codepoints($arg, bool $u_style = false): array
815
  {
816 7
    if (\is_string($arg) === true) {
817 7
      $arg = self::split($arg);
818
    }
819
820 7
    $arg = \array_map(
821
        [
822 7
            self::class,
823
            'ord',
824
        ],
825 7
        $arg
826
    );
827
828 7
    if ($u_style) {
829 1
      $arg = \array_map(
830
          [
831 1
              self::class,
832
              'int_to_hex',
833
          ],
834 1
          $arg
835
      );
836
    }
837
838 7
    return $arg;
839
  }
840
841
  /**
842
   * Trims the string and replaces consecutive whitespace characters with a
843
   * single space. This includes tabs and newline characters, as well as
844
   * multibyte whitespace such as the thin space and ideographic space.
845
   *
846
   * @param string $str <p>The input string.</p>
847
   *
848
   * @return string <p>String with a trimmed $str and condensed whitespace.</p>
849
   */
850 12
  public static function collapse_whitespace(string $str): string
851
  {
852 12
    return self::trim(
853 12
        self::regexReplace($str, '[[:space:]]+', ' ')
854
    );
855
  }
856
857
  /**
858
   * Returns count of characters used in a string.
859
   *
860
   * @param string $str       <p>The input string.</p>
861
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
862
   *
863
   * @return int[] <p>An associative array of Character as keys and
864
   *               their count as values.</p>
865
   */
866 9
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
867
  {
868 9
    return \array_count_values(self::split($str, 1, $cleanUtf8));
869
  }
870
871
  /**
872
   * Remove css media-queries.
873
   *
874
   * @param string $str
875
   *
876
   * @return static
877
   */
878 1
  public static function css_stripe_media_queries(string $str): string
879
  {
880 1
    return (string)\preg_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return (string)preg_repl...s*}\s*#misU', '', $str) returns the type string which is incompatible with the documented return type voku\helper\UTF8.
Loading history...
881 1
        '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
882 1
        '',
883 1
        $str
884
    );
885
  }
886
887
  /**
888
   * Checks whether ctype is available on the server.
889
   *
890
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
891
   */
892
  public static function ctype_loaded(): bool
893
  {
894
    return \extension_loaded('ctype');
895
  }
896
897
  /**
898
   * Converts a int-value into an UTF-8 character.
899
   *
900
   * @param mixed $int
901
   *
902
   * @return string
903
   */
904 5
  public static function decimal_to_chr($int): string
905
  {
906 5
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
907
  }
908
909
  /**
910
   * Encode a string with a new charset-encoding.
911
   *
912
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
913
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
914
   *
915
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
916
   * @param string $str      <p>The input string</p>
917
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
918
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
919
   *
920
   * @return string
921
   */
922 14
  public static function encode(string $encoding, string $str, bool $force = true): string
923
  {
924 14
    if ('' === $str || '' === $encoding) {
925 6
      return $str;
926
    }
927
928 14
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
929 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
930
    }
931
932 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
933
      self::checkForSupport();
934
    }
935
936 14
    $encodingDetected = self::str_detect_encoding($str);
937
938
    // DEBUG
939
    //var_dump($encoding, $encodingDetected, $str, "\n\n");
940
941
    if (
942 14
        $force === true
943
        ||
944
        (
945 7
            $encodingDetected !== false
946
            &&
947 14
            $encodingDetected !== $encoding
948
        )
949
    ) {
950
951
      if (
952 14
          $encoding === 'UTF-8'
953
          &&
954
          (
955 14
              $force === true
956 5
              || $encodingDetected === 'UTF-8'
957 5
              || $encodingDetected === 'WINDOWS-1252'
958 14
              || $encodingDetected === 'ISO-8859-1'
959
          )
960
      ) {
961 13
        return self::to_utf8($str);
962
      }
963
964
      if (
965 4
          $encoding === 'ISO-8859-1'
966
          &&
967
          (
968 2
              $force === true
969 2
              || $encodingDetected === 'ISO-8859-1'
970 2
              || $encodingDetected === 'WINDOWS-1252'
971 4
              || $encodingDetected === 'UTF-8'
972
          )
973
      ) {
974 2
        return self::to_iso8859($str);
975
      }
976
977
      if (
978 3
          $encoding !== 'UTF-8'
979
          &&
980 3
          $encoding !== 'ISO-8859-1'
981
          &&
982 3
          $encoding !== 'WINDOWS-1252'
983
          &&
984 3
          self::$SUPPORT['mbstring'] === false
985
      ) {
986
        \trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
987
      }
988
989 3
      $strEncoded = \mb_convert_encoding(
990 3
          $str,
991 3
          $encoding,
992 3
          ($force === true ? $encoding : $encodingDetected)
0 ignored issues
show
Bug introduced by
It seems like $force === true ? $encoding : $encodingDetected can also be of type false; however, parameter $from_encoding of mb_convert_encoding() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

992
          /** @scrutinizer ignore-type */ ($force === true ? $encoding : $encodingDetected)
Loading history...
993
      );
994
995 3
      if ($strEncoded) {
996 3
        return $strEncoded;
997
      }
998
    }
999
1000 4
    return $str;
1001
  }
1002
1003
  /**
1004
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1005
   *
1006
   * @param string   $str                    <p>The input string.</p>
1007
   * @param string   $search                 <p>The searched string.</p>
1008
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1009
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1010
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "\mb_" function</p>
1011
   *
1012
   * @return string
1013
   */
1014 1
  public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1015
  {
1016 1
    if ('' === $str) {
1017 1
      return '';
1018
    }
1019
1020 1
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1021
1022 1
    if ($length === null) {
1023 1
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
1024
    }
1025
1026 1
    if (empty($search)) {
1027
1028 1
      $stringLength = self::strlen($str, $encoding);
1029
1030 1
      if ($length > 0) {
1031 1
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1032
      } else {
1033 1
        $end = 0;
1034
      }
1035
1036 1
      $pos = \min(
1037 1
          self::strpos($str, ' ', $end, $encoding),
1038 1
          self::strpos($str, '.', $end, $encoding)
1039
      );
1040
1041 1
      if ($pos) {
1042 1
        return \rtrim(
1043 1
                   self::substr($str, 0, $pos, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::substr($str, 0, $pos, $encoding) can also be of type false; however, parameter $str of rtrim() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1043
                   /** @scrutinizer ignore-type */ self::substr($str, 0, $pos, $encoding),
Loading history...
1044 1
                   $trimChars
1045 1
               ) . $replacerForSkippedText;
1046
      }
1047
1048
      return $str;
1049
    }
1050
1051 1
    $wordPos = self::stripos(
1052 1
        $str,
1053 1
        $search,
1054 1
        0,
1055 1
        $encoding
1056
    );
1057 1
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1058
1059 1
    if ($halfSide > 0) {
1060
1061 1
      $halfText = self::substr($str, 0, $halfSide, $encoding);
1062 1
      $pos_start = \max(
1063 1
          self::strrpos($halfText, ' ', 0, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $halfText can also be of type false; however, parameter $haystack of voku\helper\UTF8::strrpos() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1063
          self::strrpos(/** @scrutinizer ignore-type */ $halfText, ' ', 0, $encoding),
Loading history...
1064 1
          self::strrpos($halfText, '.', 0, $encoding)
1065
      );
1066
1067 1
      if (!$pos_start) {
1068 1
        $pos_start = 0;
1069
      }
1070
1071
    } else {
1072 1
      $pos_start = 0;
1073
    }
1074
1075 1
    if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type integer|false is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1076 1
      $l = $pos_start + $length - 1;
1077 1
      $realLength = self::strlen($str, $encoding);
1078
1079 1
      if ($l > $realLength) {
1080
        $l = $realLength;
1081
      }
1082
1083 1
      $pos_end = \min(
1084 1
                     self::strpos($str, ' ', $l, $encoding),
1085 1
                     self::strpos($str, '.', $l, $encoding)
1086 1
                 ) - $pos_start;
1087
1088 1
      if (!$pos_end || $pos_end <= 0) {
1089 1
        $extract = $replacerForSkippedText . \ltrim(
1090 1
                self::substr(
0 ignored issues
show
Bug introduced by
It seems like self::substr($str, $pos_...trlen($str), $encoding) can also be of type false; however, parameter $str of ltrim() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1090
                /** @scrutinizer ignore-type */ self::substr(
Loading history...
1091 1
                    $str,
1092 1
                    $pos_start,
1093 1
                    self::strlen($str),
1094 1
                    $encoding
1095
                ),
1096 1
                $trimChars
1097
            );
1098
      } else {
1099 1
        $extract = $replacerForSkippedText . \trim(
1100 1
                self::substr(
0 ignored issues
show
Bug introduced by
It seems like self::substr($str, $pos_...t, $pos_end, $encoding) can also be of type false; however, parameter $str of trim() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1100
                /** @scrutinizer ignore-type */ self::substr(
Loading history...
1101 1
                    $str,
1102 1
                    $pos_start,
1103 1
                    $pos_end,
1104 1
                    $encoding
1105
                ),
1106 1
                $trimChars
1107 1
            ) . $replacerForSkippedText;
1108
      }
1109
1110
    } else {
1111
1112 1
      $l = $length - 1;
1113 1
      $trueLength = self::strlen($str, $encoding);
1114
1115 1
      if ($l > $trueLength) {
1116
        $l = $trueLength;
1117
      }
1118
1119 1
      $pos_end = \min(
1120 1
          self::strpos($str, ' ', $l, $encoding),
1121 1
          self::strpos($str, '.', $l, $encoding)
1122
      );
1123
1124 1
      if ($pos_end) {
1125 1
        $extract = \rtrim(
1126 1
                       self::substr($str, 0, $pos_end, $encoding),
1127 1
                       $trimChars
1128 1
                   ) . $replacerForSkippedText;
1129
      } else {
1130 1
        $extract = $str;
1131
      }
1132
    }
1133
1134 1
    return $extract;
1135
  }
1136
1137
  /**
1138
   * Reads entire file into a string.
1139
   *
1140
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1141
   *
1142
   * @link http://php.net/manual/en/function.file-get-contents.php
1143
   *
1144
   * @param string        $filename         <p>
1145
   *                                        Name of the file to read.
1146
   *                                        </p>
1147
   * @param bool          $use_include_path [optional] <p>
1148
   *                                        Prior to PHP 5, this parameter is called
1149
   *                                        use_include_path and is a bool.
1150
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1151
   *                                        to trigger include path
1152
   *                                        search.
1153
   *                                        </p>
1154
   * @param resource|null $context          [optional] <p>
1155
   *                                        A valid context resource created with
1156
   *                                        stream_context_create. If you don't need to use a
1157
   *                                        custom context, you can skip this parameter by &null;.
1158
   *                                        </p>
1159
   * @param int|null      $offset           [optional] <p>
1160
   *                                        The offset where the reading starts.
1161
   *                                        </p>
1162
   * @param int|null      $maxLength        [optional] <p>
1163
   *                                        Maximum length of data read. The default is to read until end
1164
   *                                        of file is reached.
1165
   *                                        </p>
1166
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1167
   *
1168
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
1169
   *                                        images or pdf, because they used non default utf-8 chars.</p>
1170
   *
1171
   * @return string|false <p>The function returns the read data or false on failure.</p>
1172
   */
1173 6
  public static function file_get_contents(
1174
      string $filename,
1175
      bool $use_include_path = false,
1176
      $context = null,
1177
      int $offset = null,
1178
      int $maxLength = null,
1179
      int $timeout = 10,
1180
      bool $convertToUtf8 = true
1181
  )
1182
  {
1183
    // init
1184 6
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1185
1186 6
    if ($timeout && $context === null) {
1187 5
      $context = \stream_context_create(
1188
          [
1189
              'http' =>
1190
                  [
1191 5
                      'timeout' => $timeout,
1192
                  ],
1193
          ]
1194
      );
1195
    }
1196
1197 6
    if ($offset === null) {
1198 6
      $offset = 0;
1199
    }
1200
1201 6
    if (\is_int($maxLength) === true) {
1202 1
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1203
    } else {
1204 6
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1205
    }
1206
1207
    // return false on error
1208 6
    if ($data === false) {
1209
      return false;
1210
    }
1211
1212 6
    if ($convertToUtf8 === true) {
1213
      // only for non binary, but also for UTF-16 or UTF-32
1214
      if (
1215 6
          self::is_binary($data, true) !== true
1216
          ||
1217 4
          self::is_utf16($data) !== false
1218
          ||
1219 6
          self::is_utf32($data) !== false
1220
      ) {
1221 5
        $data = self::encode('UTF-8', $data, false);
1222 5
        $data = self::cleanup($data);
1223
      }
1224
    }
1225
1226 6
    return $data;
1227
  }
1228
1229
  /**
1230
   * Checks if a file starts with BOM (Byte Order Mark) character.
1231
   *
1232
   * @param string $file_path <p>Path to a valid file.</p>
1233
   *
1234
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1235
   */
1236 1
  public static function file_has_bom(string $file_path): bool
1237
  {
1238 1
    return self::string_has_bom(\file_get_contents($file_path));
1239
  }
1240
1241
  /**
1242
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1243
   *
1244
   * @param mixed  $var
1245
   * @param int    $normalization_form
1246
   * @param string $leading_combining
1247
   *
1248
   * @return mixed
1249
   */
1250 35
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1251
  {
1252 35
    switch (\gettype($var)) {
1253 35
      case 'array':
1254 3
        foreach ($var as $k => $v) {
1255
          /** @noinspection AlterInForeachInspection */
1256 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1257
        }
1258 3
        break;
1259 35
      case 'object':
1260 2
        foreach ($var as $k => $v) {
1261 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1262
        }
1263 2
        break;
1264 35
      case 'string':
1265
1266 35
        if (false !== \strpos($var, "\r")) {
1267
          // Workaround https://bugs.php.net/65732
1268 2
          $var = \str_replace(["\r\n", "\r"], "\n", $var);
1269
        }
1270
1271 35
        if (self::is_ascii($var) === false) {
1272
          /** @noinspection PhpUndefinedClassInspection */
1273 18
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1274 15
            $n = '-';
1275
          } else {
1276
            /** @noinspection PhpUndefinedClassInspection */
1277 7
            $n = \Normalizer::normalize($var, $normalization_form);
1278
1279 7
            if (isset($n[0])) {
1280 4
              $var = $n;
1281
            } else {
1282 5
              $var = self::encode('UTF-8', $var, true);
1283
            }
1284
          }
1285
1286
          if (
1287 18
              $var[0] >= "\x80"
1288
              &&
1289 18
              isset($n[0], $leading_combining[0])
1290
              &&
1291 18
              \preg_match('/^\p{Mn}/u', $var)
1292
          ) {
1293
            // Prevent leading combining chars
1294
            // for NFC-safe concatenations.
1295 2
            $var = $leading_combining . $var;
1296
          }
1297
        }
1298
1299 35
        break;
1300
    }
1301
1302 35
    return $var;
1303
  }
1304
1305
  /**
1306
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1307
   *
1308
   * Gets a specific external variable by name and optionally filters it
1309
   *
1310
   * @link  http://php.net/manual/en/function.filter-input.php
1311
   *
1312
   * @param int    $type          <p>
1313
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1314
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1315
   *                              <b>INPUT_ENV</b>.
1316
   *                              </p>
1317
   * @param string $variable_name <p>
1318
   *                              Name of a variable to get.
1319
   *                              </p>
1320
   * @param int    $filter        [optional] <p>
1321
   *                              The ID of the filter to apply. The
1322
   *                              manual page lists the available filters.
1323
   *                              </p>
1324
   * @param mixed  $options       [optional] <p>
1325
   *                              Associative array of options or bitwise disjunction of flags. If filter
1326
   *                              accepts options, flags can be provided in "flags" field of array.
1327
   *                              </p>
1328
   *
1329
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1330
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1331
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1332
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1333
   * @since 5.2.0
1334
   */
1335
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
1336
  {
1337
    if (4 > \func_num_args()) {
1338
      $var = \filter_input($type, $variable_name, $filter);
1339
    } else {
1340
      $var = \filter_input($type, $variable_name, $filter, $options);
1341
    }
1342
1343
    return self::filter($var);
1344
  }
1345
1346
  /**
1347
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1348
   *
1349
   * Gets external variables and optionally filters them
1350
   *
1351
   * @link  http://php.net/manual/en/function.filter-input-array.php
1352
   *
1353
   * @param int   $type       <p>
1354
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1355
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1356
   *                          <b>INPUT_ENV</b>.
1357
   *                          </p>
1358
   * @param mixed $definition [optional] <p>
1359
   *                          An array defining the arguments. A valid key is a string
1360
   *                          containing a variable name and a valid value is either a filter type, or an array
1361
   *                          optionally specifying the filter, flags and options. If the value is an
1362
   *                          array, valid keys are filter which specifies the
1363
   *                          filter type,
1364
   *                          flags which specifies any flags that apply to the
1365
   *                          filter, and options which specifies any options that
1366
   *                          apply to the filter. See the example below for a better understanding.
1367
   *                          </p>
1368
   *                          <p>
1369
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1370
   *                          input array are filtered by this filter.
1371
   *                          </p>
1372
   * @param bool  $add_empty  [optional] <p>
1373
   *                          Add missing keys as <b>NULL</b> to the return value.
1374
   *                          </p>
1375
   *
1376
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1377
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1378
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1379
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1380
   * fails.
1381
   * @since 5.2.0
1382
   */
1383
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1384
  {
1385
    if (2 > \func_num_args()) {
1386
      $a = \filter_input_array($type);
1387
    } else {
1388
      $a = \filter_input_array($type, $definition, $add_empty);
1389
    }
1390
1391
    return self::filter($a);
1392
  }
1393
1394
  /**
1395
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1396
   *
1397
   * Filters a variable with a specified filter
1398
   *
1399
   * @link  http://php.net/manual/en/function.filter-var.php
1400
   *
1401
   * @param mixed $variable <p>
1402
   *                        Value to filter.
1403
   *                        </p>
1404
   * @param int   $filter   [optional] <p>
1405
   *                        The ID of the filter to apply. The
1406
   *                        manual page lists the available filters.
1407
   *                        </p>
1408
   * @param mixed $options  [optional] <p>
1409
   *                        Associative array of options or bitwise disjunction of flags. If filter
1410
   *                        accepts options, flags can be provided in "flags" field of array. For
1411
   *                        the "callback" filter, callable type should be passed. The
1412
   *                        callback must accept one argument, the value to be filtered, and return
1413
   *                        the value after filtering/sanitizing it.
1414
   *                        </p>
1415
   *                        <p>
1416
   *                        <code>
1417
   *                        // for filters that accept options, use this format
1418
   *                        $options = array(
1419
   *                        'options' => array(
1420
   *                        'default' => 3, // value to return if the filter fails
1421
   *                        // other options here
1422
   *                        'min_range' => 0
1423
   *                        ),
1424
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1425
   *                        );
1426
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1427
   *                        // for filter that only accept flags, you can pass them directly
1428
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1429
   *                        // for filter that only accept flags, you can also pass as an array
1430
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1431
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1432
   *                        // callback validate filter
1433
   *                        function foo($value)
1434
   *                        {
1435
   *                        // Expected format: Surname, GivenNames
1436
   *                        if (strpos($value, ", ") === false) return false;
1437
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1438
   *                        $empty = (empty($surname) || empty($givennames));
1439
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1440
   *                        if ($empty || $notstrings) {
1441
   *                        return false;
1442
   *                        } else {
1443
   *                        return $value;
1444
   *                        }
1445
   *                        }
1446
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1447
   *                        </code>
1448
   *                        </p>
1449
   *
1450
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1451
   * @since 5.2.0
1452
   */
1453 1
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
1454
  {
1455 1
    if (3 > \func_num_args()) {
1456 1
      $variable = \filter_var($variable, $filter);
1457
    } else {
1458 1
      $variable = \filter_var($variable, $filter, $options);
1459
    }
1460
1461 1
    return self::filter($variable);
1462
  }
1463
1464
  /**
1465
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1466
   *
1467
   * Gets multiple variables and optionally filters them
1468
   *
1469
   * @link  http://php.net/manual/en/function.filter-var-array.php
1470
   *
1471
   * @param array $data       <p>
1472
   *                          An array with string keys containing the data to filter.
1473
   *                          </p>
1474
   * @param mixed $definition [optional] <p>
1475
   *                          An array defining the arguments. A valid key is a string
1476
   *                          containing a variable name and a valid value is either a
1477
   *                          filter type, or an
1478
   *                          array optionally specifying the filter, flags and options.
1479
   *                          If the value is an array, valid keys are filter
1480
   *                          which specifies the filter type,
1481
   *                          flags which specifies any flags that apply to the
1482
   *                          filter, and options which specifies any options that
1483
   *                          apply to the filter. See the example below for a better understanding.
1484
   *                          </p>
1485
   *                          <p>
1486
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1487
   *                          input array are filtered by this filter.
1488
   *                          </p>
1489
   * @param bool  $add_empty  [optional] <p>
1490
   *                          Add missing keys as <b>NULL</b> to the return value.
1491
   *                          </p>
1492
   *
1493
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1494
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1495
   * the variable is not set.
1496
   * @since 5.2.0
1497
   */
1498 1
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1499
  {
1500 1
    if (2 > \func_num_args()) {
1501 1
      $a = \filter_var_array($data);
1502
    } else {
1503 1
      $a = \filter_var_array($data, $definition, $add_empty);
1504
    }
1505
1506 1
    return self::filter($a);
1507
  }
1508
1509
  /**
1510
   * Checks whether finfo is available on the server.
1511
   *
1512
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1513
   */
1514
  public static function finfo_loaded(): bool
1515
  {
1516
    return \class_exists('finfo');
1517
  }
1518
1519
  /**
1520
   * Returns the first $n characters of the string.
1521
   *
1522
   * @param string $str      <p>The input string.</p>
1523
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1524
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
1525
   *
1526
   * @return string
1527
   */
1528 12
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1529
  {
1530 12
    if ($n <= 0) {
1531 4
      return '';
1532
    }
1533
1534 8
    return self::substr($str, 0, $n, $encoding);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str, 0, $n, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
1535
  }
1536
1537
  /**
1538
   * Check if the number of unicode characters are not more than the specified integer.
1539
   *
1540
   * @param string $str      The original string to be checked.
1541
   * @param int    $box_size The size in number of chars to be checked against string.
1542
   *
1543
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1544
   */
1545 1
  public static function fits_inside(string $str, int $box_size): bool
1546
  {
1547 1
    return (self::strlen($str) <= $box_size);
1548
  }
1549
1550
  /**
1551
   * Try to fix simple broken UTF-8 strings.
1552
   *
1553
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1554
   *
1555
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1556
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1557
   * See: http://en.wikipedia.org/wiki/Windows-1252
1558
   *
1559
   * @param string $str <p>The input string</p>
1560
   *
1561
   * @return string
1562
   */
1563 30
  public static function fix_simple_utf8(string $str): string
1564
  {
1565 30
    if ('' === $str) {
1566 2
      return '';
1567
    }
1568
1569 30
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1570 30
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1571
1572 30
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1573
1574 1
      if (self::$BROKEN_UTF8_FIX === null) {
1575 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_fix') can also be of type integer or string or boolean. However, the property $BROKEN_UTF8_FIX is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1576
      }
1577
1578 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type integer and string and boolean; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1578
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1579 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type integer and string and boolean; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1579
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1580
    }
1581
1582 30
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1583
  }
1584
1585
  /**
1586
   * Fix a double (or multiple) encoded UTF8 string.
1587
   *
1588
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1589
   *
1590
   * @return string|string[] <p>Will return the fixed input-"array" or
1591
   *                         the fixed input-"string".</p>
1592
   */
1593 1
  public static function fix_utf8($str)
1594
  {
1595 1
    if (\is_array($str) === true) {
1596 1
      foreach ($str as $k => $v) {
1597 1
        $str[$k] = self::fix_utf8($v);
1598
      }
1599
1600 1
      return $str;
1601
    }
1602
1603 1
    $last = '';
1604 1
    while ($last !== $str) {
1605 1
      $last = $str;
1606 1
      $str = self::to_utf8(
1607 1
          self::utf8_decode($str, true)
1608
      );
1609
    }
1610
1611 1
    return $str;
1612
  }
1613
1614
  /**
1615
   * Get character of a specific character.
1616
   *
1617
   * @param string $char
1618
   *
1619
   * @return string <p>'RTL' or 'LTR'</p>
1620
   */
1621 1
  public static function getCharDirection(string $char): string
1622
  {
1623 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1624
      self::checkForSupport();
1625
    }
1626
1627 1
    if (self::$SUPPORT['intlChar'] === true) {
1628
      /** @noinspection PhpComposerExtensionStubsInspection */
1629 1
      $tmpReturn = \IntlChar::charDirection($char);
1630
1631
      // from "IntlChar"-Class
1632
      $charDirection = [
1633 1
          'RTL' => [1, 13, 14, 15, 21],
1634
          'LTR' => [0, 11, 12, 20],
1635
      ];
1636
1637 1
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1638
        return 'LTR';
1639
      }
1640
1641 1
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1642 1
        return 'RTL';
1643
      }
1644
    }
1645
1646 1
    $c = static::chr_to_decimal($char);
1647
1648 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1649 1
      return 'LTR';
1650
    }
1651
1652 1
    if (0x85e >= $c) {
1653
1654 1
      if (0x5be === $c ||
1655 1
          0x5c0 === $c ||
1656 1
          0x5c3 === $c ||
1657 1
          0x5c6 === $c ||
1658 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1659 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1660 1
          0x608 === $c ||
1661 1
          0x60b === $c ||
1662 1
          0x60d === $c ||
1663 1
          0x61b === $c ||
1664 1
          (0x61e <= $c && 0x64a >= $c) ||
1665
          (0x66d <= $c && 0x66f >= $c) ||
1666
          (0x671 <= $c && 0x6d5 >= $c) ||
1667
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1668
          (0x6ee <= $c && 0x6ef >= $c) ||
1669
          (0x6fa <= $c && 0x70d >= $c) ||
1670
          0x710 === $c ||
1671
          (0x712 <= $c && 0x72f >= $c) ||
1672
          (0x74d <= $c && 0x7a5 >= $c) ||
1673
          0x7b1 === $c ||
1674
          (0x7c0 <= $c && 0x7ea >= $c) ||
1675
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1676
          0x7fa === $c ||
1677
          (0x800 <= $c && 0x815 >= $c) ||
1678
          0x81a === $c ||
1679
          0x824 === $c ||
1680
          0x828 === $c ||
1681
          (0x830 <= $c && 0x83e >= $c) ||
1682
          (0x840 <= $c && 0x858 >= $c) ||
1683 1
          0x85e === $c
1684
      ) {
1685 1
        return 'RTL';
1686
      }
1687
1688 1
    } elseif (0x200f === $c) {
1689
1690
      return 'RTL';
1691
1692 1
    } elseif (0xfb1d <= $c) {
1693
1694 1
      if (0xfb1d === $c ||
1695 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1696 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1697 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1698 1
          0xfb3e === $c ||
1699 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1700 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1701 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1702 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1703 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1704 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1705 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1706 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1707 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1708 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1709 1
          0x10808 === $c ||
1710 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1711 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1712 1
          0x1083c === $c ||
1713 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1714 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1715 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1716 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1717 1
          0x1093f === $c ||
1718 1
          0x10a00 === $c ||
1719 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1720 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1721 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1722 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1723 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1724 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1725 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1726 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1727 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1728 1
          (0x10b78 <= $c && 0x10b7f >= $c)
1729
      ) {
1730 1
        return 'RTL';
1731
      }
1732
    }
1733
1734 1
    return 'LTR';
1735
  }
1736
1737
  /**
1738
   * get data from "/data/*.ser"
1739
   *
1740
   * @param string $file
1741
   *
1742
   * @return bool|string|array|int <p>Will return false on error.</p>
1743
   */
1744 13
  private static function getData(string $file)
1745
  {
1746 13
    $file = __DIR__ . '/data/' . $file . '.php';
1747 13
    if (\file_exists($file)) {
1748
      /** @noinspection PhpIncludeInspection */
1749 12
      return require $file;
1750
    }
1751
1752 2
    return false;
1753
  }
1754
1755
  /**
1756
   * Check for php-support.
1757
   *
1758
   * @param string|null $key
1759
   *
1760
   * @return mixed <p>Return the full support-"array", if $key === null<br>
1761
   *               return bool-value, if $key is used and available<br>
1762
   *               otherwise return null</p>
1763
   */
1764 19
  public static function getSupportInfo(string $key = null)
1765
  {
1766 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1767
      self::checkForSupport();
1768
    }
1769
1770 19
    if ($key === null) {
1771 2
      return self::$SUPPORT;
1772
    }
1773
1774 18
    if (!isset(self::$SUPPORT[$key])) {
1775 1
      return null;
1776
    }
1777
1778 17
    return self::$SUPPORT[$key];
1779
  }
1780
1781
  /**
1782
   * @param int    $length        <p>Length of the random string.</p>
1783
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
1784
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
1785
   *
1786
   * @return string
1787
   */
1788 1
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
1789
  {
1790
    // init
1791 1
    $i = 0;
1792 1
    $str = '';
1793 1
    $maxlength = self::strlen($possibleChars, $encoding);
1794
1795 1
    if ($maxlength === 0) {
1796 1
      return '';
1797
    }
1798
1799
    // add random chars
1800 1
    while ($i < $length) {
1801
      try {
1802 1
        $randInt = \random_int(0, $maxlength - 1);
1803
      } catch (\Exception $e) {
1804
        /** @noinspection RandomApiMigrationInspection */
1805
        $randInt = \mt_rand(0, $maxlength - 1);
1806
      }
1807 1
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
1808 1
      $str .= $char;
1809 1
      $i++;
1810
    }
1811
1812 1
    return $str;
1813
  }
1814
1815
  /**
1816
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
1817
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
1818
   *
1819
   * @return string
1820
   */
1821 1
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
1822
  {
1823 1
    $uniqueHelper = \mt_rand() .
1824 1
                    \session_id() .
1825 1
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
1826 1
                    ($_SERVER['SERVER_ADDR'] ?? '') .
1827 1
                    $entropyExtra;
1828
1829 1
    $uniqueString = \uniqid($uniqueHelper, true);
1830
1831 1
    if ($md5) {
1832 1
      $uniqueString = \md5($uniqueString . $uniqueHelper);
1833
    }
1834
1835 1
    return $uniqueString;
1836
  }
1837
1838
  /**
1839
   * alias for "UTF8::string_has_bom()"
1840
   *
1841
   * @see        UTF8::string_has_bom()
1842
   *
1843
   * @param string $str
1844
   *
1845
   * @return bool
1846
   *
1847
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1848
   */
1849 1
  public static function hasBom(string $str): bool
1850
  {
1851 1
    return self::string_has_bom($str);
1852
  }
1853
1854
  /**
1855
   * Returns true if the string contains a lower case char, false otherwise.
1856
   *
1857
   * @param string $str <p>The input string.</p>
1858
   *
1859
   * @return bool <p>Whether or not the string contains a lower case character.</p>
1860
   */
1861 47
  public static function has_lowercase(string $str): bool
1862
  {
1863 47
    return self::str_matches_pattern($str, '.*[[:lower:]]');
1864
  }
1865
1866
  /**
1867
   * Returns true if the string contains an upper case char, false otherwise.
1868
   *
1869
   * @param string $str <p>The input string.</p>
1870
   *
1871
   * @return bool <p>Whether or not the string contains an upper case character.</p>
1872
   */
1873 12
  public static function has_uppercase(string $str): bool
1874
  {
1875 12
    return self::str_matches_pattern($str, '.*[[:upper:]]');
1876
  }
1877
1878
  /**
1879
   * Converts a hexadecimal-value into an UTF-8 character.
1880
   *
1881
   * @param string $hexdec <p>The hexadecimal value.</p>
1882
   *
1883
   * @return string|false <p>One single UTF-8 character.</p>
1884
   */
1885 2
  public static function hex_to_chr(string $hexdec)
1886
  {
1887 2
    return self::decimal_to_chr(\hexdec($hexdec));
1888
  }
1889
1890
  /**
1891
   * Converts hexadecimal U+xxxx code point representation to integer.
1892
   *
1893
   * INFO: opposite to UTF8::int_to_hex()
1894
   *
1895
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1896
   *
1897
   * @return int|false <p>The code point, or false on failure.</p>
1898
   */
1899 1
  public static function hex_to_int(string $hexDec)
1900
  {
1901 1
    if ('' === $hexDec) {
1902 1
      return false;
1903
    }
1904
1905 1
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1906 1
      return \intval($match[1], 16);
1907
    }
1908
1909 1
    return false;
1910
  }
1911
1912
  /**
1913
   * alias for "UTF8::html_entity_decode()"
1914
   *
1915
   * @see UTF8::html_entity_decode()
1916
   *
1917
   * @param string $str
1918
   * @param int    $flags
1919
   * @param string $encoding
1920
   *
1921
   * @return string
1922
   */
1923 1
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1924
  {
1925 1
    return self::html_entity_decode($str, $flags, $encoding);
1926
  }
1927
1928
  /**
1929
   * Converts a UTF-8 string to a series of HTML numbered entities.
1930
   *
1931
   * INFO: opposite to UTF8::html_decode()
1932
   *
1933
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1934
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1935
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
1936
   *
1937
   * @return string <p>HTML numbered entities.</p>
1938
   */
1939 8
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
1940
  {
1941 8
    if ('' === $str) {
1942 2
      return '';
1943
    }
1944
1945 8
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1946 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1947
    }
1948
1949
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1950 8
    if (\function_exists('mb_encode_numericentity')) {
1951
1952 8
      $startCode = 0x00;
1953 8
      if ($keepAsciiChars === true) {
1954 8
        $startCode = 0x80;
1955
      }
1956
1957 8
      return \mb_encode_numericentity(
1958 8
          $str,
1959 8
          [$startCode, 0xfffff, 0, 0xfffff, 0],
1960 8
          $encoding
1961
      );
1962
    }
1963
1964
    return \implode(
1965
        '',
1966
        \array_map(
1967
            function ($data) use ($keepAsciiChars, $encoding) {
1968
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1969
            },
1970
            self::split($str)
1971
        )
1972
    );
1973
  }
1974
1975
  /**
1976
   * UTF-8 version of html_entity_decode()
1977
   *
1978
   * The reason we are not using html_entity_decode() by itself is because
1979
   * while it is not technically correct to leave out the semicolon
1980
   * at the end of an entity most browsers will still interpret the entity
1981
   * correctly. html_entity_decode() does not convert entities without
1982
   * semicolons, so we are left with our own little solution here. Bummer.
1983
   *
1984
   * Convert all HTML entities to their applicable characters
1985
   *
1986
   * INFO: opposite to UTF8::html_encode()
1987
   *
1988
   * @link http://php.net/manual/en/function.html-entity-decode.php
1989
   *
1990
   * @param string $str      <p>
1991
   *                         The input string.
1992
   *                         </p>
1993
   * @param int    $flags    [optional] <p>
1994
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1995
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1996
   *                         <table>
1997
   *                         Available <i>flags</i> constants
1998
   *                         <tr valign="top">
1999
   *                         <td>Constant Name</td>
2000
   *                         <td>Description</td>
2001
   *                         </tr>
2002
   *                         <tr valign="top">
2003
   *                         <td><b>ENT_COMPAT</b></td>
2004
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2005
   *                         </tr>
2006
   *                         <tr valign="top">
2007
   *                         <td><b>ENT_QUOTES</b></td>
2008
   *                         <td>Will convert both double and single quotes.</td>
2009
   *                         </tr>
2010
   *                         <tr valign="top">
2011
   *                         <td><b>ENT_NOQUOTES</b></td>
2012
   *                         <td>Will leave both double and single quotes unconverted.</td>
2013
   *                         </tr>
2014
   *                         <tr valign="top">
2015
   *                         <td><b>ENT_HTML401</b></td>
2016
   *                         <td>
2017
   *                         Handle code as HTML 4.01.
2018
   *                         </td>
2019
   *                         </tr>
2020
   *                         <tr valign="top">
2021
   *                         <td><b>ENT_XML1</b></td>
2022
   *                         <td>
2023
   *                         Handle code as XML 1.
2024
   *                         </td>
2025
   *                         </tr>
2026
   *                         <tr valign="top">
2027
   *                         <td><b>ENT_XHTML</b></td>
2028
   *                         <td>
2029
   *                         Handle code as XHTML.
2030
   *                         </td>
2031
   *                         </tr>
2032
   *                         <tr valign="top">
2033
   *                         <td><b>ENT_HTML5</b></td>
2034
   *                         <td>
2035
   *                         Handle code as HTML 5.
2036
   *                         </td>
2037
   *                         </tr>
2038
   *                         </table>
2039
   *                         </p>
2040
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
2041
   *
2042
   * @return string <p>The decoded string.</p>
2043
   */
2044 22
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2045
  {
2046 22
    if ('' === $str) {
2047 6
      return '';
2048
    }
2049
2050 22
    if (!isset($str[3])) { // examples: &; || &x;
2051 10
      return $str;
2052
    }
2053
2054
    if (
2055 21
        \strpos($str, '&') === false
2056
        ||
2057
        (
2058 21
            \strpos($str, '&#') === false
2059
            &&
2060 21
            \strpos($str, ';') === false
2061
        )
2062
    ) {
2063 9
      return $str;
2064
    }
2065
2066 21
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2067 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2068
    }
2069
2070 21
    if ($flags === null) {
2071 5
      $flags = ENT_QUOTES | ENT_HTML5;
2072
    }
2073
2074
    if (
2075 21
        $encoding !== 'UTF-8'
2076
        &&
2077 21
        $encoding !== 'ISO-8859-1'
2078
        &&
2079 21
        $encoding !== 'WINDOWS-1252'
2080
        &&
2081 21
        self::$SUPPORT['mbstring'] === false
2082
    ) {
2083
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2084
    }
2085
2086
    do {
2087 21
      $str_compare = $str;
2088
2089 21
      $str = (string)\preg_replace_callback(
2090 21
          "/&#\d{2,6};/",
2091
          function ($matches) use ($encoding) {
2092 15
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2093
2094 15
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2095 13
              return $returnTmp;
2096
            }
2097
2098 8
            return $matches[0];
2099 21
          },
2100 21
          $str
2101
      );
2102
2103
      // decode numeric & UTF16 two byte entities
2104 21
      $str = \html_entity_decode(
2105 21
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2106 21
          $flags,
2107 21
          $encoding
2108
      );
2109
2110 21
    } while ($str_compare !== $str);
2111
2112 21
    return $str;
2113
  }
2114
2115
  /**
2116
   * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2117
   *
2118
   * @param string $str
2119
   * @param string $encoding [optional] <p>Default: UTF-8</p>
2120
   *
2121
   * @return string
2122
   */
2123 6
  public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2124
  {
2125 6
    return self::htmlspecialchars(
2126 6
        $str,
2127 6
        ENT_QUOTES | ENT_SUBSTITUTE,
2128 6
        $encoding
2129
    );
2130
  }
2131
2132
  /**
2133
   * Remove empty html-tag.
2134
   *
2135
   * e.g.: <tag></tag>
2136
   *
2137
   * @param string $str
2138
   *
2139
   * @return string
2140
   */
2141 1
  public static function html_stripe_empty_tags(string $str): string
2142
  {
2143 1
    return (string)\preg_replace(
2144 1
        "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2145 1
        '',
2146 1
        $str
2147
    );
2148
  }
2149
2150
  /**
2151
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2152
   *
2153
   * @link http://php.net/manual/en/function.htmlentities.php
2154
   *
2155
   * @param string $str           <p>
2156
   *                              The input string.
2157
   *                              </p>
2158
   * @param int    $flags         [optional] <p>
2159
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2160
   *                              invalid code unit sequences and the used document type. The default is
2161
   *                              ENT_COMPAT | ENT_HTML401.
2162
   *                              <table>
2163
   *                              Available <i>flags</i> constants
2164
   *                              <tr valign="top">
2165
   *                              <td>Constant Name</td>
2166
   *                              <td>Description</td>
2167
   *                              </tr>
2168
   *                              <tr valign="top">
2169
   *                              <td><b>ENT_COMPAT</b></td>
2170
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2171
   *                              </tr>
2172
   *                              <tr valign="top">
2173
   *                              <td><b>ENT_QUOTES</b></td>
2174
   *                              <td>Will convert both double and single quotes.</td>
2175
   *                              </tr>
2176
   *                              <tr valign="top">
2177
   *                              <td><b>ENT_NOQUOTES</b></td>
2178
   *                              <td>Will leave both double and single quotes unconverted.</td>
2179
   *                              </tr>
2180
   *                              <tr valign="top">
2181
   *                              <td><b>ENT_IGNORE</b></td>
2182
   *                              <td>
2183
   *                              Silently discard invalid code unit sequences instead of returning
2184
   *                              an empty string. Using this flag is discouraged as it
2185
   *                              may have security implications.
2186
   *                              </td>
2187
   *                              </tr>
2188
   *                              <tr valign="top">
2189
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2190
   *                              <td>
2191
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2192
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2193
   *                              </td>
2194
   *                              </tr>
2195
   *                              <tr valign="top">
2196
   *                              <td><b>ENT_DISALLOWED</b></td>
2197
   *                              <td>
2198
   *                              Replace invalid code points for the given document type with a
2199
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2200
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2201
   *                              instance, to ensure the well-formedness of XML documents with
2202
   *                              embedded external content.
2203
   *                              </td>
2204
   *                              </tr>
2205
   *                              <tr valign="top">
2206
   *                              <td><b>ENT_HTML401</b></td>
2207
   *                              <td>
2208
   *                              Handle code as HTML 4.01.
2209
   *                              </td>
2210
   *                              </tr>
2211
   *                              <tr valign="top">
2212
   *                              <td><b>ENT_XML1</b></td>
2213
   *                              <td>
2214
   *                              Handle code as XML 1.
2215
   *                              </td>
2216
   *                              </tr>
2217
   *                              <tr valign="top">
2218
   *                              <td><b>ENT_XHTML</b></td>
2219
   *                              <td>
2220
   *                              Handle code as XHTML.
2221
   *                              </td>
2222
   *                              </tr>
2223
   *                              <tr valign="top">
2224
   *                              <td><b>ENT_HTML5</b></td>
2225
   *                              <td>
2226
   *                              Handle code as HTML 5.
2227
   *                              </td>
2228
   *                              </tr>
2229
   *                              </table>
2230
   *                              </p>
2231
   * @param string $encoding      [optional] <p>
2232
   *                              Like <b>htmlspecialchars</b>,
2233
   *                              <b>htmlentities</b> takes an optional third argument
2234
   *                              <i>encoding</i> which defines encoding used in
2235
   *                              conversion.
2236
   *                              Although this argument is technically optional, you are highly
2237
   *                              encouraged to specify the correct value for your code.
2238
   *                              </p>
2239
   * @param bool   $double_encode [optional] <p>
2240
   *                              When <i>double_encode</i> is turned off PHP will not
2241
   *                              encode existing html entities. The default is to convert everything.
2242
   *                              </p>
2243
   *
2244
   *
2245
   * @return string the encoded string.
2246
   * </p>
2247
   * <p>
2248
   * If the input <i>string</i> contains an invalid code unit
2249
   * sequence within the given <i>encoding</i> an empty string
2250
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2251
   * <b>ENT_SUBSTITUTE</b> flags are set.
2252
   */
2253 7
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2254
  {
2255 7
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2256 6
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2257
    }
2258
2259 7
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2260
2261
    /**
2262
     * PHP doesn't replace a backslash to its html entity since this is something
2263
     * that's mostly used to escape characters when inserting in a database. Since
2264
     * we're using a decent database layer, we don't need this shit and we're replacing
2265
     * the double backslashes by its' html entity equivalent.
2266
     *
2267
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2268
     */
2269 7
    $str = \str_replace('\\', '&#92;', $str);
2270
2271 7
    return self::html_encode($str, true, $encoding);
2272
  }
2273
2274
  /**
2275
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2276
   *
2277
   * INFO: Take a look at "UTF8::htmlentities()"
2278
   *
2279
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2280
   *
2281
   * @param string $str           <p>
2282
   *                              The string being converted.
2283
   *                              </p>
2284
   * @param int    $flags         [optional] <p>
2285
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2286
   *                              invalid code unit sequences and the used document type. The default is
2287
   *                              ENT_COMPAT | ENT_HTML401.
2288
   *                              <table>
2289
   *                              Available <i>flags</i> constants
2290
   *                              <tr valign="top">
2291
   *                              <td>Constant Name</td>
2292
   *                              <td>Description</td>
2293
   *                              </tr>
2294
   *                              <tr valign="top">
2295
   *                              <td><b>ENT_COMPAT</b></td>
2296
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2297
   *                              </tr>
2298
   *                              <tr valign="top">
2299
   *                              <td><b>ENT_QUOTES</b></td>
2300
   *                              <td>Will convert both double and single quotes.</td>
2301
   *                              </tr>
2302
   *                              <tr valign="top">
2303
   *                              <td><b>ENT_NOQUOTES</b></td>
2304
   *                              <td>Will leave both double and single quotes unconverted.</td>
2305
   *                              </tr>
2306
   *                              <tr valign="top">
2307
   *                              <td><b>ENT_IGNORE</b></td>
2308
   *                              <td>
2309
   *                              Silently discard invalid code unit sequences instead of returning
2310
   *                              an empty string. Using this flag is discouraged as it
2311
   *                              may have security implications.
2312
   *                              </td>
2313
   *                              </tr>
2314
   *                              <tr valign="top">
2315
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2316
   *                              <td>
2317
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2318
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2319
   *                              </td>
2320
   *                              </tr>
2321
   *                              <tr valign="top">
2322
   *                              <td><b>ENT_DISALLOWED</b></td>
2323
   *                              <td>
2324
   *                              Replace invalid code points for the given document type with a
2325
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2326
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2327
   *                              instance, to ensure the well-formedness of XML documents with
2328
   *                              embedded external content.
2329
   *                              </td>
2330
   *                              </tr>
2331
   *                              <tr valign="top">
2332
   *                              <td><b>ENT_HTML401</b></td>
2333
   *                              <td>
2334
   *                              Handle code as HTML 4.01.
2335
   *                              </td>
2336
   *                              </tr>
2337
   *                              <tr valign="top">
2338
   *                              <td><b>ENT_XML1</b></td>
2339
   *                              <td>
2340
   *                              Handle code as XML 1.
2341
   *                              </td>
2342
   *                              </tr>
2343
   *                              <tr valign="top">
2344
   *                              <td><b>ENT_XHTML</b></td>
2345
   *                              <td>
2346
   *                              Handle code as XHTML.
2347
   *                              </td>
2348
   *                              </tr>
2349
   *                              <tr valign="top">
2350
   *                              <td><b>ENT_HTML5</b></td>
2351
   *                              <td>
2352
   *                              Handle code as HTML 5.
2353
   *                              </td>
2354
   *                              </tr>
2355
   *                              </table>
2356
   *                              </p>
2357
   * @param string $encoding      [optional] <p>
2358
   *                              Defines encoding used in conversion.
2359
   *                              </p>
2360
   *                              <p>
2361
   *                              For the purposes of this function, the encodings
2362
   *                              ISO-8859-1, ISO-8859-15,
2363
   *                              UTF-8, cp866,
2364
   *                              cp1251, cp1252, and
2365
   *                              KOI8-R are effectively equivalent, provided the
2366
   *                              <i>string</i> itself is valid for the encoding, as
2367
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2368
   *                              the same positions in all of these encodings.
2369
   *                              </p>
2370
   * @param bool   $double_encode [optional] <p>
2371
   *                              When <i>double_encode</i> is turned off PHP will not
2372
   *                              encode existing html entities, the default is to convert everything.
2373
   *                              </p>
2374
   *
2375
   * @return string The converted string.
2376
   * </p>
2377
   * <p>
2378
   * If the input <i>string</i> contains an invalid code unit
2379
   * sequence within the given <i>encoding</i> an empty string
2380
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2381
   * <b>ENT_SUBSTITUTE</b> flags are set.
2382
   */
2383 7
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2384
  {
2385 7
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2386 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2387
    }
2388
2389 7
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2390
  }
2391
2392
  /**
2393
   * Checks whether iconv is available on the server.
2394
   *
2395
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2396
   */
2397
  public static function iconv_loaded(): bool
2398
  {
2399
    return \extension_loaded('iconv') ? true : false;
2400
  }
2401
2402
  /**
2403
   * alias for "UTF8::decimal_to_chr()"
2404
   *
2405
   * @see UTF8::decimal_to_chr()
2406
   *
2407
   * @param mixed $int
2408
   *
2409
   * @return string
2410
   */
2411 2
  public static function int_to_chr($int): string
2412
  {
2413 2
    return self::decimal_to_chr($int);
2414
  }
2415
2416
  /**
2417
   * Converts Integer to hexadecimal U+xxxx code point representation.
2418
   *
2419
   * INFO: opposite to UTF8::hex_to_int()
2420
   *
2421
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2422
   * @param string $pfix [optional]
2423
   *
2424
   * @return string <p>The code point, or empty string on failure.</p>
2425
   */
2426 3
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2427
  {
2428 3
    $hex = \dechex($int);
2429
2430 3
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2431
2432 3
    return $pfix . $hex;
2433
  }
2434
2435
  /**
2436
   * Checks whether intl-char is available on the server.
2437
   *
2438
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2439
   */
2440
  public static function intlChar_loaded(): bool
2441
  {
2442
    return \class_exists('IntlChar');
2443
  }
2444
2445
  /**
2446
   * Checks whether intl is available on the server.
2447
   *
2448
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2449
   */
2450 3
  public static function intl_loaded(): bool
2451
  {
2452 3
    return \extension_loaded('intl');
2453
  }
2454
2455
  /**
2456
   * alias for "UTF8::is_ascii()"
2457
   *
2458
   * @see        UTF8::is_ascii()
2459
   *
2460
   * @param string $str
2461
   *
2462
   * @return bool
2463
   *
2464
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2465
   */
2466 1
  public static function isAscii(string $str): bool
2467
  {
2468 1
    return self::is_ascii($str);
2469
  }
2470
2471
  /**
2472
   * alias for "UTF8::is_base64()"
2473
   *
2474
   * @see        UTF8::is_base64()
2475
   *
2476
   * @param string $str
2477
   *
2478
   * @return bool
2479
   *
2480
   * @deprecated <p>use "UTF8::is_base64()"</p>
2481
   */
2482 1
  public static function isBase64(string $str): bool
2483
  {
2484 1
    return self::is_base64($str);
2485
  }
2486
2487
  /**
2488
   * alias for "UTF8::is_binary()"
2489
   *
2490
   * @see        UTF8::is_binary()
2491
   *
2492
   * @param mixed $str
2493
   * @param bool  $strict
2494
   *
2495
   * @return bool
2496
   *
2497
   * @deprecated <p>use "UTF8::is_binary()"</p>
2498
   */
2499 2
  public static function isBinary($str, $strict = false): bool
2500
  {
2501 2
    return self::is_binary($str, $strict);
2502
  }
2503
2504
  /**
2505
   * alias for "UTF8::is_bom()"
2506
   *
2507
   * @see        UTF8::is_bom()
2508
   *
2509
   * @param string $utf8_chr
2510
   *
2511
   * @return bool
2512
   *
2513
   * @deprecated <p>use "UTF8::is_bom()"</p>
2514
   */
2515 1
  public static function isBom(string $utf8_chr): bool
2516
  {
2517 1
    return self::is_bom($utf8_chr);
2518
  }
2519
2520
  /**
2521
   * alias for "UTF8::is_html()"
2522
   *
2523
   * @see        UTF8::is_html()
2524
   *
2525
   * @param string $str
2526
   *
2527
   * @return bool
2528
   *
2529
   * @deprecated <p>use "UTF8::is_html()"</p>
2530
   */
2531 1
  public static function isHtml(string $str): bool
2532
  {
2533 1
    return self::is_html($str);
2534
  }
2535
2536
  /**
2537
   * alias for "UTF8::is_json()"
2538
   *
2539
   * @see        UTF8::is_json()
2540
   *
2541
   * @param string $str
2542
   *
2543
   * @return bool
2544
   *
2545
   * @deprecated <p>use "UTF8::is_json()"</p>
2546
   */
2547
  public static function isJson(string $str): bool
2548
  {
2549
    return self::is_json($str);
2550
  }
2551
2552
  /**
2553
   * alias for "UTF8::is_utf16()"
2554
   *
2555
   * @see        UTF8::is_utf16()
2556
   *
2557
   * @param string $str
2558
   *
2559
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2560
   *
2561
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2562
   */
2563 1
  public static function isUtf16(string $str)
2564
  {
2565 1
    return self::is_utf16($str);
2566
  }
2567
2568
  /**
2569
   * alias for "UTF8::is_utf32()"
2570
   *
2571
   * @see        UTF8::is_utf32()
2572
   *
2573
   * @param string $str
2574
   *
2575
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2576
   *
2577
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2578
   */
2579 1
  public static function isUtf32(string $str)
2580
  {
2581 1
    return self::is_utf32($str);
2582
  }
2583
2584
  /**
2585
   * alias for "UTF8::is_utf8()"
2586
   *
2587
   * @see        UTF8::is_utf8()
2588
   *
2589
   * @param string $str
2590
   * @param bool   $strict
2591
   *
2592
   * @return bool
2593
   *
2594
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2595
   */
2596 16
  public static function isUtf8($str, $strict = false): bool
2597
  {
2598 16
    return self::is_utf8($str, $strict);
2599
  }
2600
2601
  /**
2602
   * Returns true if the string contains only alphabetic chars, false otherwise.
2603
   *
2604
   * @param string $str
2605
   *
2606
   * @return bool <p>Whether or not $str contains only alphabetic chars.</p>
2607
   */
2608 10
  public static function is_alpha(string $str): bool
2609
  {
2610 10
    return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2611
  }
2612
2613
  /**
2614
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2615
   *
2616
   * @param string $str
2617
   *
2618
   * @return bool <p>Whether or not $str contains only alphanumeric chars.</p>
2619
   */
2620 13
  public static function is_alphanumeric(string $str): bool
2621
  {
2622 13
    return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2623
  }
2624
2625
  /**
2626
   * Checks if a string is 7 bit ASCII.
2627
   *
2628
   * @param string $str <p>The string to check.</p>
2629
   *
2630
   * @return bool <p>
2631
   *              <strong>true</strong> if it is ASCII<br>
2632
   *              <strong>false</strong> otherwise
2633
   *              </p>
2634
   */
2635 100
  public static function is_ascii(string $str): bool
2636
  {
2637 100
    if ('' === $str) {
2638 6
      return true;
2639
    }
2640
2641 99
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2642
  }
2643
2644
  /**
2645
   * Returns true if the string is base64 encoded, false otherwise.
2646
   *
2647
   * @param string $str <p>The input string.</p>
2648
   *
2649
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2650
   */
2651 8
  public static function is_base64(string $str): bool
2652
  {
2653 8
    $base64String = (string)\base64_decode($str, true);
2654
2655 8
    return $base64String && \base64_encode($base64String) === $str;
2656
  }
2657
2658
  /**
2659
   * Check if the input is binary... (is look like a hack).
2660
   *
2661
   * @param mixed $input
2662
   * @param bool  $strict
2663
   *
2664
   * @return bool
2665
   */
2666 19
  public static function is_binary($input, bool $strict = false): bool
2667
  {
2668 19
    $input = (string)$input;
2669 19
    if ('' === $input) {
2670 5
      return false;
2671
    }
2672
2673 19
    if (\preg_match('~^[01]+$~', $input)) {
2674 6
      return true;
2675
    }
2676
2677 19
    $testNull = 0;
2678 19
    $testLength = \strlen($input);
2679 19
    if ($testLength) {
2680 19
      $testNull = \substr_count($input, "\x0");
2681 19
      if (($testNull / $testLength) > 0.3) {
2682 6
        return true;
2683
      }
2684
    }
2685
2686 18
    if ($strict === true) {
2687
2688 16
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2689
        self::checkForSupport();
2690
      }
2691
2692 16
      if (self::$SUPPORT['finfo'] === false) {
2693
        throw new \RuntimeException('ext-fileinfo: is not installed');
2694
      }
2695
2696
      /** @noinspection PhpComposerExtensionStubsInspection */
2697 16
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
2698 16
      $finfo_encoding = $finfo->buffer($input);
2699 16
      if ($finfo_encoding && $finfo_encoding === 'binary') {
2700 16
        return true;
2701
      }
2702
2703 8
    } elseif ($testNull > 0) {
2704
2705 3
      return true;
2706
2707
    }
2708
2709 17
    return false;
2710
  }
2711
2712
  /**
2713
   * Check if the file is binary.
2714
   *
2715
   * @param string $file
2716
   *
2717
   * @return bool
2718
   */
2719 3
  public static function is_binary_file($file): bool
2720
  {
2721
    try {
2722 3
      $fp = \fopen($file, 'rb');
2723 3
      $block = \fread($fp, 512);
0 ignored issues
show
Bug introduced by
It seems like $fp can also be of type false; however, parameter $handle of fread() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2723
      $block = \fread(/** @scrutinizer ignore-type */ $fp, 512);
Loading history...
2724 3
      \fclose($fp);
0 ignored issues
show
Bug introduced by
It seems like $fp can also be of type false; however, parameter $handle of fclose() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2724
      \fclose(/** @scrutinizer ignore-type */ $fp);
Loading history...
2725
    } catch (\Exception $e) {
2726
      $block = '';
2727
    }
2728
2729 3
    return self::is_binary($block, true);
2730
  }
2731
2732
  /**
2733
   * Returns true if the string contains only whitespace chars, false otherwise.
2734
   *
2735
   * @param string $str
2736
   *
2737
   * @return bool <p>Whether or not $str contains only whitespace characters.</p>
2738
   */
2739 15
  public static function is_blank(string $str): bool
2740
  {
2741 15
    return self::str_matches_pattern($str, '^[[:space:]]*$');
2742
  }
2743
2744
  /**
2745
   * Checks if the given string is equal to any "Byte Order Mark".
2746
   *
2747
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2748
   *
2749
   * @param string $str <p>The input string.</p>
2750
   *
2751
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2752
   */
2753 1
  public static function is_bom($str): bool
2754
  {
2755 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2756 1
      if ($str === $bomString) {
2757 1
        return true;
2758
      }
2759
    }
2760
2761 1
    return false;
2762
  }
2763
2764
  /**
2765
   * Determine whether the string is considered to be empty.
2766
   *
2767
   * A variable is considered empty if it does not exist or if its value equals FALSE.
2768
   * empty() does not generate a warning if the variable does not exist.
2769
   *
2770
   * @param mixed $str
2771
   *
2772
   * @return bool <p>Whether or not $str is empty().</p>
2773
   */
2774
  public static function is_empty($str): bool
2775
  {
2776
    return empty($str);
2777
  }
2778
2779
  /**
2780
   * Returns true if the string contains only hexadecimal chars, false otherwise.
2781
   *
2782
   * @param string $str
2783
   *
2784
   * @return bool <p>Whether or not $str contains only hexadecimal chars.</p>
2785
   */
2786 13
  public static function is_hexadecimal(string $str): bool
2787
  {
2788 13
    return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
2789
  }
2790
2791
  /**
2792
   * Check if the string contains any html-tags <lall>.
2793
   *
2794
   * @param string $str <p>The input string.</p>
2795
   *
2796
   * @return bool
2797
   */
2798 2
  public static function is_html(string $str): bool
2799
  {
2800 2
    if ('' === $str) {
2801 2
      return false;
2802
    }
2803
2804
    // init
2805 2
    $matches = [];
2806
2807 2
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2808
2809 2
    return !(\count($matches) === 0);
2810
  }
2811
2812
  /**
2813
   * Try to check if "$str" is an json-string.
2814
   *
2815
   * @param string $str <p>The input string.</p>
2816
   *
2817
   * @return bool
2818
   */
2819 21
  public static function is_json(string $str): bool
2820
  {
2821 21
    if ('' === $str) {
2822 2
      return false;
2823
    }
2824
2825 20
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2826
      self::checkForSupport();
2827
    }
2828
2829 20
    if (self::$SUPPORT['json'] === false) {
2830
      throw new \RuntimeException('ext-json: is not installed');
2831
    }
2832
2833 20
    $json = self::json_decode($str);
2834
2835
    /** @noinspection PhpComposerExtensionStubsInspection */
2836
    return (
2837 20
               \is_object($json) === true
2838
               ||
2839 20
               \is_array($json) === true
2840
           )
2841
           &&
2842 20
           \json_last_error() === JSON_ERROR_NONE;
2843
  }
2844
2845
  /**
2846
   * @param string $str
2847
   *
2848
   * @return bool
2849
   */
2850 8
  public static function is_lowercase(string $str): bool
2851
  {
2852 8
    if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
2853 3
      return true;
2854
    }
2855
2856 5
    return false;
2857
  }
2858
2859
  /**
2860
   * Returns true if the string is serialized, false otherwise.
2861
   *
2862
   * @param string $str
2863
   *
2864
   * @return bool <p>Whether or not $str is serialized.</p>
2865
   */
2866 7
  public static function is_serialized(string $str): bool
2867
  {
2868 7
    if ('' === $str) {
2869 1
      return false;
2870
    }
2871
2872
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2873
    /** @noinspection UnserializeExploitsInspection */
2874 6
    return $str === 'b:0;'
2875
           ||
2876 6
           @\unserialize($str) !== false;
2877
  }
2878
2879
  /**
2880
   * Returns true if the string contains only lower case chars, false
2881
   * otherwise.
2882
   *
2883
   * @param string $str <p>The input string.</p>
2884
   *
2885
   * @return bool <p>Whether or not $str contains only lower case characters.</p>
2886
   */
2887 8
  public static function is_uppercase(string $str): bool
2888
  {
2889 8
    return self::str_matches_pattern($str, '^[[:upper:]]*$');
2890
  }
2891
2892
  /**
2893
   * Check if the string is UTF-16.
2894
   *
2895
   * @param string $str <p>The input string.</p>
2896
   *
2897
   * @return int|false <p>
2898
   *                   <strong>false</strong> if is't not UTF-16,<br>
2899
   *                   <strong>1</strong> for UTF-16LE,<br>
2900
   *                   <strong>2</strong> for UTF-16BE.
2901
   *                   </p>
2902
   */
2903 10
  public static function is_utf16(string $str)
2904
  {
2905 10
    if (self::is_binary($str) === false) {
2906 4
      return false;
2907
    }
2908
2909
    // init
2910 8
    $strChars = [];
2911
2912 8
    $str = self::remove_bom($str);
2913
2914 8
    $maybeUTF16LE = 0;
2915 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2916 8
    if ($test) {
2917 7
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2918 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2919 7
      if ($test3 === $test) {
2920 7
        if (\count($strChars) === 0) {
2921 7
          $strChars = self::count_chars($str, true);
2922
        }
2923 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2924 7
          if (\in_array($test3char, $strChars, true) === true) {
2925 7
            $maybeUTF16LE++;
2926
          }
2927
        }
2928
      }
2929
    }
2930
2931 8
    $maybeUTF16BE = 0;
2932 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2933 8
    if ($test) {
2934 7
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2935 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2936 7
      if ($test3 === $test) {
2937 7
        if (\count($strChars) === 0) {
2938 3
          $strChars = self::count_chars($str, true);
2939
        }
2940 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2941 7
          if (\in_array($test3char, $strChars, true) === true) {
2942 7
            $maybeUTF16BE++;
2943
          }
2944
        }
2945
      }
2946
    }
2947
2948 8
    if ($maybeUTF16BE !== $maybeUTF16LE) {
2949 3
      if ($maybeUTF16LE > $maybeUTF16BE) {
2950 2
        return 1;
2951
      }
2952
2953 3
      return 2;
2954
    }
2955
2956 6
    return false;
2957
  }
2958
2959
  /**
2960
   * Check if the string is UTF-32.
2961
   *
2962
   * @param string $str
2963
   *
2964
   * @return int|false <p>
2965
   *                   <strong>false</strong> if is't not UTF-32,<br>
2966
   *                   <strong>1</strong> for UTF-32LE,<br>
2967
   *                   <strong>2</strong> for UTF-32BE.
2968
   *                   </p>
2969
   */
2970 8
  public static function is_utf32(string $str)
2971
  {
2972 8
    if (self::is_binary($str) === false) {
2973 4
      return false;
2974
    }
2975
2976
    // init
2977 6
    $strChars = [];
2978
2979 6
    $str = self::remove_bom($str);
2980
2981 6
    $maybeUTF32LE = 0;
2982 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2983 6
    if ($test) {
2984 5
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2985 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2986 5
      if ($test3 === $test) {
2987 5
        if (\count($strChars) === 0) {
2988 5
          $strChars = self::count_chars($str, true);
2989
        }
2990 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2991 5
          if (\in_array($test3char, $strChars, true) === true) {
2992 5
            $maybeUTF32LE++;
2993
          }
2994
        }
2995
      }
2996
    }
2997
2998 6
    $maybeUTF32BE = 0;
2999 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3000 6
    if ($test) {
3001 5
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3002 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3003 5
      if ($test3 === $test) {
3004 5
        if (\count($strChars) === 0) {
3005 3
          $strChars = self::count_chars($str, true);
3006
        }
3007 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3008 5
          if (\in_array($test3char, $strChars, true) === true) {
3009 5
            $maybeUTF32BE++;
3010
          }
3011
        }
3012
      }
3013
    }
3014
3015 6
    if ($maybeUTF32BE !== $maybeUTF32LE) {
3016 1
      if ($maybeUTF32LE > $maybeUTF32BE) {
3017 1
        return 1;
3018
      }
3019
3020 1
      return 2;
3021
    }
3022
3023 6
    return false;
3024
  }
3025
3026
  /**
3027
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3028
   *
3029
   * @see    http://hsivonen.iki.fi/php-utf8/
3030
   *
3031
   * @param string|string[] $str    <p>The string to be checked.</p>
3032
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3033
   *
3034
   * @return bool
3035
   */
3036 92
  public static function is_utf8($str, bool $strict = false): bool
3037
  {
3038 92
    if (\is_array($str) === true) {
3039 1
      foreach ($str as $k => $v) {
3040 1
        if (false === self::is_utf8($v, $strict)) {
3041 1
          return false;
3042
        }
3043
      }
3044
3045
      return true;
3046
    }
3047
3048 92
    if ('' === $str) {
3049 11
      return true;
3050
    }
3051
3052 88
    if ($strict === true) {
3053 1
      if (self::is_utf16($str) !== false) {
3054 1
        return false;
3055
      }
3056
3057
      if (self::is_utf32($str) !== false) {
3058
        return false;
3059
      }
3060
    }
3061
3062 88
    if (self::pcre_utf8_support() !== true) {
3063
3064
      // If even just the first character can be matched, when the /u
3065
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3066
      // invalid, nothing at all will match, even if the string contains
3067
      // some valid sequences
3068
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
3069
    }
3070
3071 88
    $mState = 0; // cached expected number of octets after the current octet
3072
    // until the beginning of the next UTF8 character sequence
3073 88
    $mUcs4 = 0; // cached Unicode character
3074 88
    $mBytes = 1; // cached expected number of octets in the current sequence
3075
3076 88
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3077
      self::checkForSupport();
3078
    }
3079
3080 88
    if (self::$ORD === null) {
3081
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type integer or string or boolean. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3082
    }
3083
3084 88
    $len = self::strlen_in_byte((string)$str);
3085
    /** @noinspection ForeachInvariantsInspection */
3086 88
    for ($i = 0; $i < $len; $i++) {
3087 88
      $in = self::$ORD[$str[$i]];
3088 88
      if ($mState === 0) {
3089
        // When mState is zero we expect either a US-ASCII character or a
3090
        // multi-octet sequence.
3091 88
        if (0 === (0x80 & $in)) {
3092
          // US-ASCII, pass straight through.
3093 85
          $mBytes = 1;
3094 69
        } elseif (0xC0 === (0xE0 & $in)) {
3095
          // First octet of 2 octet sequence.
3096 62
          $mUcs4 = $in;
3097 62
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3098 62
          $mState = 1;
3099 62
          $mBytes = 2;
3100 46
        } elseif (0xE0 === (0xF0 & $in)) {
3101
          // First octet of 3 octet sequence.
3102 30
          $mUcs4 = $in;
3103 30
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3104 30
          $mState = 2;
3105 30
          $mBytes = 3;
3106 23
        } elseif (0xF0 === (0xF8 & $in)) {
3107
          // First octet of 4 octet sequence.
3108 13
          $mUcs4 = $in;
3109 13
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3110 13
          $mState = 3;
3111 13
          $mBytes = 4;
3112 11
        } elseif (0xF8 === (0xFC & $in)) {
3113
          /* First octet of 5 octet sequence.
3114
          *
3115
          * This is illegal because the encoded codepoint must be either
3116
          * (a) not the shortest form or
3117
          * (b) outside the Unicode range of 0-0x10FFFF.
3118
          * Rather than trying to resynchronize, we will carry on until the end
3119
          * of the sequence and let the later error handling code catch it.
3120
          */
3121 4
          $mUcs4 = $in;
3122 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3123 4
          $mState = 4;
3124 4
          $mBytes = 5;
3125 8
        } elseif (0xFC === (0xFE & $in)) {
3126
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3127 4
          $mUcs4 = $in;
3128 4
          $mUcs4 = ($mUcs4 & 1) << 30;
3129 4
          $mState = 5;
3130 4
          $mBytes = 6;
3131
        } else {
3132
          /* Current octet is neither in the US-ASCII range nor a legal first
3133
           * octet of a multi-octet sequence.
3134
           */
3135 88
          return false;
3136
        }
3137
      } else {
3138
        // When mState is non-zero, we expect a continuation of the multi-octet
3139
        // sequence
3140 69
        if (0x80 === (0xC0 & $in)) {
3141
          // Legal continuation.
3142 63
          $shift = ($mState - 1) * 6;
3143 63
          $tmp = $in;
3144 63
          $tmp = ($tmp & 0x0000003F) << $shift;
3145 63
          $mUcs4 |= $tmp;
3146
          /**Prefix
3147
           * End of the multi-octet sequence. mUcs4 now contains the final
3148
           * Unicode code point to be output
3149
           */
3150 63
          if (0 === --$mState) {
3151
            /*
3152
            * Check for illegal sequences and code points.
3153
            */
3154
            // From Unicode 3.1, non-shortest form is illegal
3155
            if (
3156 63
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3157 63
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3158 63
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3159 63
                (4 < $mBytes) ||
3160
                // From Unicode 3.2, surrogate characters are illegal.
3161 63
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3162
                // Code points outside the Unicode range are illegal.
3163 63
                ($mUcs4 > 0x10FFFF)
3164
            ) {
3165 7
              return false;
3166
            }
3167
            // initialize UTF8 cache
3168 63
            $mState = 0;
3169 63
            $mUcs4 = 0;
3170 63
            $mBytes = 1;
3171
          }
3172
        } else {
3173
          /**
3174
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3175
           * Incomplete multi-octet sequence.
3176
           */
3177 28
          return false;
3178
        }
3179
      }
3180
    }
3181
3182 56
    return true;
3183
  }
3184
3185
  /**
3186
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3187
   * Decodes a JSON string
3188
   *
3189
   * @link http://php.net/manual/en/function.json-decode.php
3190
   *
3191
   * @param string $json    <p>
3192
   *                        The <i>json</i> string being decoded.
3193
   *                        </p>
3194
   *                        <p>
3195
   *                        This function only works with UTF-8 encoded strings.
3196
   *                        </p>
3197
   *                        <p>PHP implements a superset of
3198
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3199
   *                        only supports these values when they are nested inside an array or an object.
3200
   *                        </p>
3201
   * @param bool   $assoc   [optional] <p>
3202
   *                        When <b>TRUE</b>, returned objects will be converted into
3203
   *                        associative arrays.
3204
   *                        </p>
3205
   * @param int    $depth   [optional] <p>
3206
   *                        User specified recursion depth.
3207
   *                        </p>
3208
   * @param int    $options [optional] <p>
3209
   *                        Bitmask of JSON decode options. Currently only
3210
   *                        <b>JSON_BIGINT_AS_STRING</b>
3211
   *                        is supported (default is to cast large integers as floats)
3212
   *                        </p>
3213
   *
3214
   * @return mixed the value encoded in <i>json</i> in appropriate
3215
   * PHP type. Values true, false and
3216
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3217
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3218
   * <i>json</i> cannot be decoded or if the encoded
3219
   * data is deeper than the recursion limit.
3220
   */
3221 21
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3222
  {
3223 21
    $json = self::filter($json);
3224
3225 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3226
      self::checkForSupport();
3227
    }
3228
3229 21
    if (self::$SUPPORT['json'] === false) {
3230
      throw new \RuntimeException('ext-json: is not installed');
3231
    }
3232
3233
    /** @noinspection PhpComposerExtensionStubsInspection */
3234 21
    $json = \json_decode($json, $assoc, $depth, $options);
3235
3236 21
    return $json;
3237
  }
3238
3239
  /**
3240
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3241
   * Returns the JSON representation of a value.
3242
   *
3243
   * @link http://php.net/manual/en/function.json-encode.php
3244
   *
3245
   * @param mixed $value   <p>
3246
   *                       The <i>value</i> being encoded. Can be any type except
3247
   *                       a resource.
3248
   *                       </p>
3249
   *                       <p>
3250
   *                       All string data must be UTF-8 encoded.
3251
   *                       </p>
3252
   *                       <p>PHP implements a superset of
3253
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3254
   *                       only supports these values when they are nested inside an array or an object.
3255
   *                       </p>
3256
   * @param int   $options [optional] <p>
3257
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3258
   *                       <b>JSON_HEX_TAG</b>,
3259
   *                       <b>JSON_HEX_AMP</b>,
3260
   *                       <b>JSON_HEX_APOS</b>,
3261
   *                       <b>JSON_NUMERIC_CHECK</b>,
3262
   *                       <b>JSON_PRETTY_PRINT</b>,
3263
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3264
   *                       <b>JSON_FORCE_OBJECT</b>,
3265
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3266
   *                       constants is described on
3267
   *                       the JSON constants page.
3268
   *                       </p>
3269
   * @param int   $depth   [optional] <p>
3270
   *                       Set the maximum depth. Must be greater than zero.
3271
   *                       </p>
3272
   *
3273
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3274
   */
3275 2
  public static function json_encode($value, int $options = 0, int $depth = 512): string
3276
  {
3277 2
    $value = self::filter($value);
3278
3279 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3280
      self::checkForSupport();
3281
    }
3282
3283 2
    if (self::$SUPPORT['json'] === false) {
3284
      throw new \RuntimeException('ext-json: is not installed');
3285
    }
3286
3287
    /** @noinspection PhpComposerExtensionStubsInspection */
3288 2
    $json = \json_encode($value, $options, $depth);
3289
3290 2
    return $json;
3291
  }
3292
3293
  /**
3294
   * Checks whether JSON is available on the server.
3295
   *
3296
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3297
   */
3298
  public static function json_loaded(): bool
3299
  {
3300
    return \function_exists('json_decode');
3301
  }
3302
3303
  /**
3304
   * Makes string's first char lowercase.
3305
   *
3306
   * @param string $str       <p>The input string</p>
3307
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
3308
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3309
   *
3310
   * @return string <p>The resulting string</p>
3311
   */
3312 44
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3313
  {
3314 44
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3315 44
    if ($strPartTwo === false) {
3316
      $strPartTwo = '';
3317
    }
3318
3319 44
    $strPartOne = self::strtolower(
3320 44
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3321 44
        $encoding,
3322 44
        $cleanUtf8
3323
    );
3324
3325 44
    return $strPartOne . $strPartTwo;
3326
  }
3327
3328
  /**
3329
   * alias for "UTF8::lcfirst()"
3330
   *
3331
   * @see UTF8::lcfirst()
3332
   *
3333
   * @param string $str
3334
   * @param string $encoding
3335
   * @param bool   $cleanUtf8
3336
   *
3337
   * @return string
3338
   */
3339 1
  public static function lcword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3340
  {
3341 1
    return self::lcfirst($str, $encoding, $cleanUtf8);
3342
  }
3343
3344
  /**
3345
   * Lowercase for all words in the string.
3346
   *
3347
   * @param string   $str        <p>The input string.</p>
3348
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3349
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3350
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3351
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3352
   *
3353
   * @return string
3354
   */
3355 1
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3356
  {
3357 1
    if (!$str) {
3358 1
      return '';
3359
    }
3360
3361 1
    $words = self::str_to_words($str, $charlist);
3362 1
    $newWords = [];
3363
3364 1
    if (\count($exceptions) > 0) {
3365 1
      $useExceptions = true;
3366
    } else {
3367 1
      $useExceptions = false;
3368
    }
3369
3370 1
    foreach ($words as $word) {
3371
3372 1
      if (!$word) {
3373 1
        continue;
3374
      }
3375
3376
      if (
3377 1
          $useExceptions === false
3378
          ||
3379
          (
3380 1
              $useExceptions === true
3381
              &&
3382 1
              !\in_array($word, $exceptions, true)
3383
          )
3384
      ) {
3385 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3386
      }
3387
3388 1
      $newWords[] = $word;
3389
    }
3390
3391 1
    return \implode('', $newWords);
3392
  }
3393
3394
  /**
3395
   * alias for "UTF8::lcfirst()"
3396
   *
3397
   * @see UTF8::lcfirst()
3398
   *
3399
   * @param string $str
3400
   * @param string $encoding
3401
   * @param bool   $cleanUtf8
3402
   *
3403
   * @return string
3404
   */
3405 5
  public static function lowerCaseFirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3406
  {
3407 5
    return self::lcfirst($str, $encoding, $cleanUtf8);
3408
  }
3409
3410
  /**
3411
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3412
   *
3413
   * @param string $str   <p>The string to be trimmed</p>
3414
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3415
   *
3416
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3417
   */
3418 21
  public static function ltrim(string $str = '', $chars = INF): string
3419
  {
3420 21
    if ('' === $str) {
3421 2
      return '';
3422
    }
3423
3424
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3425 20
    if ($chars === INF || !$chars) {
3426 13
      $pattern = "^[\pZ\pC]+";
3427
    } else {
3428 9
      $chars = \preg_quote($chars, '/');
3429 9
      $pattern = "^[$chars]+";
3430
    }
3431
3432 20
    return self::regexReplace($str, $pattern, '', '', '/');
3433
  }
3434
3435
  /**
3436
   * Returns the UTF-8 character with the maximum code point in the given data.
3437
   *
3438
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3439
   *
3440
   * @return string <p>The character with the highest code point than others.</p>
3441
   */
3442 1
  public static function max($arg): string
3443
  {
3444 1
    if (\is_array($arg) === true) {
3445 1
      $arg = \implode('', $arg);
3446
    }
3447
3448 1
    return self::chr(\max(self::codepoints($arg)));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::chr(max(self::codepoints($arg))) could return the type null which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
3449
  }
3450
3451
  /**
3452
   * Calculates and returns the maximum number of bytes taken by any
3453
   * UTF-8 encoded character in the given string.
3454
   *
3455
   * @param string $str <p>The original Unicode string.</p>
3456
   *
3457
   * @return int <p>Max byte lengths of the given chars.</p>
3458
   */
3459 1
  public static function max_chr_width(string $str): int
3460
  {
3461 1
    $bytes = self::chr_size_list($str);
3462 1
    if (\count($bytes) > 0) {
3463 1
      return (int)\max($bytes);
3464
    }
3465
3466 1
    return 0;
3467
  }
3468
3469
  /**
3470
   * Checks whether mbstring is available on the server.
3471
   *
3472
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3473
   */
3474 11
  public static function mbstring_loaded(): bool
3475
  {
3476 11
    $return = \extension_loaded('mbstring') ? true : false;
3477
3478 11
    if ($return === true) {
3479 11
      \mb_internal_encoding('UTF-8');
3480
    }
3481
3482 11
    return $return;
3483
  }
3484
3485
  /**
3486
   * Checks whether mbstring "overloaded" is active on the server.
3487
   *
3488
   * @return bool
3489
   */
3490
  private static function mbstring_overloaded(): bool
3491
  {
3492
    /**
3493
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3494
     */
3495
3496
    /** @noinspection PhpComposerExtensionStubsInspection */
3497
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3498
    return \defined('MB_OVERLOAD_STRING')
3499
           &&
3500
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3501
  }
3502
3503
  /**
3504
   * Returns the UTF-8 character with the minimum code point in the given data.
3505
   *
3506
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3507
   *
3508
   * @return string <p>The character with the lowest code point than others.</p>
3509
   */
3510 1
  public static function min($arg): string
3511
  {
3512 1
    if (\is_array($arg) === true) {
3513 1
      $arg = \implode('', $arg);
3514
    }
3515
3516 1
    return self::chr(\min(self::codepoints($arg)));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::chr(min(self::codepoints($arg))) could return the type null which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
3517
  }
3518
3519
  /**
3520
   * alias for "UTF8::normalize_encoding()"
3521
   *
3522
   * @see        UTF8::normalize_encoding()
3523
   *
3524
   * @param string $encoding
3525
   * @param mixed  $fallback
3526
   *
3527
   * @return mixed
3528
   *
3529
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3530
   */
3531 1
  public static function normalizeEncoding(string $encoding, $fallback = '')
3532
  {
3533 1
    return self::normalize_encoding($encoding, $fallback);
3534
  }
3535
3536
  /**
3537
   * Normalize the encoding-"name" input.
3538
   *
3539
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3540
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3541
   *
3542
   * @return mixed <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by
3543
   *                default)</p>
3544
   */
3545 322
  public static function normalize_encoding(string $encoding, $fallback = '')
3546
  {
3547 322
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3548
3549 322
    if (!$encoding) {
3550 299
      return $fallback;
3551
    }
3552
3553
    if (
3554 26
        'UTF-8' === $encoding
3555
        ||
3556 26
        'UTF8' === $encoding
3557
    ) {
3558 11
      return 'UTF-8';
3559
    }
3560
3561 22
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3562 19
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3563
    }
3564
3565 6
    if (self::$ENCODINGS === null) {
3566 1
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type integer or string or boolean. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3567
    }
3568
3569 6
    if (\in_array($encoding, self::$ENCODINGS, true)) {
0 ignored issues
show
Bug introduced by
It seems like self::ENCODINGS can also be of type integer and string and boolean; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3569
    if (\in_array($encoding, /** @scrutinizer ignore-type */ self::$ENCODINGS, true)) {
Loading history...
3570 3
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3571
3572 3
      return $encoding;
3573
    }
3574
3575 5
    $encodingOrig = $encoding;
3576 5
    $encoding = \strtoupper($encoding);
3577 5
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3578
3579
    $equivalences = [
3580 5
        'ISO8859'     => 'ISO-8859-1',
3581
        'ISO88591'    => 'ISO-8859-1',
3582
        'ISO'         => 'ISO-8859-1',
3583
        'LATIN'       => 'ISO-8859-1',
3584
        'LATIN1'      => 'ISO-8859-1', // Western European
3585
        'ISO88592'    => 'ISO-8859-2',
3586
        'LATIN2'      => 'ISO-8859-2', // Central European
3587
        'ISO88593'    => 'ISO-8859-3',
3588
        'LATIN3'      => 'ISO-8859-3', // Southern European
3589
        'ISO88594'    => 'ISO-8859-4',
3590
        'LATIN4'      => 'ISO-8859-4', // Northern European
3591
        'ISO88595'    => 'ISO-8859-5',
3592
        'ISO88596'    => 'ISO-8859-6', // Greek
3593
        'ISO88597'    => 'ISO-8859-7',
3594
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3595
        'ISO88599'    => 'ISO-8859-9',
3596
        'LATIN5'      => 'ISO-8859-9', // Turkish
3597
        'ISO885911'   => 'ISO-8859-11',
3598
        'TIS620'      => 'ISO-8859-11', // Thai
3599
        'ISO885910'   => 'ISO-8859-10',
3600
        'LATIN6'      => 'ISO-8859-10', // Nordic
3601
        'ISO885913'   => 'ISO-8859-13',
3602
        'LATIN7'      => 'ISO-8859-13', // Baltic
3603
        'ISO885914'   => 'ISO-8859-14',
3604
        'LATIN8'      => 'ISO-8859-14', // Celtic
3605
        'ISO885915'   => 'ISO-8859-15',
3606
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3607
        'ISO885916'   => 'ISO-8859-16',
3608
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3609
        'CP1250'      => 'WINDOWS-1250',
3610
        'WIN1250'     => 'WINDOWS-1250',
3611
        'WINDOWS1250' => 'WINDOWS-1250',
3612
        'CP1251'      => 'WINDOWS-1251',
3613
        'WIN1251'     => 'WINDOWS-1251',
3614
        'WINDOWS1251' => 'WINDOWS-1251',
3615
        'CP1252'      => 'WINDOWS-1252',
3616
        'WIN1252'     => 'WINDOWS-1252',
3617
        'WINDOWS1252' => 'WINDOWS-1252',
3618
        'CP1253'      => 'WINDOWS-1253',
3619
        'WIN1253'     => 'WINDOWS-1253',
3620
        'WINDOWS1253' => 'WINDOWS-1253',
3621
        'CP1254'      => 'WINDOWS-1254',
3622
        'WIN1254'     => 'WINDOWS-1254',
3623
        'WINDOWS1254' => 'WINDOWS-1254',
3624
        'CP1255'      => 'WINDOWS-1255',
3625
        'WIN1255'     => 'WINDOWS-1255',
3626
        'WINDOWS1255' => 'WINDOWS-1255',
3627
        'CP1256'      => 'WINDOWS-1256',
3628
        'WIN1256'     => 'WINDOWS-1256',
3629
        'WINDOWS1256' => 'WINDOWS-1256',
3630
        'CP1257'      => 'WINDOWS-1257',
3631
        'WIN1257'     => 'WINDOWS-1257',
3632
        'WINDOWS1257' => 'WINDOWS-1257',
3633
        'CP1258'      => 'WINDOWS-1258',
3634
        'WIN1258'     => 'WINDOWS-1258',
3635
        'WINDOWS1258' => 'WINDOWS-1258',
3636
        'UTF16'       => 'UTF-16',
3637
        'UTF32'       => 'UTF-32',
3638
        'UTF8'        => 'UTF-8',
3639
        'UTF'         => 'UTF-8',
3640
        'UTF7'        => 'UTF-7',
3641
        '8BIT'        => 'CP850',
3642
        'BINARY'      => 'CP850',
3643
    ];
3644
3645 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3646 4
      $encoding = $equivalences[$encodingUpperHelper];
3647
    }
3648
3649 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3650
3651 5
    return $encoding;
3652
  }
3653
3654
  /**
3655
   * Normalize some MS Word special characters.
3656
   *
3657
   * @param string $str <p>The string to be normalized.</p>
3658
   *
3659
   * @return string
3660
   */
3661 36
  public static function normalize_msword(string $str): string
3662
  {
3663 36
    if ('' === $str) {
3664 1
      return '';
3665
    }
3666
3667 36
    static $UTF8_MSWORD_KEYS_CACHE = null;
3668 36
    static $UTF8_MSWORD_VALUES_CACHE = null;
3669
3670 36
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3671
3672 1
      if (self::$UTF8_MSWORD === null) {
3673 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_msword') can also be of type integer or string or boolean. However, the property $UTF8_MSWORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3674
      }
3675
3676 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type integer and string and boolean; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3676
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
3677 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type integer and string and boolean; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3677
      $UTF8_MSWORD_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
3678
    }
3679
3680 36
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3681
  }
3682
3683
  /**
3684
   * Normalize the whitespace.
3685
   *
3686
   * @param string $str                     <p>The string to be normalized.</p>
3687
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3688
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3689
   *                                        bidirectional text chars.</p>
3690
   *
3691
   * @return string
3692
   */
3693 76
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
3694
  {
3695 76
    if ('' === $str) {
3696 5
      return '';
3697
    }
3698
3699 76
    static $WHITESPACE_CACHE = [];
3700 76
    $cacheKey = (int)$keepNonBreakingSpace;
3701
3702 76
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3703
3704 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3705
3706 2
      if ($keepNonBreakingSpace === true) {
3707 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3708
      }
3709
3710 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
3711
    }
3712
3713 76
    if ($keepBidiUnicodeControls === false) {
3714 76
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3715
3716 76
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3717 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3718
      }
3719
3720 76
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3721
    }
3722
3723 76
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3724
  }
3725
3726
  /**
3727
   * Calculates Unicode code point of the given UTF-8 encoded character.
3728
   *
3729
   * INFO: opposite to UTF8::chr()
3730
   *
3731
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3732
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3733
   *
3734
   * @return int <p>
3735
   *             Unicode code point of the given character,<br>
3736
   *             0 on invalid UTF-8 byte sequence.
3737
   *             </p>
3738
   */
3739 23
  public static function ord(string $chr, string $encoding = 'UTF-8'): int
3740
  {
3741
    // init
3742 23
    static $CHAR_CACHE = [];
3743
3744
    // save the original string
3745 23
    $chr_orig = $chr;
3746
3747 23
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3748 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3749
3750
      // check again, if it's still not UTF-8
3751
      /** @noinspection NotOptimalIfConditionsInspection */
3752 2
      if ($encoding !== 'UTF-8') {
3753 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3754
      }
3755
    }
3756
3757 23
    $cacheKey = $chr_orig . $encoding;
3758 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3759 23
      return $CHAR_CACHE[$cacheKey];
3760
    }
3761
3762 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3763
      self::checkForSupport();
3764
    }
3765
3766 11
    if (self::$SUPPORT['intlChar'] === true) {
3767
      /** @noinspection PhpComposerExtensionStubsInspection */
3768 10
      $code = \IntlChar::ord($chr);
3769 10
      if ($code) {
3770 9
        return $CHAR_CACHE[$cacheKey] = $code;
3771
      }
3772
    }
3773
3774
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3775 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
0 ignored issues
show
Bug introduced by
$chr of type array is incompatible with the type string expected by parameter $str of voku\helper\UTF8::substr(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3775
    $chr = \unpack('C*', (string)self::substr(/** @scrutinizer ignore-type */ $chr, 0, 4, 'CP850'));
Loading history...
3776 6
    $code = $chr ? $chr[1] : 0;
3777
3778 6
    if (0xF0 <= $code && isset($chr[4])) {
3779
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3780
    }
3781
3782 6
    if (0xE0 <= $code && isset($chr[3])) {
3783 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3784
    }
3785
3786 6
    if (0xC0 <= $code && isset($chr[2])) {
3787 2
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3788
    }
3789
3790 5
    return $CHAR_CACHE[$cacheKey] = $code;
3791
  }
3792
3793
  /**
3794
   * Parses the string into an array (into the the second parameter).
3795
   *
3796
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3797
   *          if the second parameter is not set!
3798
   *
3799
   * @link http://php.net/manual/en/function.parse-str.php
3800
   *
3801
   * @param string $str       <p>The input string.</p>
3802
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
3803
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3804
   *
3805
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3806
   */
3807 1
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
3808
  {
3809 1
    if ($cleanUtf8 === true) {
3810 1
      $str = self::clean($str);
3811
    }
3812
3813
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3814 1
    $return = \mb_parse_str($str, $result);
3815
3816 1
    return !($return === false || empty($result));
3817
  }
3818
3819
  /**
3820
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3821
   *
3822
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3823
   */
3824 88
  public static function pcre_utf8_support(): bool
3825
  {
3826
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3827 88
    return (bool)@\preg_match('//u', '');
3828
  }
3829
3830
  /**
3831
   * Create an array containing a range of UTF-8 characters.
3832
   *
3833
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3834
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3835
   *
3836
   * @return string[]
3837
   */
3838 1
  public static function range($var1, $var2): array
3839
  {
3840 1
    if (!$var1 || !$var2) {
3841 1
      return [];
3842
    }
3843
3844 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3845
      self::checkForSupport();
3846
    }
3847
3848 1
    if (self::$SUPPORT['ctype'] === false) {
3849
      throw new \RuntimeException('ext-ctype: is not installed');
3850
    }
3851
3852
    /** @noinspection PhpComposerExtensionStubsInspection */
3853 1
    if (\ctype_digit((string)$var1)) {
3854 1
      $start = (int)$var1;
3855 1
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
3856
      $start = (int)self::hex_to_int($var1);
3857
    } else {
3858 1
      $start = self::ord($var1);
3859
    }
3860
3861 1
    if (!$start) {
3862
      return [];
3863
    }
3864
3865
    /** @noinspection PhpComposerExtensionStubsInspection */
3866 1
    if (\ctype_digit((string)$var2)) {
3867 1
      $end = (int)$var2;
3868 1
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
3869
      $end = (int)self::hex_to_int($var2);
3870
    } else {
3871 1
      $end = self::ord($var2);
3872
    }
3873
3874 1
    if (!$end) {
3875
      return [];
3876
    }
3877
3878 1
    return \array_map(
3879
        [
3880 1
            self::class,
3881
            'chr',
3882
        ],
3883 1
        \range($start, $end)
3884
    );
3885
  }
3886
3887
  /**
3888
   * Multi decode html entity & fix urlencoded-win1252-chars.
3889
   *
3890
   * e.g:
3891
   * 'test+test'                     => 'test+test'
3892
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3893
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3894
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3895
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3896
   * 'Düsseldorf'                   => 'Düsseldorf'
3897
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3898
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3899
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3900
   *
3901
   * @param string $str          <p>The input string.</p>
3902
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3903
   *
3904
   * @return string
3905
   */
3906 2
  public static function rawurldecode(string $str, bool $multi_decode = true): string
3907
  {
3908 2
    if ('' === $str) {
3909 1
      return '';
3910
    }
3911
3912 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3913 2
    if (\preg_match($pattern, $str)) {
3914 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
3915
    }
3916
3917 2
    $flags = ENT_QUOTES | ENT_HTML5;
3918
3919
    do {
3920 2
      $str_compare = $str;
3921
3922 2
      $str = self::fix_simple_utf8(
3923 2
          \rawurldecode(
3924 2
              self::html_entity_decode(
3925 2
                  self::to_utf8($str),
3926 2
                  $flags
3927
              )
3928
          )
3929
      );
3930
3931 2
    } while ($multi_decode === true && $str_compare !== $str);
3932
3933 2
    return $str;
3934
  }
3935
3936
  /**
3937
   * @param array $strings
3938
   * @param bool  $removeEmptyValues
3939
   * @param int   $removeShortValues
3940
   *
3941
   * @return array
3942
   */
3943 1
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
3944
  {
3945
    // init
3946 1
    $return = [];
3947
3948 1
    foreach ($strings as $str) {
3949
      if (
3950 1
          $removeShortValues !== null
3951
          &&
3952 1
          self::strlen($str) <= $removeShortValues
3953
      ) {
3954 1
        continue;
3955
      }
3956
3957
      if (
3958 1
          $removeEmptyValues === true
3959
          &&
3960 1
          \trim($str) === ''
3961
      ) {
3962 1
        continue;
3963
      }
3964
3965 1
      $return[] = $str;
3966
    }
3967
3968 1
    return $return;
3969
  }
3970
3971
  /**
3972
   * Replaces all occurrences of $pattern in $str by $replacement.
3973
   *
3974
   * @param string $str         <p>The input string.</p>
3975
   * @param string $pattern     <p>The regular expression pattern.</p>
3976
   * @param string $replacement <p>The string to replace with.</p>
3977
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
3978
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
3979
   *
3980
   * @return string
3981
   */
3982 273
  public static function regexReplace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
3983
  {
3984 273
    if ($options === 'msr') {
3985 9
      $options = 'ms';
3986
    }
3987
3988
    // fallback
3989 273
    if (!$delimiter) {
3990
      $delimiter = '/';
3991
    }
3992
3993 273
    $str = (string)\preg_replace(
3994 273
        $delimiter . $pattern . $delimiter . 'u' . $options,
3995 273
        $replacement,
3996 273
        $str
3997
    );
3998
3999 273
    return $str;
4000
  }
4001
4002
  /**
4003
   * alias for "UTF8::remove_bom()"
4004
   *
4005
   * @see        UTF8::remove_bom()
4006
   *
4007
   * @param string $str
4008
   *
4009
   * @return string
4010
   *
4011
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4012
   */
4013
  public static function removeBOM(string $str): string
4014
  {
4015
    return self::remove_bom($str);
4016
  }
4017
4018
  /**
4019
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4020
   *
4021
   * @param string $str <p>The input string.</p>
4022
   *
4023
   * @return string <p>String without UTF-BOM</p>
4024
   */
4025 60
  public static function remove_bom(string $str): string
4026
  {
4027 60
    if ('' === $str) {
4028 4
      return '';
4029
    }
4030
4031 60
    foreach (self::$BOM as $bomString => $bomByteLength) {
4032 60
      if (0 === self::strpos($str, $bomString, 0, 'CP850')) {
4033 5
        $strTmp = self::substr($str, $bomByteLength, null, 'CP850');
4034 5
        if ($strTmp === false) {
4035
          $strTmp = '';
4036
        }
4037 60
        $str = (string)$strTmp;
4038
      }
4039
    }
4040
4041 60
    return $str;
4042
  }
4043
4044
  /**
4045
   * Removes duplicate occurrences of a string in another string.
4046
   *
4047
   * @param string          $str  <p>The base string.</p>
4048
   * @param string|string[] $what <p>String to search for in the base string.</p>
4049
   *
4050
   * @return string <p>The result string with removed duplicates.</p>
4051
   */
4052 1
  public static function remove_duplicates(string $str, $what = ' '): string
4053
  {
4054 1
    if (\is_string($what) === true) {
4055 1
      $what = [$what];
4056
    }
4057
4058 1
    if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4059
      /** @noinspection ForeachSourceInspection */
4060 1
      foreach ($what as $item) {
4061 1
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4062
      }
4063
    }
4064
4065 1
    return $str;
4066
  }
4067
4068
  /**
4069
   * Remove invisible characters from a string.
4070
   *
4071
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4072
   *
4073
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4074
   *
4075
   * @param string $str
4076
   * @param bool   $url_encoded
4077
   * @param string $replacement
4078
   *
4079
   * @return string
4080
   */
4081 82
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4082
  {
4083
    // init
4084 82
    $non_displayables = [];
4085
4086
    // every control character except newline (dec 10),
4087
    // carriage return (dec 13) and horizontal tab (dec 09)
4088 82
    if ($url_encoded) {
4089 82
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4090 82
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4091
    }
4092
4093 82
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4094
4095
    do {
4096 82
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4097 82
    } while ($count !== 0);
4098
4099 82
    return $str;
4100
  }
4101
4102
  /**
4103
   * Replaces all occurrences of $search in $str by $replacement.
4104
   *
4105
   * @param string $str           <p>The input string.</p>
4106
   * @param string $search        <p>The needle to search for.</p>
4107
   * @param string $replacement   <p>The string to replace with.</p>
4108
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4109
   *
4110
   * @return string <p>String after the replacements.</p>
4111
   */
4112 29
  public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4113
  {
4114 29
    if ($caseSensitive) {
4115 22
      return self::str_replace($search, $replacement, $str);
4116
    }
4117
4118 7
    return self::str_ireplace($search, $replacement, $str);
4119
  }
4120
4121
  /**
4122
   * Replaces all occurrences of $search in $str by $replacement.
4123
   *
4124
   * @param string       $str           <p>The input string.</p>
4125
   * @param array        $search        <p>The elements to search for.</p>
4126
   * @param string|array $replacement   <p>The string to replace with.</p>
4127
   * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4128
   *
4129
   * @return string <p>String after the replacements.</p>
4130
   */
4131 30
  public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4132
  {
4133 30
    if ($caseSensitive) {
4134 23
      return self::str_replace($search, $replacement, $str);
4135
    }
4136
4137 7
    return self::str_ireplace($search, $replacement, $str);
4138
  }
4139
4140
  /**
4141
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4142
   *
4143
   * @param string $str                <p>The input string</p>
4144
   * @param string $replacementChar    <p>The replacement character.</p>
4145
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4146
   *
4147
   * @return string
4148
   */
4149 54
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4150
  {
4151 54
    if ('' === $str) {
4152 5
      return '';
4153
    }
4154
4155 54
    if ($processInvalidUtf8 === true) {
4156 54
      $replacementCharHelper = $replacementChar;
4157 54
      if ($replacementChar === '') {
4158 54
        $replacementCharHelper = 'none';
4159
      }
4160
4161 54
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4162
        self::checkForSupport();
4163
      }
4164
4165 54
      $save = \mb_substitute_character();
4166 54
      \mb_substitute_character($replacementCharHelper);
4167 54
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4168 54
      \mb_substitute_character($save);
4169
4170 54
      if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4171 54
        $str = $strTmp;
4172
      } else {
4173
        $str = '';
4174
      }
4175
    }
4176
4177 54
    return str_replace(
4178
        [
4179 54
            "\xEF\xBF\xBD",
4180
            '�',
4181
        ],
4182
        [
4183 54
            $replacementChar,
4184 54
            $replacementChar,
4185
        ],
4186 54
        $str
4187
    );
4188
  }
4189
4190
  /**
4191
   * Strip whitespace or other characters from end of a UTF-8 string.
4192
   *
4193
   * @param string $str   <p>The string to be trimmed.</p>
4194
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4195
   *
4196
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4197
   */
4198 20
  public static function rtrim(string $str = '', $chars = INF): string
4199
  {
4200 20
    if ('' === $str) {
4201 2
      return '';
4202
    }
4203
4204
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4205 19
    if ($chars === INF || !$chars) {
4206 14
      $pattern = "[\pZ\pC]+\$";
4207
    } else {
4208 7
      $chars = \preg_quote($chars, '/');
4209 7
      $pattern = "[$chars]+\$";
4210
    }
4211
4212 19
    return self::regexReplace($str, $pattern, '', '', '/');
4213
  }
4214
4215
  /**
4216
   * rxClass
4217
   *
4218
   * @param string $s
4219
   * @param string $class
4220
   *
4221
   * @return string
4222
   */
4223 32
  private static function rxClass(string $s, string $class = ''): string
4224
  {
4225 32
    static $RX_CLASSS_CACHE = [];
4226
4227 32
    $cacheKey = $s . $class;
4228
4229 32
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4230 20
      return $RX_CLASSS_CACHE[$cacheKey];
4231
    }
4232
4233
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4234 15
    $class = [$class];
4235
4236
    /** @noinspection SuspiciousLoopInspection */
4237 15
    foreach (self::str_split($s) as $s) {
4238 14
      if ('-' === $s) {
4239
        $class[0] = '-' . $class[0];
4240 14
      } elseif (!isset($s[2])) {
4241 14
        $class[0] .= \preg_quote($s, '/');
4242 1
      } elseif (1 === self::strlen($s)) {
4243 1
        $class[0] .= $s;
4244
      } else {
4245 14
        $class[] = $s;
4246
      }
4247
    }
4248
4249 15
    if ($class[0]) {
4250 15
      $class[0] = '[' . $class[0] . ']';
4251
    }
4252
4253 15
    if (1 === \count($class)) {
4254 15
      $return = $class[0];
4255
    } else {
4256
      $return = '(?:' . \implode('|', $class) . ')';
4257
    }
4258
4259 15
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4260
4261 15
    return $return;
4262
  }
4263
4264
  /**
4265
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4266
   */
4267 1
  public static function showSupport()
4268
  {
4269 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4270
      self::checkForSupport();
4271
    }
4272
4273 1
    echo '<pre>';
4274 1
    foreach (self::$SUPPORT as $key => $value) {
4275 1
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4276
    }
4277 1
    echo '</pre>';
4278 1
  }
4279
4280
  /**
4281
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4282
   *
4283
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4284
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4285
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
4286
   *
4287
   * @return string <p>The HTML numbered entity.</p>
4288
   */
4289 1
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4290
  {
4291 1
    if ('' === $char) {
4292 1
      return '';
4293
    }
4294
4295
    if (
4296 1
        $keepAsciiChars === true
4297
        &&
4298 1
        self::is_ascii($char) === true
4299
    ) {
4300 1
      return $char;
4301
    }
4302
4303 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4304 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4305
    }
4306
4307 1
    return '&#' . self::ord($char, $encoding) . ';';
4308
  }
4309
4310
  /**
4311
   * @param string $str
4312
   * @param int    $tabLength
4313
   *
4314
   * @return string
4315
   */
4316 5
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4317
  {
4318 5
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4319
  }
4320
4321
  /**
4322
   * Convert a string to an array of Unicode characters.
4323
   *
4324
   * @param string $str       <p>The string to split into array.</p>
4325
   * @param int    $length    [optional] <p>Max character length of each array element.</p>
4326
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4327
   *
4328
   * @return string[] <p>An array containing chunks of the string.</p>
4329
   */
4330 35
  public static function split(string $str, int $length = 1, bool $cleanUtf8 = false): array
4331
  {
4332 35
    if ('' === $str) {
4333 3
      return [];
4334
    }
4335
4336
    // init
4337 34
    $ret = [];
4338
4339 34
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4340
      self::checkForSupport();
4341
    }
4342
4343 34
    if ($cleanUtf8 === true) {
4344 9
      $str = self::clean($str);
4345
    }
4346
4347 34
    if (self::$SUPPORT['pcre_utf8'] === true) {
4348
4349 34
      \preg_match_all('/./us', $str, $retArray);
4350 34
      if (isset($retArray[0])) {
4351 34
        $ret = $retArray[0];
4352
      }
4353 34
      unset($retArray);
4354
4355
    } else {
4356
4357
      // fallback
4358
4359 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4360
        self::checkForSupport();
4361
      }
4362
4363 2
      $len = self::strlen_in_byte($str);
4364
4365
      /** @noinspection ForeachInvariantsInspection */
4366 2
      for ($i = 0; $i < $len; $i++) {
4367
4368 2
        if (($str[$i] & "\x80") === "\x00") {
4369
4370 2
          $ret[] = $str[$i];
4371
4372
        } elseif (
4373 2
            isset($str[$i + 1])
4374
            &&
4375 2
            ($str[$i] & "\xE0") === "\xC0"
4376
        ) {
4377
4378
          if (($str[$i + 1] & "\xC0") === "\x80") {
4379
            $ret[] = $str[$i] . $str[$i + 1];
4380
4381
            $i++;
4382
          }
4383
4384
        } elseif (
4385 2
            isset($str[$i + 2])
4386
            &&
4387 2
            ($str[$i] & "\xF0") === "\xE0"
4388
        ) {
4389
4390
          if (
4391 2
              ($str[$i + 1] & "\xC0") === "\x80"
4392
              &&
4393 2
              ($str[$i + 2] & "\xC0") === "\x80"
4394
          ) {
4395 2
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4396
4397 2
            $i += 2;
4398
          }
4399
4400
        } elseif (
4401
            isset($str[$i + 3])
4402
            &&
4403
            ($str[$i] & "\xF8") === "\xF0"
4404
        ) {
4405
4406
          if (
4407
              ($str[$i + 1] & "\xC0") === "\x80"
4408
              &&
4409
              ($str[$i + 2] & "\xC0") === "\x80"
4410
              &&
4411
              ($str[$i + 3] & "\xC0") === "\x80"
4412
          ) {
4413
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4414
4415
            $i += 3;
4416
          }
4417
4418
        }
4419
      }
4420
    }
4421
4422 34
    if ($length > 1) {
4423 5
      $ret = \array_chunk($ret, $length);
4424
4425 5
      return \array_map(
4426
          function ($item) {
4427 5
            return \implode('', $item);
4428 5
          }, $ret
4429
      );
4430
    }
4431
4432 30
    if (isset($ret[0]) && $ret[0] === '') {
4433
      return [];
4434
    }
4435
4436 30
    return $ret;
4437
  }
4438
4439
  /**
4440
   * Returns a camelCase version of the string. Trims surrounding spaces,
4441
   * capitalizes letters following digits, spaces, dashes and underscores,
4442
   * and removes spaces, dashes, as well as underscores.
4443
   *
4444
   * @param string $str      <p>The input string.</p>
4445
   * @param string $encoding [optional] <p>Default: UTF-8</p>
4446
   *
4447
   * @return string
4448
   */
4449 32
  public static function str_camelize(string $str, string $encoding = 'UTF-8'): string
4450
  {
4451 32
    $str = self::lcfirst(self::trim($str), $encoding);
4452 32
    $str = (string)\preg_replace('/^[-_]+/', '', $str);
4453
4454 32
    $str = (string)\preg_replace_callback(
4455 32
        '/[-_\s]+(.)?/u',
4456
        function ($match) use ($encoding) {
4457 27
          if (isset($match[1])) {
4458 27
            return UTF8::strtoupper($match[1], $encoding);
4459
          }
4460
4461 1
          return '';
4462 32
        },
4463 32
        $str
4464
    );
4465
4466 32
    $str = (string)\preg_replace_callback(
4467 32
        '/[\d]+(.)?/u',
4468
        function ($match) use ($encoding) {
4469 6
          return UTF8::strtoupper($match[0], $encoding);
4470 32
        },
4471 32
        $str
4472
    );
4473
4474 32
    return $str;
4475
  }
4476
4477
  /**
4478
   * Returns true if the string contains $needle, false otherwise. By default
4479
   * the comparison is case-sensitive, but can be made insensitive by setting
4480
   * $caseSensitive to false.
4481
   *
4482
   * @param string $haystack      <p>The input string.</p>
4483
   * @param string $needle        <p>Substring to look for.</p>
4484
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4485
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4486
   *
4487
   * @return bool <p>Whether or not $haystack contains $needle.</p>
4488
   */
4489 106
  public static function str_contains(string $haystack, string $needle, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4490
  {
4491 106
    if ($caseSensitive) {
4492 56
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
4493
    }
4494
4495 50
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
4496
  }
4497
4498
  /**
4499
   * Returns true if the string contains all $needles, false otherwise. By
4500
   * default the comparison is case-sensitive, but can be made insensitive by
4501
   * setting $caseSensitive to false.
4502
   *
4503
   * @param string $haystack      <p>The input string.</p>
4504
   * @param array  $needles       <p>SubStrings to look for.</p>
4505
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4506
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4507
   *
4508
   * @return bool <p>Whether or not $haystack contains $needle.</p>
4509
   */
4510 44
  public static function str_contains_all(string $haystack, array $needles, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4511
  {
4512 44
    if ('' === $haystack) {
4513
      return false;
4514
    }
4515
4516 44
    if (empty($needles)) {
4517 1
      return false;
4518
    }
4519
4520 43
    foreach ($needles as $needle) {
4521 43
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4522 43
        return false;
4523
      }
4524
    }
4525
4526 24
    return true;
4527
  }
4528
4529
  /**
4530
   * Returns true if the string contains any $needles, false otherwise. By
4531
   * default the comparison is case-sensitive, but can be made insensitive by
4532
   * setting $caseSensitive to false.
4533
   *
4534
   * @param string <p>The input stiring.</p>
0 ignored issues
show
Documentation Bug introduced by
The doc comment <p>The at position 0 could not be parsed: Unknown type name '<' at position 0 in <p>The.
Loading history...
4535
   * @param array  $needles       <p>SubStrings to look for.</p>
4536
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4537
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4538
   *
4539
   * @return bool <p>Whether or not $str contains $needle.</p>
4540
   */
4541 43
  public static function str_contains_any(string $haystack, array $needles, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4542
  {
4543 43
    if (empty($needles)) {
4544 1
      return false;
4545
    }
4546
4547 42
    foreach ($needles as $needle) {
4548 42
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4549 42
        return true;
4550
      }
4551
    }
4552
4553 18
    return false;
4554
  }
4555
4556
  /**
4557
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
4558
   * inserted before uppercase characters (with the exception of the first
4559
   * character of the string), and in place of spaces as well as underscores.
4560
   *
4561
   * @param string $str      <p>The input string.</p>
4562
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
4563
   *
4564
   * @return string
4565
   */
4566 19
  public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
4567
  {
4568 19
    return self::str_delimit($str, '-', $encoding);
4569
  }
4570
4571
  /**
4572
   * Returns a lowercase and trimmed string separated by the given delimiter.
4573
   * Delimiters are inserted before uppercase characters (with the exception
4574
   * of the first character of the string), and in place of spaces, dashes,
4575
   * and underscores. Alpha delimiters are not converted to lowercase.
4576
   *
4577
   * @param string $str       <p>The input string.</p>
4578
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
4579
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
4580
   *
4581
   * @return string
4582
   */
4583 49
  public static function str_delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
4584
  {
4585 49
    $str = self::trim($str);
4586
4587 49
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
4588
4589 49
    $str = self::strtolower($str, $encoding);
4590
4591 49
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
4592
  }
4593
4594
  /**
4595
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4596
   *
4597
   * @param string $str <p>The input string.</p>
4598
   *
4599
   * @return false|string <p>
4600
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4601
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
4602
   *                      </p>
4603
   */
4604 15
  public static function str_detect_encoding(string $str)
4605
  {
4606
    //
4607
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
4608
    //
4609
4610 15
    if (self::is_binary($str, true) === true) {
4611
4612 5
      if (self::is_utf16($str) === 1) {
4613 1
        return 'UTF-16LE';
4614
      }
4615
4616 5
      if (self::is_utf16($str) === 2) {
4617 1
        return 'UTF-16BE';
4618
      }
4619
4620 4
      if (self::is_utf32($str) === 1) {
4621
        return 'UTF-32LE';
4622
      }
4623
4624 4
      if (self::is_utf32($str) === 2) {
4625
        return 'UTF-32BE';
4626
      }
4627
4628
      // is binary but not "UTF-16" or "UTF-32"
4629 4
      return false;
4630
    }
4631
4632
    //
4633
    // 2.) simple check for ASCII chars
4634
    //
4635
4636 13
    if (self::is_ascii($str) === true) {
4637 5
      return 'ASCII';
4638
    }
4639
4640
    //
4641
    // 3.) simple check for UTF-8 chars
4642
    //
4643
4644 13
    if (self::is_utf8($str) === true) {
4645 9
      return 'UTF-8';
4646
    }
4647
4648
    //
4649
    // 4.) check via "\mb_detect_encoding()"
4650
    //
4651
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4652
4653
    $detectOrder = [
4654 8
        'ISO-8859-1',
4655
        'ISO-8859-2',
4656
        'ISO-8859-3',
4657
        'ISO-8859-4',
4658
        'ISO-8859-5',
4659
        'ISO-8859-6',
4660
        'ISO-8859-7',
4661
        'ISO-8859-8',
4662
        'ISO-8859-9',
4663
        'ISO-8859-10',
4664
        'ISO-8859-13',
4665
        'ISO-8859-14',
4666
        'ISO-8859-15',
4667
        'ISO-8859-16',
4668
        'WINDOWS-1251',
4669
        'WINDOWS-1252',
4670
        'WINDOWS-1254',
4671
        'CP932',
4672
        'CP936',
4673
        'CP950',
4674
        'CP866',
4675
        'CP850',
4676
        'CP51932',
4677
        'CP50220',
4678
        'CP50221',
4679
        'CP50222',
4680
        'ISO-2022-JP',
4681
        'ISO-2022-KR',
4682
        'JIS',
4683
        'JIS-ms',
4684
        'EUC-CN',
4685
        'EUC-JP',
4686
    ];
4687
4688 8
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4689 8
    if ($encoding) {
4690 8
      return $encoding;
4691
    }
4692
4693
    //
4694
    // 5.) check via "iconv()"
4695
    //
4696
4697
    if (self::$ENCODINGS === null) {
4698
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type integer or string or boolean. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4699
    }
4700
4701
    $md5 = \md5($str);
4702
    foreach (self::$ENCODINGS as $encodingTmp) {
4703
      # INFO: //IGNORE and //TRANSLIT still throw notice
4704
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4705
      if (\md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4706
        return $encodingTmp;
4707
      }
4708
    }
4709
4710
    return false;
4711
  }
4712
4713
  /**
4714
   * Check if the string ends with the given substring.
4715
   *
4716
   * @param string $haystack <p>The string to search in.</p>
4717
   * @param string $needle   <p>The substring to search for.</p>
4718
   *
4719
   * @return bool
4720
   */
4721 38
  public static function str_ends_with(string $haystack, string $needle): bool
4722
  {
4723 38
    if ('' === $haystack || '' === $needle) {
4724 3
      return false;
4725
    }
4726
4727 36
    return \substr($haystack, -\strlen($needle)) === $needle;
4728
  }
4729
4730
  /**
4731
   * Returns true if the string ends with any of $substrings, false otherwise.
4732
   *
4733
   * - case-sensitive
4734
   *
4735
   * @param string   $str        <p>The input string.</p>
4736
   * @param string[] $substrings <p>Substrings to look for.</p>
4737
   *
4738
   * @return bool     <p>Whether or not $str ends with $substring.</p>
4739
   */
4740 7
  public static function str_ends_with_any(string $str, array $substrings): bool
4741
  {
4742 7
    if (empty($substrings)) {
4743
      return false;
4744
    }
4745
4746 7
    foreach ($substrings as $substring) {
4747 7
      if (self::str_ends_with($str, $substring)) {
4748 7
        return true;
4749
      }
4750
    }
4751
4752 6
    return false;
4753
  }
4754
4755
  /**
4756
   * Ensures that the string begins with $substring. If it doesn't, it's
4757
   * prepended.
4758
   *
4759
   * @param string str <p>The input string.</p>
4760
   * @param string $substring <p>The substring to add if not present.</p>
4761
   *
4762
   * @return string
4763
   */
4764 10
  public static function str_ensure_left(string $str, string $substring): string
4765
  {
4766 10
    if (!self::str_starts_with($str, $substring)) {
4767 4
      $str = $substring . $str;
4768
    }
4769
4770 10
    return $str;
4771
  }
4772
4773
  /**
4774
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
4775
   *
4776
   * @param string str <p>The input string.</p>
4777
   * @param string $substring <p>The substring to add if not present.</p>
4778
   *
4779
   * @return string
4780
   */
4781 10
  public static function str_ensure_right(string $str, string $substring): string
4782
  {
4783 10
    if (!self::str_ends_with($str, $substring)) {
4784 4
      $str .= $substring;
4785
    }
4786
4787 10
    return $str;
4788
  }
4789
4790
  /**
4791
   * Capitalizes the first word of the string, replaces underscores with
4792
   * spaces, and strips '_id'.
4793
   *
4794
   * @param string $str
4795
   *
4796
   * @return string
4797
   */
4798 3
  public static function str_humanize($str): string
4799
  {
4800 3
    $str = self::str_replace(
4801
        [
4802 3
            '_id',
4803
            '_',
4804
        ],
4805
        [
4806 3
            '',
4807
            ' ',
4808
        ],
4809 3
        $str
4810
    );
4811
4812 3
    return self::ucfirst(self::trim($str));
4813
  }
4814
4815
  /**
4816
   * Check if the string ends with the given substring, case insensitive.
4817
   *
4818
   * @param string $haystack <p>The string to search in.</p>
4819
   * @param string $needle   <p>The substring to search for.</p>
4820
   *
4821
   * @return bool
4822
   */
4823 10
  public static function str_iends_with(string $haystack, string $needle): bool
4824
  {
4825 10
    if ('' === $haystack || '' === $needle) {
4826 1
      return false;
4827
    }
4828
4829 10
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
4830 10
      return true;
4831
    }
4832
4833 6
    return false;
4834
  }
4835
4836
  /**
4837
   * Returns true if the string ends with any of $substrings, false otherwise.
4838
   *
4839
   * - case-insensitive
4840
   *
4841
   * @param string   $str        <p>The input string.</p>
4842
   * @param string[] $substrings <p>Substrings to look for.</p>
4843
   *
4844
   * @return bool     <p>Whether or not $str ends with $substring.</p>
4845
   */
4846 4
  public static function str_iends_with_any(string $str, array $substrings): bool
4847
  {
4848 4
    if (empty($substrings)) {
4849
      return false;
4850
    }
4851
4852 4
    foreach ($substrings as $substring) {
4853 4
      if (self::str_iends_with($str, $substring)) {
4854 4
        return true;
4855
      }
4856
    }
4857
4858
    return false;
4859
  }
4860
4861
  /**
4862
   * Returns the index of the first occurrence of $needle in the string,
4863
   * and false if not found. Accepts an optional offset from which to begin
4864
   * the search.
4865
   *
4866
   * @param string $str      <p>The input string.</p>
4867
   * @param string $needle   <p>Substring to look for.</p>
4868
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
4869
   * @param string $encoding [optional] <p>Default: UTF-8</p>
4870
   *
4871
   * @return int|false <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
4872
   */
4873 2
  public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
4874
  {
4875 2
    return self::stripos(
4876 2
        $str,
4877 2
        $needle,
4878 2
        $offset,
4879 2
        $encoding
4880
    );
4881
  }
4882
4883
  /**
4884
   * Returns the index of the last occurrence of $needle in the string,
4885
   * and false if not found. Accepts an optional offset from which to begin
4886
   * the search. Offsets may be negative to count from the last character
4887
   * in the string.
4888
   *
4889
   * @param string $str      <p>The input string.</p>
4890
   * @param string $needle   <p>Substring to look for.</p>
4891
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
4892
   * @param string $encoding [optional] <p>Default: UTF-8</p>
4893
   *
4894
   * @return int|false <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
4895
   */
4896 2
  public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
4897
  {
4898 2
    return self::strripos(
4899 2
        $str,
4900 2
        $needle,
4901 2
        $offset,
4902 2
        $encoding
4903
    );
4904
  }
4905
4906
  /**
4907
   * Returns the index of the first occurrence of $needle in the string,
4908
   * and false if not found. Accepts an optional offset from which to begin
4909
   * the search.
4910
   *
4911
   * @param string $str      <p>The input string.</p>
4912
   * @param string $needle   <p>Substring to look for.</p>
4913
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
4914
   * @param string $encoding [optional] <p>Default: UTF-8</p>
4915
   *
4916
   * @return int|false <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
4917
   */
4918 12
  public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
4919
  {
4920 12
    return self::strpos(
4921 12
        $str,
4922 12
        $needle,
4923 12
        $offset,
4924 12
        $encoding
4925
    );
4926
  }
4927
4928
  /**
4929
   * Returns the index of the last occurrence of $needle in the string,
4930
   * and false if not found. Accepts an optional offset from which to begin
4931
   * the search. Offsets may be negative to count from the last character
4932
   * in the string.
4933
   *
4934
   * @param string $str      <p>The input string.</p>
4935
   * @param string $needle   <p>Substring to look for.</p>
4936
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
4937
   * @param string $encoding [optional] <p>Default: UTF-8</p>
4938
   *
4939
   * @return int|false <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
4940
   */
4941 12
  public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
4942
  {
4943 12
    return self::strrpos(
4944 12
        $str,
4945 12
        $needle,
4946 12
        $offset,
4947 12
        $encoding
4948
    );
4949
  }
4950
4951
  /**
4952
   * Inserts $substring into the string at the $index provided.
4953
   *
4954
   * @param string $str       <p>The input string.</p>
4955
   * @param string $substring <p>String to be inserted.</p>
4956
   * @param int    $index     <p>The index at which to insert the substring.</p>
4957
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
4958
   *
4959
   * @return string
4960
   */
4961 8
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
4962
  {
4963 8
    $len = self::strlen($str, $encoding);
4964
4965 8
    if ($index > $len) {
4966 1
      return $str;
4967
    }
4968
4969 7
    $start = self::substr($str, 0, $index, $encoding);
4970 7
    $end = self::substr($str, $index, $len, $encoding);
4971
4972 7
    return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4972
    return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4972
    return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
4973
  }
4974
4975
  /**
4976
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4977
   *
4978
   * @link  http://php.net/manual/en/function.str-ireplace.php
4979
   *
4980
   * @param mixed $search  <p>
4981
   *                       Every replacement with search array is
4982
   *                       performed on the result of previous replacement.
4983
   *                       </p>
4984
   * @param mixed $replace <p>
4985
   *                       </p>
4986
   * @param mixed $subject <p>
4987
   *                       If subject is an array, then the search and
4988
   *                       replace is performed with every entry of
4989
   *                       subject, and the return value is an array as
4990
   *                       well.
4991
   *                       </p>
4992
   * @param int   $count   [optional] <p>
4993
   *                       The number of matched and replaced needles will
4994
   *                       be returned in count which is passed by
4995
   *                       reference.
4996
   *                       </p>
4997
   *
4998
   * @return mixed <p>A string or an array of replacements.</p>
4999
   */
5000 40
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5001
  {
5002 40
    $search = (array)$search;
5003
5004
    /** @noinspection AlterInForeachInspection */
5005 40
    foreach ($search as &$s) {
5006 40
      if ('' === $s .= '') {
5007 7
        $s = '/^(?<=.)$/';
5008
      } else {
5009 40
        $s = '/' . \preg_quote($s, '/') . '/ui';
5010
      }
5011
    }
5012
5013 40
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5014 40
    $count = $replace; // used as reference parameter
5015
5016 40
    return $subject;
5017
  }
5018
5019
  /**
5020
   * Check if the string starts with the given substring, case insensitive.
5021
   *
5022
   * @param string $haystack <p>The string to search in.</p>
5023
   * @param string $needle   <p>The substring to search for.</p>
5024
   *
5025
   * @return bool
5026
   */
5027 10
  public static function str_istarts_with(string $haystack, string $needle): bool
5028
  {
5029 10
    if ('' === $haystack || '' === $needle) {
5030 1
      return false;
5031
    }
5032
5033 10
    if (self::stripos($haystack, $needle) === 0) {
5034 10
      return true;
5035
    }
5036
5037 2
    return false;
5038
  }
5039
5040
  /**
5041
   * Returns true if the string begins with any of $substrings, false otherwise.
5042
   *
5043
   * - case-insensitive
5044
   *
5045
   * @param string $str        <p>The input string.</p>
5046
   * @param array  $substrings <p>Substrings to look for.</p>
5047
   *
5048
   * @return bool <p>Whether or not $str starts with $substring.</p>
5049
   */
5050 4
  public static function str_istarts_with_any(string $str, array $substrings): bool
5051
  {
5052 4
    if ('' === $str) {
5053
      return false;
5054
    }
5055
5056 4
    if (empty($substrings)) {
5057
      return false;
5058
    }
5059
5060 4
    foreach ($substrings as $substring) {
5061 4
      if (self::str_istarts_with($str, $substring)) {
5062 4
        return true;
5063
      }
5064
    }
5065
5066
    return false;
5067
  }
5068
5069
  /**
5070
   * Gets the substring after the first occurrence of a separator.
5071
   *
5072
   * @param string $str       <p>The input string.</p>
5073
   * @param string $separator <p>The string separator.</p>
5074
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5075
   *
5076
   * @return string
5077
   */
5078 1
  public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5079
  {
5080
    if (
5081 1
        $separator === ''
5082
        ||
5083 1
        $str === ''
5084
    ) {
5085 1
      return '';
5086
    }
5087
5088 1
    $offset = self::str_iindex_first($str, $separator);
5089 1
    if ($offset === false) {
5090 1
      return '';
5091
    }
5092
5093 1
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str...ding), null, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5094 1
        $str,
5095 1
        $offset + self::strlen($separator, $encoding),
5096 1
        null,
5097 1
        $encoding
5098
    );
5099
  }
5100
5101
  /**
5102
   * Gets the substring after the last occurrence of a separator.
5103
   *
5104
   * @param string $str       <p>The input string.</p>
5105
   * @param string $separator <p>The string separator.</p>
5106
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5107
   *
5108
   * @return string
5109
   */
5110 1
  public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5111
  {
5112
    if (
5113 1
        $separator === ''
5114
        ||
5115 1
        $str === ''
5116
    ) {
5117 1
      return '';
5118
    }
5119
5120 1
    $offset = self::str_iindex_last($str, $separator);
5121 1
    if ($offset === false) {
5122 1
      return '';
5123
    }
5124
5125 1
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str...ding), null, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5126 1
        $str,
5127 1
        $offset + self::strlen($separator, $encoding),
5128 1
        null,
5129 1
        $encoding
5130
    );
5131
  }
5132
5133
  /**
5134
   * Gets the substring before the first occurrence of a separator.
5135
   *
5136
   * @param string $str       <p>The input string.</p>
5137
   * @param string $separator <p>The string separator.</p>
5138
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5139
   *
5140
   * @return string
5141
   */
5142 1
  public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5143
  {
5144
    if (
5145 1
        $separator === ''
5146
        ||
5147 1
        $str === ''
5148
    ) {
5149 1
      return '';
5150
    }
5151
5152 1
    $offset = self::str_iindex_first($str, $separator);
5153 1
    if ($offset === false) {
5154 1
      return '';
5155
    }
5156
5157 1
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str, 0, $offset, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5158 1
        $str,
5159 1
        0,
5160 1
        $offset,
5161 1
        $encoding
5162
    );
5163
  }
5164
5165
  /**
5166
   * Gets the substring before the last occurrence of a separator.
5167
   *
5168
   * @param string $str       <p>The input string.</p>
5169
   * @param string $separator <p>The string separator.</p>
5170
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5171
   *
5172
   * @return string
5173
   */
5174 1
  public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5175
  {
5176
    if (
5177 1
        $separator === ''
5178
        ||
5179 1
        $str === ''
5180
    ) {
5181 1
      return '';
5182
    }
5183
5184 1
    $offset = self::str_iindex_last($str, $separator);
5185 1
    if ($offset === false) {
5186 1
      return '';
5187
    }
5188
5189 1
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str, 0, $offset, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5190 1
        $str,
5191 1
        0,
5192 1
        $offset,
5193 1
        $encoding
5194
    );
5195
  }
5196
5197
  /**
5198
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5199
   *
5200
   * @param string $str          <p>The input string.</p>
5201
   * @param string $needle       <p>The string to look for.</p>
5202
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5203
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5204
   *
5205
   * @return string
5206
   */
5207 2
  public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5208
  {
5209
    if (
5210 2
        '' === $needle
5211
        ||
5212 2
        '' === $str
5213
    ) {
5214 2
      return '';
5215
    }
5216
5217 2
    $part = self::stristr(
5218 2
        $str,
5219 2
        $needle,
5220 2
        $beforeNeedle,
5221 2
        $encoding
5222
    );
5223 2
    if (false === $part) {
5224 2
      return '';
5225
    }
5226
5227 2
    return $part;
5228
  }
5229
5230
  /**
5231
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5232
   *
5233
   * @param string $str          <p>The input string.</p>
5234
   * @param string $needle       <p>The string to look for.</p>
5235
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5236
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5237
   *
5238
   * @return string
5239
   */
5240 1
  public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5241
  {
5242
    if (
5243 1
        '' === $needle
5244
        ||
5245 1
        '' === $str
5246
    ) {
5247 1
      return '';
5248
    }
5249
5250 1
    $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5251 1
    if (false === $part) {
0 ignored issues
show
introduced by
The condition false === $part is always false.
Loading history...
5252 1
      return '';
5253
    }
5254
5255 1
    return $part;
5256
  }
5257
5258
  /**
5259
   * Returns the last $n characters of the string.
5260
   *
5261
   * @param string $str      <p>The input string.</p>
5262
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5263
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
5264
   *
5265
   * @return string
5266
   */
5267 12
  public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5268
  {
5269 12
    if ($n <= 0) {
5270 4
      return '';
5271
    }
5272
5273 8
    return self::substr($str, -$n, null, $encoding);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str, -$n, null, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5274
  }
5275
5276
  /**
5277
   * Limit the number of characters in a string, but also after the next word.
5278
   *
5279
   * @param string $str
5280
   * @param int    $length   [optional] <p>Default: 100</p>
5281
   * @param string $strAddOn [optional] <p>Default: …</p>
5282
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5283
   *
5284
   * @return string
5285
   */
5286 5
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5287
  {
5288 5
    if ('' === $str) {
5289 1
      return '';
5290
    }
5291
5292 5
    if (self::strlen($str, $encoding) <= $length) {
5293 1
      return $str;
5294
    }
5295
5296 5
    if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
5297 4
      return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5297
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
5298
    }
5299
5300 2
    $str = (string)self::substr($str, 0, $length, $encoding);
5301 2
    $array = \explode(' ', $str);
5302 2
    \array_pop($array);
5303 2
    $new_str = \implode(' ', $array);
5304
5305 2
    if ($new_str === '') {
5306 1
      $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
5307
    } else {
5308 2
      $str = $new_str . $strAddOn;
5309
    }
5310
5311 2
    return $str;
5312
  }
5313
5314
  /**
5315
   * Returns the longest common prefix between the string and $otherStr.
5316
   *
5317
   * @param string $str      <p>The input sting.</p>
5318
   * @param string $otherStr <p>Second string for comparison.</p>
5319
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
5320
   *
5321
   * @return string
5322
   */
5323 10
  public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5324
  {
5325 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5326
5327 10
    $longestCommonPrefix = '';
5328 10
    for ($i = 0; $i < $maxLength; $i++) {
5329 8
      $char = self::substr($str, $i, 1, $encoding);
5330
5331 8
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
5332 6
        $longestCommonPrefix .= $char;
5333
      } else {
5334 6
        break;
5335
      }
5336
    }
5337
5338 10
    return $longestCommonPrefix;
5339
  }
5340
5341
  /**
5342
   * Returns the longest common substring between the string and $otherStr.
5343
   * In the case of ties, it returns that which occurs first.
5344
   *
5345
   * @param string $str
5346
   * @param string $otherStr <p>Second string for comparison.</p>
5347
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
5348
   *
5349
   * @return static <p>String with its $str being the longest common substring.</p>
5350
   */
5351 10
  public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5352
  {
5353
    // Uses dynamic programming to solve
5354
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
5355 10
    $strLength = self::strlen($str, $encoding);
5356 10
    $otherLength = self::strlen($otherStr, $encoding);
5357
5358
    // Return if either string is empty
5359 10
    if ($strLength == 0 || $otherLength == 0) {
5360 2
      return '';
0 ignored issues
show
Bug Best Practice introduced by
The expression return '' returns the type string which is incompatible with the documented return type voku\helper\UTF8.
Loading history...
5361
    }
5362
5363 8
    $len = 0;
5364 8
    $end = 0;
5365 8
    $table = \array_fill(
5366 8
        0,
5367 8
        $strLength + 1,
5368 8
        \array_fill(0, $otherLength + 1, 0)
5369
    );
5370
5371 8
    for ($i = 1; $i <= $strLength; $i++) {
5372 8
      for ($j = 1; $j <= $otherLength; $j++) {
5373 8
        $strChar = self::substr($str, $i - 1, 1, $encoding);
5374 8
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
5375
5376 8
        if ($strChar == $otherChar) {
5377 8
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
5378 8
          if ($table[$i][$j] > $len) {
5379 8
            $len = $table[$i][$j];
5380 8
            $end = $i;
5381
          }
5382
        } else {
5383 8
          $table[$i][$j] = 0;
5384
        }
5385
      }
5386
    }
5387
5388 8
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str... $len, $len, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5389 8
        $str,
5390 8
        $end - $len,
5391 8
        $len,
5392 8
        $encoding
5393
    );
5394
  }
5395
5396
  /**
5397
   * Returns the longest common suffix between the string and $otherStr.
5398
   *
5399
   * @param string $str
5400
   * @param string $otherStr <p>Second string for comparison.</p>
5401
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
5402
   *
5403
   * @return string
5404
   */
5405 10
  public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5406
  {
5407 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5408
5409 10
    $longestCommonSuffix = '';
5410 10
    for ($i = 1; $i <= $maxLength; $i++) {
5411 8
      $char = self::substr($str, -$i, 1, $encoding);
5412
5413 8
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
5414 6
        $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5414
        $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
5415
      } else {
5416 6
        break;
5417
      }
5418
    }
5419
5420 10
    return $longestCommonSuffix;
5421
  }
5422
5423
  /**
5424
   * Returns true if $str matches the supplied pattern, false otherwise.
5425
   *
5426
   * @param string $str     <p>The input string.</p>
5427
   * @param string $pattern <p>Regex pattern to match against.</p>
5428
   *
5429
   * @return bool <p>Whether or not $str matches the pattern.</p>
5430
   */
5431 126
  public static function str_matches_pattern(string $str, string $pattern): bool
5432
  {
5433 126
    if (\preg_match('/' . $pattern . '/u', $str)) {
5434 87
      return true;
5435
    }
5436
5437 39
    return false;
5438
  }
5439
5440
  /**
5441
   * Returns whether or not a character exists at an index. Offsets may be
5442
   * negative to count from the last character in the string. Implements
5443
   * part of the ArrayAccess interface.
5444
   *
5445
   * @param string $str      <p>The input string.</p>
5446
   * @param int    $offset   <p>The index to check.</p>
5447
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
5448
   *
5449
   *
5450
   * @return bool <p>Whether or not the index exists.</p>
5451
   */
5452 6
  public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
5453
  {
5454
    // init
5455 6
    $length = self::strlen($str, $encoding);
5456
5457 6
    if ($offset >= 0) {
5458 3
      return ($length > $offset);
5459
    }
5460
5461 3
    return ($length >= \abs($offset));
5462
  }
5463
5464
  /**
5465
   * Returns the character at the given index. Offsets may be negative to
5466
   * count from the last character in the string. Implements part of the
5467
   * ArrayAccess interface, and throws an OutOfBoundsException if the index
5468
   * does not exist.
5469
   *
5470
   * @param string $str
5471
   * @param int    $offset   <p>The <strong>index</strong> from which to retrieve the char.</p>
5472
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5473
   *
5474
   * @return string <p>The character at the specified index.</p>
5475
   *
5476
   * @throws \OutOfBoundsException <p>If the positive or negative offset does not exist.</p>
5477
   */
5478 2
  public static function str_offset_get(string $str, int $offset, string $encoding = 'UTF-8'): string
5479
  {
5480
    // init
5481 2
    $length = self::strlen($str);
5482
5483
    if (
5484 2
        ($offset >= 0 && $length <= $offset)
5485
        ||
5486 2
        $length < \abs($offset)
5487
    ) {
5488 1
      throw new \OutOfBoundsException('No character exists at the index');
5489
    }
5490
5491 1
    return self::substr($str, $offset, 1, $encoding);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str, $offset, 1, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5492
  }
5493
5494
  /**
5495
   * Pad a UTF-8 string to given length with another string.
5496
   *
5497
   * @param string $str        <p>The input string.</p>
5498
   * @param int    $pad_length <p>The length of return string.</p>
5499
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
5500
   * @param int    $pad_type   [optional] <p>
5501
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
5502
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
5503
   *                           </p>
5504
   * @param string $encoding   [optional] <p>Default: UTF-8</p>
5505
   *
5506
   * @return string <strong>Returns the padded string</strong>
5507
   */
5508 40
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
5509
  {
5510 40
    if ('' === $str) {
5511
      return '';
5512
    }
5513
5514 40
    if ($pad_type !== (int)$pad_type) {
5515 13
      if ($pad_type == 'left') {
5516 3
        $pad_type = STR_PAD_LEFT;
5517 10
      } else if ($pad_type == 'right') {
5518 6
        $pad_type = STR_PAD_RIGHT;
5519 4
      } else if ($pad_type == 'both') {
5520 3
        $pad_type = STR_PAD_BOTH;
5521
      } else {
5522 1
        throw new \InvalidArgumentException(
5523 1
            'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
5524
        );
5525
      }
5526
    }
5527
5528 39
    $str_length = self::strlen($str, $encoding);
5529
5530
    if (
5531 39
        $pad_length > 0
5532
        &&
5533 39
        $pad_length >= $str_length
5534
    ) {
5535 38
      $ps_length = self::strlen($pad_string, $encoding);
5536
5537 38
      $diff = ($pad_length - $str_length);
5538
5539
      switch ($pad_type) {
5540 38
        case STR_PAD_LEFT:
5541 12
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
5542 12
          $pre = (string)self::substr($pre, 0, $diff, $encoding);
5543 12
          $post = '';
5544 12
          break;
5545
5546 28
        case STR_PAD_BOTH:
5547 13
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
5548 13
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2), $encoding);
5549 13
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
5550 13
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2), $encoding);
5551 13
          break;
5552
5553 17
        case STR_PAD_RIGHT:
5554
        default:
5555 17
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
5556 17
          $post = (string)self::substr($post, 0, $diff, $encoding);
5557 17
          $pre = '';
5558
      }
5559
5560 38
      return $pre . $str . $post;
5561
    }
5562
5563 3
    return $str;
5564
  }
5565
5566
  /**
5567
   * Returns a new string of a given length such that both sides of the
5568
   * string are padded. Alias for pad() with a $padType of 'both'.
5569
   *
5570
   * @param string $str
5571
   * @param int    $length   <p>Desired string length after padding.</p>
5572
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
5573
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5574
   *
5575
   * @return string <p>String with padding applied.</p>
5576
   */
5577 11
  public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
5578
  {
5579 11
    $padding = $length - self::strlen($str, $encoding);
5580
5581 11
    return self::apply_padding($str, (int)\floor($padding / 2), (int)\ceil($padding / 2), $padStr, $encoding);
5582
  }
5583
5584
  /**
5585
   * Returns a new string of a given length such that the beginning of the
5586
   * string is padded. Alias for pad() with a $padType of 'left'.
5587
   *
5588
   * @param string $str
5589
   * @param int    $length   <p>Desired string length after padding.</p>
5590
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
5591
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5592
   *
5593
   * @return string <p>String with left padding.</p>
5594
   */
5595 7
  public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
5596
  {
5597 7
    return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
5598
  }
5599
5600
  /**
5601
   * Returns a new string of a given length such that the end of the string
5602
   * is padded. Alias for pad() with a $padType of 'right'.
5603
   *
5604
   * @param string $str
5605
   * @param int    $length   <p>Desired string length after padding.</p>
5606
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
5607
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5608
   *
5609
   * @return string <p>String with right padding.</p>
5610
   */
5611 7
  public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
5612
  {
5613 7
    return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
5614
  }
5615
5616
  /**
5617
   * Remove html via "strip_tags()" from the string.
5618
   *
5619
   * @param string $str
5620
   * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
5621
   *                              not be stripped. Default: null
5622
   *                              </p>
5623
   *
5624
   * @return string
5625
   */
5626 6
  public static function str_remove_html(string $str, string $allowableTags = null): string
5627
  {
5628 6
    return \strip_tags($str, $allowableTags);
5629
  }
5630
5631
  /**
5632
   * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5633
   *
5634
   * @param string $str
5635
   * @param string $replacement [optional] <p>Default is a empty string.</p>
5636
   *
5637
   * @return string
5638
   */
5639 6
  public static function str_remove_html_breaks(string $str, string $replacement = ''): string
5640
  {
5641 6
    return (string)\preg_replace('#/\r\n|\r|\n|<br.*/?>#isU', $replacement, $str);
5642
  }
5643
5644
  /**
5645
   * Returns a new string with the prefix $substring removed, if present.
5646
   *
5647
   * @param string $str
5648
   * @param string $substring <p>The prefix to remove.</p>
5649
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5650
   *
5651
   * @return string <p>string without the prefix $substring.</p>
5652
   */
5653 12
  public static function str_remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
5654
  {
5655 12
    if (self::str_starts_with($str, $substring)) {
5656
5657 6
      return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str...ding), null, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5658 6
          $str,
5659 6
          self::strlen($substring, $encoding),
5660 6
          null,
5661 6
          $encoding
5662
      );
5663
    }
5664
5665 6
    return $str;
5666
  }
5667
5668
  /**
5669
   * Returns a new string with the suffix $substring removed, if present.
5670
   *
5671
   * @param string $str
5672
   * @param string $substring <p>The suffix to remove.</p>
5673
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5674
   *
5675
   * @return string <p>String having a $str without the suffix $substring.</p>
5676
   */
5677 12
  public static function str_remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
5678
  {
5679 12
    if (self::str_ends_with($str, $substring)) {
5680
5681 6
      return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str...$substring, $encoding)) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5682 6
          $str,
5683 6
          0,
5684 6
          self::strlen($str, $encoding) - self::strlen($substring, $encoding)
5685
      );
5686
    }
5687
5688 6
    return $str;
5689
  }
5690
5691
  /**
5692
   * Repeat a string.
5693
   *
5694
   * @param string $str        <p>
5695
   *                           The string to be repeated.
5696
   *                           </p>
5697
   * @param int    $multiplier <p>
5698
   *                           Number of time the input string should be
5699
   *                           repeated.
5700
   *                           </p>
5701
   *                           <p>
5702
   *                           multiplier has to be greater than or equal to 0.
5703
   *                           If the multiplier is set to 0, the function
5704
   *                           will return an empty string.
5705
   *                           </p>
5706
   *
5707
   * @return string <p>The repeated string.</p>
5708
   */
5709 8
  public static function str_repeat(string $str, int $multiplier): string
5710
  {
5711 8
    $str = self::filter($str);
5712
5713 8
    return \str_repeat($str, $multiplier);
5714
  }
5715
5716
  /**
5717
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
5718
   *
5719
   * Replace all occurrences of the search string with the replacement string
5720
   *
5721
   * @link http://php.net/manual/en/function.str-replace.php
5722
   *
5723
   * @param mixed $search  <p>
5724
   *                       The value being searched for, otherwise known as the needle.
5725
   *                       An array may be used to designate multiple needles.
5726
   *                       </p>
5727
   * @param mixed $replace <p>
5728
   *                       The replacement value that replaces found search
5729
   *                       values. An array may be used to designate multiple replacements.
5730
   *                       </p>
5731
   * @param mixed $subject <p>
5732
   *                       The string or array being searched and replaced on,
5733
   *                       otherwise known as the haystack.
5734
   *                       </p>
5735
   *                       <p>
5736
   *                       If subject is an array, then the search and
5737
   *                       replace is performed with every entry of
5738
   *                       subject, and the return value is an array as
5739
   *                       well.
5740
   *                       </p>
5741
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
5742
   *
5743
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
5744
   */
5745 92
  public static function str_replace($search, $replace, $subject, int &$count = null)
5746
  {
5747 92
    return \str_replace($search, $replace, $subject, $count);
5748
  }
5749
5750
  /**
5751
   * Replaces all occurrences of $search from the beginning of string with $replacement.
5752
   *
5753
   * @param string $str         <p>The input string.</p>
5754
   * @param string $search      <p>The string to search for.</p>
5755
   * @param string $replacement <p>The replacement.</p>
5756
   *
5757
   * @return string <p>String after the replacements.</p>
5758
   */
5759 16
  public static function str_replace_beginning(string $str, string $search, string $replacement): string
5760
  {
5761 16
    return self::regexReplace(
5762 16
        $str,
5763 16
        '^' . \preg_quote($search, '/'),
5764 16
        self::str_replace('\\', '\\\\', $replacement)
5765
    );
5766
  }
5767
5768
  /**
5769
   * Replaces all occurrences of $search from the ending of string with $replacement.
5770
   *
5771
   * @param string $str         <p>The input string.</p>
5772
   * @param string $search      <p>The string to search for.</p>
5773
   * @param string $replacement <p>The replacement.</p>
5774
   *
5775
   * @return string <p>String after the replacements.</p>
5776
   */
5777 16
  public static function str_replace_ending(string $str, string $search, string $replacement): string
5778
  {
5779 16
    return self::regexReplace(
5780 16
        $str,
5781 16
        \preg_quote($search, '/') . '$',
5782 16
        self::str_replace('\\', '\\\\', $replacement)
5783
    );
5784
  }
5785
5786
  /**
5787
   * Replace the first "$search"-term with the "$replace"-term.
5788
   *
5789
   * @param string $search
5790
   * @param string $replace
5791
   * @param string $subject
5792
   *
5793
   * @return string
5794
   */
5795 1
  public static function str_replace_first(string $search, string $replace, string $subject): string
5796
  {
5797 1
    $pos = self::strpos($subject, $search);
5798
5799 1
    if ($pos !== false) {
5800 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5801
    }
5802
5803 1
    return $subject;
5804
  }
5805
5806
  /**
5807
   * Shuffles all the characters in the string.
5808
   *
5809
   * PS: uses random algorithm which is weak for cryptography purposes
5810
   *
5811
   * @param string $str <p>The input string</p>
5812
   *
5813
   * @return string <p>The shuffled string.</p>
5814
   */
5815 4
  public static function str_shuffle(string $str): string
5816
  {
5817 4
    $indexes = \range(0, self::strlen($str) - 1);
5818
    /** @noinspection NonSecureShuffleUsageInspection */
5819 4
    \shuffle($indexes);
5820
5821 4
    $shuffledStr = '';
5822 4
    foreach ($indexes as $i) {
5823 4
      $shuffledStr .= self::substr($str, $i, 1);
5824
    }
5825
5826 4
    return $shuffledStr;
5827
  }
5828
5829
  /**
5830
   * Returns the substring beginning at $start, and up to, but not including
5831
   * the index specified by $end. If $end is omitted, the function extracts
5832
   * the remaining string. If $end is negative, it is computed from the end
5833
   * of the string.
5834
   *
5835
   * @param string $str
5836
   * @param int    $start    <p>Initial index from which to begin extraction.</p>
5837
   * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
5838
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5839
   *
5840
   * @return string <p>The extracted substring.</p>
5841
   */
5842 16
  public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8'): string
5843
  {
5844 16
    if ($end === null) {
5845 4
      $length = self::strlen($str);
5846 12
    } elseif ($end >= 0 && $end <= $start) {
5847 4
      return '';
5848 8
    } elseif ($end < 0) {
5849 2
      $length = self::strlen($str) + $end - $start;
5850
    } else {
5851 6
      $length = $end - $start;
5852
    }
5853
5854 12
    return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str...rt, $length, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5855
  }
5856
5857
  /**
5858
   * Convert a string to e.g.: "snake_case"
5859
   *
5860
   * @param string $str
5861
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5862
   *
5863
   * @return string <p>String in snake_case.</p>
5864
   */
5865 20
  public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
5866
  {
5867 20
    $str = self::normalize_whitespace($str);
5868 20
    $str = \str_replace('-', '_', $str);
5869
5870 20
    $str = (string)\preg_replace_callback(
5871 20
        '/([\d|A-Z])/u',
5872
        function ($matches) use ($encoding) {
5873 8
          $match = $matches[1];
5874 8
          $matchInt = (int)$match;
5875
5876 8
          if ((string)$matchInt == $match) {
5877 4
            return '_' . $match . '_';
5878
          }
5879
5880 4
          return '_' . UTF8::strtolower($match, $encoding);
5881 20
        },
5882 20
        $str
5883
    );
5884
5885 20
    $str = (string)\preg_replace(
5886
        [
5887 20
            '/\s+/',        // convert spaces to "_"
5888
            '/^\s+|\s+$/',  // trim leading & trailing spaces
5889
            '/_+/',         // remove double "_"
5890
        ],
5891
        [
5892 20
            '_',
5893
            '',
5894
            '_',
5895
        ],
5896 20
        $str
5897
    );
5898
5899 20
    $str = self::trim($str, '_'); // trim leading & trailing "_"
5900 20
    $str = self::trim($str); // trim leading & trailing whitespace
5901
5902 20
    return $str;
5903
  }
5904
5905
  /**
5906
   * Sort all characters according to code points.
5907
   *
5908
   * @param string $str    <p>A UTF-8 string.</p>
5909
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
5910
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
5911
   *
5912
   * @return string <p>String of sorted characters.</p>
5913
   */
5914 1
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
5915
  {
5916 1
    $array = self::codepoints($str);
5917
5918 1
    if ($unique) {
5919 1
      $array = \array_flip(\array_flip($array));
5920
    }
5921
5922 1
    if ($desc) {
5923 1
      \arsort($array);
5924
    } else {
5925 1
      \asort($array);
5926
    }
5927
5928 1
    return self::string($array);
5929
  }
5930
5931
  /**
5932
   * Split a string into an array.
5933
   *
5934
   * @param string|string[] $str
5935
   * @param int             $len
5936
   *
5937
   * @return string[]
5938
   */
5939 21
  public static function str_split($str, int $len = 1): array
5940
  {
5941 21
    if (\is_array($str) === true) {
5942 1
      foreach ($str as $k => $v) {
5943 1
        $str[$k] = self::str_split($v, $len);
5944
      }
5945
5946 1
      return $str;
5947
    }
5948
5949 21
    if ('' === $str) {
5950 2
      return [];
5951
    }
5952
5953 19
    if ($len < 1) {
5954
      return \str_split($str, $len);
5955
    }
5956
5957
    /** @noinspection NotOptimalRegularExpressionsInspection */
5958 19
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
5959 19
    $a = $a[0];
5960
5961 19
    if ($len === 1) {
5962 19
      return $a;
5963
    }
5964
5965 1
    $arrayOutput = [];
5966 1
    $p = -1;
5967
5968
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
5969 1
    foreach ($a as $l => $a) {
5970 1
      if ($l % $len) {
5971 1
        $arrayOutput[$p] .= $a;
5972
      } else {
5973 1
        $arrayOutput[++$p] = $a;
5974
      }
5975
    }
5976
5977 1
    return $arrayOutput;
5978
  }
5979
5980
  /**
5981
   * Splits the string with the provided regular expression, returning an
5982
   * array of Stringy objects. An optional integer $limit will truncate the
5983
   * results.
5984
   *
5985
   * @param string $str
5986
   * @param string $pattern <p>The regex with which to split the string.</p>
5987
   * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
5988
   *
5989
   * @return string[] <p>An array of strings.</p>
5990
   */
5991 16
  public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
5992
  {
5993 16
    if ($limit === 0) {
5994 2
      return [];
5995
    }
5996
5997
    // this->split errors when supplied an empty pattern in < PHP 5.4.13
5998
    // and current versions of HHVM (3.8 and below)
5999 14
    if ($pattern === '') {
6000 1
      return [$str];
6001
    }
6002
6003
    // this->split returns the remaining unsplit string in the last index when
6004
    // supplying a limit
6005 13
    if ($limit > 0) {
6006 8
      ++$limit;
6007
    } else {
6008 5
      $limit = -1;
6009
    }
6010
6011 13
    $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6012
6013 13
    if ($limit > 0 && \count($array) === $limit) {
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type false; however, parameter $var of count() does only seem to accept Countable|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6013
    if ($limit > 0 && \count(/** @scrutinizer ignore-type */ $array) === $limit) {
Loading history...
6014 4
      \array_pop($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type false; however, parameter $array of array_pop() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6014
      \array_pop(/** @scrutinizer ignore-type */ $array);
Loading history...
6015
    }
6016
6017 13
    return $array;
6018
  }
6019
6020
  /**
6021
   * Check if the string starts with the given substring.
6022
   *
6023
   * @param string $haystack <p>The string to search in.</p>
6024
   * @param string $needle   <p>The substring to search for.</p>
6025
   *
6026
   * @return bool
6027
   */
6028 39
  public static function str_starts_with(string $haystack, string $needle): bool
6029
  {
6030 39
    if ('' === $haystack || '' === $needle) {
6031 3
      return false;
6032
    }
6033
6034 37
    if (\strpos($haystack, $needle) === 0) {
6035 17
      return true;
6036
    }
6037
6038 22
    return false;
6039
  }
6040
6041
  /**
6042
   * Returns true if the string begins with any of $substrings, false otherwise.
6043
   *
6044
   * - case-sensitive
6045
   *
6046
   * @param string $str        <p>The input string.</p>
6047
   * @param array  $substrings <p>Substrings to look for.</p>
6048
   *
6049
   * @return bool <p>Whether or not $str starts with $substring.</p>
6050
   */
6051 8
  public static function str_starts_with_any(string $str, array $substrings): bool
6052
  {
6053 8
    if ('' === $str) {
6054
      return false;
6055
    }
6056
6057 8
    if (empty($substrings)) {
6058
      return false;
6059
    }
6060
6061 8
    foreach ($substrings as $substring) {
6062 8
      if (self::str_starts_with($str, $substring)) {
6063 8
        return true;
6064
      }
6065
    }
6066
6067 6
    return false;
6068
  }
6069
6070
  /**
6071
   * Gets the substring after the first occurrence of a separator.
6072
   *
6073
   * @param string $str       <p>The input string.</p>
6074
   * @param string $separator <p>The string separator.</p>
6075
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6076
   *
6077
   * @return string
6078
   */
6079 1
  public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6080
  {
6081
    if (
6082 1
        $separator === ''
6083
        ||
6084 1
        $str === ''
6085
    ) {
6086 1
      return '';
6087
    }
6088
6089 1
    $offset = self::str_index_first($str, $separator);
6090 1
    if ($offset === false) {
6091 1
      return '';
6092
    }
6093
6094 1
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str...ding), null, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6095 1
        $str,
6096 1
        $offset + self::strlen($separator, $encoding),
6097 1
        null,
6098 1
        $encoding
6099
    );
6100
  }
6101
6102
  /**
6103
   * Gets the substring after the last occurrence of a separator.
6104
   *
6105
   * @param string $str       <p>The input string.</p>
6106
   * @param string $separator <p>The string separator.</p>
6107
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6108
   *
6109
   * @return string
6110
   */
6111 1
  public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6112
  {
6113
    if (
6114 1
        $separator === ''
6115
        ||
6116 1
        $str === ''
6117
    ) {
6118 1
      return '';
6119
    }
6120
6121 1
    $offset = self::str_index_last($str, $separator);
6122 1
    if ($offset === false) {
6123 1
      return '';
6124
    }
6125
6126 1
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str...ding), null, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6127 1
        $str,
6128 1
        $offset + self::strlen($separator, $encoding),
6129 1
        null,
6130 1
        $encoding
6131
    );
6132
  }
6133
6134
  /**
6135
   * Gets the substring before the first occurrence of a separator.
6136
   *
6137
   * @param string $str       <p>The input string.</p>
6138
   * @param string $separator <p>The string separator.</p>
6139
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6140
   *
6141
   * @return string
6142
   */
6143 1
  public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6144
  {
6145
    if (
6146 1
        $separator === ''
6147
        ||
6148 1
        $str === ''
6149
    ) {
6150 1
      return '';
6151
    }
6152
6153 1
    $offset = self::str_index_first($str, $separator);
6154 1
    if ($offset === false) {
6155 1
      return '';
6156
    }
6157
6158 1
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str, 0, $offset, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6159 1
        $str,
6160 1
        0,
6161 1
        $offset,
6162 1
        $encoding
6163
    );
6164
  }
6165
6166
  /**
6167
   * Gets the substring before the last occurrence of a separator.
6168
   *
6169
   * @param string $str       <p>The input string.</p>
6170
   * @param string $separator <p>The string separator.</p>
6171
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6172
   *
6173
   * @return string
6174
   */
6175 1
  public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6176
  {
6177
    if (
6178 1
        $separator === ''
6179
        ||
6180 1
        $str === ''
6181
    ) {
6182 1
      return '';
6183
    }
6184
6185 1
    $offset = self::str_index_last($str, $separator);
6186 1
    if ($offset === false) {
6187 1
      return '';
6188
    }
6189
6190 1
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str, 0, $offset, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6191 1
        $str,
6192 1
        0,
6193 1
        $offset,
6194 1
        $encoding
6195
    );
6196
  }
6197
6198
  /**
6199
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6200
   *
6201
   * @param string $str          <p>The input string.</p>
6202
   * @param string $needle       <p>The string to look for.</p>
6203
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6204
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6205
   *
6206
   * @return string
6207
   */
6208 2
  public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6209
  {
6210
    if (
6211 2
        '' === $str
6212
        ||
6213 2
        '' === $needle
6214
    ) {
6215 2
      return '';
6216
    }
6217
6218 2
    $part = self::strstr(
6219 2
        $str,
6220 2
        $needle,
6221 2
        $beforeNeedle,
6222 2
        $encoding
6223
    );
6224 2
    if (false === $part) {
6225 2
      return '';
6226
    }
6227
6228 2
    return $part;
6229
  }
6230
6231
  /**
6232
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6233
   *
6234
   * @param string $str          <p>The input string.</p>
6235
   * @param string $needle       <p>The string to look for.</p>
6236
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6237
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6238
   *
6239
   * @return string
6240
   */
6241 2
  public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6242
  {
6243
    if (
6244 2
        '' === $str
6245
        ||
6246 2
        '' === $needle
6247
    ) {
6248 2
      return '';
6249
    }
6250
6251 2
    $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6252 2
    if (false === $part) {
0 ignored issues
show
introduced by
The condition false === $part is always false.
Loading history...
6253 2
      return '';
6254
    }
6255
6256 2
    return $part;
6257
  }
6258
6259
  /**
6260
   * Surrounds $str with the given substring.
6261
   *
6262
   * @param string $str
6263
   * @param string $substring <p>The substring to add to both sides.</P>
6264
   *
6265
   * @return string <p>String with the substring both prepended and appended.</p>
6266
   */
6267 5
  public static function str_surround(string $str, string $substring): string
6268
  {
6269 5
    return \implode('', [$substring, $str, $substring]);
6270
  }
6271
6272
  /**
6273
   * Returns a trimmed string with the first letter of each word capitalized.
6274
   * Also accepts an array, $ignore, allowing you to list words not to be
6275
   * capitalized.
6276
   *
6277
   * @param string              $str
6278
   * @param string[]|array|null $ignore   [optional] <p>An array of words not to capitalize or null. Default: null</p>
6279
   * @param string              $encoding [optional] <p>Default: UTF-8</p>
6280
   *
6281
   * @return string <p>The titleized string.</p>
6282
   */
6283 5
  public static function str_titleize(string $str, array $ignore = null, string $encoding = 'UTF-8'): string
6284
  {
6285 5
    $str = self::trim($str);
6286
6287 5
    $str = (string)\preg_replace_callback(
6288 5
        '/([\S]+)/u',
6289
        function ($match) use ($encoding, $ignore) {
6290 5
          if ($ignore && \in_array($match[0], $ignore, true)) {
6291 2
            return $match[0];
6292
          }
6293
6294 5
          return self::str_upper_first(self::strtolower($match[0], $encoding));
6295 5
        },
6296 5
        $str
6297
    );
6298
6299 5
    return $str;
6300
  }
6301
6302
  /**
6303
   * Returns a trimmed string in proper title case.
6304
   *
6305
   * Also accepts an array, $ignore, allowing you to list words not to be
6306
   * capitalized.
6307
   *
6308
   * Adapted from John Gruber's script.
6309
   *
6310
   * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6311
   *
6312
   * @param string $str
6313
   * @param array  $ignore   <p>An array of words not to capitalize.</p>
6314
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6315
   *
6316
   * @return string <p>The titleized string.</p>
6317
   */
6318 35
  public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6319
  {
6320 35
    $smallWords = \array_merge(
6321
        [
6322 35
            '(?<!q&)a',
6323
            'an',
6324
            'and',
6325
            'as',
6326
            'at(?!&t)',
6327
            'but',
6328
            'by',
6329
            'en',
6330
            'for',
6331
            'if',
6332
            'in',
6333
            'of',
6334
            'on',
6335
            'or',
6336
            'the',
6337
            'to',
6338
            'v[.]?',
6339
            'via',
6340
            'vs[.]?',
6341
        ],
6342 35
        $ignore
6343
    );
6344
6345 35
    $smallWordsRx = \implode('|', $smallWords);
6346 35
    $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6347
6348 35
    $str = self::trim($str);
6349
6350 35
    if (self::has_lowercase($str) === false) {
6351 2
      $str = self::strtolower($str);
6352
    }
6353
6354
    // The main substitutions
6355 35
    $str = (string)\preg_replace_callback(
6356
        '~\b (_*) (?:                                                              # 1. Leading underscore and
6357
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6358 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6359
                        |
6360 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6361
                        |
6362 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6363
                        |
6364 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
6365
                      ) (_*) \b                                                           # 6. With trailing underscore
6366
                    ~ux',
6367
        function ($matches) use ($encoding) {
6368
          // Preserve leading underscore
6369 35
          $str = $matches[1];
6370 35
          if ($matches[2]) {
6371
            // Preserve URLs, domains, emails and file paths
6372 5
            $str .= $matches[2];
6373 35
          } elseif ($matches[3]) {
6374
            // Lower-case small words
6375 25
            $str .= self::strtolower($matches[3], $encoding);
6376 35
          } elseif ($matches[4]) {
6377
            // Capitalize word w/o internal caps
6378 34
            $str .= static::str_upper_first($matches[4], $encoding);
6379
          } else {
6380
            // Preserve other kinds of word (iPhone)
6381 7
            $str .= $matches[5];
6382
          }
6383
          // Preserve trailing underscore
6384 35
          $str .= $matches[6];
6385
6386 35
          return $str;
6387 35
        },
6388 35
        $str
6389
    );
6390
6391
    // Exceptions for small words: capitalize at start of title...
6392 35
    $str = (string)\preg_replace_callback(
6393
        '~(  \A [[:punct:]]*                # start of title...
6394
                      |  [:.;?!][ ]+               # or of subsentence...
6395
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
6396 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
6397
                     ~uxi',
6398
        function ($matches) use ($encoding) {
6399 11
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
6400 35
        },
6401 35
        $str
6402
    );
6403
6404
    // ...and end of title
6405 35
    $str = (string)\preg_replace_callback(
6406 35
        '~\b ( ' . $smallWordsRx . ' ) # small word...
6407
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
6408
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
6409
                     ~uxi',
6410
        function ($matches) use ($encoding) {
6411 3
          return static::str_upper_first($matches[1], $encoding);
6412 35
        },
6413 35
        $str
6414
    );
6415
6416
    // Exceptions for small words in hyphenated compound words
6417
    // e.g. "in-flight" -> In-Flight
6418 35
    $str = (string)\preg_replace_callback(
6419
        '~\b
6420
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
6421 35
                        ( ' . $smallWordsRx . ' )
6422
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
6423
                       ~uxi',
6424
        function ($matches) use ($encoding) {
6425
          return static::str_upper_first($matches[1], $encoding);
6426 35
        },
6427 35
        $str
6428
    );
6429
6430
    // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
6431 35
    $str = (string)\preg_replace_callback(
6432
        '~\b
6433
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
6434
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
6435 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
6436
                      (?!	- )                   # Negative lookahead for another -
6437
                     ~uxi',
6438
        function ($matches) use ($encoding) {
6439
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
6440 35
        },
6441 35
        $str
6442
    );
6443
6444 35
    return $str;
6445
  }
6446
6447
  /**
6448
   * Get a binary representation of a specific string.
6449
   *
6450
   * @param string $str <p>The input string.</p>
6451
   *
6452
   * @return string
6453
   */
6454 1
  public static function str_to_binary(string $str): string
6455
  {
6456 1
    $value = \unpack('H*', $str);
6457
6458 1
    return \base_convert($value[1], 16, 2);
6459
  }
6460
6461
  /**
6462
   * @param string   $str
6463
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6464
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
6465
   *
6466
   * @return string[]
6467
   */
6468 17
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
6469
  {
6470 17
    if ('' === $str) {
6471 1
      if ($removeEmptyValues === true) {
6472
        return [];
6473
      }
6474
6475 1
      return [''];
6476
    }
6477
6478 16
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
6479
6480
    if (
6481 16
        $removeShortValues === null
6482
        &&
6483 16
        $removeEmptyValues === false
6484
    ) {
6485 16
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return could return the type false which is incompatible with the type-hinted return array. Consider adding an additional type-check to rule them out.
Loading history...
6486
    }
6487
6488
    $tmpReturn = self::reduce_string_array(
6489
        $return,
0 ignored issues
show
Bug introduced by
It seems like $return can also be of type false; however, parameter $strings of voku\helper\UTF8::reduce_string_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6489
        /** @scrutinizer ignore-type */ $return,
Loading history...
6490
        $removeEmptyValues,
6491
        $removeShortValues
6492
    );
6493
6494
    return $tmpReturn;
6495
  }
6496
6497
  /**
6498
   * Convert a string into an array of words.
6499
   *
6500
   * @param string   $str
6501
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
6502
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6503
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
6504
   *
6505
   * @return string[]
6506
   */
6507 10
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
6508
  {
6509 10
    if ('' === $str) {
6510 2
      if ($removeEmptyValues === true) {
6511
        return [];
6512
      }
6513
6514 2
      return [''];
6515
    }
6516
6517 10
    $charList = self::rxClass($charList, '\pL');
6518
6519 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
6520
6521
    if (
6522 10
        $removeShortValues === null
6523
        &&
6524 10
        $removeEmptyValues === false
6525
    ) {
6526 10
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return could return the type false which is incompatible with the type-hinted return array. Consider adding an additional type-check to rule them out.
Loading history...
6527
    }
6528
6529 1
    $tmpReturn = self::reduce_string_array(
6530 1
        $return,
0 ignored issues
show
Bug introduced by
It seems like $return can also be of type false; however, parameter $strings of voku\helper\UTF8::reduce_string_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6530
        /** @scrutinizer ignore-type */ $return,
Loading history...
6531 1
        $removeEmptyValues,
6532 1
        $removeShortValues
6533
    );
6534
6535 1
    return $tmpReturn;
6536
  }
6537
6538
  /**
6539
   * alias for "UTF8::to_ascii()"
6540
   *
6541
   * @see UTF8::to_ascii()
6542
   *
6543
   * @param string $str
6544
   * @param string $unknown
6545
   * @param bool   $strict
6546
   *
6547
   * @return string
6548
   */
6549 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
6550
  {
6551 7
    return self::to_ascii($str, $unknown, $strict);
6552
  }
6553
6554
  /**
6555
   * Truncates the string to a given length. If $substring is provided, and
6556
   * truncating occurs, the string is further truncated so that the substring
6557
   * may be appended without exceeding the desired length.
6558
   *
6559
   * @param string $str
6560
   * @param int    $length    <p>Desired length of the truncated string.</p>
6561
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
6562
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6563
   *
6564
   * @return string <p>String after truncating.</p>
6565
   */
6566 22
  public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
6567
  {
6568
    // init
6569 22
    $str = (string)$str;
6570
6571 22
    if ('' === $str) {
6572
      return '';
6573
    }
6574
6575 22
    if ($length >= self::strlen($str, $encoding)) {
6576 4
      return $str;
6577
    }
6578
6579
    // Need to further trim the string so we can append the substring
6580 18
    $substringLength = self::strlen($substring, $encoding);
6581 18
    $length -= $substringLength;
6582
6583 18
    $truncated = self::substr($str, 0, $length, $encoding);
6584
6585 18
    return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6585
    return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
6586
  }
6587
6588
  /**
6589
   * Truncates the string to a given length, while ensuring that it does not
6590
   * split words. If $substring is provided, and truncating occurs, the
6591
   * string is further truncated so that the substring may be appended without
6592
   * exceeding the desired length.
6593
   *
6594
   * @param string $str
6595
   * @param int    $length    <p>Desired length of the truncated string.</p>
6596
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
6597
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6598
   *
6599
   * @return string <p>String after truncating.</p>
6600
   */
6601 23
  public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
6602
  {
6603 23
    if ($length >= self::strlen($str, $encoding)) {
6604 4
      return $str;
6605
    }
6606
6607
    // need to further trim the string so we can append the substring
6608 19
    $substringLength = self::strlen($substring, $encoding);
6609 19
    $length -= $substringLength;
6610
6611 19
    $truncated = self::substr($str, 0, $length, $encoding);
6612
6613
    // if the last word was truncated
6614 19
    $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
6615 19
    if ($strPosSpace != $length) {
6616
      // find pos of the last occurrence of a space, get up to that
6617 12
      $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $truncated can also be of type false; however, parameter $haystack of voku\helper\UTF8::strrpos() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6617
      $lastPos = self::strrpos(/** @scrutinizer ignore-type */ $truncated, ' ', 0, $encoding);
Loading history...
6618
6619 12
      if ($lastPos !== false || $strPosSpace !== false) {
6620 11
        $truncated = self::substr($truncated, 0, (int)$lastPos, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $truncated can also be of type false; however, parameter $str of voku\helper\UTF8::substr() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6620
        $truncated = self::substr(/** @scrutinizer ignore-type */ $truncated, 0, (int)$lastPos, $encoding);
Loading history...
6621
      }
6622
    }
6623
6624 19
    $str = $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6624
    $str = /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
6625
6626 19
    return $str;
6627
  }
6628
6629
  /**
6630
   * Returns a lowercase and trimmed string separated by underscores.
6631
   * Underscores are inserted before uppercase characters (with the exception
6632
   * of the first character of the string), and in place of spaces as well as
6633
   * dashes.
6634
   *
6635
   * @param string $str
6636
   *
6637
   * @return string <p>The underscored string.</p>
6638
   */
6639 16
  public static function str_underscored(string $str): string
6640
  {
6641 16
    return self::str_delimit($str, '_');
6642
  }
6643
6644
  /**
6645
   * Returns an UpperCamelCase version of the supplied string. It trims
6646
   * surrounding spaces, capitalizes letters following digits, spaces, dashes
6647
   * and underscores, and removes spaces, dashes, underscores.
6648
   *
6649
   * @param string $str      <p>The input string.</p>
6650
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6651
   *
6652
   * @return string <p>String in UpperCamelCase.</p>
6653
   */
6654 13
  public static function str_upper_camelize(string $str, string $encoding = 'UTF-8'): string
6655
  {
6656 13
    return self::str_upper_first(self::str_camelize($str, $encoding), $encoding);
6657
  }
6658
6659
  /**
6660
   * alias for "UTF8::ucfirst()"
6661
   *
6662
   * @see UTF8::ucfirst()
6663
   *
6664
   * @param string $str
6665
   * @param string $encoding
6666
   * @param bool   $cleanUtf8
6667
   *
6668
   * @return string
6669
   */
6670 57
  public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6671
  {
6672 57
    return self::ucfirst($str, $encoding, $cleanUtf8);
6673
  }
6674
6675
  /**
6676
   * Counts number of words in the UTF-8 string.
6677
   *
6678
   * @param string $str      <p>The input string.</p>
6679
   * @param int    $format   [optional] <p>
6680
   *                         <strong>0</strong> => return a number of words (default)<br>
6681
   *                         <strong>1</strong> => return an array of words<br>
6682
   *                         <strong>2</strong> => return an array of words with word-offset as key
6683
   *                         </p>
6684
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6685
   *
6686
   * @return string[]|int <p>The number of words in the string</p>
6687
   */
6688 1
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
6689
  {
6690 1
    $strParts = self::str_to_words($str, $charlist);
6691
6692 1
    $len = \count($strParts);
6693
6694 1
    if ($format === 1) {
6695
6696 1
      $numberOfWords = [];
6697 1
      for ($i = 1; $i < $len; $i += 2) {
6698 1
        $numberOfWords[] = $strParts[$i];
6699
      }
6700
6701 1
    } elseif ($format === 2) {
6702
6703 1
      $numberOfWords = [];
6704 1
      $offset = self::strlen($strParts[0]);
6705 1
      for ($i = 1; $i < $len; $i += 2) {
6706 1
        $numberOfWords[$offset] = $strParts[$i];
6707 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
6708
      }
6709
6710
    } else {
6711
6712 1
      $numberOfWords = ($len - 1) / 2;
6713
6714
    }
6715
6716 1
    return $numberOfWords;
6717
  }
6718
6719
  /**
6720
   * Case-insensitive string comparison.
6721
   *
6722
   * INFO: Case-insensitive version of UTF8::strcmp()
6723
   *
6724
   * @param string $str1
6725
   * @param string $str2
6726
   *
6727
   * @return int <p>
6728
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
6729
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
6730
   *             <strong>0</strong> if they are equal.
6731
   *             </p>
6732
   */
6733 19
  public static function strcasecmp(string $str1, string $str2): int
6734
  {
6735 19
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
6736
  }
6737
6738
  /**
6739
   * alias for "UTF8::strstr()"
6740
   *
6741
   * @see UTF8::strstr()
6742
   *
6743
   * @param string $haystack
6744
   * @param string $needle
6745
   * @param bool   $before_needle
6746
   * @param string $encoding
6747
   * @param bool   $cleanUtf8
6748
   *
6749
   * @return string|false
6750
   */
6751 1
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6752
  {
6753 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
6754
  }
6755
6756
  /**
6757
   * Case-sensitive string comparison.
6758
   *
6759
   * @param string $str1
6760
   * @param string $str2
6761
   *
6762
   * @return int  <p>
6763
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
6764
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
6765
   *              <strong>0</strong> if they are equal.
6766
   *              </p>
6767
   */
6768 22
  public static function strcmp(string $str1, string $str2): int
6769
  {
6770
    /** @noinspection PhpUndefinedClassInspection */
6771 22
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
6772 17
        \Normalizer::normalize($str1, \Normalizer::NFD),
6773 22
        \Normalizer::normalize($str2, \Normalizer::NFD)
6774
    );
6775
  }
6776
6777
  /**
6778
   * Find length of initial segment not matching mask.
6779
   *
6780
   * @param string $str
6781
   * @param string $charList
6782
   * @param int    $offset
6783
   * @param int    $length
6784
   *
6785
   * @return int|null
6786
   */
6787 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
6788
  {
6789 15
    if ('' === $charList .= '') {
6790 1
      return null;
6791
    }
6792
6793 14
    if ($offset || $length !== null) {
6794 2
      $strTmp = self::substr($str, $offset, $length);
6795 2
      if ($strTmp === false) {
6796
        return null;
6797
      }
6798 2
      $str = (string)$strTmp;
6799
    }
6800
6801 14
    if ('' === $str) {
6802 1
      return null;
6803
    }
6804
6805 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept null|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6805
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
6806 13
      return self::strlen($length[1]);
6807
    }
6808
6809 1
    return self::strlen($str);
6810
  }
6811
6812
  /**
6813
   * alias for "UTF8::stristr()"
6814
   *
6815
   * @see UTF8::stristr()
6816
   *
6817
   * @param string $haystack
6818
   * @param string $needle
6819
   * @param bool   $before_needle
6820
   * @param string $encoding
6821
   * @param bool   $cleanUtf8
6822
   *
6823
   * @return string|false
6824
   */
6825 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6826
  {
6827 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
6828
  }
6829
6830
  /**
6831
   * Create a UTF-8 string from code points.
6832
   *
6833
   * INFO: opposite to UTF8::codepoints()
6834
   *
6835
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
6836
   *
6837
   * @return string <p>UTF-8 encoded string.</p>
6838
   */
6839 2
  public static function string(array $array): string
6840
  {
6841 2
    return \implode(
6842 2
        '',
6843 2
        \array_map(
6844
            [
6845 2
                self::class,
6846
                'chr',
6847
            ],
6848 2
            $array
6849
        )
6850
    );
6851
  }
6852
6853
  /**
6854
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
6855
   *
6856
   * @param string $str <p>The input string.</p>
6857
   *
6858
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
6859
   */
6860 3
  public static function string_has_bom(string $str): bool
6861
  {
6862 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
6863 3
      if (0 === \strpos($str, $bomString)) {
6864 3
        return true;
6865
      }
6866
    }
6867
6868 3
    return false;
6869
  }
6870
6871
  /**
6872
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
6873
   *
6874
   * @link http://php.net/manual/en/function.strip-tags.php
6875
   *
6876
   * @param string $str             <p>
6877
   *                                The input string.
6878
   *                                </p>
6879
   * @param string $allowable_tags  [optional] <p>
6880
   *                                You can use the optional second parameter to specify tags which should
6881
   *                                not be stripped.
6882
   *                                </p>
6883
   *                                <p>
6884
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
6885
   *                                can not be changed with allowable_tags.
6886
   *                                </p>
6887
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
6888
   *
6889
   * @return string <p>The stripped string.</p>
6890
   */
6891 2
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
6892
  {
6893 2
    if ('' === $str) {
6894 1
      return '';
6895
    }
6896
6897 2
    if ($cleanUtf8 === true) {
6898 1
      $str = self::clean($str);
6899
    }
6900
6901 2
    return \strip_tags($str, $allowable_tags);
6902
  }
6903
6904
  /**
6905
   * Strip all whitespace characters. This includes tabs and newline
6906
   * characters, as well as multibyte whitespace such as the thin space
6907
   * and ideographic space.
6908
   *
6909
   * @param string $str
6910
   *
6911
   * @return string
6912
   */
6913 24
  public static function strip_whitespace(string $str): string
6914
  {
6915 24
    if ('' === $str) {
6916 2
      return '';
6917
    }
6918
6919 22
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
6920
  }
6921
6922
  /**
6923
   * Finds position of first occurrence of a string within another, case insensitive.
6924
   *
6925
   * @link http://php.net/manual/en/function.mb-stripos.php
6926
   *
6927
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
6928
   * @param string $needle    <p>The string to find in haystack.</p>
6929
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
6930
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6931
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6932
   *
6933
   * @return int|false <p>
6934
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
6935
   *                   or false if needle is not found.
6936
   *                   </p>
6937
   */
6938 71
  public static function stripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6939
  {
6940 71
    if ('' === $haystack || '' === $needle) {
6941 3
      return false;
6942
    }
6943
6944 70
    if ($cleanUtf8 === true) {
6945
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6946
      // if invalid characters are found in $haystack before $needle
6947 1
      $haystack = self::clean($haystack);
6948 1
      $needle = self::clean($needle);
6949
    }
6950
6951 70
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6952 22
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6953
    }
6954
6955 70
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6956
      self::checkForSupport();
6957
    }
6958
6959
    if (
6960 70
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6961
        &&
6962 70
        self::$SUPPORT['intl'] === true
6963
    ) {
6964 70
      return \grapheme_stripos($haystack, $needle, $offset);
6965
    }
6966
6967
    // fallback to "mb_"-function via polyfill
6968 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
6969
  }
6970
6971
  /**
6972
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
6973
   *
6974
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
6975
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
6976
   * @param bool   $before_needle  [optional] <p>
6977
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
6978
   *                               haystack before the first occurrence of the needle (excluding the needle).
6979
   *                               </p>
6980
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
6981
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
6982
   *
6983
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
6984
   */
6985 19
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6986
  {
6987 19
    if ('' === $haystack || '' === $needle) {
6988 6
      return false;
6989
    }
6990
6991 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6992 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6993
    }
6994
6995 13
    if ($cleanUtf8 === true) {
6996
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6997
      // if invalid characters are found in $haystack before $needle
6998 1
      $needle = self::clean($needle);
6999 1
      $haystack = self::clean($haystack);
7000
    }
7001
7002 13
    if (!$needle) {
7003
      return $haystack;
7004
    }
7005
7006 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7007
      self::checkForSupport();
7008
    }
7009
7010
    if (
7011 13
        $encoding !== 'UTF-8'
7012
        &&
7013 13
        self::$SUPPORT['mbstring'] === false
7014
    ) {
7015
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7016
    }
7017
7018 13
    if (self::$SUPPORT['mbstring'] === true) {
7019 13
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7020
    }
7021
7022
    if (
7023
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7024
        &&
7025
        self::$SUPPORT['intl'] === true
7026
    ) {
7027
      return \grapheme_stristr($haystack, $needle, $before_needle);
7028
    }
7029
7030
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7031
      return \stristr($haystack, $needle, $before_needle);
7032
    }
7033
7034
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7035
7036
    if (!isset($match[1])) {
7037
      return false;
7038
    }
7039
7040
    if ($before_needle) {
7041
      return $match[1];
7042
    }
7043
7044
    return self::substr($haystack, self::strlen($match[1]));
7045
  }
7046
7047
  /**
7048
   * Get the string length, not the byte-length!
7049
   *
7050
   * @link     http://php.net/manual/en/function.mb-strlen.php
7051
   *
7052
   * @param string $str       <p>The string being checked for length.</p>
7053
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7054
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7055
   *
7056
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
7057
   *             character counted as +1)</p>
7058
   */
7059 374
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
7060
  {
7061 374
    if ('' === $str) {
7062 31
      return 0;
7063
    }
7064
7065 372
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7066 158
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7067
    }
7068
7069 372
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7070
      self::checkForSupport();
7071
    }
7072
7073
    switch ($encoding) {
7074 372
      case 'ASCII':
7075 372
      case 'CP850':
7076
        if (
7077 6
            $encoding === 'CP850'
7078
            &&
7079 6
            self::$SUPPORT['mbstring_func_overload'] === false
7080
        ) {
7081 6
          return \strlen($str);
7082
        }
7083
7084
        return \mb_strlen($str, 'CP850'); // 8-BIT
7085
    }
7086
7087 367
    if ($cleanUtf8 === true) {
7088
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
7089
      // if invalid characters are found in $str
7090 2
      $str = self::clean($str);
7091
    }
7092
7093
    if (
7094 367
        $encoding !== 'UTF-8'
7095
        &&
7096 367
        self::$SUPPORT['mbstring'] === false
7097
        &&
7098 367
        self::$SUPPORT['iconv'] === false
7099
    ) {
7100
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7101
    }
7102
7103
    if (
7104 367
        $encoding !== 'UTF-8'
7105
        &&
7106 367
        self::$SUPPORT['iconv'] === true
7107
        &&
7108 367
        self::$SUPPORT['mbstring'] === false
7109
    ) {
7110
      $returnTmp = \iconv_strlen($str, $encoding);
7111
      if ($returnTmp !== false) {
7112
        return $returnTmp;
7113
      }
7114
    }
7115
7116 367
    if (self::$SUPPORT['mbstring'] === true) {
7117 366
      $returnTmp = \mb_strlen($str, $encoding);
7118 366
      if ($returnTmp !== false) {
7119 366
        return $returnTmp;
7120
      }
7121
    }
7122
7123 2
    if (self::$SUPPORT['iconv'] === true) {
7124
      $returnTmp = \iconv_strlen($str, $encoding);
7125
      if ($returnTmp !== false) {
7126
        return $returnTmp;
7127
      }
7128
    }
7129
7130
    if (
7131 2
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7132
        &&
7133 2
        self::$SUPPORT['intl'] === true
7134
    ) {
7135
      return \grapheme_strlen($str);
7136
    }
7137
7138 2
    if (self::is_ascii($str)) {
7139 1
      return \strlen($str);
7140
    }
7141
7142
    // fallback via vanilla php
7143 2
    \preg_match_all('/./us', $str, $parts);
7144 2
    $returnTmp = \count($parts[0]);
7145 2
    if ($returnTmp !== 0) {
7146 2
      return $returnTmp;
7147
    }
7148
7149
    // fallback to "mb_"-function via polyfill
7150
    return \mb_strlen($str, $encoding);
7151
  }
7152
7153
  /**
7154
   * Get string length in byte.
7155
   *
7156
   * @param string $str
7157
   *
7158
   * @return int
7159
   */
7160 101
  public static function strlen_in_byte(string $str): int
7161
  {
7162 101
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7163
      $len = \mb_strlen($str, 'CP850'); // 8-BIT
7164
    } else {
7165 101
      $len = \strlen($str);
7166
    }
7167
7168 101
    return $len;
7169
  }
7170
7171
  /**
7172
   * Case insensitive string comparisons using a "natural order" algorithm.
7173
   *
7174
   * INFO: natural order version of UTF8::strcasecmp()
7175
   *
7176
   * @param string $str1 <p>The first string.</p>
7177
   * @param string $str2 <p>The second string.</p>
7178
   *
7179
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
7180
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7181
   *             <strong>0</strong> if they are equal
7182
   */
7183 1
  public static function strnatcasecmp(string $str1, string $str2): int
7184
  {
7185 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
7186
  }
7187
7188
  /**
7189
   * String comparisons using a "natural order" algorithm
7190
   *
7191
   * INFO: natural order version of UTF8::strcmp()
7192
   *
7193
   * @link  http://php.net/manual/en/function.strnatcmp.php
7194
   *
7195
   * @param string $str1 <p>The first string.</p>
7196
   * @param string $str2 <p>The second string.</p>
7197
   *
7198
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
7199
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7200
   *             <strong>0</strong> if they are equal
7201
   */
7202 2
  public static function strnatcmp(string $str1, string $str2): int
7203
  {
7204 2
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
7205
  }
7206
7207
  /**
7208
   * Case-insensitive string comparison of the first n characters.
7209
   *
7210
   * @link  http://php.net/manual/en/function.strncasecmp.php
7211
   *
7212
   * @param string $str1 <p>The first string.</p>
7213
   * @param string $str2 <p>The second string.</p>
7214
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
7215
   *
7216
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7217
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7218
   *             <strong>0</strong> if they are equal
7219
   */
7220 1
  public static function strncasecmp(string $str1, string $str2, int $len): int
7221
  {
7222 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
7223
  }
7224
7225
  /**
7226
   * String comparison of the first n characters.
7227
   *
7228
   * @link  http://php.net/manual/en/function.strncmp.php
7229
   *
7230
   * @param string $str1 <p>The first string.</p>
7231
   * @param string $str2 <p>The second string.</p>
7232
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
7233
   *
7234
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7235
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7236
   *             <strong>0</strong> if they are equal
7237
   */
7238 2
  public static function strncmp(string $str1, string $str2, int $len): int
7239
  {
7240 2
    $str1 = (string)self::substr($str1, 0, $len);
7241 2
    $str2 = (string)self::substr($str2, 0, $len);
7242
7243 2
    return self::strcmp($str1, $str2);
7244
  }
7245
7246
  /**
7247
   * Search a string for any of a set of characters.
7248
   *
7249
   * @link  http://php.net/manual/en/function.strpbrk.php
7250
   *
7251
   * @param string $haystack  <p>The string where char_list is looked for.</p>
7252
   * @param string $char_list <p>This parameter is case sensitive.</p>
7253
   *
7254
   * @return string|false <p>String starting from the character found, or false if it is not found.</p>
7255
   */
7256 1
  public static function strpbrk(string $haystack, string $char_list)
7257
  {
7258 1
    if ('' === $haystack || '' === $char_list) {
7259 1
      return false;
7260
    }
7261
7262 1
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7263 1
      return \substr($haystack, \strpos($haystack, $m[0]));
7264
    }
7265
7266 1
    return false;
7267
  }
7268
7269
  /**
7270
   * Find position of first occurrence of string in a string.
7271
   *
7272
   * @link http://php.net/manual/en/function.mb-strpos.php
7273
   *
7274
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7275
   * @param string $needle    <p>The string to find in haystack.</p>
7276
   * @param int    $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7277
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7278
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7279
   *
7280
   * @return int|false <p>
7281
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
7282
   *                   If needle is not found it returns false.
7283
   *                   </p>
7284
   */
7285 180
  public static function strpos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7286
  {
7287 180
    if ('' === $haystack || '' === $needle) {
7288 4
      return false;
7289
    }
7290
7291
    // iconv and mbstring do not support integer $needle
7292 178
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
introduced by
The condition (int)$needle === $needle is always false.
Loading history...
7293
      $needle = (string)self::chr((int)$needle);
7294
    }
7295
7296 178
    if ($cleanUtf8 === true) {
7297
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7298
      // if invalid characters are found in $haystack before $needle
7299 2
      $needle = self::clean($needle);
7300 2
      $haystack = self::clean($haystack);
7301
    }
7302
7303 178
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7304 57
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7305
    }
7306
7307 178
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7308
      self::checkForSupport();
7309
    }
7310
7311
    if (
7312 178
        $encoding === 'CP850'
7313
        &&
7314 178
        self::$SUPPORT['mbstring_func_overload'] === false
7315
    ) {
7316 61
      return \strpos($haystack, $needle, $offset);
7317
    }
7318
7319
    if (
7320 118
        $encoding !== 'UTF-8'
7321
        &&
7322 118
        self::$SUPPORT['iconv'] === false
7323
        &&
7324 118
        self::$SUPPORT['mbstring'] === false
7325
    ) {
7326
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7327
    }
7328
7329
    if (
7330 118
        $offset >= 0 // iconv_strpos() can't handle negative offset
7331
        &&
7332 118
        $encoding !== 'UTF-8'
7333
        &&
7334 118
        self::$SUPPORT['mbstring'] === false
7335
        &&
7336 118
        self::$SUPPORT['iconv'] === true
7337
    ) {
7338
      // ignore invalid negative offset to keep compatibility
7339
      // with php < 5.5.35, < 5.6.21, < 7.0.6
7340
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
7341
      if ($returnTmp !== false) {
7342
        return $returnTmp;
7343
      }
7344
    }
7345
7346 118
    if (self::$SUPPORT['mbstring'] === true) {
7347 118
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
7348 118
      if ($returnTmp !== false) {
7349 83
        return $returnTmp;
7350
      }
7351
    }
7352
7353
    if (
7354 48
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7355
        &&
7356 48
        self::$SUPPORT['intl'] === true
7357
    ) {
7358 47
      return \grapheme_strpos($haystack, $needle, $offset);
7359
    }
7360
7361
    if (
7362 2
        $offset >= 0 // iconv_strpos() can't handle negative offset
7363
        &&
7364 2
        self::$SUPPORT['iconv'] === true
7365
    ) {
7366
      // ignore invalid negative offset to keep compatibility
7367
      // with php < 5.5.35, < 5.6.21, < 7.0.6
7368 1
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
7369 1
      if ($returnTmp !== false) {
7370
        return $returnTmp;
7371
      }
7372
    }
7373
7374 2
    $haystackIsAscii = self::is_ascii($haystack);
7375 2
    if ($haystackIsAscii && self::is_ascii($needle)) {
7376 1
      return \strpos($haystack, $needle, $offset);
7377
    }
7378
7379
    // fallback via vanilla php
7380
7381 2
    if ($haystackIsAscii) {
7382
      $haystackTmp = \substr($haystack, $offset);
7383
    } else {
7384 2
      $haystackTmp = self::substr($haystack, $offset);
7385
    }
7386 2
    if ($haystackTmp === false) {
7387
      $haystackTmp = '';
7388
    }
7389 2
    $haystack = (string)$haystackTmp;
7390
7391 2
    if ($offset < 0) {
7392
      $offset = 0;
7393
    }
7394
7395 2
    $pos = \strpos($haystack, $needle);
7396 2
    if ($pos === false) {
7397
      return false;
7398
    }
7399
7400 2
    $returnTmp = $offset + self::strlen(\substr($haystack, 0, $pos));
7401 2
    if ($returnTmp !== false) {
0 ignored issues
show
introduced by
The condition $returnTmp !== false is always true.
Loading history...
7402 2
      return $returnTmp;
7403
    }
7404
7405
    // fallback to "mb_"-function via polyfill
7406
    return \mb_strpos($haystack, $needle, $offset, $encoding);
7407
  }
7408
7409
  /**
7410
   * Finds the last occurrence of a character in a string within another.
7411
   *
7412
   * @link http://php.net/manual/en/function.mb-strrchr.php
7413
   *
7414
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
7415
   * @param string $needle        <p>The string to find in haystack</p>
7416
   * @param bool   $before_needle [optional] <p>
7417
   *                              Determines which portion of haystack
7418
   *                              this function returns.
7419
   *                              If set to true, it returns all of haystack
7420
   *                              from the beginning to the last occurrence of needle.
7421
   *                              If set to false, it returns all of haystack
7422
   *                              from the last occurrence of needle to the end,
7423
   *                              </p>
7424
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
7425
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
7426
   *
7427
   * @return string|false The portion of haystack or false if needle is not found.
7428
   */
7429 3
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7430
  {
7431 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7432 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7433
    }
7434
7435 3
    if ($cleanUtf8 === true) {
7436
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7437
      // if invalid characters are found in $haystack before $needle
7438 1
      $needle = self::clean($needle);
7439 1
      $haystack = self::clean($haystack);
7440
    }
7441
7442
    // fallback to "mb_"-function via polyfill
7443 3
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
7444
  }
7445
7446
  /**
7447
   * Reverses characters order in the string.
7448
   *
7449
   * @param string $str <p>The input string.</p>
7450
   *
7451
   * @return string <p>The string with characters in the reverse sequence.</p>
7452
   */
7453 9
  public static function strrev(string $str): string
7454
  {
7455 9
    if ('' === $str) {
7456 3
      return '';
7457
    }
7458
7459 7
    $reversed = '';
7460 7
    $i = self::strlen($str);
7461 7
    while ($i--) {
7462 7
      $reversed .= self::substr($str, $i, 1);
7463
    }
7464
7465 7
    return $reversed;
7466
  }
7467
7468
  /**
7469
   * Finds the last occurrence of a character in a string within another, case insensitive.
7470
   *
7471
   * @link http://php.net/manual/en/function.mb-strrichr.php
7472
   *
7473
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
7474
   * @param string $needle         <p>The string to find in haystack.</p>
7475
   * @param bool   $before_needle  [optional] <p>
7476
   *                               Determines which portion of haystack
7477
   *                               this function returns.
7478
   *                               If set to true, it returns all of haystack
7479
   *                               from the beginning to the last occurrence of needle.
7480
   *                               If set to false, it returns all of haystack
7481
   *                               from the last occurrence of needle to the end,
7482
   *                               </p>
7483
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
7484
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7485
   *
7486
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
7487
   */
7488 2
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7489
  {
7490 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7491 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7492
    }
7493
7494 2
    if ($cleanUtf8 === true) {
7495
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7496
      // if invalid characters are found in $haystack before $needle
7497 1
      $needle = self::clean($needle);
7498 1
      $haystack = self::clean($haystack);
7499
    }
7500
7501 2
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
7502
  }
7503
7504
  /**
7505
   * Find position of last occurrence of a case-insensitive string.
7506
   *
7507
   * @param string $haystack  <p>The string to look in.</p>
7508
   * @param string $needle    <p>The string to look for.</p>
7509
   * @param int    $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
7510
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7511
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7512
   *
7513
   * @return int|false <p>
7514
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
7515
   *                   not found, it returns false.
7516
   *                   </p>
7517
   */
7518 3
  public static function strripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7519
  {
7520 3
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
introduced by
The condition (int)$needle === $needle is always false.
Loading history...
7521
      $needle = (string)self::chr((int)$needle);
7522
    }
7523
7524 3
    if ('' === $haystack || '' === $needle) {
7525
      return false;
7526
    }
7527
7528 3
    if ($cleanUtf8 === true) {
7529
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
7530 1
      $needle = self::clean($needle);
7531 1
      $haystack = self::clean($haystack);
7532
    }
7533
7534 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7535 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7536
    }
7537
7538 3
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7539
      self::checkForSupport();
7540
    }
7541
7542
    if (
7543 3
        $encoding !== 'UTF-8'
7544
        &&
7545 3
        self::$SUPPORT['mbstring'] === false
7546
    ) {
7547
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7548
    }
7549
7550 3
    if (self::$SUPPORT['mbstring'] === true) {
7551 3
      return \mb_strripos($haystack, $needle, $offset, $encoding);
7552
    }
7553
7554
    if (
7555
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7556
        &&
7557
        self::$SUPPORT['intl'] === true
7558
    ) {
7559
      return \grapheme_strripos($haystack, $needle, $offset);
7560
    }
7561
7562
    // fallback via vanilla php
7563
7564
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
7565
  }
7566
7567
  /**
7568
   * Find position of last occurrence of a string in a string.
7569
   *
7570
   * @link http://php.net/manual/en/function.mb-strrpos.php
7571
   *
7572
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
7573
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7574
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
7575
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
7576
   *                              the end of the string.
7577
   *                              </p>
7578
   * @param string     $encoding  [optional] <p>Set the charset.</p>
7579
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7580
   *
7581
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
7582
   *                   is not found, it returns false.</p>
7583
   */
7584 35
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7585
  {
7586 35
    if ((int)$needle === $needle && $needle >= 0) {
7587 2
      $needle = (string)self::chr($needle);
7588
    }
7589 35
    $needle = (string)$needle;
7590
7591 35
    if ('' === $haystack || '' === $needle) {
7592 2
      return false;
7593
    }
7594
7595 34
    if ($cleanUtf8 === true) {
7596
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
7597 3
      $needle = self::clean($needle);
7598 3
      $haystack = self::clean($haystack);
7599
    }
7600
7601 34
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7602 13
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7603
    }
7604
7605 34
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7606
      self::checkForSupport();
7607
    }
7608
7609
    if (
7610 34
        $encoding !== 'UTF-8'
7611
        &&
7612 34
        self::$SUPPORT['mbstring'] === false
7613
    ) {
7614
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7615
    }
7616
7617 34
    if (self::$SUPPORT['mbstring'] === true) {
7618 34
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
7619
    }
7620
7621
    if (
7622
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7623
        &&
7624
        self::$SUPPORT['intl'] === true
7625
    ) {
7626
      return \grapheme_strrpos($haystack, $needle, $offset);
7627
    }
7628
7629
    // fallback via vanilla php
7630
7631
    $haystackTmp = null;
7632
    if ($offset > 0) {
7633
      $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7633
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
7634
    } elseif ($offset < 0) {
7635
      $haystackTmp = self::substr($haystack, 0, $offset);
7636
      $offset = 0;
7637
    }
7638
7639
    if ($haystackTmp !== null) {
7640
      if ($haystackTmp === false) {
7641
        $haystackTmp = '';
7642
      }
7643
      $haystack = (string)$haystackTmp;
7644
    }
7645
7646
    $pos = \strrpos($haystack, $needle);
7647
    if ($pos === false) {
7648
      return false;
7649
    }
7650
7651
    return $offset + self::strlen(\substr($haystack, 0, $pos));
7652
  }
7653
7654
  /**
7655
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
7656
   * mask.
7657
   *
7658
   * @param string $str    <p>The input string.</p>
7659
   * @param string $mask   <p>The mask of chars</p>
7660
   * @param int    $offset [optional]
7661
   * @param int    $length [optional]
7662
   *
7663
   * @return int
7664
   */
7665 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
7666
  {
7667 10
    if ($offset || $length !== null) {
7668 2
      $strTmp = self::substr($str, $offset, $length);
7669 2
      if ($strTmp === false) {
7670
        $strTmp = '';
7671
      }
7672 2
      $str = (string)$strTmp;
7673
    }
7674
7675 10
    if ('' === $str || '' === $mask) {
7676 2
      return 0;
7677
    }
7678
7679 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug introduced by
$str of type string is incompatible with the type null|array expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7679
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
7680
  }
7681
7682
  /**
7683
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
7684
   *
7685
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7686
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7687
   * @param bool   $before_needle  [optional] <p>
7688
   *                               If <b>TRUE</b>, strstr() returns the part of the
7689
   *                               haystack before the first occurrence of the needle (excluding the needle).
7690
   *                               </p>
7691
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
7692
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7693
   *
7694
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
7695
   */
7696 4
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
7697
  {
7698 4
    if ('' === $haystack || '' === $needle) {
7699 1
      return false;
7700
    }
7701
7702 4
    if ($cleanUtf8 === true) {
7703
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7704
      // if invalid characters are found in $haystack before $needle
7705
      $needle = self::clean($needle);
7706
      $haystack = self::clean($haystack);
7707
    }
7708
7709 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7710 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7711
    }
7712
7713 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7714
      self::checkForSupport();
7715
    }
7716
7717
    if (
7718 4
        $encoding !== 'UTF-8'
7719
        &&
7720 4
        self::$SUPPORT['mbstring'] === false
7721
    ) {
7722
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7723
    }
7724
7725 4
    if (self::$SUPPORT['mbstring'] === true) {
7726 4
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
7727
    }
7728
7729
    if (
7730
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7731
        &&
7732
        self::$SUPPORT['intl'] === true
7733
    ) {
7734
      return \grapheme_strstr($haystack, $needle, $before_needle);
7735
    }
7736
7737
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
7738
7739
    if (!isset($match[1])) {
7740
      return false;
7741
    }
7742
7743
    if ($before_needle) {
7744
      return $match[1];
7745
    }
7746
7747
    return self::substr($haystack, self::strlen($match[1]));
7748
  }
7749
7750
  /**
7751
   * Unicode transformation for case-less matching.
7752
   *
7753
   * @link http://unicode.org/reports/tr21/tr21-5.html
7754
   *
7755
   * @param string $str        <p>The input string.</p>
7756
   * @param bool   $full       [optional] <p>
7757
   *                           <b>true</b>, replace full case folding chars (default)<br>
7758
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
7759
   *                           </p>
7760
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7761
   *
7762
   * @return string
7763
   */
7764 21
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
7765
  {
7766 21
    if ('' === $str) {
7767 4
      return '';
7768
    }
7769
7770 20
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
7771 20
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
7772
7773 20
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
7774 1
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
7775 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
7776
    }
7777
7778 20
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
7779
7780 20
    if ($full) {
7781
7782 20
      static $FULL_CASE_FOLD = null;
7783 20
      if ($FULL_CASE_FOLD === null) {
7784 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
7785
      }
7786
7787 20
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
7788
    }
7789
7790 20
    if ($cleanUtf8 === true) {
7791 1
      $str = self::clean($str);
7792
    }
7793
7794 20
    return self::strtolower($str);
7795
  }
7796
7797
  /**
7798
   * Make a string lowercase.
7799
   *
7800
   * @link http://php.net/manual/en/function.mb-strtolower.php
7801
   *
7802
   * @param string      $str       <p>The string being lowercased.</p>
7803
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7804
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7805
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
7806
   *
7807
   * @return string str with all alphabetic characters converted to lowercase.
7808
   */
7809 163
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
7810
  {
7811
    // init
7812 163
    $str = (string)$str;
7813
7814 163
    if ('' === $str) {
7815 3
      return '';
7816
    }
7817
7818 161
    if ($cleanUtf8 === true) {
7819
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7820
      // if invalid characters are found in $haystack before $needle
7821 1
      $str = self::clean($str);
7822
    }
7823
7824 161
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7825 92
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7826
    }
7827
7828 161
    if ($lang !== null) {
7829
7830 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7831
        self::checkForSupport();
7832
      }
7833
7834 1
      if (self::$SUPPORT['intl'] === true) {
7835
7836 1
        $langCode = $lang . '-Lower';
7837 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
7838
          \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, E_USER_WARNING);
7839
7840
          $langCode = 'Any-Lower';
7841
        }
7842
7843
        /** @noinspection PhpComposerExtensionStubsInspection */
7844 1
        return transliterator_transliterate($langCode, $str);
7845
      }
7846
7847
      \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, E_USER_WARNING);
7848
    }
7849
7850 161
    return \mb_strtolower($str, $encoding);
7851
  }
7852
7853
  /**
7854
   * Generic case sensitive transformation for collation matching.
7855
   *
7856
   * @param string $str <p>The input string</p>
7857
   *
7858
   * @return string
7859
   */
7860 3
  private static function strtonatfold(string $str): string
7861
  {
7862
    /** @noinspection PhpUndefinedClassInspection */
7863 3
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
7864
  }
7865
7866
  /**
7867
   * Make a string uppercase.
7868
   *
7869
   * @link http://php.net/manual/en/function.mb-strtoupper.php
7870
   *
7871
   * @param string      $str       <p>The string being uppercased.</p>
7872
   * @param string      $encoding  [optional] <p>Set the charset.</p>
7873
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7874
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
7875
   *
7876
   * @return string <p>$str with all alphabetic characters converted to uppercase.</p>
7877
   */
7878 111
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
7879
  {
7880
    // init
7881 111
    $str = (string)$str;
7882
7883 111
    if ('' === $str) {
7884 3
      return '';
7885
    }
7886
7887 109
    if ($cleanUtf8 === true) {
7888
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7889
      // if invalid characters are found in $haystack before $needle
7890 2
      $str = self::clean($str);
7891
    }
7892
7893 109
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7894 68
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7895
    }
7896
7897 109
    if ($lang !== null) {
7898
7899 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7900
        self::checkForSupport();
7901
      }
7902
7903 1
      if (self::$SUPPORT['intl'] === true) {
7904
7905 1
        $langCode = $lang . '-Upper';
7906 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
7907
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
7908
7909
          $langCode = 'Any-Upper';
7910
        }
7911
7912
        /** @noinspection PhpComposerExtensionStubsInspection */
7913 1
        return transliterator_transliterate($langCode, $str);
7914
      }
7915
7916
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
7917
    }
7918
7919 109
    return \mb_strtoupper($str, $encoding);
7920
  }
7921
7922
  /**
7923
   * Translate characters or replace sub-strings.
7924
   *
7925
   * @link  http://php.net/manual/en/function.strtr.php
7926
   *
7927
   * @param string          $str  <p>The string being translated.</p>
7928
   * @param string|string[] $from <p>The string replacing from.</p>
7929
   * @param string|string[] $to   <p>The string being translated to to.</p>
7930
   *
7931
   * @return string <p>
7932
   *                This function returns a copy of str, translating all occurrences of each character in from to the
7933
   *                corresponding character in to.
7934
   *                </p>
7935
   */
7936 1
  public static function strtr(string $str, $from, $to = INF): string
7937
  {
7938 1
    if ('' === $str) {
7939
      return '';
7940
    }
7941
7942 1
    if ($from === $to) {
7943
      return $str;
7944
    }
7945
7946 1
    if (INF !== $to) {
7947 1
      $from = self::str_split($from);
7948 1
      $to = self::str_split($to);
7949 1
      $countFrom = \count($from);
7950 1
      $countTo = \count($to);
7951
7952 1
      if ($countFrom > $countTo) {
7953 1
        $from = \array_slice($from, 0, $countTo);
7954 1
      } elseif ($countFrom < $countTo) {
7955 1
        $to = \array_slice($to, 0, $countFrom);
7956
      }
7957
7958 1
      $from = \array_combine($from, $to);
7959
    }
7960
7961 1
    if (\is_string($from)) {
7962 1
      return \str_replace($from, '', $str);
7963
    }
7964
7965 1
    return \strtr($str, $from);
7966
  }
7967
7968
  /**
7969
   * Return the width of a string.
7970
   *
7971
   * @param string $str       <p>The input string.</p>
7972
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7973
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7974
   *
7975
   * @return int
7976
   */
7977 1
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
7978
  {
7979 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7980 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7981
    }
7982
7983 1
    if ($cleanUtf8 === true) {
7984
      // iconv and mbstring are not tolerant to invalid encoding
7985
      // further, their behaviour is inconsistent with that of PHP's substr
7986 1
      $str = self::clean($str);
7987
    }
7988
7989
    // fallback to "mb_"-function via polyfill
7990 1
    return \mb_strwidth($str, $encoding);
7991
  }
7992
7993
  /**
7994
   * Get part of a string.
7995
   *
7996
   * @link http://php.net/manual/en/function.mb-substr.php
7997
   *
7998
   * @param string $str       <p>The string being checked.</p>
7999
   * @param int    $offset    <p>The first position used in str.</p>
8000
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
8001
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
8002
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8003
   *
8004
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
8005
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
8006
   *                      characters long, <b>FALSE</b> will be returned.</p>
8007
   */
8008 352
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8009
  {
8010 352
    if ('' === $str) {
8011 12
      return '';
8012
    }
8013
8014
    // Empty string
8015 347
    if ($length === 0) {
8016 11
      return '';
8017
    }
8018
8019 344
    if ($cleanUtf8 === true) {
8020
      // iconv and mbstring are not tolerant to invalid encoding
8021
      // further, their behaviour is inconsistent with that of PHP's substr
8022 1
      $str = self::clean($str);
8023
    }
8024
8025
    // Whole string
8026 344
    if (!$offset && $length === null) {
8027 5
      return $str;
8028
    }
8029
8030 339
    $str_length = 0;
8031 339
    if ($offset || $length === null) {
8032 216
      $str_length = self::strlen($str, $encoding);
8033
    }
8034
8035
    // Empty string
8036 339
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
8037 19
      return '';
8038
    }
8039
8040
    // Impossible
8041 339
    if ($offset && $offset > $str_length) {
8042 2
      return false;
8043
    }
8044
8045 337
    if ($length === null) {
8046 132
      $length = $str_length;
8047
    } else {
8048 307
      $length = (int)$length;
8049
    }
8050
8051 337
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8052 150
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8053
    }
8054
8055 337
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8056
      self::checkForSupport();
8057
    }
8058
8059
    if (
8060 337
        $encoding === 'CP850'
8061
        &&
8062 337
        self::$SUPPORT['mbstring_func_overload'] === false
8063
    ) {
8064 16
      return \substr($str, $offset, $length ?? $str_length);
8065
    }
8066
8067
    if (
8068 321
        $encoding !== 'UTF-8'
8069
        &&
8070 321
        self::$SUPPORT['mbstring'] === false
8071
    ) {
8072
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8073
    }
8074
8075 321
    if (self::$SUPPORT['mbstring'] === true) {
8076 321
      return \mb_substr($str, $offset, $length, $encoding);
8077
    }
8078
8079
    if (
8080
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8081
        &&
8082
        self::$SUPPORT['intl'] === true
8083
    ) {
8084
      return \grapheme_substr($str, $offset, $length);
8085
    }
8086
8087
    if (
8088
        $length >= 0 // "iconv_substr()" can't handle negative length
8089
        &&
8090
        self::$SUPPORT['iconv'] === true
8091
    ) {
8092
      $returnTmp = \iconv_substr($str, $offset, $length);
8093
      if ($returnTmp !== false) {
8094
        return $returnTmp;
8095
      }
8096
    }
8097
8098
    if (self::is_ascii($str)) {
8099
      return ($length === null) ?
0 ignored issues
show
introduced by
The condition $length === null is always false.
Loading history...
8100
          \substr($str, $offset) :
8101
          \substr($str, $offset, $length);
8102
    }
8103
8104
    // fallback via vanilla php
8105
8106
    // split to array, and remove invalid characters
8107
    $array = self::split($str);
8108
8109
    // extract relevant part, and join to make sting again
8110
    return \implode('', \array_slice($array, $offset, $length));
8111
  }
8112
8113
  /**
8114
   * Binary safe comparison of two strings from an offset, up to length characters.
8115
   *
8116
   * @param string   $str1               <p>The main string being compared.</p>
8117
   * @param string   $str2               <p>The secondary string being compared.</p>
8118
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
8119
   *                                     counting from the end of the string.</p>
8120
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
8121
   *                                     the length of the str compared to the length of main_str less the offset.</p>
8122
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
8123
   *                                     insensitive.</p>
8124
   *
8125
   * @return int <p>
8126
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8127
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8128
   *             <strong>0</strong> if they are equal.
8129
   *             </p>
8130
   */
8131 1
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
8132
  {
8133
    if (
8134 1
        $offset !== 0
8135
        ||
8136 1
        $length !== null
8137
    ) {
8138 1
      $str1Tmp = self::substr($str1, $offset, $length);
8139 1
      if ($str1Tmp === false) {
8140
        $str1Tmp = '';
8141
      }
8142 1
      $str1 = (string)$str1Tmp;
8143
8144 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
8145 1
      if ($str2Tmp === false) {
8146
        $str2Tmp = '';
8147
      }
8148 1
      $str2 = (string)$str2Tmp;
8149
    }
8150
8151 1
    if ($case_insensitivity === true) {
8152 1
      return self::strcasecmp($str1, $str2);
8153
    }
8154
8155 1
    return self::strcmp($str1, $str2);
8156
  }
8157
8158
  /**
8159
   * Count the number of substring occurrences.
8160
   *
8161
   * @link  http://php.net/manual/en/function.substr-count.php
8162
   *
8163
   * @param string $haystack   <p>The string to search in.</p>
8164
   * @param string $needle     <p>The substring to search for.</p>
8165
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
8166
   * @param int    $length     [optional] <p>
8167
   *                           The maximum length after the specified offset to search for the
8168
   *                           substring. It outputs a warning if the offset plus the length is
8169
   *                           greater than the haystack length.
8170
   *                           </p>
8171
   * @param string $encoding   [optional] <p>Set the charset for e.g. "\mb_" function</p>
8172
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
8173
   *
8174
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
8175
   */
8176 19
  public static function substr_count(
8177
      string $haystack,
8178
      string $needle,
8179
      int $offset = 0,
8180
      int $length = null,
8181
      string $encoding = 'UTF-8',
8182
      bool $cleanUtf8 = false
8183
  )
8184
  {
8185 19
    if ('' === $haystack || '' === $needle) {
8186 3
      return false;
8187
    }
8188
8189 17
    if ($offset || $length !== null) {
8190
8191 1
      if ($length === null) {
8192 1
        $length = self::strlen($haystack);
8193
      }
8194
8195
      if (
8196
          (
8197 1
              $length !== 0
8198
              &&
8199 1
              $offset !== 0
8200
          )
8201
          &&
8202 1
          ($length + $offset) <= 0
8203
          &&
8204 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
8205
      ) {
8206 1
        return false;
8207
      }
8208
8209 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
8210 1
      if ($haystackTmp === false) {
8211
        $haystackTmp = '';
8212
      }
8213 1
      $haystack = (string)$haystackTmp;
8214
    }
8215
8216 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8217 11
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8218
    }
8219
8220 17
    if ($cleanUtf8 === true) {
8221
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
8222
      // if invalid characters are found in $haystack before $needle
8223
      $needle = self::clean($needle);
8224
      $haystack = self::clean($haystack);
8225
    }
8226
8227 17
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8228
      self::checkForSupport();
8229
    }
8230
8231
    if (
8232 17
        $encoding !== 'UTF-8'
8233
        &&
8234 17
        self::$SUPPORT['mbstring'] === false
8235
    ) {
8236
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8237
    }
8238
8239 17
    if (self::$SUPPORT['mbstring'] === true) {
8240 17
      return \mb_substr_count($haystack, $needle, $encoding);
8241
    }
8242
8243
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
8244
8245
    return \count($matches);
8246
  }
8247
8248
  /**
8249
   * Returns the number of occurrences of $substring in the given string.
8250
   * By default, the comparison is case-sensitive, but can be made insensitive
8251
   * by setting $caseSensitive to false.
8252
   *
8253
   * @param string $str           <p>The input string.</p>
8254
   * @param string $substring     <p>The substring to search for.</p>
8255
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
8256
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
8257
   *
8258
   * @return int
8259
   */
8260 15
  public static function substr_count_simple(string $str, string $substring, bool $caseSensitive = true, string $encoding = 'UTF-8'): int
8261
  {
8262 15
    if (!$caseSensitive) {
8263 6
      $str = self::strtoupper($str, $encoding);
8264 6
      $substring = self::strtoupper($substring, $encoding);
8265
    }
8266
8267 15
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
8268
  }
8269
8270
  /**
8271
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
8272
   *
8273
   * @param string $haystack <p>The string to search in.</p>
8274
   * @param string $needle   <p>The substring to search for.</p>
8275
   *
8276
   * @return string <p>Return the sub-string.</p>
8277
   */
8278 1
  public static function substr_ileft(string $haystack, string $needle): string
8279
  {
8280 1
    if ('' === $haystack) {
8281 1
      return '';
8282
    }
8283
8284 1
    if ('' === $needle) {
8285 1
      return $haystack;
8286
    }
8287
8288 1
    if (self::str_istarts_with($haystack, $needle) === true) {
8289 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
8290 1
      if ($haystackTmp === false) {
8291
        $haystackTmp = '';
8292
      }
8293 1
      $haystack = (string)$haystackTmp;
8294
    }
8295
8296 1
    return $haystack;
8297
  }
8298
8299
  /**
8300
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
8301
   *
8302
   * @param string $haystack <p>The string to search in.</p>
8303
   * @param string $needle   <p>The substring to search for.</p>
8304
   *
8305
   * @return string <p>Return the sub-string.</p>
8306
   */
8307 1
  public static function substr_iright(string $haystack, string $needle): string
8308
  {
8309 1
    if ('' === $haystack) {
8310 1
      return '';
8311
    }
8312
8313 1
    if ('' === $needle) {
8314 1
      return $haystack;
8315
    }
8316
8317 1
    if (self::str_iends_with($haystack, $needle) === true) {
8318 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
8319 1
      if ($haystackTmp === false) {
8320
        $haystackTmp = '';
8321
      }
8322 1
      $haystack = (string)$haystackTmp;
8323
    }
8324
8325 1
    return $haystack;
8326
  }
8327
8328
  /**
8329
   * Removes an prefix ($needle) from start of the string ($haystack).
8330
   *
8331
   * @param string $haystack <p>The string to search in.</p>
8332
   * @param string $needle   <p>The substring to search for.</p>
8333
   *
8334
   * @return string <p>Return the sub-string.</p>
8335
   */
8336 1
  public static function substr_left(string $haystack, string $needle): string
8337
  {
8338 1
    if ('' === $haystack) {
8339 1
      return '';
8340
    }
8341
8342 1
    if ('' === $needle) {
8343 1
      return $haystack;
8344
    }
8345
8346 1
    if (self::str_starts_with($haystack, $needle) === true) {
8347 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
8348 1
      if ($haystackTmp === false) {
8349
        $haystackTmp = '';
8350
      }
8351 1
      $haystack = (string)$haystackTmp;
8352
    }
8353
8354 1
    return $haystack;
8355
  }
8356
8357
  /**
8358
   * Replace text within a portion of a string.
8359
   *
8360
   * source: https://gist.github.com/stemar/8287074
8361
   *
8362
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
8363
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
8364
   * @param int|int[]       $offset           <p>
8365
   *                                          If start is positive, the replacing will begin at the start'th offset
8366
   *                                          into string.
8367
   *                                          <br><br>
8368
   *                                          If start is negative, the replacing will begin at the start'th character
8369
   *                                          from the end of string.
8370
   *                                          </p>
8371
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
8372
   *                                          portion of string which is to be replaced. If it is negative, it
8373
   *                                          represents the number of characters from the end of string at which to
8374
   *                                          stop replacing. If it is not given, then it will default to strlen(
8375
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
8376
   *                                          length is zero then this function will have the effect of inserting
8377
   *                                          replacement into string at the given start offset.</p>
8378
   *
8379
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
8380
   */
8381 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
8382
  {
8383 7
    if (\is_array($str) === true) {
8384 1
      $num = \count($str);
8385
8386
      // the replacement
8387 1
      if (\is_array($replacement) === true) {
8388 1
        $replacement = \array_slice($replacement, 0, $num);
8389
      } else {
8390 1
        $replacement = \array_pad([$replacement], $num, $replacement);
8391
      }
8392
8393
      // the offset
8394 1
      if (\is_array($offset) === true) {
8395 1
        $offset = \array_slice($offset, 0, $num);
8396 1
        foreach ($offset as &$valueTmp) {
8397 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
8398
        }
8399 1
        unset($valueTmp);
8400
      } else {
8401 1
        $offset = \array_pad([$offset], $num, $offset);
8402
      }
8403
8404
      // the length
8405 1
      if (null === $length) {
8406 1
        $length = \array_fill(0, $num, 0);
8407 1
      } elseif (\is_array($length) === true) {
8408 1
        $length = \array_slice($length, 0, $num);
8409 1
        foreach ($length as &$valueTmpV2) {
8410 1
          if (null !== $valueTmpV2) {
8411 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
8412
          } else {
8413 1
            $valueTmpV2 = 0;
8414
          }
8415
        }
8416 1
        unset($valueTmpV2);
8417
      } else {
8418 1
        $length = \array_pad([$length], $num, $length);
8419
      }
8420
8421
      // recursive call
8422 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
8423
    }
8424
8425 7
    if (\is_array($replacement) === true) {
8426 1
      if (\count($replacement) > 0) {
8427 1
        $replacement = $replacement[0];
8428
      } else {
8429 1
        $replacement = '';
8430
      }
8431
    }
8432
8433
    // init
8434 7
    $str = (string)$str;
8435 7
    $replacement = (string)$replacement;
8436
8437 7
    if ('' === $str) {
8438 1
      return $replacement;
8439
    }
8440
8441 6
    if (self::is_ascii($str)) {
8442 3
      return ($length === null) ?
8443
          \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8443
          \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
8444 3
          \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8444
          \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
8445
    }
8446
8447 5
    \preg_match_all('/./us', $str, $smatches);
8448 5
    \preg_match_all('/./us', $replacement, $rmatches);
8449
8450 5
    if ($length === null) {
8451 3
      $length = self::strlen($str);
8452
    }
8453
8454 5
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8454
    \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8454
    \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
8455
8456 5
    return \implode('', $smatches[0]);
8457
  }
8458
8459
  /**
8460
   * Removes an suffix ($needle) from end of the string ($haystack).
8461
   *
8462
   * @param string $haystack <p>The string to search in.</p>
8463
   * @param string $needle   <p>The substring to search for.</p>
8464
   *
8465
   * @return string <p>Return the sub-string.</p>
8466
   */
8467 1
  public static function substr_right(string $haystack, string $needle): string
8468
  {
8469 1
    if ('' === $haystack) {
8470 1
      return '';
8471
    }
8472
8473 1
    if ('' === $needle) {
8474 1
      return $haystack;
8475
    }
8476
8477 1
    if (self::str_ends_with($haystack, $needle) === true) {
8478 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
8479 1
      if ($haystackTmp === false) {
8480
        $haystackTmp = '';
8481
      }
8482 1
      $haystack = (string)$haystackTmp;
8483
    }
8484
8485 1
    return $haystack;
8486
  }
8487
8488
  /**
8489
   * Returns a case swapped version of the string.
8490
   *
8491
   * @param string $str       <p>The input string.</p>
8492
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
8493
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8494
   *
8495
   * @return string <p>Each character's case swapped.</p>
8496
   */
8497 5
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
8498
  {
8499 5
    if ('' === $str) {
8500 1
      return '';
8501
    }
8502
8503 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8504 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8505
    }
8506
8507 5
    if ($cleanUtf8 === true) {
8508
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
8509
      // if invalid characters are found in $haystack before $needle
8510 1
      $str = self::clean($str);
8511
    }
8512
8513 5
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
8514
  }
8515
8516
  /**
8517
   * @param string $str
8518
   * @param int    $tabLength
8519
   *
8520
   * @return string
8521
   */
8522 6
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
8523
  {
8524 6
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
8525
  }
8526
8527
  /**
8528
   * Converts the first character of each word in the string to uppercase
8529
   * and all other chars to lowercase.
8530
   *
8531
   * @param string $str      <p>The input string.</p>
8532
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
8533
   *
8534
   * @return string <p>String with all characters of $str being title-cased.</p>
8535
   */
8536 5
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
8537
  {
8538 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8539 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8540
    }
8541
8542
    // "mb_convert_case()" used a polyfill if needed ...
8543 5
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
8544
  }
8545
8546
  /**
8547
   * alias for "UTF8::to_ascii()"
8548
   *
8549
   * @see        UTF8::to_ascii()
8550
   *
8551
   * @param string $str
8552
   * @param string $subst_chr
8553
   * @param bool   $strict
8554
   *
8555
   * @return string
8556
   *
8557
   * @deprecated <p>use "UTF8::to_ascii()"</p>
8558
   */
8559 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
8560
  {
8561 7
    return self::to_ascii($str, $subst_chr, $strict);
8562
  }
8563
8564
  /**
8565
   * alias for "UTF8::to_iso8859()"
8566
   *
8567
   * @see        UTF8::to_iso8859()
8568
   *
8569
   * @param string|string[] $str
8570
   *
8571
   * @return string|string[]
8572
   *
8573
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
8574
   */
8575 1
  public static function toIso8859($str)
8576
  {
8577 1
    return self::to_iso8859($str);
8578
  }
8579
8580
  /**
8581
   * alias for "UTF8::to_latin1()"
8582
   *
8583
   * @see        UTF8::to_latin1()
8584
   *
8585
   * @param string|string[] $str
8586
   *
8587
   * @return string|string[]
8588
   *
8589
   * @deprecated <p>use "UTF8::to_latin1()"</p>
8590
   */
8591 1
  public static function toLatin1($str)
8592
  {
8593 1
    return self::to_latin1($str);
8594
  }
8595
8596
  /**
8597
   * alias for "UTF8::to_utf8()"
8598
   *
8599
   * @see        UTF8::to_utf8()
8600
   *
8601
   * @param string|string[] $str
8602
   *
8603
   * @return string|string[]
8604
   *
8605
   * @deprecated <p>use "UTF8::to_utf8()"</p>
8606
   */
8607 1
  public static function toUTF8($str)
8608
  {
8609 1
    return self::to_utf8($str);
8610
  }
8611
8612
  /**
8613
   * Convert a string into ASCII.
8614
   *
8615
   * @param string $str     <p>The input string.</p>
8616
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
8617
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
8618
   *                        performance</p>
8619
   *
8620
   * @return string
8621
   */
8622 37
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
8623
  {
8624 37
    static $UTF8_TO_ASCII;
8625
8626 37
    if ('' === $str) {
8627 4
      return '';
8628
    }
8629
8630
    // check if we only have ASCII, first (better performance)
8631 34
    if (self::is_ascii($str) === true) {
8632 6
      return $str;
8633
    }
8634
8635 29
    $str = self::clean(
8636 29
        $str,
8637 29
        true,
8638 29
        true,
8639 29
        true,
8640 29
        false,
8641 29
        true,
8642 29
        true
8643
    );
8644
8645
    // check again, if we only have ASCII, now ...
8646 29
    if (self::is_ascii($str) === true) {
8647 12
      return $str;
8648
    }
8649
8650 18
    if ($strict === true) {
8651
8652 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8653
        self::checkForSupport();
8654
      }
8655
8656 1
      if (self::$SUPPORT['intl'] === true) {
8657
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
8658
        /** @noinspection PhpComposerExtensionStubsInspection */
8659 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
8660
8661
        // check again, if we only have ASCII, now ...
8662 1
        if (self::is_ascii($str) === true) {
8663 1
          return $str;
8664
        }
8665
8666
      }
8667
    }
8668
8669 18
    if (self::$ORD === null) {
8670
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type integer or string or boolean. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
8671
    }
8672
8673 18
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
8674 18
    $chars = $ar[0];
8675 18
    $ord = null;
8676 18
    foreach ($chars as &$c) {
8677
8678 18
      $ordC0 = self::$ORD[$c[0]];
8679
8680 18
      if ($ordC0 >= 0 && $ordC0 <= 127) {
8681 14
        continue;
8682
      }
8683
8684 18
      $ordC1 = self::$ORD[$c[1]];
8685
8686
      // ASCII - next please
8687 18
      if ($ordC0 >= 192 && $ordC0 <= 223) {
8688 16
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
8689
      }
8690
8691 18
      if ($ordC0 >= 224) {
8692 7
        $ordC2 = self::$ORD[$c[2]];
8693
8694 7
        if ($ordC0 <= 239) {
8695 6
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
8696
        }
8697
8698 7
        if ($ordC0 >= 240) {
8699 2
          $ordC3 = self::$ORD[$c[3]];
8700
8701 2
          if ($ordC0 <= 247) {
8702 2
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
8703
          }
8704
8705 2
          if ($ordC0 >= 248) {
8706
            $ordC4 = self::$ORD[$c[4]];
8707
8708
            if ($ordC0 <= 251) {
8709
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
8710
            }
8711
8712
            if ($ordC0 >= 252) {
8713
              $ordC5 = self::$ORD[$c[5]];
8714
8715
              if ($ordC0 <= 253) {
8716
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
8717
              }
8718
            }
8719
          }
8720
        }
8721
      }
8722
8723 18
      if ($ordC0 === 254 || $ordC0 === 255) {
8724
        $c = $unknown;
8725
        continue;
8726
      }
8727
8728 18
      if ($ord === null) {
8729
        $c = $unknown;
8730
        continue;
8731
      }
8732
8733 18
      $bank = $ord >> 8;
8734 18
      if (!isset($UTF8_TO_ASCII[$bank])) {
8735 9
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
8736 9
        if ($UTF8_TO_ASCII[$bank] === false) {
8737 2
          $UTF8_TO_ASCII[$bank] = [];
8738
        }
8739
      }
8740
8741 18
      $newchar = $ord & 255;
8742
8743 18
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
8744
8745
        // keep for debugging
8746
        /*
8747
        echo "file: " . sprintf('x%02x', $bank) . "\n";
8748
        echo "char: " . $c . "\n";
8749
        echo "ord: " . $ord . "\n";
8750
        echo "newchar: " . $newchar . "\n";
8751
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
8752
        echo "bank:" . $bank . "\n\n";
8753
        */
8754
8755 17
        $c = $UTF8_TO_ASCII[$bank][$newchar];
8756
      } else {
8757
8758
        // keep for debugging missing chars
8759
        /*
8760
        echo "file: " . sprintf('x%02x', $bank) . "\n";
8761
        echo "char: " . $c . "\n";
8762
        echo "ord: " . $ord . "\n";
8763
        echo "newchar: " . $newchar . "\n";
8764
        echo "bank:" . $bank . "\n\n";
8765
        */
8766
8767 18
        $c = $unknown;
8768
      }
8769
    }
8770
8771 18
    return \implode('', $chars);
8772
  }
8773
8774
  /**
8775
   * @param mixed $str
8776
   *
8777
   * @return bool
8778
   */
8779 19
  public static function to_boolean($str): bool
8780
  {
8781
    // init
8782 19
    $str = (string)$str;
8783
8784 19
    if ('' === $str) {
8785 2
      return false;
8786
    }
8787
8788 17
    $key = \strtolower($str);
8789
8790
    // Info: http://php.net/manual/en/filter.filters.validate.php
8791
    $map = [
8792 17
        'true'  => true,
8793
        '1'     => true,
8794
        'on'    => true,
8795
        'yes'   => true,
8796
        'false' => false,
8797
        '0'     => false,
8798
        'off'   => false,
8799
        'no'    => false,
8800
    ];
8801
8802 17
    if (isset($map[$key])) {
8803 13
      return $map[$key];
8804
    }
8805
8806
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
8807 4
    if (\is_numeric($str)) {
8808 2
      return (($str + 0) > 0);
8809
    }
8810
8811 2
    return (bool)self::trim($str);
8812
  }
8813
8814
  /**
8815
   * Convert a string into "ISO-8859"-encoding (Latin-1).
8816
   *
8817
   * @param string|string[] $str
8818
   *
8819
   * @return string|string[]
8820
   */
8821 3
  public static function to_iso8859($str)
8822
  {
8823 3
    if (\is_array($str) === true) {
8824 1
      foreach ($str as $k => $v) {
8825 1
        $str[$k] = self::to_iso8859($v);
8826
      }
8827
8828 1
      return $str;
8829
    }
8830
8831 3
    $str = (string)$str;
8832 3
    if ('' === $str) {
8833 1
      return '';
8834
    }
8835
8836 3
    return self::utf8_decode($str);
8837
  }
8838
8839
  /**
8840
   * alias for "UTF8::to_iso8859()"
8841
   *
8842
   * @see UTF8::to_iso8859()
8843
   *
8844
   * @param string|string[] $str
8845
   *
8846
   * @return string|string[]
8847
   */
8848 1
  public static function to_latin1($str)
8849
  {
8850 1
    return self::to_iso8859($str);
8851
  }
8852
8853
  /**
8854
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
8855
   *
8856
   * <ul>
8857
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
8858
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
8859
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
8860
   * case.</li>
8861
   * </ul>
8862
   *
8863
   * @param string|string[] $str                    <p>Any string or array.</p>
8864
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
8865
   *
8866
   * @return string|string[] <p>The UTF-8 encoded string.</p>
8867
   */
8868 22
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
8869
  {
8870 22
    if (\is_array($str) === true) {
8871 2
      foreach ($str as $k => $v) {
8872 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
8873
      }
8874
8875 2
      return $str;
8876
    }
8877
8878 22
    $str = (string)$str;
8879 22
    if ('' === $str) {
8880 3
      return $str;
8881
    }
8882
8883 22
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8884
      self::checkForSupport();
8885
    }
8886
8887 22
    $max = self::strlen_in_byte($str);
8888 22
    $buf = '';
8889
8890
    /** @noinspection ForeachInvariantsInspection */
8891 22
    for ($i = 0; $i < $max; $i++) {
8892 22
      $c1 = $str[$i];
8893
8894 22
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
8895
8896 22
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
8897
8898 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
8899
8900 20
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
8901 15
            $buf .= $c1 . $c2;
8902 15
            $i++;
8903
          } else { // not valid UTF8 - convert it
8904 20
            $buf .= self::to_utf8_convert($c1);
8905
          }
8906
8907 21
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
8908
8909 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
8910 20
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
8911
8912 20
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
8913 12
            $buf .= $c1 . $c2 . $c3;
8914 12
            $i += 2;
8915
          } else { // not valid UTF8 - convert it
8916 20
            $buf .= self::to_utf8_convert($c1);
8917
          }
8918
8919 14
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
8920
8921 14
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
8922 14
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
8923 14
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
8924
8925 14
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
8926 5
            $buf .= $c1 . $c2 . $c3 . $c4;
8927 5
            $i += 3;
8928
          } else { // not valid UTF8 - convert it
8929 14
            $buf .= self::to_utf8_convert($c1);
8930
          }
8931
8932
        } else { // doesn't look like UTF8, but should be converted
8933 22
          $buf .= self::to_utf8_convert($c1);
8934
        }
8935
8936 20
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
8937
8938 2
        $buf .= self::to_utf8_convert($c1);
8939
8940
      } else { // it doesn't need conversion
8941 20
        $buf .= $c1;
8942
      }
8943
    }
8944
8945
    // decode unicode escape sequences
8946 22
    $buf = \preg_replace_callback(
8947 22
        '/\\\\u([0-9a-f]{4})/i',
8948 22
        function ($match) {
8949 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
8950 22
        },
8951 22
        $buf
8952
    );
8953
8954
    // decode UTF-8 codepoints
8955 22
    if ($decodeHtmlEntityToUtf8 === true) {
8956 1
      $buf = self::html_entity_decode($buf);
8957
    }
8958
8959 22
    return $buf;
8960
  }
8961
8962
  /**
8963
   * @param int $int
8964
   *
8965
   * @return string
8966
   */
8967 16
  private static function to_utf8_convert($int): string
8968
  {
8969
    // init
8970 16
    $buf = '';
8971
8972 16
    if (self::$ORD === null) {
8973 1
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type integer or string or boolean. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
8974
    }
8975
8976 16
    if (self::$CHR === null) {
8977 1
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type integer or string or boolean. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
8978
    }
8979
8980 16
    if (self::$WIN1252_TO_UTF8 === null) {
8981 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type integer or string or boolean. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
8982
    }
8983
8984 16
    $ordC1 = self::$ORD[$int];
8985 16
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
8986 16
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
8987
    } else {
8988 1
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
8989 1
      $cc2 = ($int & "\x3F") | "\x80";
8990 1
      $buf .= $cc1 . $cc2;
8991
    }
8992
8993 16
    return $buf;
8994
  }
8995
8996
  /**
8997
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
8998
   *
8999
   * INFO: This is slower then "trim()"
9000
   *
9001
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
9002
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
9003
   *
9004
   * @param string $str   <p>The string to be trimmed</p>
9005
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
9006
   *
9007
   * @return string <p>The trimmed string.</p>
9008
   */
9009 196
  public static function trim(string $str = '', $chars = INF): string
9010
  {
9011 196
    if ('' === $str) {
9012 6
      return '';
9013
    }
9014
9015
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
9016 191
    if ($chars === INF || !$chars) {
9017 174
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
9018
    } else {
9019 37
      $chars = \preg_quote($chars, '/');
9020 37
      $pattern = "^[$chars]+|[$chars]+\$";
9021
    }
9022
9023 191
    return self::regexReplace($str, $pattern, '', '', '/');
9024
  }
9025
9026
  /**
9027
   * Makes string's first char uppercase.
9028
   *
9029
   * @param string $str       <p>The input string.</p>
9030
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
9031
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9032
   *
9033
   * @return string <p>The resulting string</p>
9034
   */
9035 74
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9036
  {
9037 74
    if ($cleanUtf8 === true) {
9038
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
9039
      // if invalid characters are found in $haystack before $needle
9040 1
      $str = self::clean($str);
9041
    }
9042
9043 74
    $strPartTwo = self::substr($str, 1, null, $encoding);
9044 74
    if ($strPartTwo === false) {
9045
      $strPartTwo = '';
9046
    }
9047
9048 74
    $strPartOne = self::strtoupper(
9049 74
        (string)self::substr($str, 0, 1, $encoding),
9050 74
        $encoding,
9051 74
        $cleanUtf8
9052
    );
9053
9054 74
    return $strPartOne . $strPartTwo;
9055
  }
9056
9057
  /**
9058
   * alias for "UTF8::ucfirst()"
9059
   *
9060
   * @see UTF8::ucfirst()
9061
   *
9062
   * @param string $str
9063
   * @param string $encoding
9064
   * @param bool   $cleanUtf8
9065
   *
9066
   * @return string
9067
   */
9068 1
  public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9069
  {
9070 1
    return self::ucfirst($str, $encoding, $cleanUtf8);
9071
  }
9072
9073
  /**
9074
   * Uppercase for all words in the string.
9075
   *
9076
   * @param string   $str        <p>The input string.</p>
9077
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
9078
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9079
   * @param string   $encoding   [optional] <p>Set the charset.</p>
9080
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9081
   *
9082
   * @return string
9083
   */
9084 8
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9085
  {
9086 8
    if (!$str) {
9087 2
      return '';
9088
    }
9089
9090
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
9091
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
9092
9093 7
    if ($cleanUtf8 === true) {
9094
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
9095
      // if invalid characters are found in $haystack before $needle
9096 1
      $str = self::clean($str);
9097
    }
9098
9099 7
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
9100
9101
    if (
9102 7
        $usePhpDefaultFunctions === true
9103
        &&
9104 7
        self::is_ascii($str) === true
9105
    ) {
9106
      return \ucwords($str);
9107
    }
9108
9109 7
    $words = self::str_to_words($str, $charlist);
9110 7
    $newWords = [];
9111
9112 7
    if (\count($exceptions) > 0) {
9113 1
      $useExceptions = true;
9114
    } else {
9115 7
      $useExceptions = false;
9116
    }
9117
9118 7
    foreach ($words as $word) {
9119
9120 7
      if (!$word) {
9121 7
        continue;
9122
      }
9123
9124
      if (
9125 7
          $useExceptions === false
9126
          ||
9127
          (
9128 1
              $useExceptions === true
9129
              &&
9130 7
              !\in_array($word, $exceptions, true)
9131
          )
9132
      ) {
9133 7
        $word = self::ucfirst($word, $encoding);
9134
      }
9135
9136 7
      $newWords[] = $word;
9137
    }
9138
9139 7
    return \implode('', $newWords);
9140
  }
9141
9142
  /**
9143
   * Multi decode html entity & fix urlencoded-win1252-chars.
9144
   *
9145
   * e.g:
9146
   * 'test+test'                     => 'test test'
9147
   * 'D&#252;sseldorf'               => 'Düsseldorf'
9148
   * 'D%FCsseldorf'                  => 'Düsseldorf'
9149
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
9150
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
9151
   * 'Düsseldorf'                   => 'Düsseldorf'
9152
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
9153
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
9154
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
9155
   *
9156
   * @param string $str          <p>The input string.</p>
9157
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
9158
   *
9159
   * @return string
9160
   */
9161 1
  public static function urldecode(string $str, bool $multi_decode = true): string
9162
  {
9163 1
    if ('' === $str) {
9164 1
      return '';
9165
    }
9166
9167 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
9168 1
    if (\preg_match($pattern, $str)) {
9169 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
9170
    }
9171
9172 1
    $flags = ENT_QUOTES | ENT_HTML5;
9173
9174
    do {
9175 1
      $str_compare = $str;
9176
9177 1
      $str = self::fix_simple_utf8(
9178 1
          \urldecode(
9179 1
              self::html_entity_decode(
9180 1
                  self::to_utf8($str),
9181 1
                  $flags
9182
              )
9183
          )
9184
      );
9185
9186 1
    } while ($multi_decode === true && $str_compare !== $str);
9187
9188 1
    return $str;
9189
  }
9190
9191
  /**
9192
   * Return a array with "urlencoded"-win1252 -> UTF-8
9193
   *
9194
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
9195
   *
9196
   * @return string[]
9197
   */
9198 1
  public static function urldecode_fix_win1252_chars(): array
9199
  {
9200
    return [
9201 1
        '%20' => ' ',
9202
        '%21' => '!',
9203
        '%22' => '"',
9204
        '%23' => '#',
9205
        '%24' => '$',
9206
        '%25' => '%',
9207
        '%26' => '&',
9208
        '%27' => "'",
9209
        '%28' => '(',
9210
        '%29' => ')',
9211
        '%2A' => '*',
9212
        '%2B' => '+',
9213
        '%2C' => ',',
9214
        '%2D' => '-',
9215
        '%2E' => '.',
9216
        '%2F' => '/',
9217
        '%30' => '0',
9218
        '%31' => '1',
9219
        '%32' => '2',
9220
        '%33' => '3',
9221
        '%34' => '4',
9222
        '%35' => '5',
9223
        '%36' => '6',
9224
        '%37' => '7',
9225
        '%38' => '8',
9226
        '%39' => '9',
9227
        '%3A' => ':',
9228
        '%3B' => ';',
9229
        '%3C' => '<',
9230
        '%3D' => '=',
9231
        '%3E' => '>',
9232
        '%3F' => '?',
9233
        '%40' => '@',
9234
        '%41' => 'A',
9235
        '%42' => 'B',
9236
        '%43' => 'C',
9237
        '%44' => 'D',
9238
        '%45' => 'E',
9239
        '%46' => 'F',
9240
        '%47' => 'G',
9241
        '%48' => 'H',
9242
        '%49' => 'I',
9243
        '%4A' => 'J',
9244
        '%4B' => 'K',
9245
        '%4C' => 'L',
9246
        '%4D' => 'M',
9247
        '%4E' => 'N',
9248
        '%4F' => 'O',
9249
        '%50' => 'P',
9250
        '%51' => 'Q',
9251
        '%52' => 'R',
9252
        '%53' => 'S',
9253
        '%54' => 'T',
9254
        '%55' => 'U',
9255
        '%56' => 'V',
9256
        '%57' => 'W',
9257
        '%58' => 'X',
9258
        '%59' => 'Y',
9259
        '%5A' => 'Z',
9260
        '%5B' => '[',
9261
        '%5C' => '\\',
9262
        '%5D' => ']',
9263
        '%5E' => '^',
9264
        '%5F' => '_',
9265
        '%60' => '`',
9266
        '%61' => 'a',
9267
        '%62' => 'b',
9268
        '%63' => 'c',
9269
        '%64' => 'd',
9270
        '%65' => 'e',
9271
        '%66' => 'f',
9272
        '%67' => 'g',
9273
        '%68' => 'h',
9274
        '%69' => 'i',
9275
        '%6A' => 'j',
9276
        '%6B' => 'k',
9277
        '%6C' => 'l',
9278
        '%6D' => 'm',
9279
        '%6E' => 'n',
9280
        '%6F' => 'o',
9281
        '%70' => 'p',
9282
        '%71' => 'q',
9283
        '%72' => 'r',
9284
        '%73' => 's',
9285
        '%74' => 't',
9286
        '%75' => 'u',
9287
        '%76' => 'v',
9288
        '%77' => 'w',
9289
        '%78' => 'x',
9290
        '%79' => 'y',
9291
        '%7A' => 'z',
9292
        '%7B' => '{',
9293
        '%7C' => '|',
9294
        '%7D' => '}',
9295
        '%7E' => '~',
9296
        '%7F' => '',
9297
        '%80' => '`',
9298
        '%81' => '',
9299
        '%82' => '‚',
9300
        '%83' => 'ƒ',
9301
        '%84' => '„',
9302
        '%85' => '…',
9303
        '%86' => '†',
9304
        '%87' => '‡',
9305
        '%88' => 'ˆ',
9306
        '%89' => '‰',
9307
        '%8A' => 'Š',
9308
        '%8B' => '‹',
9309
        '%8C' => 'Œ',
9310
        '%8D' => '',
9311
        '%8E' => 'Ž',
9312
        '%8F' => '',
9313
        '%90' => '',
9314
        '%91' => '‘',
9315
        '%92' => '’',
9316
        '%93' => '“',
9317
        '%94' => '”',
9318
        '%95' => '•',
9319
        '%96' => '–',
9320
        '%97' => '—',
9321
        '%98' => '˜',
9322
        '%99' => '™',
9323
        '%9A' => 'š',
9324
        '%9B' => '›',
9325
        '%9C' => 'œ',
9326
        '%9D' => '',
9327
        '%9E' => 'ž',
9328
        '%9F' => 'Ÿ',
9329
        '%A0' => '',
9330
        '%A1' => '¡',
9331
        '%A2' => '¢',
9332
        '%A3' => '£',
9333
        '%A4' => '¤',
9334
        '%A5' => '¥',
9335
        '%A6' => '¦',
9336
        '%A7' => '§',
9337
        '%A8' => '¨',
9338
        '%A9' => '©',
9339
        '%AA' => 'ª',
9340
        '%AB' => '«',
9341
        '%AC' => '¬',
9342
        '%AD' => '',
9343
        '%AE' => '®',
9344
        '%AF' => '¯',
9345
        '%B0' => '°',
9346
        '%B1' => '±',
9347
        '%B2' => '²',
9348
        '%B3' => '³',
9349
        '%B4' => '´',
9350
        '%B5' => 'µ',
9351
        '%B6' => '¶',
9352
        '%B7' => '·',
9353
        '%B8' => '¸',
9354
        '%B9' => '¹',
9355
        '%BA' => 'º',
9356
        '%BB' => '»',
9357
        '%BC' => '¼',
9358
        '%BD' => '½',
9359
        '%BE' => '¾',
9360
        '%BF' => '¿',
9361
        '%C0' => 'À',
9362
        '%C1' => 'Á',
9363
        '%C2' => 'Â',
9364
        '%C3' => 'Ã',
9365
        '%C4' => 'Ä',
9366
        '%C5' => 'Å',
9367
        '%C6' => 'Æ',
9368
        '%C7' => 'Ç',
9369
        '%C8' => 'È',
9370
        '%C9' => 'É',
9371
        '%CA' => 'Ê',
9372
        '%CB' => 'Ë',
9373
        '%CC' => 'Ì',
9374
        '%CD' => 'Í',
9375
        '%CE' => 'Î',
9376
        '%CF' => 'Ï',
9377
        '%D0' => 'Ð',
9378
        '%D1' => 'Ñ',
9379
        '%D2' => 'Ò',
9380
        '%D3' => 'Ó',
9381
        '%D4' => 'Ô',
9382
        '%D5' => 'Õ',
9383
        '%D6' => 'Ö',
9384
        '%D7' => '×',
9385
        '%D8' => 'Ø',
9386
        '%D9' => 'Ù',
9387
        '%DA' => 'Ú',
9388
        '%DB' => 'Û',
9389
        '%DC' => 'Ü',
9390
        '%DD' => 'Ý',
9391
        '%DE' => 'Þ',
9392
        '%DF' => 'ß',
9393
        '%E0' => 'à',
9394
        '%E1' => 'á',
9395
        '%E2' => 'â',
9396
        '%E3' => 'ã',
9397
        '%E4' => 'ä',
9398
        '%E5' => 'å',
9399
        '%E6' => 'æ',
9400
        '%E7' => 'ç',
9401
        '%E8' => 'è',
9402
        '%E9' => 'é',
9403
        '%EA' => 'ê',
9404
        '%EB' => 'ë',
9405
        '%EC' => 'ì',
9406
        '%ED' => 'í',
9407
        '%EE' => 'î',
9408
        '%EF' => 'ï',
9409
        '%F0' => 'ð',
9410
        '%F1' => 'ñ',
9411
        '%F2' => 'ò',
9412
        '%F3' => 'ó',
9413
        '%F4' => 'ô',
9414
        '%F5' => 'õ',
9415
        '%F6' => 'ö',
9416
        '%F7' => '÷',
9417
        '%F8' => 'ø',
9418
        '%F9' => 'ù',
9419
        '%FA' => 'ú',
9420
        '%FB' => 'û',
9421
        '%FC' => 'ü',
9422
        '%FD' => 'ý',
9423
        '%FE' => 'þ',
9424
        '%FF' => 'ÿ',
9425
    ];
9426
  }
9427
9428
  /**
9429
   * Decodes an UTF-8 string to ISO-8859-1.
9430
   *
9431
   * @param string $str <p>The input string.</p>
9432
   * @param bool   $keepUtf8Chars
9433
   *
9434
   * @return string
9435
   */
9436 6
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
9437
  {
9438 6
    if ('' === $str) {
9439 3
      return '';
9440
    }
9441
9442 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
9443 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
9444
9445 6
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
9446
9447 1
      if (self::$WIN1252_TO_UTF8 === null) {
9448
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type integer or string or boolean. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9449
      }
9450
9451 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type integer and string and boolean; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9451
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
9452 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type integer and string and boolean; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9452
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
9453
    }
9454
9455
    /** @noinspection PhpInternalEntityUsedInspection */
9456 6
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
9457
9458 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9459
      self::checkForSupport();
9460
    }
9461
9462
    // save for later comparision
9463 6
    $str_backup = $str;
9464 6
    $len = self::strlen_in_byte($str);
9465
9466 6
    if (self::$ORD === null) {
9467
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type integer or string or boolean. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9468
    }
9469
9470 6
    if (self::$CHR === null) {
9471
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type integer or string or boolean. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9472
    }
9473
9474 6
    $noCharFound = '?';
9475
    /** @noinspection ForeachInvariantsInspection */
9476 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
9477 6
      switch ($str[$i] & "\xF0") {
9478 6
        case "\xC0":
9479 6
        case "\xD0":
9480 6
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
9481 6
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
9482 6
          break;
9483
9484
        /** @noinspection PhpMissingBreakStatementInspection */
9485 6
        case "\xF0":
9486
          ++$i;
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment if this fall-through is intended.
Loading history...
9487 6
        case "\xE0":
9488 5
          $str[$j] = $noCharFound;
9489 5
          $i += 2;
9490 5
          break;
9491
9492
        default:
9493 6
          $str[$j] = $str[$i];
9494
      }
9495
    }
9496
9497 6
    $return = (string)self::substr($str, 0, $j, 'CP850'); // 8-BIT
9498
9499
    if (
9500 6
        $keepUtf8Chars === true
9501
        &&
9502 6
        self::strlen($return) >= self::strlen($str_backup)
9503
    ) {
9504 1
      return $str_backup;
9505
    }
9506
9507 6
    return $return;
9508
  }
9509
9510
  /**
9511
   * Encodes an ISO-8859-1 string to UTF-8.
9512
   *
9513
   * @param string $str <p>The input string.</p>
9514
   *
9515
   * @return string
9516
   */
9517 7
  public static function utf8_encode(string $str): string
9518
  {
9519 7
    if ('' === $str) {
9520 7
      return '';
9521
    }
9522
9523 7
    $strTmp = \utf8_encode($str);
9524
9525
    // the polyfill maybe return false
9526 7
    if ($strTmp === false) {
9527
      return '';
9528
    }
9529
9530 7
    $str = (string)$strTmp;
9531 7
    if (false === \strpos($str, "\xC2")) {
9532 3
      return $str;
9533
    }
9534
9535 6
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
9536 6
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
9537
9538 6
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
9539
9540 1
      if (self::$WIN1252_TO_UTF8 === null) {
9541
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type integer or string or boolean. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9542
      }
9543
9544 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type integer and string and boolean; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9544
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
9545 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type integer and string and boolean; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9545
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
9546
    }
9547
9548 6
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
9549
  }
9550
9551
  /**
9552
   * fix -> utf8-win1252 chars
9553
   *
9554
   * @param string $str <p>The input string.</p>
9555
   *
9556
   * @return string
9557
   *
9558
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
9559
   */
9560 1
  public static function utf8_fix_win1252_chars(string $str): string
9561
  {
9562 1
    return self::fix_simple_utf8($str);
9563
  }
9564
9565
  /**
9566
   * Returns an array with all utf8 whitespace characters.
9567
   *
9568
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
9569
   *
9570
   * @author: Derek E. [email protected]
9571
   *
9572
   * @return string[] <p>
9573
   *               An array with all known whitespace characters as values and the type of whitespace as keys
9574
   *               as defined in above URL.
9575
   *               </p>
9576
   */
9577 1
  public static function whitespace_table(): array
9578
  {
9579 1
    return self::$WHITESPACE_TABLE;
9580
  }
9581
9582
  /**
9583
   * Limit the number of words in a string.
9584
   *
9585
   * @param string $str      <p>The input string.</p>
9586
   * @param int    $limit    <p>The limit of words as integer.</p>
9587
   * @param string $strAddOn <p>Replacement for the striped string.</p>
9588
   *
9589
   * @return string
9590
   */
9591 1
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
9592
  {
9593 1
    if ('' === $str) {
9594 1
      return '';
9595
    }
9596
9597 1
    if ($limit < 1) {
9598 1
      return '';
9599
    }
9600
9601 1
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
9602
9603
    if (
9604 1
        !isset($matches[0])
9605
        ||
9606 1
        self::strlen($str) === self::strlen($matches[0])
9607
    ) {
9608 1
      return $str;
9609
    }
9610
9611 1
    return self::rtrim($matches[0]) . $strAddOn;
9612
  }
9613
9614
  /**
9615
   * Wraps a string to a given number of characters
9616
   *
9617
   * @link  http://php.net/manual/en/function.wordwrap.php
9618
   *
9619
   * @param string $str   <p>The input string.</p>
9620
   * @param int    $width [optional] <p>The column width.</p>
9621
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
9622
   * @param bool   $cut   [optional] <p>
9623
   *                      If the cut is set to true, the string is
9624
   *                      always wrapped at or before the specified width. So if you have
9625
   *                      a word that is larger than the given width, it is broken apart.
9626
   *                      </p>
9627
   *
9628
   * @return string <p>The given string wrapped at the specified column.</p>
9629
   */
9630 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
9631
  {
9632 10
    if ('' === $str || '' === $break) {
9633 3
      return '';
9634
    }
9635
9636 8
    $w = '';
9637 8
    $strSplit = \explode($break, $str);
9638 8
    $count = \count($strSplit);
9639
9640 8
    $chars = [];
9641
    /** @noinspection ForeachInvariantsInspection */
9642 8
    for ($i = 0; $i < $count; ++$i) {
9643
9644 8
      if ($i) {
9645 1
        $chars[] = $break;
9646 1
        $w .= '#';
9647
      }
9648
9649 8
      $c = $strSplit[$i];
9650 8
      unset($strSplit[$i]);
9651
9652 8
      foreach (self::split($c) as $c) {
9653 8
        $chars[] = $c;
9654 8
        $w .= ' ' === $c ? ' ' : '?';
9655
      }
9656
    }
9657
9658 8
    $strReturn = '';
9659 8
    $j = 0;
9660 8
    $b = $i = -1;
9661 8
    $w = \wordwrap($w, $width, '#', $cut);
9662
9663 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
9664 6
      for (++$i; $i < $b; ++$i) {
9665 6
        $strReturn .= $chars[$j];
9666 6
        unset($chars[$j++]);
9667
      }
9668
9669 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
9670 3
        unset($chars[$j++]);
9671
      }
9672
9673 6
      $strReturn .= $break;
9674
    }
9675
9676 8
    return $strReturn . \implode('', $chars);
9677
  }
9678
9679
  /**
9680
   * Line-Wrap the string after $limit, but also after the next word.
9681
   *
9682
   * @param string $str
9683
   * @param int    $limit
9684
   *
9685
   * @return string
9686
   */
9687 1
  public static function wordwrap_per_line(string $str, int $limit): string
9688
  {
9689 1
    $strings = (array)\preg_split('/\\r\\n|\\r|\\n/', $str);
9690
9691 1
    $string = '';
9692 1
    foreach ($strings as $value) {
9693 1
      $string .= wordwrap($value, $limit);
9694 1
      $string .= "\n";
9695
    }
9696
9697 1
    return $string;
9698
  }
9699
9700
  /**
9701
   * Returns an array of Unicode White Space characters.
9702
   *
9703
   * @return string[] <p>An array with numeric code point as key and White Space Character as value.</p>
9704
   */
9705 1
  public static function ws(): array
9706
  {
9707 1
    return self::$WHITESPACE;
9708
  }
9709
9710
}
9711