Passed
Push — master ( 27fda7...7a2ada )
by Lars
03:10
created

UTF8::str_titleize_for_humans()   B

Complexity

Conditions 5
Paths 2

Size

Total Lines 127
Code Lines 75

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 50
CRAP Score 5.0014

Importance

Changes 0
Metric Value
cc 5
eloc 75
nc 2
nop 3
dl 0
loc 127
ccs 50
cts 52
cp 0.9615
crap 5.0014
rs 8.2343
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206
   * __construct()
207
   */
208 16
  public function __construct()
209
  {
210 16
    self::checkForSupport();
211 16
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219
   * @return string <p>Single Multi-Byte character.</p>
220
   */
221 2
  public static function access(string $str, int $pos): string
222
  {
223 2
    if ('' === $str) {
224 1
      return '';
225
    }
226
227 2
    if ($pos < 0) {
228 1
      return '';
229
    }
230
231 2
    return (string)self::substr($str, $pos, 1);
232
  }
233
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string <p>The output string that contains BOM.</p>
242
   */
243 1
  public static function add_bom_to_string(string $str): string
244
  {
245 1
    if (self::string_has_bom($str) === false) {
246 1
      $str = self::bom() . $str;
247
    }
248
249 1
    return $str;
250
  }
251
252
  /**
253
   * Adds the specified amount of left and right padding to the given string.
254
   * The default character used is a space.
255
   *
256
   * @param string $str
257
   * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
258
   * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
259
   * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
260
   * @param string $encoding [optional] <p>Default: UTF-8</p>
261
   *
262
   * @return string <p>String with padding applied.</p>
263
   */
264 25
  private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
265
  {
266 25
    $strlen = self::strlen($str, $encoding);
267
268 25
    if ($left && $right) {
269 8
      $length = ($left + $right) + $strlen;
270 8
      $type = STR_PAD_BOTH;
271 17
    } elseif ($left) {
272 7
      $length = $left + $strlen;
273 7
      $type = STR_PAD_LEFT;
274 10
    } elseif ($right) {
275 10
      $length = $right + $strlen;
276 10
      $type = STR_PAD_RIGHT;
277
    } else {
278
      $length = ($left + $right) + $strlen;
279
      $type = STR_PAD_BOTH;
280
    }
281
282 25
    return self::str_pad($str, $length, $padStr, $type, $encoding);
283
  }
284
285
  /**
286
   * Changes all keys in an array.
287
   *
288
   * @param array $array <p>The array to work on</p>
289
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
290
   *                     or <strong>CASE_LOWER</strong> (default)</p>
291
   *
292
   * @return string[] <p>An array with its keys lower or uppercased.</p>
293
   */
294 1
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
295
  {
296
    if (
297 1
        $case !== CASE_LOWER
298
        &&
299 1
        $case !== CASE_UPPER
300
    ) {
301
      $case = CASE_LOWER;
302
    }
303
304 1
    $return = [];
305 1
    foreach ($array as $key => $value) {
306 1
      if ($case === CASE_LOWER) {
307 1
        $key = self::strtolower($key);
308
      } else {
309 1
        $key = self::strtoupper($key);
310
      }
311
312 1
      $return[$key] = $value;
313
    }
314
315 1
    return $return;
316
  }
317
318
  /**
319
   * Returns the substring between $start and $end, if found, or an empty
320
   * string. An optional offset may be supplied from which to begin the
321
   * search for the start string.
322
   *
323
   * @param string $str
324
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
325
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
326
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
327
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
328
   *
329
   * @return string
330
   */
331 16
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
332
  {
333 16
    $posStart = self::strpos($str, $start, $offset, $encoding);
334 16
    if ($posStart === false) {
335 2
      return '';
336
    }
337
338 14
    $substrIndex = $posStart + self::strlen($start, $encoding);
339 14
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
340
    if (
341 14
        $posEnd === false
342
        ||
343 14
        $posEnd === $substrIndex
344
    ) {
345 4
      return '';
346
    }
347
348 10
    return self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str...substrIndex, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
349
  }
350
351
  /**
352
   * Convert binary into an string.
353
   *
354
   * @param mixed $bin 1|0
355
   *
356
   * @return string
357
   */
358 1
  public static function binary_to_str($bin): string
359
  {
360 1
    if (!isset($bin[0])) {
361
      return '';
362
    }
363
364 1
    $convert = \base_convert($bin, 2, 16);
365 1
    if ($convert === '0') {
366 1
      return '';
367
    }
368
369 1
    return \pack('H*', $convert);
370
  }
371
372
  /**
373
   * Returns the UTF-8 Byte Order Mark Character.
374
   *
375
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
376
   *
377
   * @return string UTF-8 Byte Order Mark
378
   */
379 2
  public static function bom(): string
380
  {
381 2
    return "\xef\xbb\xbf";
382
  }
383
384
  /**
385
   * @alias of UTF8::chr_map()
386
   *
387
   * @see   UTF8::chr_map()
388
   *
389
   * @param string|array $callback
390
   * @param string       $str
391
   *
392
   * @return string[]
393
   */
394 1
  public static function callback($callback, string $str): array
395
  {
396 1
    return self::chr_map($callback, $str);
397
  }
398
399
  /**
400
   * Returns the character at $index, with indexes starting at 0.
401
   *
402
   * @param string $str
403
   * @param int    $index    <p>Position of the character.</p>
404
   * @param string $encoding [optional] <p>Default is UTF-8</p>
405
   *
406
   * @return string <p>The character at $index.</p>
407
   */
408 9
  public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
409
  {
410 9
    return (string)self::substr($str, $index, 1, $encoding);
411
  }
412
413
  /**
414
   * Returns an array consisting of the characters in the string.
415
   *
416
   * @param string $str <p>The input string.</p>
417
   *
418
   * @return string[] <p>An array of chars.</p>
419
   */
420 3
  public static function chars(string $str): array
421
  {
422 3
    return self::str_split($str, 1);
423
  }
424
425
  /**
426
   * This method will auto-detect your server environment for UTF-8 support.
427
   *
428
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
429
   */
430 19
  public static function checkForSupport()
431
  {
432 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
433
434
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
435
436
      // http://php.net/manual/en/book.mbstring.php
437
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
438
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
439
440
      // http://php.net/manual/en/book.iconv.php
441
      self::$SUPPORT['iconv'] = self::iconv_loaded();
442
443
      // http://php.net/manual/en/book.intl.php
444
      self::$SUPPORT['intl'] = self::intl_loaded();
445
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
446
      if (
447
          self::$SUPPORT['intl'] === true
448
          &&
449
          \function_exists('transliterator_list_ids') === true
450
      ) {
451
        /** @noinspection PhpComposerExtensionStubsInspection */
452
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
453
      }
454
455
      // http://php.net/manual/en/class.intlchar.php
456
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
457
458
      // http://php.net/manual/en/book.ctype.php
459
      self::$SUPPORT['ctype'] = self::ctype_loaded();
460
461
      // http://php.net/manual/en/class.finfo.php
462
      self::$SUPPORT['finfo'] = self::finfo_loaded();
463
464
      // http://php.net/manual/en/book.json.php
465
      self::$SUPPORT['json'] = self::json_loaded();
466
467
      // http://php.net/manual/en/book.pcre.php
468
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
469
    }
470 19
  }
471
472
  /**
473
   * Generates a UTF-8 encoded character from the given code point.
474
   *
475
   * INFO: opposite to UTF8::ord()
476
   *
477
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
478
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
479
   *
480
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
481
   */
482 10
  public static function chr($code_point, string $encoding = 'UTF-8')
483
  {
484
    // init
485 10
    static $CHAR_CACHE = [];
486
487 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
488
      self::checkForSupport();
489
    }
490
491 10
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
492 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
493
    }
494
495
    if (
496 10
        $encoding !== 'UTF-8'
497
        &&
498 10
        $encoding !== 'ISO-8859-1'
499
        &&
500 10
        $encoding !== 'WINDOWS-1252'
501
        &&
502 10
        self::$SUPPORT['mbstring'] === false
503
    ) {
504
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
505
    }
506
507 10
    $cacheKey = $code_point . $encoding;
508 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
509 8
      return $CHAR_CACHE[$cacheKey];
510
    }
511
512 9
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
513
514 7
      if (self::$CHR === null) {
515
        self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type integer or string or boolean. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
516
      }
517
518 7
      $chr = self::$CHR[$code_point];
519
520 7
      if ($encoding !== 'UTF-8') {
521 1
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
522
      }
523
524 7
      return $CHAR_CACHE[$cacheKey] = $chr;
525
    }
526
527 7
    if (self::$SUPPORT['intlChar'] === true) {
528
      /** @noinspection PhpComposerExtensionStubsInspection */
529 7
      $chr = \IntlChar::chr($code_point);
530
531 7
      if ($encoding !== 'UTF-8') {
532
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
533
      }
534
535 7
      return $CHAR_CACHE[$cacheKey] = $chr;
536
    }
537
538
    if (self::$CHR === null) {
539
      self::$CHR = self::getData('chr');
540
    }
541
542
    if ($code_point <= 0x7F) {
543
      $chr = self::$CHR[$code_point];
544
    } elseif ($code_point <= 0x7FF) {
545
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
546
             self::$CHR[($code_point & 0x3F) + 0x80];
547
    } elseif ($code_point <= 0xFFFF) {
548
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
549
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
550
             self::$CHR[($code_point & 0x3F) + 0x80];
551
    } else {
552
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
553
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
554
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
555
             self::$CHR[($code_point & 0x3F) + 0x80];
556
    }
557
558
    if ($encoding !== 'UTF-8') {
559
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
560
    }
561
562
    return $CHAR_CACHE[$cacheKey] = $chr;
563
  }
564
565
  /**
566
   * Applies callback to all characters of a string.
567
   *
568
   * @param string|array $callback <p>The callback function.</p>
569
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
570
   *
571
   * @return string[] <p>The outcome of callback.</p>
572
   */
573 1
  public static function chr_map($callback, string $str): array
574
  {
575 1
    $chars = self::split($str);
576
577 1
    return \array_map($callback, $chars);
578
  }
579
580
  /**
581
   * Generates an array of byte length of each character of a Unicode string.
582
   *
583
   * 1 byte => U+0000  - U+007F
584
   * 2 byte => U+0080  - U+07FF
585
   * 3 byte => U+0800  - U+FFFF
586
   * 4 byte => U+10000 - U+10FFFF
587
   *
588
   * @param string $str <p>The original unicode string.</p>
589
   *
590
   * @return int[] <p>An array of byte lengths of each character.</p>
591
   */
592 2
  public static function chr_size_list(string $str): array
593
  {
594 2
    if ('' === $str) {
595 2
      return [];
596
    }
597
598 2
    $strSplit = self::split($str);
599
600 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
601
      return \array_map(
602
          function ($data) {
603
            return UTF8::strlen($data, 'CP850'); // 8-BIT
604
          },
605
          $strSplit
606
      );
607
    }
608
609 2
    return \array_map('\strlen', $strSplit);
610
  }
611
612
  /**
613
   * Get a decimal code representation of a specific character.
614
   *
615
   * @param string $char <p>The input character.</p>
616
   *
617
   * @return int
618
   */
619 2
  public static function chr_to_decimal(string $char): int
620
  {
621 2
    $code = self::ord($char[0]);
622 2
    $bytes = 1;
623
624 2
    if (!($code & 0x80)) {
625
      // 0xxxxxxx
626 2
      return $code;
627
    }
628
629 2
    if (($code & 0xe0) === 0xc0) {
630
      // 110xxxxx
631 2
      $bytes = 2;
632 2
      $code &= ~0xc0;
633 2
    } elseif (($code & 0xf0) === 0xe0) {
634
      // 1110xxxx
635 2
      $bytes = 3;
636 2
      $code &= ~0xe0;
637 1
    } elseif (($code & 0xf8) === 0xf0) {
638
      // 11110xxx
639 1
      $bytes = 4;
640 1
      $code &= ~0xf0;
641
    }
642
643 2
    for ($i = 2; $i <= $bytes; $i++) {
644
      // 10xxxxxx
645 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
646
    }
647
648 2
    return $code;
649
  }
650
651
  /**
652
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
653
   *
654
   * @param string $char <p>The input character</p>
655
   * @param string $pfix [optional]
656
   *
657
   * @return string <p>The code point encoded as U+xxxx<p>
658
   */
659 1
  public static function chr_to_hex(string $char, string $pfix = 'U+'): string
660
  {
661 1
    if ('' === $char) {
662 1
      return '';
663
    }
664
665 1
    if ($char === '&#0;') {
666 1
      $char = '';
667
    }
668
669 1
    return self::int_to_hex(self::ord($char), $pfix);
670
  }
671
672
  /**
673
   * alias for "UTF8::chr_to_decimal()"
674
   *
675
   * @see UTF8::chr_to_decimal()
676
   *
677
   * @param string $chr
678
   *
679
   * @return int
680
   */
681 1
  public static function chr_to_int(string $chr): int
682
  {
683 1
    return self::chr_to_decimal($chr);
684
  }
685
686
  /**
687
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
688
   *
689
   * @param string $body     <p>The original string to be split.</p>
690
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
691
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
692
   *
693
   * @return string <p>The chunked string</p>
694
   */
695 1
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
696
  {
697 1
    return \implode($end, self::split($body, $chunklen));
698
  }
699
700
  /**
701
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
702
   *
703
   * @param string $str                           <p>The string to be sanitized.</p>
704
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
705
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
706
   *                                              whitespace.</p>
707
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
708
   *                                              e.g.: "…"
709
   *                                              => "..."</p>
710
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
711
   *                                              combination with
712
   *                                              $normalize_whitespace</p>
713
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
714
   *                                              mark e.g.: "�"</p>
715
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
716
   *                                              characters e.g.: "\0"</p>
717
   *
718
   * @return string <p>Clean UTF-8 encoded string.</p>
719
   */
720 81
  public static function clean(
721
      string $str,
722
      bool $remove_bom = false,
723
      bool $normalize_whitespace = false,
724
      bool $normalize_msword = false,
725
      bool $keep_non_breaking_space = false,
726
      bool $replace_diamond_question_mark = false,
727
      bool $remove_invisible_characters = true
728
  ): string
729
  {
730
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
731
    // caused connection reset problem on larger strings
732
733 81
    $regx = '/
734
      (
735
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
736
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
737
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
738
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
739
        ){1,100}                      # ...one or more times
740
      )
741
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
742
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
743
    /x';
744 81
    $str = (string)\preg_replace($regx, '$1', $str);
745
746 81
    if ($replace_diamond_question_mark === true) {
747 53
      $str = self::replace_diamond_question_mark($str, '');
748
    }
749
750 81
    if ($remove_invisible_characters === true) {
751 81
      $str = self::remove_invisible_characters($str);
752
    }
753
754 81
    if ($normalize_whitespace === true) {
755 55
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
756
    }
757
758 81
    if ($normalize_msword === true) {
759 31
      $str = self::normalize_msword($str);
760
    }
761
762 81
    if ($remove_bom === true) {
763 54
      $str = self::remove_bom($str);
764
    }
765
766 81
    return $str;
767
  }
768
769
  /**
770
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
771
   *
772
   * @param string $str <p>The input string.</p>
773
   *
774
   * @return string
775
   */
776 25
  public static function cleanup(string $str): string
777
  {
778 25
    if ('' === $str) {
779 3
      return '';
780
    }
781
782
    // fixed ISO <-> UTF-8 Errors
783 25
    $str = self::fix_simple_utf8($str);
784
785
    // remove all none UTF-8 symbols
786
    // && remove diamond question mark (�)
787
    // && remove remove invisible characters (e.g. "\0")
788
    // && remove BOM
789
    // && normalize whitespace chars (but keep non-breaking-spaces)
790 25
    $str = self::clean(
791 25
        $str,
792 25
        true,
793 25
        true,
794 25
        false,
795 25
        true,
796 25
        true,
797 25
        true
798
    );
799
800 25
    return $str;
801
  }
802
803
  /**
804
   * Accepts a string or a array of strings and returns an array of Unicode code points.
805
   *
806
   * INFO: opposite to UTF8::string()
807
   *
808
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
809
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
810
   *                                    default, code points will be returned as integers.</p>
811
   *
812
   * @return int[] <p>The array of code points.</p>
813
   */
814 7
  public static function codepoints($arg, bool $u_style = false): array
815
  {
816 7
    if (\is_string($arg) === true) {
817 7
      $arg = self::split($arg);
818
    }
819
820 7
    $arg = \array_map(
821
        [
822 7
            self::class,
823
            'ord',
824
        ],
825 7
        $arg
826
    );
827
828 7
    if ($u_style) {
829 1
      $arg = \array_map(
830
          [
831 1
              self::class,
832
              'int_to_hex',
833
          ],
834 1
          $arg
835
      );
836
    }
837
838 7
    return $arg;
839
  }
840
841
  /**
842
   * Trims the string and replaces consecutive whitespace characters with a
843
   * single space. This includes tabs and newline characters, as well as
844
   * multibyte whitespace such as the thin space and ideographic space.
845
   *
846
   * @param string $str <p>The input string.</p>
847
   *
848
   * @return string <p>String with a trimmed $str and condensed whitespace.</p>
849
   */
850 12
  public static function collapse_whitespace(string $str): string
851
  {
852 12
    return self::trim(
853 12
        self::regex_replace($str, '[[:space:]]+', ' ')
854
    );
855
  }
856
857
  /**
858
   * Returns count of characters used in a string.
859
   *
860
   * @param string $str       <p>The input string.</p>
861
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
862
   *
863
   * @return int[] <p>An associative array of Character as keys and
864
   *               their count as values.</p>
865
   */
866 9
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
867
  {
868 9
    return \array_count_values(self::split($str, 1, $cleanUtf8));
869
  }
870
871
  /**
872
   * Remove css media-queries.
873
   *
874
   * @param string $str
875
   *
876
   * @return static
877
   */
878 1
  public static function css_stripe_media_queries(string $str): string
879
  {
880 1
    return (string)\preg_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return (string)preg_repl...s*}\s*#misU', '', $str) returns the type string which is incompatible with the documented return type voku\helper\UTF8.
Loading history...
881 1
        '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
882 1
        '',
883 1
        $str
884
    );
885
  }
886
887
  /**
888
   * Checks whether ctype is available on the server.
889
   *
890
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
891
   */
892
  public static function ctype_loaded(): bool
893
  {
894
    return \extension_loaded('ctype');
895
  }
896
897
  /**
898
   * Converts a int-value into an UTF-8 character.
899
   *
900
   * @param mixed $int
901
   *
902
   * @return string
903
   */
904 5
  public static function decimal_to_chr($int): string
905
  {
906 5
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
907
  }
908
909
  /**
910
   * Encode a string with a new charset-encoding.
911
   *
912
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
913
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
914
   *
915
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
916
   * @param string $str      <p>The input string</p>
917
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
918
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
919
   *
920
   * @return string
921
   */
922 14
  public static function encode(string $encoding, string $str, bool $force = true): string
923
  {
924 14
    if ('' === $str || '' === $encoding) {
925 6
      return $str;
926
    }
927
928 14
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
929 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
930
    }
931
932 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
933
      self::checkForSupport();
934
    }
935
936 14
    $encodingDetected = self::str_detect_encoding($str);
937
938
    // DEBUG
939
    //var_dump($encoding, $encodingDetected, $str, "\n\n");
940
941
    if (
942 14
        $force === true
943
        ||
944
        (
945 7
            $encodingDetected !== false
946
            &&
947 14
            $encodingDetected !== $encoding
948
        )
949
    ) {
950
951
      if (
952 14
          $encoding === 'UTF-8'
953
          &&
954
          (
955 14
              $force === true
956 5
              || $encodingDetected === 'UTF-8'
957 5
              || $encodingDetected === 'WINDOWS-1252'
958 14
              || $encodingDetected === 'ISO-8859-1'
959
          )
960
      ) {
961 13
        return self::to_utf8($str);
962
      }
963
964
      if (
965 4
          $encoding === 'ISO-8859-1'
966
          &&
967
          (
968 2
              $force === true
969 2
              || $encodingDetected === 'ISO-8859-1'
970 2
              || $encodingDetected === 'WINDOWS-1252'
971 4
              || $encodingDetected === 'UTF-8'
972
          )
973
      ) {
974 2
        return self::to_iso8859($str);
975
      }
976
977
      if (
978 3
          $encoding !== 'UTF-8'
979
          &&
980 3
          $encoding !== 'ISO-8859-1'
981
          &&
982 3
          $encoding !== 'WINDOWS-1252'
983
          &&
984 3
          self::$SUPPORT['mbstring'] === false
985
      ) {
986
        \trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
987
      }
988
989 3
      $strEncoded = \mb_convert_encoding(
990 3
          $str,
991 3
          $encoding,
992 3
          ($force === true ? $encoding : $encodingDetected)
0 ignored issues
show
Bug introduced by
It seems like $force === true ? $encoding : $encodingDetected can also be of type false; however, parameter $from_encoding of mb_convert_encoding() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

992
          /** @scrutinizer ignore-type */ ($force === true ? $encoding : $encodingDetected)
Loading history...
993
      );
994
995 3
      if ($strEncoded) {
996 3
        return $strEncoded;
997
      }
998
    }
999
1000 4
    return $str;
1001
  }
1002
1003
  /**
1004
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1005
   *
1006
   * @param string   $str                    <p>The input string.</p>
1007
   * @param string   $search                 <p>The searched string.</p>
1008
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1009
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1010
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "\mb_" function</p>
1011
   *
1012
   * @return string
1013
   */
1014 1
  public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1015
  {
1016 1
    if ('' === $str) {
1017 1
      return '';
1018
    }
1019
1020 1
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1021
1022 1
    if ($length === null) {
1023 1
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
1024
    }
1025
1026 1
    if (empty($search)) {
1027
1028 1
      $stringLength = self::strlen($str, $encoding);
1029
1030 1
      if ($length > 0) {
1031 1
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1032
      } else {
1033 1
        $end = 0;
1034
      }
1035
1036 1
      $pos = \min(
1037 1
          self::strpos($str, ' ', $end, $encoding),
1038 1
          self::strpos($str, '.', $end, $encoding)
1039
      );
1040
1041 1
      if ($pos) {
1042 1
        return \rtrim(
1043 1
                   self::substr($str, 0, $pos, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::substr($str, 0, $pos, $encoding) can also be of type false; however, parameter $str of rtrim() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1043
                   /** @scrutinizer ignore-type */ self::substr($str, 0, $pos, $encoding),
Loading history...
1044 1
                   $trimChars
1045 1
               ) . $replacerForSkippedText;
1046
      }
1047
1048
      return $str;
1049
    }
1050
1051 1
    $wordPos = self::stripos(
1052 1
        $str,
1053 1
        $search,
1054 1
        0,
1055 1
        $encoding
1056
    );
1057 1
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1058
1059 1
    if ($halfSide > 0) {
1060
1061 1
      $halfText = self::substr($str, 0, $halfSide, $encoding);
1062 1
      $pos_start = \max(
1063 1
          self::strrpos($halfText, ' ', 0, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $halfText can also be of type false; however, parameter $haystack of voku\helper\UTF8::strrpos() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1063
          self::strrpos(/** @scrutinizer ignore-type */ $halfText, ' ', 0, $encoding),
Loading history...
1064 1
          self::strrpos($halfText, '.', 0, $encoding)
1065
      );
1066
1067 1
      if (!$pos_start) {
1068 1
        $pos_start = 0;
1069
      }
1070
1071
    } else {
1072 1
      $pos_start = 0;
1073
    }
1074
1075 1
    if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type integer|false is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1076 1
      $l = $pos_start + $length - 1;
1077 1
      $realLength = self::strlen($str, $encoding);
1078
1079 1
      if ($l > $realLength) {
1080
        $l = $realLength;
1081
      }
1082
1083 1
      $pos_end = \min(
1084 1
                     self::strpos($str, ' ', $l, $encoding),
1085 1
                     self::strpos($str, '.', $l, $encoding)
1086 1
                 ) - $pos_start;
1087
1088 1
      if (!$pos_end || $pos_end <= 0) {
1089 1
        $extract = $replacerForSkippedText . \ltrim(
1090 1
                self::substr(
0 ignored issues
show
Bug introduced by
It seems like self::substr($str, $pos_...trlen($str), $encoding) can also be of type false; however, parameter $str of ltrim() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1090
                /** @scrutinizer ignore-type */ self::substr(
Loading history...
1091 1
                    $str,
1092 1
                    $pos_start,
1093 1
                    self::strlen($str),
1094 1
                    $encoding
1095
                ),
1096 1
                $trimChars
1097
            );
1098
      } else {
1099 1
        $extract = $replacerForSkippedText . \trim(
1100 1
                self::substr(
0 ignored issues
show
Bug introduced by
It seems like self::substr($str, $pos_...t, $pos_end, $encoding) can also be of type false; however, parameter $str of trim() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1100
                /** @scrutinizer ignore-type */ self::substr(
Loading history...
1101 1
                    $str,
1102 1
                    $pos_start,
1103 1
                    $pos_end,
1104 1
                    $encoding
1105
                ),
1106 1
                $trimChars
1107 1
            ) . $replacerForSkippedText;
1108
      }
1109
1110
    } else {
1111
1112 1
      $l = $length - 1;
1113 1
      $trueLength = self::strlen($str, $encoding);
1114
1115 1
      if ($l > $trueLength) {
1116
        $l = $trueLength;
1117
      }
1118
1119 1
      $pos_end = \min(
1120 1
          self::strpos($str, ' ', $l, $encoding),
1121 1
          self::strpos($str, '.', $l, $encoding)
1122
      );
1123
1124 1
      if ($pos_end) {
1125 1
        $extract = \rtrim(
1126 1
                       self::substr($str, 0, $pos_end, $encoding),
1127 1
                       $trimChars
1128 1
                   ) . $replacerForSkippedText;
1129
      } else {
1130 1
        $extract = $str;
1131
      }
1132
    }
1133
1134 1
    return $extract;
1135
  }
1136
1137
  /**
1138
   * Reads entire file into a string.
1139
   *
1140
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1141
   *
1142
   * @link http://php.net/manual/en/function.file-get-contents.php
1143
   *
1144
   * @param string        $filename         <p>
1145
   *                                        Name of the file to read.
1146
   *                                        </p>
1147
   * @param bool          $use_include_path [optional] <p>
1148
   *                                        Prior to PHP 5, this parameter is called
1149
   *                                        use_include_path and is a bool.
1150
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1151
   *                                        to trigger include path
1152
   *                                        search.
1153
   *                                        </p>
1154
   * @param resource|null $context          [optional] <p>
1155
   *                                        A valid context resource created with
1156
   *                                        stream_context_create. If you don't need to use a
1157
   *                                        custom context, you can skip this parameter by &null;.
1158
   *                                        </p>
1159
   * @param int|null      $offset           [optional] <p>
1160
   *                                        The offset where the reading starts.
1161
   *                                        </p>
1162
   * @param int|null      $maxLength        [optional] <p>
1163
   *                                        Maximum length of data read. The default is to read until end
1164
   *                                        of file is reached.
1165
   *                                        </p>
1166
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1167
   *
1168
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
1169
   *                                        images or pdf, because they used non default utf-8 chars.</p>
1170
   *
1171
   * @return string|false <p>The function returns the read data or false on failure.</p>
1172
   */
1173 6
  public static function file_get_contents(
1174
      string $filename,
1175
      bool $use_include_path = false,
1176
      $context = null,
1177
      int $offset = null,
1178
      int $maxLength = null,
1179
      int $timeout = 10,
1180
      bool $convertToUtf8 = true
1181
  )
1182
  {
1183
    // init
1184 6
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1185
1186 6
    if ($timeout && $context === null) {
1187 5
      $context = \stream_context_create(
1188
          [
1189
              'http' =>
1190
                  [
1191 5
                      'timeout' => $timeout,
1192
                  ],
1193
          ]
1194
      );
1195
    }
1196
1197 6
    if ($offset === null) {
1198 6
      $offset = 0;
1199
    }
1200
1201 6
    if (\is_int($maxLength) === true) {
1202 1
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1203
    } else {
1204 6
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1205
    }
1206
1207
    // return false on error
1208 6
    if ($data === false) {
1209
      return false;
1210
    }
1211
1212 6
    if ($convertToUtf8 === true) {
1213
      // only for non binary, but also for UTF-16 or UTF-32
1214
      if (
1215 6
          self::is_binary($data, true) !== true
1216
          ||
1217 4
          self::is_utf16($data) !== false
1218
          ||
1219 6
          self::is_utf32($data) !== false
1220
      ) {
1221 5
        $data = self::encode('UTF-8', $data, false);
1222 5
        $data = self::cleanup($data);
1223
      }
1224
    }
1225
1226 6
    return $data;
1227
  }
1228
1229
  /**
1230
   * Checks if a file starts with BOM (Byte Order Mark) character.
1231
   *
1232
   * @param string $file_path <p>Path to a valid file.</p>
1233
   *
1234
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1235
   */
1236 1
  public static function file_has_bom(string $file_path): bool
1237
  {
1238 1
    return self::string_has_bom(\file_get_contents($file_path));
1239
  }
1240
1241
  /**
1242
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1243
   *
1244
   * @param mixed  $var
1245
   * @param int    $normalization_form
1246
   * @param string $leading_combining
1247
   *
1248
   * @return mixed
1249
   */
1250 35
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1251
  {
1252 35
    switch (\gettype($var)) {
1253 35
      case 'array':
1254 3
        foreach ($var as $k => $v) {
1255
          /** @noinspection AlterInForeachInspection */
1256 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1257
        }
1258 3
        break;
1259 35
      case 'object':
1260 2
        foreach ($var as $k => $v) {
1261 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1262
        }
1263 2
        break;
1264 35
      case 'string':
1265
1266 35
        if (false !== \strpos($var, "\r")) {
1267
          // Workaround https://bugs.php.net/65732
1268 2
          $var = \str_replace(["\r\n", "\r"], "\n", $var);
1269
        }
1270
1271 35
        if (self::is_ascii($var) === false) {
1272
          /** @noinspection PhpUndefinedClassInspection */
1273 18
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1274 15
            $n = '-';
1275
          } else {
1276
            /** @noinspection PhpUndefinedClassInspection */
1277 7
            $n = \Normalizer::normalize($var, $normalization_form);
1278
1279 7
            if (isset($n[0])) {
1280 4
              $var = $n;
1281
            } else {
1282 5
              $var = self::encode('UTF-8', $var, true);
1283
            }
1284
          }
1285
1286
          if (
1287 18
              $var[0] >= "\x80"
1288
              &&
1289 18
              isset($n[0], $leading_combining[0])
1290
              &&
1291 18
              \preg_match('/^\p{Mn}/u', $var)
1292
          ) {
1293
            // Prevent leading combining chars
1294
            // for NFC-safe concatenations.
1295 2
            $var = $leading_combining . $var;
1296
          }
1297
        }
1298
1299 35
        break;
1300
    }
1301
1302 35
    return $var;
1303
  }
1304
1305
  /**
1306
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1307
   *
1308
   * Gets a specific external variable by name and optionally filters it
1309
   *
1310
   * @link  http://php.net/manual/en/function.filter-input.php
1311
   *
1312
   * @param int    $type          <p>
1313
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1314
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1315
   *                              <b>INPUT_ENV</b>.
1316
   *                              </p>
1317
   * @param string $variable_name <p>
1318
   *                              Name of a variable to get.
1319
   *                              </p>
1320
   * @param int    $filter        [optional] <p>
1321
   *                              The ID of the filter to apply. The
1322
   *                              manual page lists the available filters.
1323
   *                              </p>
1324
   * @param mixed  $options       [optional] <p>
1325
   *                              Associative array of options or bitwise disjunction of flags. If filter
1326
   *                              accepts options, flags can be provided in "flags" field of array.
1327
   *                              </p>
1328
   *
1329
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1330
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1331
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1332
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1333
   * @since 5.2.0
1334
   */
1335
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
1336
  {
1337
    if (4 > \func_num_args()) {
1338
      $var = \filter_input($type, $variable_name, $filter);
1339
    } else {
1340
      $var = \filter_input($type, $variable_name, $filter, $options);
1341
    }
1342
1343
    return self::filter($var);
1344
  }
1345
1346
  /**
1347
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1348
   *
1349
   * Gets external variables and optionally filters them
1350
   *
1351
   * @link  http://php.net/manual/en/function.filter-input-array.php
1352
   *
1353
   * @param int   $type       <p>
1354
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1355
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1356
   *                          <b>INPUT_ENV</b>.
1357
   *                          </p>
1358
   * @param mixed $definition [optional] <p>
1359
   *                          An array defining the arguments. A valid key is a string
1360
   *                          containing a variable name and a valid value is either a filter type, or an array
1361
   *                          optionally specifying the filter, flags and options. If the value is an
1362
   *                          array, valid keys are filter which specifies the
1363
   *                          filter type,
1364
   *                          flags which specifies any flags that apply to the
1365
   *                          filter, and options which specifies any options that
1366
   *                          apply to the filter. See the example below for a better understanding.
1367
   *                          </p>
1368
   *                          <p>
1369
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1370
   *                          input array are filtered by this filter.
1371
   *                          </p>
1372
   * @param bool  $add_empty  [optional] <p>
1373
   *                          Add missing keys as <b>NULL</b> to the return value.
1374
   *                          </p>
1375
   *
1376
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1377
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1378
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1379
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1380
   * fails.
1381
   * @since 5.2.0
1382
   */
1383
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1384
  {
1385
    if (2 > \func_num_args()) {
1386
      $a = \filter_input_array($type);
1387
    } else {
1388
      $a = \filter_input_array($type, $definition, $add_empty);
1389
    }
1390
1391
    return self::filter($a);
1392
  }
1393
1394
  /**
1395
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1396
   *
1397
   * Filters a variable with a specified filter
1398
   *
1399
   * @link  http://php.net/manual/en/function.filter-var.php
1400
   *
1401
   * @param mixed $variable <p>
1402
   *                        Value to filter.
1403
   *                        </p>
1404
   * @param int   $filter   [optional] <p>
1405
   *                        The ID of the filter to apply. The
1406
   *                        manual page lists the available filters.
1407
   *                        </p>
1408
   * @param mixed $options  [optional] <p>
1409
   *                        Associative array of options or bitwise disjunction of flags. If filter
1410
   *                        accepts options, flags can be provided in "flags" field of array. For
1411
   *                        the "callback" filter, callable type should be passed. The
1412
   *                        callback must accept one argument, the value to be filtered, and return
1413
   *                        the value after filtering/sanitizing it.
1414
   *                        </p>
1415
   *                        <p>
1416
   *                        <code>
1417
   *                        // for filters that accept options, use this format
1418
   *                        $options = array(
1419
   *                        'options' => array(
1420
   *                        'default' => 3, // value to return if the filter fails
1421
   *                        // other options here
1422
   *                        'min_range' => 0
1423
   *                        ),
1424
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1425
   *                        );
1426
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1427
   *                        // for filter that only accept flags, you can pass them directly
1428
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1429
   *                        // for filter that only accept flags, you can also pass as an array
1430
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1431
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1432
   *                        // callback validate filter
1433
   *                        function foo($value)
1434
   *                        {
1435
   *                        // Expected format: Surname, GivenNames
1436
   *                        if (strpos($value, ", ") === false) return false;
1437
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1438
   *                        $empty = (empty($surname) || empty($givennames));
1439
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1440
   *                        if ($empty || $notstrings) {
1441
   *                        return false;
1442
   *                        } else {
1443
   *                        return $value;
1444
   *                        }
1445
   *                        }
1446
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1447
   *                        </code>
1448
   *                        </p>
1449
   *
1450
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1451
   * @since 5.2.0
1452
   */
1453 1
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
1454
  {
1455 1
    if (3 > \func_num_args()) {
1456 1
      $variable = \filter_var($variable, $filter);
1457
    } else {
1458 1
      $variable = \filter_var($variable, $filter, $options);
1459
    }
1460
1461 1
    return self::filter($variable);
1462
  }
1463
1464
  /**
1465
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1466
   *
1467
   * Gets multiple variables and optionally filters them
1468
   *
1469
   * @link  http://php.net/manual/en/function.filter-var-array.php
1470
   *
1471
   * @param array $data       <p>
1472
   *                          An array with string keys containing the data to filter.
1473
   *                          </p>
1474
   * @param mixed $definition [optional] <p>
1475
   *                          An array defining the arguments. A valid key is a string
1476
   *                          containing a variable name and a valid value is either a
1477
   *                          filter type, or an
1478
   *                          array optionally specifying the filter, flags and options.
1479
   *                          If the value is an array, valid keys are filter
1480
   *                          which specifies the filter type,
1481
   *                          flags which specifies any flags that apply to the
1482
   *                          filter, and options which specifies any options that
1483
   *                          apply to the filter. See the example below for a better understanding.
1484
   *                          </p>
1485
   *                          <p>
1486
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1487
   *                          input array are filtered by this filter.
1488
   *                          </p>
1489
   * @param bool  $add_empty  [optional] <p>
1490
   *                          Add missing keys as <b>NULL</b> to the return value.
1491
   *                          </p>
1492
   *
1493
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1494
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1495
   * the variable is not set.
1496
   * @since 5.2.0
1497
   */
1498 1
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1499
  {
1500 1
    if (2 > \func_num_args()) {
1501 1
      $a = \filter_var_array($data);
1502
    } else {
1503 1
      $a = \filter_var_array($data, $definition, $add_empty);
1504
    }
1505
1506 1
    return self::filter($a);
1507
  }
1508
1509
  /**
1510
   * Checks whether finfo is available on the server.
1511
   *
1512
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1513
   */
1514
  public static function finfo_loaded(): bool
1515
  {
1516
    return \class_exists('finfo');
1517
  }
1518
1519
  /**
1520
   * Returns the first $n characters of the string.
1521
   *
1522
   * @param string $str      <p>The input string.</p>
1523
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1524
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
1525
   *
1526
   * @return string
1527
   */
1528 12
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1529
  {
1530 12
    if ($n <= 0) {
1531 4
      return '';
1532
    }
1533
1534 8
    return self::substr($str, 0, $n, $encoding);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str, 0, $n, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
1535
  }
1536
1537
  /**
1538
   * Check if the number of unicode characters are not more than the specified integer.
1539
   *
1540
   * @param string $str      The original string to be checked.
1541
   * @param int    $box_size The size in number of chars to be checked against string.
1542
   *
1543
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1544
   */
1545 1
  public static function fits_inside(string $str, int $box_size): bool
1546
  {
1547 1
    return (self::strlen($str) <= $box_size);
1548
  }
1549
1550
  /**
1551
   * Try to fix simple broken UTF-8 strings.
1552
   *
1553
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1554
   *
1555
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1556
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1557
   * See: http://en.wikipedia.org/wiki/Windows-1252
1558
   *
1559
   * @param string $str <p>The input string</p>
1560
   *
1561
   * @return string
1562
   */
1563 30
  public static function fix_simple_utf8(string $str): string
1564
  {
1565 30
    if ('' === $str) {
1566 2
      return '';
1567
    }
1568
1569 30
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1570 30
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1571
1572 30
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1573
1574 1
      if (self::$BROKEN_UTF8_FIX === null) {
1575 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_fix') can also be of type integer or string or boolean. However, the property $BROKEN_UTF8_FIX is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1576
      }
1577
1578 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type integer and string and boolean; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1578
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1579 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type integer and string and boolean; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1579
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1580
    }
1581
1582 30
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1583
  }
1584
1585
  /**
1586
   * Fix a double (or multiple) encoded UTF8 string.
1587
   *
1588
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1589
   *
1590
   * @return string|string[] <p>Will return the fixed input-"array" or
1591
   *                         the fixed input-"string".</p>
1592
   */
1593 1
  public static function fix_utf8($str)
1594
  {
1595 1
    if (\is_array($str) === true) {
1596 1
      foreach ($str as $k => $v) {
1597 1
        $str[$k] = self::fix_utf8($v);
1598
      }
1599
1600 1
      return $str;
1601
    }
1602
1603 1
    $last = '';
1604 1
    while ($last !== $str) {
1605 1
      $last = $str;
1606 1
      $str = self::to_utf8(
1607 1
          self::utf8_decode($str, true)
1608
      );
1609
    }
1610
1611 1
    return $str;
1612
  }
1613
1614
  /**
1615
   * Get character of a specific character.
1616
   *
1617
   * @param string $char
1618
   *
1619
   * @return string <p>'RTL' or 'LTR'</p>
1620
   */
1621 1
  public static function getCharDirection(string $char): string
1622
  {
1623 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1624
      self::checkForSupport();
1625
    }
1626
1627 1
    if (self::$SUPPORT['intlChar'] === true) {
1628
      /** @noinspection PhpComposerExtensionStubsInspection */
1629 1
      $tmpReturn = \IntlChar::charDirection($char);
1630
1631
      // from "IntlChar"-Class
1632
      $charDirection = [
1633 1
          'RTL' => [1, 13, 14, 15, 21],
1634
          'LTR' => [0, 11, 12, 20],
1635
      ];
1636
1637 1
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1638
        return 'LTR';
1639
      }
1640
1641 1
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1642 1
        return 'RTL';
1643
      }
1644
    }
1645
1646 1
    $c = static::chr_to_decimal($char);
1647
1648 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1649 1
      return 'LTR';
1650
    }
1651
1652 1
    if (0x85e >= $c) {
1653
1654 1
      if (0x5be === $c ||
1655 1
          0x5c0 === $c ||
1656 1
          0x5c3 === $c ||
1657 1
          0x5c6 === $c ||
1658 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1659 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1660 1
          0x608 === $c ||
1661 1
          0x60b === $c ||
1662 1
          0x60d === $c ||
1663 1
          0x61b === $c ||
1664 1
          (0x61e <= $c && 0x64a >= $c) ||
1665
          (0x66d <= $c && 0x66f >= $c) ||
1666
          (0x671 <= $c && 0x6d5 >= $c) ||
1667
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1668
          (0x6ee <= $c && 0x6ef >= $c) ||
1669
          (0x6fa <= $c && 0x70d >= $c) ||
1670
          0x710 === $c ||
1671
          (0x712 <= $c && 0x72f >= $c) ||
1672
          (0x74d <= $c && 0x7a5 >= $c) ||
1673
          0x7b1 === $c ||
1674
          (0x7c0 <= $c && 0x7ea >= $c) ||
1675
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1676
          0x7fa === $c ||
1677
          (0x800 <= $c && 0x815 >= $c) ||
1678
          0x81a === $c ||
1679
          0x824 === $c ||
1680
          0x828 === $c ||
1681
          (0x830 <= $c && 0x83e >= $c) ||
1682
          (0x840 <= $c && 0x858 >= $c) ||
1683 1
          0x85e === $c
1684
      ) {
1685 1
        return 'RTL';
1686
      }
1687
1688 1
    } elseif (0x200f === $c) {
1689
1690
      return 'RTL';
1691
1692 1
    } elseif (0xfb1d <= $c) {
1693
1694 1
      if (0xfb1d === $c ||
1695 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1696 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1697 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1698 1
          0xfb3e === $c ||
1699 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1700 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1701 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1702 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1703 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1704 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1705 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1706 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1707 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1708 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1709 1
          0x10808 === $c ||
1710 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1711 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1712 1
          0x1083c === $c ||
1713 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1714 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1715 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1716 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1717 1
          0x1093f === $c ||
1718 1
          0x10a00 === $c ||
1719 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1720 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1721 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1722 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1723 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1724 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1725 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1726 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1727 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1728 1
          (0x10b78 <= $c && 0x10b7f >= $c)
1729
      ) {
1730 1
        return 'RTL';
1731
      }
1732
    }
1733
1734 1
    return 'LTR';
1735
  }
1736
1737
  /**
1738
   * get data from "/data/*.ser"
1739
   *
1740
   * @param string $file
1741
   *
1742
   * @return bool|string|array|int <p>Will return false on error.</p>
1743
   */
1744 13
  private static function getData(string $file)
1745
  {
1746 13
    $file = __DIR__ . '/data/' . $file . '.php';
1747 13
    if (\file_exists($file)) {
1748
      /** @noinspection PhpIncludeInspection */
1749 12
      return require $file;
1750
    }
1751
1752 2
    return false;
1753
  }
1754
1755
  /**
1756
   * Check for php-support.
1757
   *
1758
   * @param string|null $key
1759
   *
1760
   * @return mixed <p>Return the full support-"array", if $key === null<br>
1761
   *               return bool-value, if $key is used and available<br>
1762
   *               otherwise return null</p>
1763
   */
1764 19
  public static function getSupportInfo(string $key = null)
1765
  {
1766 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1767
      self::checkForSupport();
1768
    }
1769
1770 19
    if ($key === null) {
1771 2
      return self::$SUPPORT;
1772
    }
1773
1774 18
    if (!isset(self::$SUPPORT[$key])) {
1775 1
      return null;
1776
    }
1777
1778 17
    return self::$SUPPORT[$key];
1779
  }
1780
1781
  /**
1782
   * @param int    $length        <p>Length of the random string.</p>
1783
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
1784
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
1785
   *
1786
   * @return string
1787
   */
1788 1
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
1789
  {
1790
    // init
1791 1
    $i = 0;
1792 1
    $str = '';
1793 1
    $maxlength = self::strlen($possibleChars, $encoding);
1794
1795 1
    if ($maxlength === 0) {
1796 1
      return '';
1797
    }
1798
1799
    // add random chars
1800 1
    while ($i < $length) {
1801
      try {
1802 1
        $randInt = \random_int(0, $maxlength - 1);
1803
      } catch (\Exception $e) {
1804
        /** @noinspection RandomApiMigrationInspection */
1805
        $randInt = \mt_rand(0, $maxlength - 1);
1806
      }
1807 1
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
1808 1
      $str .= $char;
1809 1
      $i++;
1810
    }
1811
1812 1
    return $str;
1813
  }
1814
1815
  /**
1816
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
1817
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
1818
   *
1819
   * @return string
1820
   */
1821 1
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
1822
  {
1823 1
    $uniqueHelper = \mt_rand() .
1824 1
                    \session_id() .
1825 1
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
1826 1
                    ($_SERVER['SERVER_ADDR'] ?? '') .
1827 1
                    $entropyExtra;
1828
1829 1
    $uniqueString = \uniqid($uniqueHelper, true);
1830
1831 1
    if ($md5) {
1832 1
      $uniqueString = \md5($uniqueString . $uniqueHelper);
1833
    }
1834
1835 1
    return $uniqueString;
1836
  }
1837
1838
  /**
1839
   * alias for "UTF8::string_has_bom()"
1840
   *
1841
   * @see        UTF8::string_has_bom()
1842
   *
1843
   * @param string $str
1844
   *
1845
   * @return bool
1846
   *
1847
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1848
   */
1849 1
  public static function hasBom(string $str): bool
1850
  {
1851 1
    return self::string_has_bom($str);
1852
  }
1853
1854
  /**
1855
   * Returns true if the string contains a lower case char, false otherwise.
1856
   *
1857
   * @param string $str <p>The input string.</p>
1858
   *
1859
   * @return bool <p>Whether or not the string contains a lower case character.</p>
1860
   */
1861 47
  public static function has_lowercase(string $str): bool
1862
  {
1863 47
    return self::str_matches_pattern($str, '.*[[:lower:]]');
1864
  }
1865
1866
  /**
1867
   * Returns true if the string contains an upper case char, false otherwise.
1868
   *
1869
   * @param string $str <p>The input string.</p>
1870
   *
1871
   * @return bool <p>Whether or not the string contains an upper case character.</p>
1872
   */
1873 12
  public static function has_uppercase(string $str): bool
1874
  {
1875 12
    return self::str_matches_pattern($str, '.*[[:upper:]]');
1876
  }
1877
1878
  /**
1879
   * Converts a hexadecimal-value into an UTF-8 character.
1880
   *
1881
   * @param string $hexdec <p>The hexadecimal value.</p>
1882
   *
1883
   * @return string|false <p>One single UTF-8 character.</p>
1884
   */
1885 2
  public static function hex_to_chr(string $hexdec)
1886
  {
1887 2
    return self::decimal_to_chr(\hexdec($hexdec));
1888
  }
1889
1890
  /**
1891
   * Converts hexadecimal U+xxxx code point representation to integer.
1892
   *
1893
   * INFO: opposite to UTF8::int_to_hex()
1894
   *
1895
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1896
   *
1897
   * @return int|false <p>The code point, or false on failure.</p>
1898
   */
1899 1
  public static function hex_to_int(string $hexDec)
1900
  {
1901 1
    if ('' === $hexDec) {
1902 1
      return false;
1903
    }
1904
1905 1
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1906 1
      return \intval($match[1], 16);
1907
    }
1908
1909 1
    return false;
1910
  }
1911
1912
  /**
1913
   * alias for "UTF8::html_entity_decode()"
1914
   *
1915
   * @see UTF8::html_entity_decode()
1916
   *
1917
   * @param string $str
1918
   * @param int    $flags
1919
   * @param string $encoding
1920
   *
1921
   * @return string
1922
   */
1923 1
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1924
  {
1925 1
    return self::html_entity_decode($str, $flags, $encoding);
1926
  }
1927
1928
  /**
1929
   * Converts a UTF-8 string to a series of HTML numbered entities.
1930
   *
1931
   * INFO: opposite to UTF8::html_decode()
1932
   *
1933
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1934
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1935
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
1936
   *
1937
   * @return string <p>HTML numbered entities.</p>
1938
   */
1939 8
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
1940
  {
1941 8
    if ('' === $str) {
1942 2
      return '';
1943
    }
1944
1945 8
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1946 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1947
    }
1948
1949
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1950 8
    if (\function_exists('mb_encode_numericentity')) {
1951
1952 8
      $startCode = 0x00;
1953 8
      if ($keepAsciiChars === true) {
1954 8
        $startCode = 0x80;
1955
      }
1956
1957 8
      return \mb_encode_numericentity(
1958 8
          $str,
1959 8
          [$startCode, 0xfffff, 0, 0xfffff, 0],
1960 8
          $encoding
1961
      );
1962
    }
1963
1964
    return \implode(
1965
        '',
1966
        \array_map(
1967
            function ($data) use ($keepAsciiChars, $encoding) {
1968
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1969
            },
1970
            self::split($str)
1971
        )
1972
    );
1973
  }
1974
1975
  /**
1976
   * UTF-8 version of html_entity_decode()
1977
   *
1978
   * The reason we are not using html_entity_decode() by itself is because
1979
   * while it is not technically correct to leave out the semicolon
1980
   * at the end of an entity most browsers will still interpret the entity
1981
   * correctly. html_entity_decode() does not convert entities without
1982
   * semicolons, so we are left with our own little solution here. Bummer.
1983
   *
1984
   * Convert all HTML entities to their applicable characters
1985
   *
1986
   * INFO: opposite to UTF8::html_encode()
1987
   *
1988
   * @link http://php.net/manual/en/function.html-entity-decode.php
1989
   *
1990
   * @param string $str      <p>
1991
   *                         The input string.
1992
   *                         </p>
1993
   * @param int    $flags    [optional] <p>
1994
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1995
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1996
   *                         <table>
1997
   *                         Available <i>flags</i> constants
1998
   *                         <tr valign="top">
1999
   *                         <td>Constant Name</td>
2000
   *                         <td>Description</td>
2001
   *                         </tr>
2002
   *                         <tr valign="top">
2003
   *                         <td><b>ENT_COMPAT</b></td>
2004
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2005
   *                         </tr>
2006
   *                         <tr valign="top">
2007
   *                         <td><b>ENT_QUOTES</b></td>
2008
   *                         <td>Will convert both double and single quotes.</td>
2009
   *                         </tr>
2010
   *                         <tr valign="top">
2011
   *                         <td><b>ENT_NOQUOTES</b></td>
2012
   *                         <td>Will leave both double and single quotes unconverted.</td>
2013
   *                         </tr>
2014
   *                         <tr valign="top">
2015
   *                         <td><b>ENT_HTML401</b></td>
2016
   *                         <td>
2017
   *                         Handle code as HTML 4.01.
2018
   *                         </td>
2019
   *                         </tr>
2020
   *                         <tr valign="top">
2021
   *                         <td><b>ENT_XML1</b></td>
2022
   *                         <td>
2023
   *                         Handle code as XML 1.
2024
   *                         </td>
2025
   *                         </tr>
2026
   *                         <tr valign="top">
2027
   *                         <td><b>ENT_XHTML</b></td>
2028
   *                         <td>
2029
   *                         Handle code as XHTML.
2030
   *                         </td>
2031
   *                         </tr>
2032
   *                         <tr valign="top">
2033
   *                         <td><b>ENT_HTML5</b></td>
2034
   *                         <td>
2035
   *                         Handle code as HTML 5.
2036
   *                         </td>
2037
   *                         </tr>
2038
   *                         </table>
2039
   *                         </p>
2040
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
2041
   *
2042
   * @return string <p>The decoded string.</p>
2043
   */
2044 22
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2045
  {
2046 22
    if ('' === $str) {
2047 6
      return '';
2048
    }
2049
2050 22
    if (!isset($str[3])) { // examples: &; || &x;
2051 10
      return $str;
2052
    }
2053
2054
    if (
2055 21
        \strpos($str, '&') === false
2056
        ||
2057
        (
2058 21
            \strpos($str, '&#') === false
2059
            &&
2060 21
            \strpos($str, ';') === false
2061
        )
2062
    ) {
2063 9
      return $str;
2064
    }
2065
2066 21
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2067 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2068
    }
2069
2070 21
    if ($flags === null) {
2071 5
      $flags = ENT_QUOTES | ENT_HTML5;
2072
    }
2073
2074
    if (
2075 21
        $encoding !== 'UTF-8'
2076
        &&
2077 21
        $encoding !== 'ISO-8859-1'
2078
        &&
2079 21
        $encoding !== 'WINDOWS-1252'
2080
        &&
2081 21
        self::$SUPPORT['mbstring'] === false
2082
    ) {
2083
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2084
    }
2085
2086
    do {
2087 21
      $str_compare = $str;
2088
2089 21
      $str = (string)\preg_replace_callback(
2090 21
          "/&#\d{2,6};/",
2091 21
          function ($matches) use ($encoding) {
2092 15
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2093
2094 15
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2095 13
              return $returnTmp;
2096
            }
2097
2098 8
            return $matches[0];
2099 21
          },
2100 21
          $str
2101
      );
2102
2103
      // decode numeric & UTF16 two byte entities
2104 21
      $str = \html_entity_decode(
2105 21
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2106 21
          $flags,
2107 21
          $encoding
2108
      );
2109
2110 21
    } while ($str_compare !== $str);
2111
2112 21
    return $str;
2113
  }
2114
2115
  /**
2116
   * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2117
   *
2118
   * @param string $str
2119
   * @param string $encoding [optional] <p>Default: UTF-8</p>
2120
   *
2121
   * @return string
2122
   */
2123 6
  public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2124
  {
2125 6
    return self::htmlspecialchars(
2126 6
        $str,
2127 6
        ENT_QUOTES | ENT_SUBSTITUTE,
2128 6
        $encoding
2129
    );
2130
  }
2131
2132
  /**
2133
   * Remove empty html-tag.
2134
   *
2135
   * e.g.: <tag></tag>
2136
   *
2137
   * @param string $str
2138
   *
2139
   * @return string
2140
   */
2141 1
  public static function html_stripe_empty_tags(string $str): string
2142
  {
2143 1
    return (string)\preg_replace(
2144 1
        "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2145 1
        '',
2146 1
        $str
2147
    );
2148
  }
2149
2150
  /**
2151
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2152
   *
2153
   * @link http://php.net/manual/en/function.htmlentities.php
2154
   *
2155
   * @param string $str           <p>
2156
   *                              The input string.
2157
   *                              </p>
2158
   * @param int    $flags         [optional] <p>
2159
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2160
   *                              invalid code unit sequences and the used document type. The default is
2161
   *                              ENT_COMPAT | ENT_HTML401.
2162
   *                              <table>
2163
   *                              Available <i>flags</i> constants
2164
   *                              <tr valign="top">
2165
   *                              <td>Constant Name</td>
2166
   *                              <td>Description</td>
2167
   *                              </tr>
2168
   *                              <tr valign="top">
2169
   *                              <td><b>ENT_COMPAT</b></td>
2170
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2171
   *                              </tr>
2172
   *                              <tr valign="top">
2173
   *                              <td><b>ENT_QUOTES</b></td>
2174
   *                              <td>Will convert both double and single quotes.</td>
2175
   *                              </tr>
2176
   *                              <tr valign="top">
2177
   *                              <td><b>ENT_NOQUOTES</b></td>
2178
   *                              <td>Will leave both double and single quotes unconverted.</td>
2179
   *                              </tr>
2180
   *                              <tr valign="top">
2181
   *                              <td><b>ENT_IGNORE</b></td>
2182
   *                              <td>
2183
   *                              Silently discard invalid code unit sequences instead of returning
2184
   *                              an empty string. Using this flag is discouraged as it
2185
   *                              may have security implications.
2186
   *                              </td>
2187
   *                              </tr>
2188
   *                              <tr valign="top">
2189
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2190
   *                              <td>
2191
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2192
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2193
   *                              </td>
2194
   *                              </tr>
2195
   *                              <tr valign="top">
2196
   *                              <td><b>ENT_DISALLOWED</b></td>
2197
   *                              <td>
2198
   *                              Replace invalid code points for the given document type with a
2199
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2200
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2201
   *                              instance, to ensure the well-formedness of XML documents with
2202
   *                              embedded external content.
2203
   *                              </td>
2204
   *                              </tr>
2205
   *                              <tr valign="top">
2206
   *                              <td><b>ENT_HTML401</b></td>
2207
   *                              <td>
2208
   *                              Handle code as HTML 4.01.
2209
   *                              </td>
2210
   *                              </tr>
2211
   *                              <tr valign="top">
2212
   *                              <td><b>ENT_XML1</b></td>
2213
   *                              <td>
2214
   *                              Handle code as XML 1.
2215
   *                              </td>
2216
   *                              </tr>
2217
   *                              <tr valign="top">
2218
   *                              <td><b>ENT_XHTML</b></td>
2219
   *                              <td>
2220
   *                              Handle code as XHTML.
2221
   *                              </td>
2222
   *                              </tr>
2223
   *                              <tr valign="top">
2224
   *                              <td><b>ENT_HTML5</b></td>
2225
   *                              <td>
2226
   *                              Handle code as HTML 5.
2227
   *                              </td>
2228
   *                              </tr>
2229
   *                              </table>
2230
   *                              </p>
2231
   * @param string $encoding      [optional] <p>
2232
   *                              Like <b>htmlspecialchars</b>,
2233
   *                              <b>htmlentities</b> takes an optional third argument
2234
   *                              <i>encoding</i> which defines encoding used in
2235
   *                              conversion.
2236
   *                              Although this argument is technically optional, you are highly
2237
   *                              encouraged to specify the correct value for your code.
2238
   *                              </p>
2239
   * @param bool   $double_encode [optional] <p>
2240
   *                              When <i>double_encode</i> is turned off PHP will not
2241
   *                              encode existing html entities. The default is to convert everything.
2242
   *                              </p>
2243
   *
2244
   *
2245
   * @return string the encoded string.
2246
   * </p>
2247
   * <p>
2248
   * If the input <i>string</i> contains an invalid code unit
2249
   * sequence within the given <i>encoding</i> an empty string
2250
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2251
   * <b>ENT_SUBSTITUTE</b> flags are set.
2252
   */
2253 7
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2254
  {
2255 7
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2256 6
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2257
    }
2258
2259 7
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2260
2261
    /**
2262
     * PHP doesn't replace a backslash to its html entity since this is something
2263
     * that's mostly used to escape characters when inserting in a database. Since
2264
     * we're using a decent database layer, we don't need this shit and we're replacing
2265
     * the double backslashes by its' html entity equivalent.
2266
     *
2267
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2268
     */
2269 7
    $str = \str_replace('\\', '&#92;', $str);
2270
2271 7
    return self::html_encode($str, true, $encoding);
2272
  }
2273
2274
  /**
2275
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2276
   *
2277
   * INFO: Take a look at "UTF8::htmlentities()"
2278
   *
2279
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2280
   *
2281
   * @param string $str           <p>
2282
   *                              The string being converted.
2283
   *                              </p>
2284
   * @param int    $flags         [optional] <p>
2285
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2286
   *                              invalid code unit sequences and the used document type. The default is
2287
   *                              ENT_COMPAT | ENT_HTML401.
2288
   *                              <table>
2289
   *                              Available <i>flags</i> constants
2290
   *                              <tr valign="top">
2291
   *                              <td>Constant Name</td>
2292
   *                              <td>Description</td>
2293
   *                              </tr>
2294
   *                              <tr valign="top">
2295
   *                              <td><b>ENT_COMPAT</b></td>
2296
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2297
   *                              </tr>
2298
   *                              <tr valign="top">
2299
   *                              <td><b>ENT_QUOTES</b></td>
2300
   *                              <td>Will convert both double and single quotes.</td>
2301
   *                              </tr>
2302
   *                              <tr valign="top">
2303
   *                              <td><b>ENT_NOQUOTES</b></td>
2304
   *                              <td>Will leave both double and single quotes unconverted.</td>
2305
   *                              </tr>
2306
   *                              <tr valign="top">
2307
   *                              <td><b>ENT_IGNORE</b></td>
2308
   *                              <td>
2309
   *                              Silently discard invalid code unit sequences instead of returning
2310
   *                              an empty string. Using this flag is discouraged as it
2311
   *                              may have security implications.
2312
   *                              </td>
2313
   *                              </tr>
2314
   *                              <tr valign="top">
2315
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2316
   *                              <td>
2317
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2318
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2319
   *                              </td>
2320
   *                              </tr>
2321
   *                              <tr valign="top">
2322
   *                              <td><b>ENT_DISALLOWED</b></td>
2323
   *                              <td>
2324
   *                              Replace invalid code points for the given document type with a
2325
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2326
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2327
   *                              instance, to ensure the well-formedness of XML documents with
2328
   *                              embedded external content.
2329
   *                              </td>
2330
   *                              </tr>
2331
   *                              <tr valign="top">
2332
   *                              <td><b>ENT_HTML401</b></td>
2333
   *                              <td>
2334
   *                              Handle code as HTML 4.01.
2335
   *                              </td>
2336
   *                              </tr>
2337
   *                              <tr valign="top">
2338
   *                              <td><b>ENT_XML1</b></td>
2339
   *                              <td>
2340
   *                              Handle code as XML 1.
2341
   *                              </td>
2342
   *                              </tr>
2343
   *                              <tr valign="top">
2344
   *                              <td><b>ENT_XHTML</b></td>
2345
   *                              <td>
2346
   *                              Handle code as XHTML.
2347
   *                              </td>
2348
   *                              </tr>
2349
   *                              <tr valign="top">
2350
   *                              <td><b>ENT_HTML5</b></td>
2351
   *                              <td>
2352
   *                              Handle code as HTML 5.
2353
   *                              </td>
2354
   *                              </tr>
2355
   *                              </table>
2356
   *                              </p>
2357
   * @param string $encoding      [optional] <p>
2358
   *                              Defines encoding used in conversion.
2359
   *                              </p>
2360
   *                              <p>
2361
   *                              For the purposes of this function, the encodings
2362
   *                              ISO-8859-1, ISO-8859-15,
2363
   *                              UTF-8, cp866,
2364
   *                              cp1251, cp1252, and
2365
   *                              KOI8-R are effectively equivalent, provided the
2366
   *                              <i>string</i> itself is valid for the encoding, as
2367
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2368
   *                              the same positions in all of these encodings.
2369
   *                              </p>
2370
   * @param bool   $double_encode [optional] <p>
2371
   *                              When <i>double_encode</i> is turned off PHP will not
2372
   *                              encode existing html entities, the default is to convert everything.
2373
   *                              </p>
2374
   *
2375
   * @return string The converted string.
2376
   * </p>
2377
   * <p>
2378
   * If the input <i>string</i> contains an invalid code unit
2379
   * sequence within the given <i>encoding</i> an empty string
2380
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2381
   * <b>ENT_SUBSTITUTE</b> flags are set.
2382
   */
2383 7
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2384
  {
2385 7
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2386 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2387
    }
2388
2389 7
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2390
  }
2391
2392
  /**
2393
   * Checks whether iconv is available on the server.
2394
   *
2395
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2396
   */
2397
  public static function iconv_loaded(): bool
2398
  {
2399
    return \extension_loaded('iconv') ? true : false;
2400
  }
2401
2402
  /**
2403
   * alias for "UTF8::decimal_to_chr()"
2404
   *
2405
   * @see UTF8::decimal_to_chr()
2406
   *
2407
   * @param mixed $int
2408
   *
2409
   * @return string
2410
   */
2411 2
  public static function int_to_chr($int): string
2412
  {
2413 2
    return self::decimal_to_chr($int);
2414
  }
2415
2416
  /**
2417
   * Converts Integer to hexadecimal U+xxxx code point representation.
2418
   *
2419
   * INFO: opposite to UTF8::hex_to_int()
2420
   *
2421
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2422
   * @param string $pfix [optional]
2423
   *
2424
   * @return string <p>The code point, or empty string on failure.</p>
2425
   */
2426 3
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2427
  {
2428 3
    $hex = \dechex($int);
2429
2430 3
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2431
2432 3
    return $pfix . $hex;
2433
  }
2434
2435
  /**
2436
   * Checks whether intl-char is available on the server.
2437
   *
2438
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2439
   */
2440
  public static function intlChar_loaded(): bool
2441
  {
2442
    return \class_exists('IntlChar');
2443
  }
2444
2445
  /**
2446
   * Checks whether intl is available on the server.
2447
   *
2448
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2449
   */
2450 3
  public static function intl_loaded(): bool
2451
  {
2452 3
    return \extension_loaded('intl');
2453
  }
2454
2455
  /**
2456
   * alias for "UTF8::is_ascii()"
2457
   *
2458
   * @see        UTF8::is_ascii()
2459
   *
2460
   * @param string $str
2461
   *
2462
   * @return bool
2463
   *
2464
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2465
   */
2466 1
  public static function isAscii(string $str): bool
2467
  {
2468 1
    return self::is_ascii($str);
2469
  }
2470
2471
  /**
2472
   * alias for "UTF8::is_base64()"
2473
   *
2474
   * @see        UTF8::is_base64()
2475
   *
2476
   * @param string $str
2477
   *
2478
   * @return bool
2479
   *
2480
   * @deprecated <p>use "UTF8::is_base64()"</p>
2481
   */
2482 1
  public static function isBase64(string $str): bool
2483
  {
2484 1
    return self::is_base64($str);
2485
  }
2486
2487
  /**
2488
   * alias for "UTF8::is_binary()"
2489
   *
2490
   * @see        UTF8::is_binary()
2491
   *
2492
   * @param mixed $str
2493
   * @param bool  $strict
2494
   *
2495
   * @return bool
2496
   *
2497
   * @deprecated <p>use "UTF8::is_binary()"</p>
2498
   */
2499 2
  public static function isBinary($str, $strict = false): bool
2500
  {
2501 2
    return self::is_binary($str, $strict);
2502
  }
2503
2504
  /**
2505
   * alias for "UTF8::is_bom()"
2506
   *
2507
   * @see        UTF8::is_bom()
2508
   *
2509
   * @param string $utf8_chr
2510
   *
2511
   * @return bool
2512
   *
2513
   * @deprecated <p>use "UTF8::is_bom()"</p>
2514
   */
2515 1
  public static function isBom(string $utf8_chr): bool
2516
  {
2517 1
    return self::is_bom($utf8_chr);
2518
  }
2519
2520
  /**
2521
   * alias for "UTF8::is_html()"
2522
   *
2523
   * @see        UTF8::is_html()
2524
   *
2525
   * @param string $str
2526
   *
2527
   * @return bool
2528
   *
2529
   * @deprecated <p>use "UTF8::is_html()"</p>
2530
   */
2531 1
  public static function isHtml(string $str): bool
2532
  {
2533 1
    return self::is_html($str);
2534
  }
2535
2536
  /**
2537
   * alias for "UTF8::is_json()"
2538
   *
2539
   * @see        UTF8::is_json()
2540
   *
2541
   * @param string $str
2542
   *
2543
   * @return bool
2544
   *
2545
   * @deprecated <p>use "UTF8::is_json()"</p>
2546
   */
2547
  public static function isJson(string $str): bool
2548
  {
2549
    return self::is_json($str);
2550
  }
2551
2552
  /**
2553
   * alias for "UTF8::is_utf16()"
2554
   *
2555
   * @see        UTF8::is_utf16()
2556
   *
2557
   * @param string $str
2558
   *
2559
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2560
   *
2561
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2562
   */
2563 1
  public static function isUtf16(string $str)
2564
  {
2565 1
    return self::is_utf16($str);
2566
  }
2567
2568
  /**
2569
   * alias for "UTF8::is_utf32()"
2570
   *
2571
   * @see        UTF8::is_utf32()
2572
   *
2573
   * @param string $str
2574
   *
2575
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2576
   *
2577
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2578
   */
2579 1
  public static function isUtf32(string $str)
2580
  {
2581 1
    return self::is_utf32($str);
2582
  }
2583
2584
  /**
2585
   * alias for "UTF8::is_utf8()"
2586
   *
2587
   * @see        UTF8::is_utf8()
2588
   *
2589
   * @param string $str
2590
   * @param bool   $strict
2591
   *
2592
   * @return bool
2593
   *
2594
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2595
   */
2596 16
  public static function isUtf8($str, $strict = false): bool
2597
  {
2598 16
    return self::is_utf8($str, $strict);
2599
  }
2600
2601
  /**
2602
   * Returns true if the string contains only alphabetic chars, false otherwise.
2603
   *
2604
   * @param string $str
2605
   *
2606
   * @return bool <p>Whether or not $str contains only alphabetic chars.</p>
2607
   */
2608 10
  public static function is_alpha(string $str): bool
2609
  {
2610 10
    return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2611
  }
2612
2613
  /**
2614
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2615
   *
2616
   * @param string $str
2617
   *
2618
   * @return bool <p>Whether or not $str contains only alphanumeric chars.</p>
2619
   */
2620 13
  public static function is_alphanumeric(string $str): bool
2621
  {
2622 13
    return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2623
  }
2624
2625
  /**
2626
   * Checks if a string is 7 bit ASCII.
2627
   *
2628
   * @param string $str <p>The string to check.</p>
2629
   *
2630
   * @return bool <p>
2631
   *              <strong>true</strong> if it is ASCII<br>
2632
   *              <strong>false</strong> otherwise
2633
   *              </p>
2634
   */
2635 101
  public static function is_ascii(string $str): bool
2636
  {
2637 101
    if ('' === $str) {
2638 6
      return true;
2639
    }
2640
2641 100
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2642
  }
2643
2644
  /**
2645
   * Returns true if the string is base64 encoded, false otherwise.
2646
   *
2647
   * @param string $str <p>The input string.</p>
2648
   *
2649
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2650
   */
2651 8
  public static function is_base64(string $str): bool
2652
  {
2653 8
    $base64String = (string)\base64_decode($str, true);
2654
2655 8
    return $base64String && \base64_encode($base64String) === $str;
2656
  }
2657
2658
  /**
2659
   * Check if the input is binary... (is look like a hack).
2660
   *
2661
   * @param mixed $input
2662
   * @param bool  $strict
2663
   *
2664
   * @return bool
2665
   */
2666 19
  public static function is_binary($input, bool $strict = false): bool
2667
  {
2668 19
    $input = (string)$input;
2669 19
    if ('' === $input) {
2670 5
      return false;
2671
    }
2672
2673 19
    if (\preg_match('~^[01]+$~', $input)) {
2674 6
      return true;
2675
    }
2676
2677 19
    $testNull = 0;
2678 19
    $testLength = \strlen($input);
2679 19
    if ($testLength) {
2680 19
      $testNull = \substr_count($input, "\x0");
2681 19
      if (($testNull / $testLength) > 0.3) {
2682 6
        return true;
2683
      }
2684
    }
2685
2686 18
    if ($strict === true) {
2687
2688 16
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2689
        self::checkForSupport();
2690
      }
2691
2692 16
      if (self::$SUPPORT['finfo'] === false) {
2693
        throw new \RuntimeException('ext-fileinfo: is not installed');
2694
      }
2695
2696
      /** @noinspection PhpComposerExtensionStubsInspection */
2697 16
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
2698 16
      $finfo_encoding = $finfo->buffer($input);
2699 16
      if ($finfo_encoding && $finfo_encoding === 'binary') {
2700 16
        return true;
2701
      }
2702
2703 8
    } elseif ($testNull > 0) {
2704
2705 3
      return true;
2706
2707
    }
2708
2709 17
    return false;
2710
  }
2711
2712
  /**
2713
   * Check if the file is binary.
2714
   *
2715
   * @param string $file
2716
   *
2717
   * @return bool
2718
   */
2719 3
  public static function is_binary_file($file): bool
2720
  {
2721
    try {
2722 3
      $fp = \fopen($file, 'rb');
2723 3
      $block = \fread($fp, 512);
0 ignored issues
show
Bug introduced by
It seems like $fp can also be of type false; however, parameter $handle of fread() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2723
      $block = \fread(/** @scrutinizer ignore-type */ $fp, 512);
Loading history...
2724 3
      \fclose($fp);
0 ignored issues
show
Bug introduced by
It seems like $fp can also be of type false; however, parameter $handle of fclose() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2724
      \fclose(/** @scrutinizer ignore-type */ $fp);
Loading history...
2725
    } catch (\Exception $e) {
2726
      $block = '';
2727
    }
2728
2729 3
    return self::is_binary($block, true);
2730
  }
2731
2732
  /**
2733
   * Returns true if the string contains only whitespace chars, false otherwise.
2734
   *
2735
   * @param string $str
2736
   *
2737
   * @return bool <p>Whether or not $str contains only whitespace characters.</p>
2738
   */
2739 15
  public static function is_blank(string $str): bool
2740
  {
2741 15
    return self::str_matches_pattern($str, '^[[:space:]]*$');
2742
  }
2743
2744
  /**
2745
   * Checks if the given string is equal to any "Byte Order Mark".
2746
   *
2747
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2748
   *
2749
   * @param string $str <p>The input string.</p>
2750
   *
2751
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2752
   */
2753 1
  public static function is_bom($str): bool
2754
  {
2755 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2756 1
      if ($str === $bomString) {
2757 1
        return true;
2758
      }
2759
    }
2760
2761 1
    return false;
2762
  }
2763
2764
  /**
2765
   * Determine whether the string is considered to be empty.
2766
   *
2767
   * A variable is considered empty if it does not exist or if its value equals FALSE.
2768
   * empty() does not generate a warning if the variable does not exist.
2769
   *
2770
   * @param mixed $str
2771
   *
2772
   * @return bool <p>Whether or not $str is empty().</p>
2773
   */
2774
  public static function is_empty($str): bool
2775
  {
2776
    return empty($str);
2777
  }
2778
2779
  /**
2780
   * Returns true if the string contains only hexadecimal chars, false otherwise.
2781
   *
2782
   * @param string $str
2783
   *
2784
   * @return bool <p>Whether or not $str contains only hexadecimal chars.</p>
2785
   */
2786 13
  public static function is_hexadecimal(string $str): bool
2787
  {
2788 13
    return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
2789
  }
2790
2791
  /**
2792
   * Check if the string contains any html-tags <lall>.
2793
   *
2794
   * @param string $str <p>The input string.</p>
2795
   *
2796
   * @return bool
2797
   */
2798 2
  public static function is_html(string $str): bool
2799
  {
2800 2
    if ('' === $str) {
2801 2
      return false;
2802
    }
2803
2804
    // init
2805 2
    $matches = [];
2806
2807 2
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2808
2809 2
    return !(\count($matches) === 0);
2810
  }
2811
2812
  /**
2813
   * Try to check if "$str" is an json-string.
2814
   *
2815
   * @param string $str <p>The input string.</p>
2816
   *
2817
   * @return bool
2818
   */
2819 21
  public static function is_json(string $str): bool
2820
  {
2821 21
    if ('' === $str) {
2822 2
      return false;
2823
    }
2824
2825 20
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2826
      self::checkForSupport();
2827
    }
2828
2829 20
    if (self::$SUPPORT['json'] === false) {
2830
      throw new \RuntimeException('ext-json: is not installed');
2831
    }
2832
2833 20
    $json = self::json_decode($str);
2834
2835
    /** @noinspection PhpComposerExtensionStubsInspection */
2836
    return (
2837 20
               \is_object($json) === true
2838
               ||
2839 20
               \is_array($json) === true
2840
           )
2841
           &&
2842 20
           \json_last_error() === JSON_ERROR_NONE;
2843
  }
2844
2845
  /**
2846
   * @param string $str
2847
   *
2848
   * @return bool
2849
   */
2850 8
  public static function is_lowercase(string $str): bool
2851
  {
2852 8
    if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
2853 3
      return true;
2854
    }
2855
2856 5
    return false;
2857
  }
2858
2859
  /**
2860
   * Returns true if the string is serialized, false otherwise.
2861
   *
2862
   * @param string $str
2863
   *
2864
   * @return bool <p>Whether or not $str is serialized.</p>
2865
   */
2866 7
  public static function is_serialized(string $str): bool
2867
  {
2868 7
    if ('' === $str) {
2869 1
      return false;
2870
    }
2871
2872
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2873
    /** @noinspection UnserializeExploitsInspection */
2874 6
    return $str === 'b:0;'
2875
           ||
2876 6
           @\unserialize($str) !== false;
2877
  }
2878
2879
  /**
2880
   * Returns true if the string contains only lower case chars, false
2881
   * otherwise.
2882
   *
2883
   * @param string $str <p>The input string.</p>
2884
   *
2885
   * @return bool <p>Whether or not $str contains only lower case characters.</p>
2886
   */
2887 8
  public static function is_uppercase(string $str): bool
2888
  {
2889 8
    return self::str_matches_pattern($str, '^[[:upper:]]*$');
2890
  }
2891
2892
  /**
2893
   * Check if the string is UTF-16.
2894
   *
2895
   * @param string $str <p>The input string.</p>
2896
   *
2897
   * @return int|false <p>
2898
   *                   <strong>false</strong> if is't not UTF-16,<br>
2899
   *                   <strong>1</strong> for UTF-16LE,<br>
2900
   *                   <strong>2</strong> for UTF-16BE.
2901
   *                   </p>
2902
   */
2903 10
  public static function is_utf16(string $str)
2904
  {
2905 10
    if (self::is_binary($str) === false) {
2906 4
      return false;
2907
    }
2908
2909
    // init
2910 8
    $strChars = [];
2911
2912 8
    $str = self::remove_bom($str);
2913
2914 8
    $maybeUTF16LE = 0;
2915 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2916 8
    if ($test) {
2917 7
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2918 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2919 7
      if ($test3 === $test) {
2920 7
        if (\count($strChars) === 0) {
2921 7
          $strChars = self::count_chars($str, true);
2922
        }
2923 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2924 7
          if (\in_array($test3char, $strChars, true) === true) {
2925 7
            $maybeUTF16LE++;
2926
          }
2927
        }
2928
      }
2929
    }
2930
2931 8
    $maybeUTF16BE = 0;
2932 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2933 8
    if ($test) {
2934 7
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2935 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2936 7
      if ($test3 === $test) {
2937 7
        if (\count($strChars) === 0) {
2938 3
          $strChars = self::count_chars($str, true);
2939
        }
2940 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2941 7
          if (\in_array($test3char, $strChars, true) === true) {
2942 7
            $maybeUTF16BE++;
2943
          }
2944
        }
2945
      }
2946
    }
2947
2948 8
    if ($maybeUTF16BE !== $maybeUTF16LE) {
2949 3
      if ($maybeUTF16LE > $maybeUTF16BE) {
2950 2
        return 1;
2951
      }
2952
2953 3
      return 2;
2954
    }
2955
2956 6
    return false;
2957
  }
2958
2959
  /**
2960
   * Check if the string is UTF-32.
2961
   *
2962
   * @param string $str
2963
   *
2964
   * @return int|false <p>
2965
   *                   <strong>false</strong> if is't not UTF-32,<br>
2966
   *                   <strong>1</strong> for UTF-32LE,<br>
2967
   *                   <strong>2</strong> for UTF-32BE.
2968
   *                   </p>
2969
   */
2970 8
  public static function is_utf32(string $str)
2971
  {
2972 8
    if (self::is_binary($str) === false) {
2973 4
      return false;
2974
    }
2975
2976
    // init
2977 6
    $strChars = [];
2978
2979 6
    $str = self::remove_bom($str);
2980
2981 6
    $maybeUTF32LE = 0;
2982 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2983 6
    if ($test) {
2984 5
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2985 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2986 5
      if ($test3 === $test) {
2987 5
        if (\count($strChars) === 0) {
2988 5
          $strChars = self::count_chars($str, true);
2989
        }
2990 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2991 5
          if (\in_array($test3char, $strChars, true) === true) {
2992 5
            $maybeUTF32LE++;
2993
          }
2994
        }
2995
      }
2996
    }
2997
2998 6
    $maybeUTF32BE = 0;
2999 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3000 6
    if ($test) {
3001 5
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3002 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3003 5
      if ($test3 === $test) {
3004 5
        if (\count($strChars) === 0) {
3005 3
          $strChars = self::count_chars($str, true);
3006
        }
3007 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3008 5
          if (\in_array($test3char, $strChars, true) === true) {
3009 5
            $maybeUTF32BE++;
3010
          }
3011
        }
3012
      }
3013
    }
3014
3015 6
    if ($maybeUTF32BE !== $maybeUTF32LE) {
3016 1
      if ($maybeUTF32LE > $maybeUTF32BE) {
3017 1
        return 1;
3018
      }
3019
3020 1
      return 2;
3021
    }
3022
3023 6
    return false;
3024
  }
3025
3026
  /**
3027
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3028
   *
3029
   * @see    http://hsivonen.iki.fi/php-utf8/
3030
   *
3031
   * @param string|string[] $str    <p>The string to be checked.</p>
3032
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3033
   *
3034
   * @return bool
3035
   */
3036 92
  public static function is_utf8($str, bool $strict = false): bool
3037
  {
3038 92
    if (\is_array($str) === true) {
3039 1
      foreach ($str as $k => $v) {
3040 1
        if (false === self::is_utf8($v, $strict)) {
3041 1
          return false;
3042
        }
3043
      }
3044
3045
      return true;
3046
    }
3047
3048 92
    if ('' === $str) {
3049 11
      return true;
3050
    }
3051
3052 88
    if ($strict === true) {
3053 1
      if (self::is_utf16($str) !== false) {
3054 1
        return false;
3055
      }
3056
3057
      if (self::is_utf32($str) !== false) {
3058
        return false;
3059
      }
3060
    }
3061
3062 88
    if (self::pcre_utf8_support() !== true) {
3063
3064
      // If even just the first character can be matched, when the /u
3065
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3066
      // invalid, nothing at all will match, even if the string contains
3067
      // some valid sequences
3068
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
3069
    }
3070
3071 88
    $mState = 0; // cached expected number of octets after the current octet
3072
    // until the beginning of the next UTF8 character sequence
3073 88
    $mUcs4 = 0; // cached Unicode character
3074 88
    $mBytes = 1; // cached expected number of octets in the current sequence
3075
3076 88
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3077
      self::checkForSupport();
3078
    }
3079
3080 88
    if (self::$ORD === null) {
3081
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type integer or string or boolean. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3082
    }
3083
3084 88
    $len = self::strlen_in_byte((string)$str);
3085
    /** @noinspection ForeachInvariantsInspection */
3086 88
    for ($i = 0; $i < $len; $i++) {
3087 88
      $in = self::$ORD[$str[$i]];
3088 88
      if ($mState === 0) {
3089
        // When mState is zero we expect either a US-ASCII character or a
3090
        // multi-octet sequence.
3091 88
        if (0 === (0x80 & $in)) {
3092
          // US-ASCII, pass straight through.
3093 85
          $mBytes = 1;
3094 69
        } elseif (0xC0 === (0xE0 & $in)) {
3095
          // First octet of 2 octet sequence.
3096 62
          $mUcs4 = $in;
3097 62
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3098 62
          $mState = 1;
3099 62
          $mBytes = 2;
3100 46
        } elseif (0xE0 === (0xF0 & $in)) {
3101
          // First octet of 3 octet sequence.
3102 30
          $mUcs4 = $in;
3103 30
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3104 30
          $mState = 2;
3105 30
          $mBytes = 3;
3106 23
        } elseif (0xF0 === (0xF8 & $in)) {
3107
          // First octet of 4 octet sequence.
3108 13
          $mUcs4 = $in;
3109 13
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3110 13
          $mState = 3;
3111 13
          $mBytes = 4;
3112 11
        } elseif (0xF8 === (0xFC & $in)) {
3113
          /* First octet of 5 octet sequence.
3114
          *
3115
          * This is illegal because the encoded codepoint must be either
3116
          * (a) not the shortest form or
3117
          * (b) outside the Unicode range of 0-0x10FFFF.
3118
          * Rather than trying to resynchronize, we will carry on until the end
3119
          * of the sequence and let the later error handling code catch it.
3120
          */
3121 4
          $mUcs4 = $in;
3122 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3123 4
          $mState = 4;
3124 4
          $mBytes = 5;
3125 8
        } elseif (0xFC === (0xFE & $in)) {
3126
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3127 4
          $mUcs4 = $in;
3128 4
          $mUcs4 = ($mUcs4 & 1) << 30;
3129 4
          $mState = 5;
3130 4
          $mBytes = 6;
3131
        } else {
3132
          /* Current octet is neither in the US-ASCII range nor a legal first
3133
           * octet of a multi-octet sequence.
3134
           */
3135 88
          return false;
3136
        }
3137
      } else {
3138
        // When mState is non-zero, we expect a continuation of the multi-octet
3139
        // sequence
3140 69
        if (0x80 === (0xC0 & $in)) {
3141
          // Legal continuation.
3142 63
          $shift = ($mState - 1) * 6;
3143 63
          $tmp = $in;
3144 63
          $tmp = ($tmp & 0x0000003F) << $shift;
3145 63
          $mUcs4 |= $tmp;
3146
          /**Prefix
3147
           * End of the multi-octet sequence. mUcs4 now contains the final
3148
           * Unicode code point to be output
3149
           */
3150 63
          if (0 === --$mState) {
3151
            /*
3152
            * Check for illegal sequences and code points.
3153
            */
3154
            // From Unicode 3.1, non-shortest form is illegal
3155
            if (
3156 63
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3157 63
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3158 63
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3159 63
                (4 < $mBytes) ||
3160
                // From Unicode 3.2, surrogate characters are illegal.
3161 63
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3162
                // Code points outside the Unicode range are illegal.
3163 63
                ($mUcs4 > 0x10FFFF)
3164
            ) {
3165 7
              return false;
3166
            }
3167
            // initialize UTF8 cache
3168 63
            $mState = 0;
3169 63
            $mUcs4 = 0;
3170 63
            $mBytes = 1;
3171
          }
3172
        } else {
3173
          /**
3174
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3175
           * Incomplete multi-octet sequence.
3176
           */
3177 28
          return false;
3178
        }
3179
      }
3180
    }
3181
3182 56
    return true;
3183
  }
3184
3185
  /**
3186
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3187
   * Decodes a JSON string
3188
   *
3189
   * @link http://php.net/manual/en/function.json-decode.php
3190
   *
3191
   * @param string $json    <p>
3192
   *                        The <i>json</i> string being decoded.
3193
   *                        </p>
3194
   *                        <p>
3195
   *                        This function only works with UTF-8 encoded strings.
3196
   *                        </p>
3197
   *                        <p>PHP implements a superset of
3198
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3199
   *                        only supports these values when they are nested inside an array or an object.
3200
   *                        </p>
3201
   * @param bool   $assoc   [optional] <p>
3202
   *                        When <b>TRUE</b>, returned objects will be converted into
3203
   *                        associative arrays.
3204
   *                        </p>
3205
   * @param int    $depth   [optional] <p>
3206
   *                        User specified recursion depth.
3207
   *                        </p>
3208
   * @param int    $options [optional] <p>
3209
   *                        Bitmask of JSON decode options. Currently only
3210
   *                        <b>JSON_BIGINT_AS_STRING</b>
3211
   *                        is supported (default is to cast large integers as floats)
3212
   *                        </p>
3213
   *
3214
   * @return mixed the value encoded in <i>json</i> in appropriate
3215
   * PHP type. Values true, false and
3216
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3217
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3218
   * <i>json</i> cannot be decoded or if the encoded
3219
   * data is deeper than the recursion limit.
3220
   */
3221 21
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3222
  {
3223 21
    $json = self::filter($json);
3224
3225 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3226
      self::checkForSupport();
3227
    }
3228
3229 21
    if (self::$SUPPORT['json'] === false) {
3230
      throw new \RuntimeException('ext-json: is not installed');
3231
    }
3232
3233
    /** @noinspection PhpComposerExtensionStubsInspection */
3234 21
    $json = \json_decode($json, $assoc, $depth, $options);
3235
3236 21
    return $json;
3237
  }
3238
3239
  /**
3240
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3241
   * Returns the JSON representation of a value.
3242
   *
3243
   * @link http://php.net/manual/en/function.json-encode.php
3244
   *
3245
   * @param mixed $value   <p>
3246
   *                       The <i>value</i> being encoded. Can be any type except
3247
   *                       a resource.
3248
   *                       </p>
3249
   *                       <p>
3250
   *                       All string data must be UTF-8 encoded.
3251
   *                       </p>
3252
   *                       <p>PHP implements a superset of
3253
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3254
   *                       only supports these values when they are nested inside an array or an object.
3255
   *                       </p>
3256
   * @param int   $options [optional] <p>
3257
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3258
   *                       <b>JSON_HEX_TAG</b>,
3259
   *                       <b>JSON_HEX_AMP</b>,
3260
   *                       <b>JSON_HEX_APOS</b>,
3261
   *                       <b>JSON_NUMERIC_CHECK</b>,
3262
   *                       <b>JSON_PRETTY_PRINT</b>,
3263
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3264
   *                       <b>JSON_FORCE_OBJECT</b>,
3265
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3266
   *                       constants is described on
3267
   *                       the JSON constants page.
3268
   *                       </p>
3269
   * @param int   $depth   [optional] <p>
3270
   *                       Set the maximum depth. Must be greater than zero.
3271
   *                       </p>
3272
   *
3273
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3274
   */
3275 2
  public static function json_encode($value, int $options = 0, int $depth = 512): string
3276
  {
3277 2
    $value = self::filter($value);
3278
3279 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3280
      self::checkForSupport();
3281
    }
3282
3283 2
    if (self::$SUPPORT['json'] === false) {
3284
      throw new \RuntimeException('ext-json: is not installed');
3285
    }
3286
3287
    /** @noinspection PhpComposerExtensionStubsInspection */
3288 2
    $json = \json_encode($value, $options, $depth);
3289
3290 2
    return $json;
3291
  }
3292
3293
  /**
3294
   * Checks whether JSON is available on the server.
3295
   *
3296
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3297
   */
3298
  public static function json_loaded(): bool
3299
  {
3300
    return \function_exists('json_decode');
3301
  }
3302
3303
  /**
3304
   * Makes string's first char lowercase.
3305
   *
3306
   * @param string $str       <p>The input string</p>
3307
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
3308
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3309
   *
3310
   * @return string <p>The resulting string</p>
3311
   */
3312 44
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3313
  {
3314 44
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3315 44
    if ($strPartTwo === false) {
3316
      $strPartTwo = '';
3317
    }
3318
3319 44
    $strPartOne = self::strtolower(
3320 44
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3321 44
        $encoding,
3322 44
        $cleanUtf8
3323
    );
3324
3325 44
    return $strPartOne . $strPartTwo;
3326
  }
3327
3328
  /**
3329
   * alias for "UTF8::lcfirst()"
3330
   *
3331
   * @see UTF8::lcfirst()
3332
   *
3333
   * @param string $str
3334
   * @param string $encoding
3335
   * @param bool   $cleanUtf8
3336
   *
3337
   * @return string
3338
   */
3339 1
  public static function lcword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3340
  {
3341 1
    return self::lcfirst($str, $encoding, $cleanUtf8);
3342
  }
3343
3344
  /**
3345
   * Lowercase for all words in the string.
3346
   *
3347
   * @param string   $str        <p>The input string.</p>
3348
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3349
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3350
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3351
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3352
   *
3353
   * @return string
3354
   */
3355 1
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3356
  {
3357 1
    if (!$str) {
3358 1
      return '';
3359
    }
3360
3361 1
    $words = self::str_to_words($str, $charlist);
3362 1
    $newWords = [];
3363
3364 1
    if (\count($exceptions) > 0) {
3365 1
      $useExceptions = true;
3366
    } else {
3367 1
      $useExceptions = false;
3368
    }
3369
3370 1
    foreach ($words as $word) {
3371
3372 1
      if (!$word) {
3373 1
        continue;
3374
      }
3375
3376
      if (
3377 1
          $useExceptions === false
3378
          ||
3379
          (
3380 1
              $useExceptions === true
3381
              &&
3382 1
              !\in_array($word, $exceptions, true)
3383
          )
3384
      ) {
3385 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3386
      }
3387
3388 1
      $newWords[] = $word;
3389
    }
3390
3391 1
    return \implode('', $newWords);
3392
  }
3393
3394
  /**
3395
   * alias for "UTF8::lcfirst()"
3396
   *
3397
   * @see UTF8::lcfirst()
3398
   *
3399
   * @param string $str
3400
   * @param string $encoding
3401
   * @param bool   $cleanUtf8
3402
   *
3403
   * @return string
3404
   */
3405 5
  public static function lowerCaseFirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3406
  {
3407 5
    return self::lcfirst($str, $encoding, $cleanUtf8);
3408
  }
3409
3410
  /**
3411
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3412
   *
3413
   * @param string $str   <p>The string to be trimmed</p>
3414
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3415
   *
3416
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3417
   */
3418 21
  public static function ltrim(string $str = '', $chars = INF): string
3419
  {
3420 21
    if ('' === $str) {
3421 2
      return '';
3422
    }
3423
3424
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3425 20
    if ($chars === INF || !$chars) {
3426 13
      $pattern = "^[\pZ\pC]+";
3427
    } else {
3428 9
      $chars = \preg_quote($chars, '/');
3429 9
      $pattern = "^[$chars]+";
3430
    }
3431
3432 20
    return self::regex_replace($str, $pattern, '', '', '/');
3433
  }
3434
3435
  /**
3436
   * Returns the UTF-8 character with the maximum code point in the given data.
3437
   *
3438
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3439
   *
3440
   * @return string <p>The character with the highest code point than others.</p>
3441
   */
3442 1
  public static function max($arg): string
3443
  {
3444 1
    if (\is_array($arg) === true) {
3445 1
      $arg = \implode('', $arg);
3446
    }
3447
3448 1
    return self::chr(\max(self::codepoints($arg)));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::chr(max(self::codepoints($arg))) could return the type null which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
3449
  }
3450
3451
  /**
3452
   * Calculates and returns the maximum number of bytes taken by any
3453
   * UTF-8 encoded character in the given string.
3454
   *
3455
   * @param string $str <p>The original Unicode string.</p>
3456
   *
3457
   * @return int <p>Max byte lengths of the given chars.</p>
3458
   */
3459 1
  public static function max_chr_width(string $str): int
3460
  {
3461 1
    $bytes = self::chr_size_list($str);
3462 1
    if (\count($bytes) > 0) {
3463 1
      return (int)\max($bytes);
3464
    }
3465
3466 1
    return 0;
3467
  }
3468
3469
  /**
3470
   * Checks whether mbstring is available on the server.
3471
   *
3472
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3473
   */
3474 11
  public static function mbstring_loaded(): bool
3475
  {
3476 11
    $return = \extension_loaded('mbstring') ? true : false;
3477
3478 11
    if ($return === true) {
3479 11
      \mb_internal_encoding('UTF-8');
3480
    }
3481
3482 11
    return $return;
3483
  }
3484
3485
  /**
3486
   * Checks whether mbstring "overloaded" is active on the server.
3487
   *
3488
   * @return bool
3489
   */
3490
  private static function mbstring_overloaded(): bool
3491
  {
3492
    /**
3493
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3494
     */
3495
3496
    /** @noinspection PhpComposerExtensionStubsInspection */
3497
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3498
    return \defined('MB_OVERLOAD_STRING')
3499
           &&
3500
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3501
  }
3502
3503
  /**
3504
   * Returns the UTF-8 character with the minimum code point in the given data.
3505
   *
3506
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3507
   *
3508
   * @return string <p>The character with the lowest code point than others.</p>
3509
   */
3510 1
  public static function min($arg): string
3511
  {
3512 1
    if (\is_array($arg) === true) {
3513 1
      $arg = \implode('', $arg);
3514
    }
3515
3516 1
    return self::chr(\min(self::codepoints($arg)));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::chr(min(self::codepoints($arg))) could return the type null which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
3517
  }
3518
3519
  /**
3520
   * alias for "UTF8::normalize_encoding()"
3521
   *
3522
   * @see        UTF8::normalize_encoding()
3523
   *
3524
   * @param string $encoding
3525
   * @param mixed  $fallback
3526
   *
3527
   * @return mixed
3528
   *
3529
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3530
   */
3531 1
  public static function normalizeEncoding(string $encoding, $fallback = '')
3532
  {
3533 1
    return self::normalize_encoding($encoding, $fallback);
3534
  }
3535
3536
  /**
3537
   * Normalize the encoding-"name" input.
3538
   *
3539
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3540
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3541
   *
3542
   * @return mixed <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by
3543
   *                default)</p>
3544
   */
3545 322
  public static function normalize_encoding(string $encoding, $fallback = '')
3546
  {
3547 322
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3548
3549 322
    if (!$encoding) {
3550 299
      return $fallback;
3551
    }
3552
3553
    if (
3554 26
        'UTF-8' === $encoding
3555
        ||
3556 26
        'UTF8' === $encoding
3557
    ) {
3558 11
      return 'UTF-8';
3559
    }
3560
3561 22
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3562 19
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3563
    }
3564
3565 6
    if (self::$ENCODINGS === null) {
3566 1
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type integer or string or boolean. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3567
    }
3568
3569 6
    if (\in_array($encoding, self::$ENCODINGS, true)) {
0 ignored issues
show
Bug introduced by
It seems like self::ENCODINGS can also be of type integer and string and boolean; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3569
    if (\in_array($encoding, /** @scrutinizer ignore-type */ self::$ENCODINGS, true)) {
Loading history...
3570 3
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3571
3572 3
      return $encoding;
3573
    }
3574
3575 5
    $encodingOrig = $encoding;
3576 5
    $encoding = \strtoupper($encoding);
3577 5
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3578
3579
    $equivalences = [
3580 5
        'ISO8859'     => 'ISO-8859-1',
3581
        'ISO88591'    => 'ISO-8859-1',
3582
        'ISO'         => 'ISO-8859-1',
3583
        'LATIN'       => 'ISO-8859-1',
3584
        'LATIN1'      => 'ISO-8859-1', // Western European
3585
        'ISO88592'    => 'ISO-8859-2',
3586
        'LATIN2'      => 'ISO-8859-2', // Central European
3587
        'ISO88593'    => 'ISO-8859-3',
3588
        'LATIN3'      => 'ISO-8859-3', // Southern European
3589
        'ISO88594'    => 'ISO-8859-4',
3590
        'LATIN4'      => 'ISO-8859-4', // Northern European
3591
        'ISO88595'    => 'ISO-8859-5',
3592
        'ISO88596'    => 'ISO-8859-6', // Greek
3593
        'ISO88597'    => 'ISO-8859-7',
3594
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3595
        'ISO88599'    => 'ISO-8859-9',
3596
        'LATIN5'      => 'ISO-8859-9', // Turkish
3597
        'ISO885911'   => 'ISO-8859-11',
3598
        'TIS620'      => 'ISO-8859-11', // Thai
3599
        'ISO885910'   => 'ISO-8859-10',
3600
        'LATIN6'      => 'ISO-8859-10', // Nordic
3601
        'ISO885913'   => 'ISO-8859-13',
3602
        'LATIN7'      => 'ISO-8859-13', // Baltic
3603
        'ISO885914'   => 'ISO-8859-14',
3604
        'LATIN8'      => 'ISO-8859-14', // Celtic
3605
        'ISO885915'   => 'ISO-8859-15',
3606
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3607
        'ISO885916'   => 'ISO-8859-16',
3608
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3609
        'CP1250'      => 'WINDOWS-1250',
3610
        'WIN1250'     => 'WINDOWS-1250',
3611
        'WINDOWS1250' => 'WINDOWS-1250',
3612
        'CP1251'      => 'WINDOWS-1251',
3613
        'WIN1251'     => 'WINDOWS-1251',
3614
        'WINDOWS1251' => 'WINDOWS-1251',
3615
        'CP1252'      => 'WINDOWS-1252',
3616
        'WIN1252'     => 'WINDOWS-1252',
3617
        'WINDOWS1252' => 'WINDOWS-1252',
3618
        'CP1253'      => 'WINDOWS-1253',
3619
        'WIN1253'     => 'WINDOWS-1253',
3620
        'WINDOWS1253' => 'WINDOWS-1253',
3621
        'CP1254'      => 'WINDOWS-1254',
3622
        'WIN1254'     => 'WINDOWS-1254',
3623
        'WINDOWS1254' => 'WINDOWS-1254',
3624
        'CP1255'      => 'WINDOWS-1255',
3625
        'WIN1255'     => 'WINDOWS-1255',
3626
        'WINDOWS1255' => 'WINDOWS-1255',
3627
        'CP1256'      => 'WINDOWS-1256',
3628
        'WIN1256'     => 'WINDOWS-1256',
3629
        'WINDOWS1256' => 'WINDOWS-1256',
3630
        'CP1257'      => 'WINDOWS-1257',
3631
        'WIN1257'     => 'WINDOWS-1257',
3632
        'WINDOWS1257' => 'WINDOWS-1257',
3633
        'CP1258'      => 'WINDOWS-1258',
3634
        'WIN1258'     => 'WINDOWS-1258',
3635
        'WINDOWS1258' => 'WINDOWS-1258',
3636
        'UTF16'       => 'UTF-16',
3637
        'UTF32'       => 'UTF-32',
3638
        'UTF8'        => 'UTF-8',
3639
        'UTF'         => 'UTF-8',
3640
        'UTF7'        => 'UTF-7',
3641
        '8BIT'        => 'CP850',
3642
        'BINARY'      => 'CP850',
3643
    ];
3644
3645 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3646 4
      $encoding = $equivalences[$encodingUpperHelper];
3647
    }
3648
3649 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3650
3651 5
    return $encoding;
3652
  }
3653
3654
  /**
3655
   * Normalize some MS Word special characters.
3656
   *
3657
   * @param string $str <p>The string to be normalized.</p>
3658
   *
3659
   * @return string
3660
   */
3661 36
  public static function normalize_msword(string $str): string
3662
  {
3663 36
    if ('' === $str) {
3664 1
      return '';
3665
    }
3666
3667 36
    static $UTF8_MSWORD_KEYS_CACHE = null;
3668 36
    static $UTF8_MSWORD_VALUES_CACHE = null;
3669
3670 36
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3671
3672 1
      if (self::$UTF8_MSWORD === null) {
3673 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_msword') can also be of type integer or string or boolean. However, the property $UTF8_MSWORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3674
      }
3675
3676 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type integer and string and boolean; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3676
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
3677 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type integer and string and boolean; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3677
      $UTF8_MSWORD_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
3678
    }
3679
3680 36
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3681
  }
3682
3683
  /**
3684
   * Normalize the whitespace.
3685
   *
3686
   * @param string $str                     <p>The string to be normalized.</p>
3687
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3688
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3689
   *                                        bidirectional text chars.</p>
3690
   *
3691
   * @return string
3692
   */
3693 76
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
3694
  {
3695 76
    if ('' === $str) {
3696 5
      return '';
3697
    }
3698
3699 76
    static $WHITESPACE_CACHE = [];
3700 76
    $cacheKey = (int)$keepNonBreakingSpace;
3701
3702 76
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3703
3704 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3705
3706 2
      if ($keepNonBreakingSpace === true) {
3707 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3708
      }
3709
3710 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
3711
    }
3712
3713 76
    if ($keepBidiUnicodeControls === false) {
3714 76
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3715
3716 76
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3717 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3718
      }
3719
3720 76
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3721
    }
3722
3723 76
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3724
  }
3725
3726
  /**
3727
   * Calculates Unicode code point of the given UTF-8 encoded character.
3728
   *
3729
   * INFO: opposite to UTF8::chr()
3730
   *
3731
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3732
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3733
   *
3734
   * @return int <p>
3735
   *             Unicode code point of the given character,<br>
3736
   *             0 on invalid UTF-8 byte sequence.
3737
   *             </p>
3738
   */
3739 23
  public static function ord(string $chr, string $encoding = 'UTF-8'): int
3740
  {
3741
    // init
3742 23
    static $CHAR_CACHE = [];
3743
3744
    // save the original string
3745 23
    $chr_orig = $chr;
3746
3747 23
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3748 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3749
3750
      // check again, if it's still not UTF-8
3751
      /** @noinspection NotOptimalIfConditionsInspection */
3752 2
      if ($encoding !== 'UTF-8') {
3753 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3754
      }
3755
    }
3756
3757 23
    $cacheKey = $chr_orig . $encoding;
3758 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3759 23
      return $CHAR_CACHE[$cacheKey];
3760
    }
3761
3762 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3763
      self::checkForSupport();
3764
    }
3765
3766 11
    if (self::$SUPPORT['intlChar'] === true) {
3767
      /** @noinspection PhpComposerExtensionStubsInspection */
3768 10
      $code = \IntlChar::ord($chr);
3769 10
      if ($code) {
3770 9
        return $CHAR_CACHE[$cacheKey] = $code;
3771
      }
3772
    }
3773
3774
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3775 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
0 ignored issues
show
Bug introduced by
$chr of type array is incompatible with the type string expected by parameter $str of voku\helper\UTF8::substr(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3775
    $chr = \unpack('C*', (string)self::substr(/** @scrutinizer ignore-type */ $chr, 0, 4, 'CP850'));
Loading history...
3776 6
    $code = $chr ? $chr[1] : 0;
3777
3778 6
    if (0xF0 <= $code && isset($chr[4])) {
3779
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3780
    }
3781
3782 6
    if (0xE0 <= $code && isset($chr[3])) {
3783 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3784
    }
3785
3786 6
    if (0xC0 <= $code && isset($chr[2])) {
3787 2
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3788
    }
3789
3790 5
    return $CHAR_CACHE[$cacheKey] = $code;
3791
  }
3792
3793
  /**
3794
   * Parses the string into an array (into the the second parameter).
3795
   *
3796
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3797
   *          if the second parameter is not set!
3798
   *
3799
   * @link http://php.net/manual/en/function.parse-str.php
3800
   *
3801
   * @param string $str       <p>The input string.</p>
3802
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
3803
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3804
   *
3805
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3806
   */
3807 1
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
3808
  {
3809 1
    if ($cleanUtf8 === true) {
3810 1
      $str = self::clean($str);
3811
    }
3812
3813
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3814 1
    $return = \mb_parse_str($str, $result);
3815
3816 1
    return !($return === false || empty($result));
3817
  }
3818
3819
  /**
3820
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3821
   *
3822
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3823
   */
3824 88
  public static function pcre_utf8_support(): bool
3825
  {
3826
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3827 88
    return (bool)@\preg_match('//u', '');
3828
  }
3829
3830
  /**
3831
   * Create an array containing a range of UTF-8 characters.
3832
   *
3833
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3834
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3835
   *
3836
   * @return string[]
3837
   */
3838 1
  public static function range($var1, $var2): array
3839
  {
3840 1
    if (!$var1 || !$var2) {
3841 1
      return [];
3842
    }
3843
3844 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3845
      self::checkForSupport();
3846
    }
3847
3848 1
    if (self::$SUPPORT['ctype'] === false) {
3849
      throw new \RuntimeException('ext-ctype: is not installed');
3850
    }
3851
3852
    /** @noinspection PhpComposerExtensionStubsInspection */
3853 1
    if (\ctype_digit((string)$var1)) {
3854 1
      $start = (int)$var1;
3855 1
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
3856
      $start = (int)self::hex_to_int($var1);
3857
    } else {
3858 1
      $start = self::ord($var1);
3859
    }
3860
3861 1
    if (!$start) {
3862
      return [];
3863
    }
3864
3865
    /** @noinspection PhpComposerExtensionStubsInspection */
3866 1
    if (\ctype_digit((string)$var2)) {
3867 1
      $end = (int)$var2;
3868 1
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
3869
      $end = (int)self::hex_to_int($var2);
3870
    } else {
3871 1
      $end = self::ord($var2);
3872
    }
3873
3874 1
    if (!$end) {
3875
      return [];
3876
    }
3877
3878 1
    return \array_map(
3879
        [
3880 1
            self::class,
3881
            'chr',
3882
        ],
3883 1
        \range($start, $end)
3884
    );
3885
  }
3886
3887
  /**
3888
   * Multi decode html entity & fix urlencoded-win1252-chars.
3889
   *
3890
   * e.g:
3891
   * 'test+test'                     => 'test+test'
3892
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3893
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3894
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3895
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3896
   * 'Düsseldorf'                   => 'Düsseldorf'
3897
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3898
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3899
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3900
   *
3901
   * @param string $str          <p>The input string.</p>
3902
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3903
   *
3904
   * @return string
3905
   */
3906 2
  public static function rawurldecode(string $str, bool $multi_decode = true): string
3907
  {
3908 2
    if ('' === $str) {
3909 1
      return '';
3910
    }
3911
3912 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3913 2
    if (\preg_match($pattern, $str)) {
3914 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
3915
    }
3916
3917 2
    $flags = ENT_QUOTES | ENT_HTML5;
3918
3919
    do {
3920 2
      $str_compare = $str;
3921
3922 2
      $str = self::fix_simple_utf8(
3923 2
          \rawurldecode(
3924 2
              self::html_entity_decode(
3925 2
                  self::to_utf8($str),
3926 2
                  $flags
3927
              )
3928
          )
3929
      );
3930
3931 2
    } while ($multi_decode === true && $str_compare !== $str);
3932
3933 2
    return $str;
3934
  }
3935
3936
  /**
3937
   * @param array $strings
3938
   * @param bool  $removeEmptyValues
3939
   * @param int   $removeShortValues
3940
   *
3941
   * @return array
3942
   */
3943 1
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
3944
  {
3945
    // init
3946 1
    $return = [];
3947
3948 1
    foreach ($strings as $str) {
3949
      if (
3950 1
          $removeShortValues !== null
3951
          &&
3952 1
          self::strlen($str) <= $removeShortValues
3953
      ) {
3954 1
        continue;
3955
      }
3956
3957
      if (
3958 1
          $removeEmptyValues === true
3959
          &&
3960 1
          \trim($str) === ''
3961
      ) {
3962 1
        continue;
3963
      }
3964
3965 1
      $return[] = $str;
3966
    }
3967
3968 1
    return $return;
3969
  }
3970
3971
  /**
3972
   * Replaces all occurrences of $pattern in $str by $replacement.
3973
   *
3974
   * @param string $str         <p>The input string.</p>
3975
   * @param string $pattern     <p>The regular expression pattern.</p>
3976
   * @param string $replacement <p>The string to replace with.</p>
3977
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
3978
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
3979
   *
3980
   * @return string
3981
   */
3982 273
  public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
3983
  {
3984 273
    if ($options === 'msr') {
3985 9
      $options = 'ms';
3986
    }
3987
3988
    // fallback
3989 273
    if (!$delimiter) {
3990
      $delimiter = '/';
3991
    }
3992
3993 273
    $str = (string)\preg_replace(
3994 273
        $delimiter . $pattern . $delimiter . 'u' . $options,
3995 273
        $replacement,
3996 273
        $str
3997
    );
3998
3999 273
    return $str;
4000
  }
4001
4002
  /**
4003
   * alias for "UTF8::remove_bom()"
4004
   *
4005
   * @see        UTF8::remove_bom()
4006
   *
4007
   * @param string $str
4008
   *
4009
   * @return string
4010
   *
4011
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4012
   */
4013
  public static function removeBOM(string $str): string
4014
  {
4015
    return self::remove_bom($str);
4016
  }
4017
4018
  /**
4019
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4020
   *
4021
   * @param string $str <p>The input string.</p>
4022
   *
4023
   * @return string <p>String without UTF-BOM</p>
4024
   */
4025 60
  public static function remove_bom(string $str): string
4026
  {
4027 60
    if ('' === $str) {
4028 4
      return '';
4029
    }
4030
4031 60
    foreach (self::$BOM as $bomString => $bomByteLength) {
4032 60
      if (0 === self::strpos($str, $bomString, 0, 'CP850')) {
4033 5
        $strTmp = self::substr($str, $bomByteLength, null, 'CP850');
4034 5
        if ($strTmp === false) {
4035
          $strTmp = '';
4036
        }
4037 60
        $str = (string)$strTmp;
4038
      }
4039
    }
4040
4041 60
    return $str;
4042
  }
4043
4044
  /**
4045
   * Removes duplicate occurrences of a string in another string.
4046
   *
4047
   * @param string          $str  <p>The base string.</p>
4048
   * @param string|string[] $what <p>String to search for in the base string.</p>
4049
   *
4050
   * @return string <p>The result string with removed duplicates.</p>
4051
   */
4052 1
  public static function remove_duplicates(string $str, $what = ' '): string
4053
  {
4054 1
    if (\is_string($what) === true) {
4055 1
      $what = [$what];
4056
    }
4057
4058 1
    if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4059
      /** @noinspection ForeachSourceInspection */
4060 1
      foreach ($what as $item) {
4061 1
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4062
      }
4063
    }
4064
4065 1
    return $str;
4066
  }
4067
4068
  /**
4069
   * Remove html via "strip_tags()" from the string.
4070
   *
4071
   * @param string $str
4072
   * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4073
   *                              not be stripped. Default: null
4074
   *                              </p>
4075
   *
4076
   * @return string
4077
   */
4078 6
  public static function remove_html(string $str, string $allowableTags = null): string
4079
  {
4080 6
    return \strip_tags($str, $allowableTags);
4081
  }
4082
4083
  /**
4084
   * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4085
   *
4086
   * @param string $str
4087
   * @param string $replacement [optional] <p>Default is a empty string.</p>
4088
   *
4089
   * @return string
4090
   */
4091 6
  public static function remove_html_breaks(string $str, string $replacement = ''): string
4092
  {
4093 6
    return (string)\preg_replace('#/\r\n|\r|\n|<br.*/?>#isU', $replacement, $str);
4094
  }
4095
4096
  /**
4097
   * Remove invisible characters from a string.
4098
   *
4099
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4100
   *
4101
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4102
   *
4103
   * @param string $str
4104
   * @param bool   $url_encoded
4105
   * @param string $replacement
4106
   *
4107
   * @return string
4108
   */
4109 82
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4110
  {
4111
    // init
4112 82
    $non_displayables = [];
4113
4114
    // every control character except newline (dec 10),
4115
    // carriage return (dec 13) and horizontal tab (dec 09)
4116 82
    if ($url_encoded) {
4117 82
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4118 82
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4119
    }
4120
4121 82
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4122
4123
    do {
4124 82
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4125 82
    } while ($count !== 0);
4126
4127 82
    return $str;
4128
  }
4129
4130
  /**
4131
   * Returns a new string with the prefix $substring removed, if present.
4132
   *
4133
   * @param string $str
4134
   * @param string $substring <p>The prefix to remove.</p>
4135
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4136
   *
4137
   * @return string <p>string without the prefix $substring.</p>
4138
   */
4139 12
  public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4140
  {
4141 12
    if (self::str_starts_with($str, $substring)) {
4142
4143 6
      return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str...ding), null, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
4144 6
          $str,
4145 6
          self::strlen($substring, $encoding),
4146 6
          null,
4147 6
          $encoding
4148
      );
4149
    }
4150
4151 6
    return $str;
4152
  }
4153
4154
  /**
4155
   * Returns a new string with the suffix $substring removed, if present.
4156
   *
4157
   * @param string $str
4158
   * @param string $substring <p>The suffix to remove.</p>
4159
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4160
   *
4161
   * @return string <p>String having a $str without the suffix $substring.</p>
4162
   */
4163 12
  public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4164
  {
4165 12
    if (self::str_ends_with($str, $substring)) {
4166
4167 6
      return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str...$substring, $encoding)) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
4168 6
          $str,
4169 6
          0,
4170 6
          self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4171
      );
4172
    }
4173
4174 6
    return $str;
4175
  }
4176
4177
  /**
4178
   * Replaces all occurrences of $search in $str by $replacement.
4179
   *
4180
   * @param string $str           <p>The input string.</p>
4181
   * @param string $search        <p>The needle to search for.</p>
4182
   * @param string $replacement   <p>The string to replace with.</p>
4183
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4184
   *
4185
   * @return string <p>String after the replacements.</p>
4186
   */
4187 29
  public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4188
  {
4189 29
    if ($caseSensitive) {
4190 22
      return self::str_replace($search, $replacement, $str);
4191
    }
4192
4193 7
    return self::str_ireplace($search, $replacement, $str);
4194
  }
4195
4196
  /**
4197
   * Replaces all occurrences of $search in $str by $replacement.
4198
   *
4199
   * @param string       $str           <p>The input string.</p>
4200
   * @param array        $search        <p>The elements to search for.</p>
4201
   * @param string|array $replacement   <p>The string to replace with.</p>
4202
   * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4203
   *
4204
   * @return string <p>String after the replacements.</p>
4205
   */
4206 30
  public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4207
  {
4208 30
    if ($caseSensitive) {
4209 23
      return self::str_replace($search, $replacement, $str);
4210
    }
4211
4212 7
    return self::str_ireplace($search, $replacement, $str);
4213
  }
4214
4215
  /**
4216
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4217
   *
4218
   * @param string $str                <p>The input string</p>
4219
   * @param string $replacementChar    <p>The replacement character.</p>
4220
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4221
   *
4222
   * @return string
4223
   */
4224 54
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4225
  {
4226 54
    if ('' === $str) {
4227 5
      return '';
4228
    }
4229
4230 54
    if ($processInvalidUtf8 === true) {
4231 54
      $replacementCharHelper = $replacementChar;
4232 54
      if ($replacementChar === '') {
4233 54
        $replacementCharHelper = 'none';
4234
      }
4235
4236 54
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4237
        self::checkForSupport();
4238
      }
4239
4240 54
      $save = \mb_substitute_character();
4241 54
      \mb_substitute_character($replacementCharHelper);
4242 54
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4243 54
      \mb_substitute_character($save);
4244
4245 54
      if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4246 54
        $str = $strTmp;
4247
      } else {
4248
        $str = '';
4249
      }
4250
    }
4251
4252 54
    return str_replace(
4253
        [
4254 54
            "\xEF\xBF\xBD",
4255
            '�',
4256
        ],
4257
        [
4258 54
            $replacementChar,
4259 54
            $replacementChar,
4260
        ],
4261 54
        $str
4262
    );
4263
  }
4264
4265
  /**
4266
   * Strip whitespace or other characters from end of a UTF-8 string.
4267
   *
4268
   * @param string $str   <p>The string to be trimmed.</p>
4269
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4270
   *
4271
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4272
   */
4273 20
  public static function rtrim(string $str = '', $chars = INF): string
4274
  {
4275 20
    if ('' === $str) {
4276 2
      return '';
4277
    }
4278
4279
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4280 19
    if ($chars === INF || !$chars) {
4281 14
      $pattern = "[\pZ\pC]+\$";
4282
    } else {
4283 7
      $chars = \preg_quote($chars, '/');
4284 7
      $pattern = "[$chars]+\$";
4285
    }
4286
4287 19
    return self::regex_replace($str, $pattern, '', '', '/');
4288
  }
4289
4290
  /**
4291
   * rxClass
4292
   *
4293
   * @param string $s
4294
   * @param string $class
4295
   *
4296
   * @return string
4297
   */
4298 32
  private static function rxClass(string $s, string $class = ''): string
4299
  {
4300 32
    static $RX_CLASSS_CACHE = [];
4301
4302 32
    $cacheKey = $s . $class;
4303
4304 32
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4305 20
      return $RX_CLASSS_CACHE[$cacheKey];
4306
    }
4307
4308
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4309 15
    $class = [$class];
4310
4311
    /** @noinspection SuspiciousLoopInspection */
4312 15
    foreach (self::str_split($s) as $s) {
4313 14
      if ('-' === $s) {
4314
        $class[0] = '-' . $class[0];
4315 14
      } elseif (!isset($s[2])) {
4316 14
        $class[0] .= \preg_quote($s, '/');
4317 1
      } elseif (1 === self::strlen($s)) {
4318 1
        $class[0] .= $s;
4319
      } else {
4320 14
        $class[] = $s;
4321
      }
4322
    }
4323
4324 15
    if ($class[0]) {
4325 15
      $class[0] = '[' . $class[0] . ']';
4326
    }
4327
4328 15
    if (1 === \count($class)) {
4329 15
      $return = $class[0];
4330
    } else {
4331
      $return = '(?:' . \implode('|', $class) . ')';
4332
    }
4333
4334 15
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4335
4336 15
    return $return;
4337
  }
4338
4339
  /**
4340
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4341
   */
4342 1
  public static function showSupport()
4343
  {
4344 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4345
      self::checkForSupport();
4346
    }
4347
4348 1
    echo '<pre>';
4349 1
    foreach (self::$SUPPORT as $key => $value) {
4350 1
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4351
    }
4352 1
    echo '</pre>';
4353 1
  }
4354
4355
  /**
4356
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4357
   *
4358
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4359
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4360
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
4361
   *
4362
   * @return string <p>The HTML numbered entity.</p>
4363
   */
4364 1
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4365
  {
4366 1
    if ('' === $char) {
4367 1
      return '';
4368
    }
4369
4370
    if (
4371 1
        $keepAsciiChars === true
4372
        &&
4373 1
        self::is_ascii($char) === true
4374
    ) {
4375 1
      return $char;
4376
    }
4377
4378 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4379 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4380
    }
4381
4382 1
    return '&#' . self::ord($char, $encoding) . ';';
4383
  }
4384
4385
  /**
4386
   * @param string $str
4387
   * @param int    $tabLength
4388
   *
4389
   * @return string
4390
   */
4391 5
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4392
  {
4393 5
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4394
  }
4395
4396
  /**
4397
   * Convert a string to an array of Unicode characters.
4398
   *
4399
   * @param string $str       <p>The string to split into array.</p>
4400
   * @param int    $length    [optional] <p>Max character length of each array element.</p>
4401
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4402
   *
4403
   * @return string[] <p>An array containing chunks of the string.</p>
4404
   */
4405 35
  public static function split(string $str, int $length = 1, bool $cleanUtf8 = false): array
4406
  {
4407 35
    if ('' === $str) {
4408 3
      return [];
4409
    }
4410
4411
    // init
4412 34
    $ret = [];
4413
4414 34
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4415
      self::checkForSupport();
4416
    }
4417
4418 34
    if ($cleanUtf8 === true) {
4419 9
      $str = self::clean($str);
4420
    }
4421
4422 34
    if (self::$SUPPORT['pcre_utf8'] === true) {
4423
4424 34
      \preg_match_all('/./us', $str, $retArray);
4425 34
      if (isset($retArray[0])) {
4426 34
        $ret = $retArray[0];
4427
      }
4428 34
      unset($retArray);
4429
4430
    } else {
4431
4432
      // fallback
4433
4434 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4435
        self::checkForSupport();
4436
      }
4437
4438 2
      $len = self::strlen_in_byte($str);
4439
4440
      /** @noinspection ForeachInvariantsInspection */
4441 2
      for ($i = 0; $i < $len; $i++) {
4442
4443 2
        if (($str[$i] & "\x80") === "\x00") {
4444
4445 2
          $ret[] = $str[$i];
4446
4447
        } elseif (
4448 2
            isset($str[$i + 1])
4449
            &&
4450 2
            ($str[$i] & "\xE0") === "\xC0"
4451
        ) {
4452
4453
          if (($str[$i + 1] & "\xC0") === "\x80") {
4454
            $ret[] = $str[$i] . $str[$i + 1];
4455
4456
            $i++;
4457
          }
4458
4459
        } elseif (
4460 2
            isset($str[$i + 2])
4461
            &&
4462 2
            ($str[$i] & "\xF0") === "\xE0"
4463
        ) {
4464
4465
          if (
4466 2
              ($str[$i + 1] & "\xC0") === "\x80"
4467
              &&
4468 2
              ($str[$i + 2] & "\xC0") === "\x80"
4469
          ) {
4470 2
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4471
4472 2
            $i += 2;
4473
          }
4474
4475
        } elseif (
4476
            isset($str[$i + 3])
4477
            &&
4478
            ($str[$i] & "\xF8") === "\xF0"
4479
        ) {
4480
4481
          if (
4482
              ($str[$i + 1] & "\xC0") === "\x80"
4483
              &&
4484
              ($str[$i + 2] & "\xC0") === "\x80"
4485
              &&
4486
              ($str[$i + 3] & "\xC0") === "\x80"
4487
          ) {
4488
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4489
4490
            $i += 3;
4491
          }
4492
4493
        }
4494
      }
4495
    }
4496
4497 34
    if ($length > 1) {
4498 5
      $ret = \array_chunk($ret, $length);
4499
4500 5
      return \array_map(
4501 5
          function ($item) {
4502 5
            return \implode('', $item);
4503 5
          }, $ret
4504
      );
4505
    }
4506
4507 30
    if (isset($ret[0]) && $ret[0] === '') {
4508
      return [];
4509
    }
4510
4511 30
    return $ret;
4512
  }
4513
4514
  /**
4515
   * Returns a camelCase version of the string. Trims surrounding spaces,
4516
   * capitalizes letters following digits, spaces, dashes and underscores,
4517
   * and removes spaces, dashes, as well as underscores.
4518
   *
4519
   * @param string $str      <p>The input string.</p>
4520
   * @param string $encoding [optional] <p>Default: UTF-8</p>
4521
   *
4522
   * @return string
4523
   */
4524 32
  public static function str_camelize(string $str, string $encoding = 'UTF-8'): string
4525
  {
4526 32
    $str = self::lcfirst(self::trim($str), $encoding);
4527 32
    $str = (string)\preg_replace('/^[-_]+/', '', $str);
4528
4529 32
    $str = (string)\preg_replace_callback(
4530 32
        '/[-_\s]+(.)?/u',
4531 32
        function ($match) use ($encoding) {
4532 27
          if (isset($match[1])) {
4533 27
            return UTF8::strtoupper($match[1], $encoding);
4534
          }
4535
4536 1
          return '';
4537 32
        },
4538 32
        $str
4539
    );
4540
4541 32
    $str = (string)\preg_replace_callback(
4542 32
        '/[\d]+(.)?/u',
4543 32
        function ($match) use ($encoding) {
4544 6
          return UTF8::strtoupper($match[0], $encoding);
4545 32
        },
4546 32
        $str
4547
    );
4548
4549 32
    return $str;
4550
  }
4551
4552
  /**
4553
   * Returns the string with the first letter of each word capitalized,
4554
   * except for when the word is a name which shouldn't be capitalized.
4555
   *
4556
   * @param string $str
4557
   *
4558
   * @return static <p>Object with $str capitalized.</p>
4559
   */
4560
  public static function str_capitalize_name(string $str): string
4561
  {
4562
    $str = self::collapse_whitespace($str);
4563
4564
    $str = self::str_capitalize_name_helper($str, ' ');
4565
    $str = self::str_capitalize_name_helper($str, '-');
4566
4567
    return $str;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $str returns the type string which is incompatible with the documented return type voku\helper\UTF8.
Loading history...
4568
  }
4569
4570
  /**
4571
   * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
4572
   *
4573
   * @param string $names
4574
   * @param string $delimiter
4575
   * @param string $encoding
4576
   *
4577
   * @return string
4578
   */
4579
  private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
4580
  {
4581
    // init
4582
    $namesArray = \explode($delimiter, $names);
4583
4584
    $specialCases = [
4585
        'names'    => [
4586
            'ab',
4587
            'af',
4588
            'al',
4589
            'and',
4590
            'ap',
4591
            'bint',
4592
            'binte',
4593
            'da',
4594
            'de',
4595
            'del',
4596
            'den',
4597
            'der',
4598
            'di',
4599
            'dit',
4600
            'ibn',
4601
            'la',
4602
            'mac',
4603
            'nic',
4604
            'of',
4605
            'ter',
4606
            'the',
4607
            'und',
4608
            'van',
4609
            'von',
4610
            'y',
4611
            'zu',
4612
        ],
4613
        'prefixes' => [
4614
            'al-',
4615
            "d'",
4616
            'ff',
4617
            "l'",
4618
            'mac',
4619
            'mc',
4620
            'nic',
4621
        ],
4622
    ];
4623
4624
    foreach ($namesArray as &$name) {
4625
      if (\in_array($name, $specialCases['names'], true)) {
4626
        continue;
4627
      }
4628
4629
      $continue = false;
4630
4631
      if ($delimiter == '-') {
4632
        foreach ($specialCases['names'] as $beginning) {
4633
          if (self::strpos($name, $beginning, 0, $encoding) === 0) {
4634
            $continue = true;
4635
          }
4636
        }
4637
      }
4638
4639
      foreach ($specialCases['prefixes'] as $beginning) {
4640
        if (self::strpos($name, $beginning, 0, $encoding) === 0) {
4641
          $continue = true;
4642
        }
4643
      }
4644
4645
      if ($continue) {
4646
        continue;
4647
      }
4648
4649
      $name = self::str_upper_first($name);
4650
    }
4651
4652
    return \implode($delimiter, $namesArray);
4653
  }
4654
4655
  /**
4656
   * Returns true if the string contains $needle, false otherwise. By default
4657
   * the comparison is case-sensitive, but can be made insensitive by setting
4658
   * $caseSensitive to false.
4659
   *
4660
   * @param string $haystack      <p>The input string.</p>
4661
   * @param string $needle        <p>Substring to look for.</p>
4662
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4663
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4664
   *
4665
   * @return bool <p>Whether or not $haystack contains $needle.</p>
4666
   */
4667 106
  public static function str_contains(string $haystack, string $needle, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4668
  {
4669 106
    if ($caseSensitive) {
4670 56
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
4671
    }
4672
4673 50
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
4674
  }
4675
4676
  /**
4677
   * Returns true if the string contains all $needles, false otherwise. By
4678
   * default the comparison is case-sensitive, but can be made insensitive by
4679
   * setting $caseSensitive to false.
4680
   *
4681
   * @param string $haystack      <p>The input string.</p>
4682
   * @param array  $needles       <p>SubStrings to look for.</p>
4683
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4684
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4685
   *
4686
   * @return bool <p>Whether or not $haystack contains $needle.</p>
4687
   */
4688 44
  public static function str_contains_all(string $haystack, array $needles, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4689
  {
4690 44
    if ('' === $haystack) {
4691
      return false;
4692
    }
4693
4694 44
    if (empty($needles)) {
4695 1
      return false;
4696
    }
4697
4698 43
    foreach ($needles as $needle) {
4699 43
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4700 43
        return false;
4701
      }
4702
    }
4703
4704 24
    return true;
4705
  }
4706
4707
  /**
4708
   * Returns true if the string contains any $needles, false otherwise. By
4709
   * default the comparison is case-sensitive, but can be made insensitive by
4710
   * setting $caseSensitive to false.
4711
   *
4712
   * @param string <p>The input stiring.</p>
0 ignored issues
show
Documentation Bug introduced by
The doc comment <p>The at position 0 could not be parsed: Unknown type name '<' at position 0 in <p>The.
Loading history...
4713
   * @param array  $needles       <p>SubStrings to look for.</p>
4714
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4715
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4716
   *
4717
   * @return bool <p>Whether or not $str contains $needle.</p>
4718
   */
4719 43
  public static function str_contains_any(string $haystack, array $needles, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4720
  {
4721 43
    if (empty($needles)) {
4722 1
      return false;
4723
    }
4724
4725 42
    foreach ($needles as $needle) {
4726 42
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4727 42
        return true;
4728
      }
4729
    }
4730
4731 18
    return false;
4732
  }
4733
4734
  /**
4735
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
4736
   * inserted before uppercase characters (with the exception of the first
4737
   * character of the string), and in place of spaces as well as underscores.
4738
   *
4739
   * @param string $str      <p>The input string.</p>
4740
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
4741
   *
4742
   * @return string
4743
   */
4744 19
  public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
4745
  {
4746 19
    return self::str_delimit($str, '-', $encoding);
4747
  }
4748
4749
  /**
4750
   * Returns a lowercase and trimmed string separated by the given delimiter.
4751
   * Delimiters are inserted before uppercase characters (with the exception
4752
   * of the first character of the string), and in place of spaces, dashes,
4753
   * and underscores. Alpha delimiters are not converted to lowercase.
4754
   *
4755
   * @param string $str       <p>The input string.</p>
4756
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
4757
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
4758
   *
4759
   * @return string
4760
   */
4761 49
  public static function str_delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
4762
  {
4763 49
    $str = self::trim($str);
4764
4765 49
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
4766
4767 49
    $str = self::strtolower($str, $encoding);
4768
4769 49
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
4770
  }
4771
4772
  /**
4773
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4774
   *
4775
   * @param string $str <p>The input string.</p>
4776
   *
4777
   * @return false|string <p>
4778
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4779
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
4780
   *                      </p>
4781
   */
4782 15
  public static function str_detect_encoding(string $str)
4783
  {
4784
    //
4785
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
4786
    //
4787
4788 15
    if (self::is_binary($str, true) === true) {
4789
4790 5
      if (self::is_utf16($str) === 1) {
4791 1
        return 'UTF-16LE';
4792
      }
4793
4794 5
      if (self::is_utf16($str) === 2) {
4795 1
        return 'UTF-16BE';
4796
      }
4797
4798 4
      if (self::is_utf32($str) === 1) {
4799
        return 'UTF-32LE';
4800
      }
4801
4802 4
      if (self::is_utf32($str) === 2) {
4803
        return 'UTF-32BE';
4804
      }
4805
4806
      // is binary but not "UTF-16" or "UTF-32"
4807 4
      return false;
4808
    }
4809
4810
    //
4811
    // 2.) simple check for ASCII chars
4812
    //
4813
4814 13
    if (self::is_ascii($str) === true) {
4815 5
      return 'ASCII';
4816
    }
4817
4818
    //
4819
    // 3.) simple check for UTF-8 chars
4820
    //
4821
4822 13
    if (self::is_utf8($str) === true) {
4823 9
      return 'UTF-8';
4824
    }
4825
4826
    //
4827
    // 4.) check via "\mb_detect_encoding()"
4828
    //
4829
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4830
4831
    $detectOrder = [
4832 8
        'ISO-8859-1',
4833
        'ISO-8859-2',
4834
        'ISO-8859-3',
4835
        'ISO-8859-4',
4836
        'ISO-8859-5',
4837
        'ISO-8859-6',
4838
        'ISO-8859-7',
4839
        'ISO-8859-8',
4840
        'ISO-8859-9',
4841
        'ISO-8859-10',
4842
        'ISO-8859-13',
4843
        'ISO-8859-14',
4844
        'ISO-8859-15',
4845
        'ISO-8859-16',
4846
        'WINDOWS-1251',
4847
        'WINDOWS-1252',
4848
        'WINDOWS-1254',
4849
        'CP932',
4850
        'CP936',
4851
        'CP950',
4852
        'CP866',
4853
        'CP850',
4854
        'CP51932',
4855
        'CP50220',
4856
        'CP50221',
4857
        'CP50222',
4858
        'ISO-2022-JP',
4859
        'ISO-2022-KR',
4860
        'JIS',
4861
        'JIS-ms',
4862
        'EUC-CN',
4863
        'EUC-JP',
4864
    ];
4865
4866 8
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4867 8
    if ($encoding) {
4868 8
      return $encoding;
4869
    }
4870
4871
    //
4872
    // 5.) check via "iconv()"
4873
    //
4874
4875
    if (self::$ENCODINGS === null) {
4876
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type integer or string or boolean. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4877
    }
4878
4879
    $md5 = \md5($str);
4880
    foreach (self::$ENCODINGS as $encodingTmp) {
4881
      # INFO: //IGNORE and //TRANSLIT still throw notice
4882
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4883
      if (\md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4884
        return $encodingTmp;
4885
      }
4886
    }
4887
4888
    return false;
4889
  }
4890
4891
  /**
4892
   * Check if the string ends with the given substring.
4893
   *
4894
   * @param string $haystack <p>The string to search in.</p>
4895
   * @param string $needle   <p>The substring to search for.</p>
4896
   *
4897
   * @return bool
4898
   */
4899 38
  public static function str_ends_with(string $haystack, string $needle): bool
4900
  {
4901 38
    if ('' === $haystack || '' === $needle) {
4902 3
      return false;
4903
    }
4904
4905 36
    return \substr($haystack, -\strlen($needle)) === $needle;
4906
  }
4907
4908
  /**
4909
   * Returns true if the string ends with any of $substrings, false otherwise.
4910
   *
4911
   * - case-sensitive
4912
   *
4913
   * @param string   $str        <p>The input string.</p>
4914
   * @param string[] $substrings <p>Substrings to look for.</p>
4915
   *
4916
   * @return bool     <p>Whether or not $str ends with $substring.</p>
4917
   */
4918 7
  public static function str_ends_with_any(string $str, array $substrings): bool
4919
  {
4920 7
    if (empty($substrings)) {
4921
      return false;
4922
    }
4923
4924 7
    foreach ($substrings as $substring) {
4925 7
      if (self::str_ends_with($str, $substring)) {
4926 7
        return true;
4927
      }
4928
    }
4929
4930 6
    return false;
4931
  }
4932
4933
  /**
4934
   * Ensures that the string begins with $substring. If it doesn't, it's
4935
   * prepended.
4936
   *
4937
   * @param string str <p>The input string.</p>
4938
   * @param string $substring <p>The substring to add if not present.</p>
4939
   *
4940
   * @return string
4941
   */
4942 10
  public static function str_ensure_left(string $str, string $substring): string
4943
  {
4944 10
    if (!self::str_starts_with($str, $substring)) {
4945 4
      $str = $substring . $str;
4946
    }
4947
4948 10
    return $str;
4949
  }
4950
4951
  /**
4952
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
4953
   *
4954
   * @param string str <p>The input string.</p>
4955
   * @param string $substring <p>The substring to add if not present.</p>
4956
   *
4957
   * @return string
4958
   */
4959 10
  public static function str_ensure_right(string $str, string $substring): string
4960
  {
4961 10
    if (!self::str_ends_with($str, $substring)) {
4962 4
      $str .= $substring;
4963
    }
4964
4965 10
    return $str;
4966
  }
4967
4968
  /**
4969
   * Capitalizes the first word of the string, replaces underscores with
4970
   * spaces, and strips '_id'.
4971
   *
4972
   * @param string $str
4973
   *
4974
   * @return string
4975
   */
4976 3
  public static function str_humanize($str): string
4977
  {
4978 3
    $str = self::str_replace(
4979
        [
4980 3
            '_id',
4981
            '_',
4982
        ],
4983
        [
4984 3
            '',
4985
            ' ',
4986
        ],
4987 3
        $str
4988
    );
4989
4990 3
    return self::ucfirst(self::trim($str));
4991
  }
4992
4993
  /**
4994
   * Check if the string ends with the given substring, case insensitive.
4995
   *
4996
   * @param string $haystack <p>The string to search in.</p>
4997
   * @param string $needle   <p>The substring to search for.</p>
4998
   *
4999
   * @return bool
5000
   */
5001 10
  public static function str_iends_with(string $haystack, string $needle): bool
5002
  {
5003 10
    if ('' === $haystack || '' === $needle) {
5004 1
      return false;
5005
    }
5006
5007 10
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
5008 10
      return true;
5009
    }
5010
5011 6
    return false;
5012
  }
5013
5014
  /**
5015
   * Returns true if the string ends with any of $substrings, false otherwise.
5016
   *
5017
   * - case-insensitive
5018
   *
5019
   * @param string   $str        <p>The input string.</p>
5020
   * @param string[] $substrings <p>Substrings to look for.</p>
5021
   *
5022
   * @return bool     <p>Whether or not $str ends with $substring.</p>
5023
   */
5024 4
  public static function str_iends_with_any(string $str, array $substrings): bool
5025
  {
5026 4
    if (empty($substrings)) {
5027
      return false;
5028
    }
5029
5030 4
    foreach ($substrings as $substring) {
5031 4
      if (self::str_iends_with($str, $substring)) {
5032 4
        return true;
5033
      }
5034
    }
5035
5036
    return false;
5037
  }
5038
5039
  /**
5040
   * Returns the index of the first occurrence of $needle in the string,
5041
   * and false if not found. Accepts an optional offset from which to begin
5042
   * the search.
5043
   *
5044
   * @param string $str      <p>The input string.</p>
5045
   * @param string $needle   <p>Substring to look for.</p>
5046
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5047
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5048
   *
5049
   * @return int|false <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5050
   */
5051 2
  public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5052
  {
5053 2
    return self::stripos(
5054 2
        $str,
5055 2
        $needle,
5056 2
        $offset,
5057 2
        $encoding
5058
    );
5059
  }
5060
5061
  /**
5062
   * Returns the index of the last occurrence of $needle in the string,
5063
   * and false if not found. Accepts an optional offset from which to begin
5064
   * the search. Offsets may be negative to count from the last character
5065
   * in the string.
5066
   *
5067
   * @param string $str      <p>The input string.</p>
5068
   * @param string $needle   <p>Substring to look for.</p>
5069
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5070
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5071
   *
5072
   * @return int|false <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5073
   */
5074 2
  public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5075
  {
5076 2
    return self::strripos(
5077 2
        $str,
5078 2
        $needle,
5079 2
        $offset,
5080 2
        $encoding
5081
    );
5082
  }
5083
5084
  /**
5085
   * Returns the index of the first occurrence of $needle in the string,
5086
   * and false if not found. Accepts an optional offset from which to begin
5087
   * the search.
5088
   *
5089
   * @param string $str      <p>The input string.</p>
5090
   * @param string $needle   <p>Substring to look for.</p>
5091
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5092
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5093
   *
5094
   * @return int|false <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5095
   */
5096 12
  public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5097
  {
5098 12
    return self::strpos(
5099 12
        $str,
5100 12
        $needle,
5101 12
        $offset,
5102 12
        $encoding
5103
    );
5104
  }
5105
5106
  /**
5107
   * Returns the index of the last occurrence of $needle in the string,
5108
   * and false if not found. Accepts an optional offset from which to begin
5109
   * the search. Offsets may be negative to count from the last character
5110
   * in the string.
5111
   *
5112
   * @param string $str      <p>The input string.</p>
5113
   * @param string $needle   <p>Substring to look for.</p>
5114
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5115
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5116
   *
5117
   * @return int|false <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5118
   */
5119 12
  public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5120
  {
5121 12
    return self::strrpos(
5122 12
        $str,
5123 12
        $needle,
5124 12
        $offset,
5125 12
        $encoding
5126
    );
5127
  }
5128
5129
  /**
5130
   * Inserts $substring into the string at the $index provided.
5131
   *
5132
   * @param string $str       <p>The input string.</p>
5133
   * @param string $substring <p>String to be inserted.</p>
5134
   * @param int    $index     <p>The index at which to insert the substring.</p>
5135
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5136
   *
5137
   * @return string
5138
   */
5139 8
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5140
  {
5141 8
    $len = self::strlen($str, $encoding);
5142
5143 8
    if ($index > $len) {
5144 1
      return $str;
5145
    }
5146
5147 7
    $start = self::substr($str, 0, $index, $encoding);
5148 7
    $end = self::substr($str, $index, $len, $encoding);
5149
5150 7
    return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5150
    return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5150
    return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
5151
  }
5152
5153
  /**
5154
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5155
   *
5156
   * @link  http://php.net/manual/en/function.str-ireplace.php
5157
   *
5158
   * @param mixed $search  <p>
5159
   *                       Every replacement with search array is
5160
   *                       performed on the result of previous replacement.
5161
   *                       </p>
5162
   * @param mixed $replace <p>
5163
   *                       </p>
5164
   * @param mixed $subject <p>
5165
   *                       If subject is an array, then the search and
5166
   *                       replace is performed with every entry of
5167
   *                       subject, and the return value is an array as
5168
   *                       well.
5169
   *                       </p>
5170
   * @param int   $count   [optional] <p>
5171
   *                       The number of matched and replaced needles will
5172
   *                       be returned in count which is passed by
5173
   *                       reference.
5174
   *                       </p>
5175
   *
5176
   * @return mixed <p>A string or an array of replacements.</p>
5177
   */
5178 40
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5179
  {
5180 40
    $search = (array)$search;
5181
5182
    /** @noinspection AlterInForeachInspection */
5183 40
    foreach ($search as &$s) {
5184 40
      if ('' === $s .= '') {
5185 7
        $s = '/^(?<=.)$/';
5186
      } else {
5187 40
        $s = '/' . \preg_quote($s, '/') . '/ui';
5188
      }
5189
    }
5190
5191 40
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5192 40
    $count = $replace; // used as reference parameter
5193
5194 40
    return $subject;
5195
  }
5196
5197
  /**
5198
   * Check if the string starts with the given substring, case insensitive.
5199
   *
5200
   * @param string $haystack <p>The string to search in.</p>
5201
   * @param string $needle   <p>The substring to search for.</p>
5202
   *
5203
   * @return bool
5204
   */
5205 10
  public static function str_istarts_with(string $haystack, string $needle): bool
5206
  {
5207 10
    if ('' === $haystack || '' === $needle) {
5208 1
      return false;
5209
    }
5210
5211 10
    if (self::stripos($haystack, $needle) === 0) {
5212 10
      return true;
5213
    }
5214
5215 2
    return false;
5216
  }
5217
5218
  /**
5219
   * Returns true if the string begins with any of $substrings, false otherwise.
5220
   *
5221
   * - case-insensitive
5222
   *
5223
   * @param string $str        <p>The input string.</p>
5224
   * @param array  $substrings <p>Substrings to look for.</p>
5225
   *
5226
   * @return bool <p>Whether or not $str starts with $substring.</p>
5227
   */
5228 4
  public static function str_istarts_with_any(string $str, array $substrings): bool
5229
  {
5230 4
    if ('' === $str) {
5231
      return false;
5232
    }
5233
5234 4
    if (empty($substrings)) {
5235
      return false;
5236
    }
5237
5238 4
    foreach ($substrings as $substring) {
5239 4
      if (self::str_istarts_with($str, $substring)) {
5240 4
        return true;
5241
      }
5242
    }
5243
5244
    return false;
5245
  }
5246
5247
  /**
5248
   * Gets the substring after the first occurrence of a separator.
5249
   *
5250
   * @param string $str       <p>The input string.</p>
5251
   * @param string $separator <p>The string separator.</p>
5252
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5253
   *
5254
   * @return string
5255
   */
5256 1
  public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5257
  {
5258
    if (
5259 1
        $separator === ''
5260
        ||
5261 1
        $str === ''
5262
    ) {
5263 1
      return '';
5264
    }
5265
5266 1
    $offset = self::str_iindex_first($str, $separator);
5267 1
    if ($offset === false) {
5268 1
      return '';
5269
    }
5270
5271 1
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str...ding), null, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5272 1
        $str,
5273 1
        $offset + self::strlen($separator, $encoding),
5274 1
        null,
5275 1
        $encoding
5276
    );
5277
  }
5278
5279
  /**
5280
   * Gets the substring after the last occurrence of a separator.
5281
   *
5282
   * @param string $str       <p>The input string.</p>
5283
   * @param string $separator <p>The string separator.</p>
5284
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5285
   *
5286
   * @return string
5287
   */
5288 1
  public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5289
  {
5290
    if (
5291 1
        $separator === ''
5292
        ||
5293 1
        $str === ''
5294
    ) {
5295 1
      return '';
5296
    }
5297
5298 1
    $offset = self::str_iindex_last($str, $separator);
5299 1
    if ($offset === false) {
5300 1
      return '';
5301
    }
5302
5303 1
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str...ding), null, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5304 1
        $str,
5305 1
        $offset + self::strlen($separator, $encoding),
5306 1
        null,
5307 1
        $encoding
5308
    );
5309
  }
5310
5311
  /**
5312
   * Gets the substring before the first occurrence of a separator.
5313
   *
5314
   * @param string $str       <p>The input string.</p>
5315
   * @param string $separator <p>The string separator.</p>
5316
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5317
   *
5318
   * @return string
5319
   */
5320 1
  public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5321
  {
5322
    if (
5323 1
        $separator === ''
5324
        ||
5325 1
        $str === ''
5326
    ) {
5327 1
      return '';
5328
    }
5329
5330 1
    $offset = self::str_iindex_first($str, $separator);
5331 1
    if ($offset === false) {
5332 1
      return '';
5333
    }
5334
5335 1
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str, 0, $offset, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5336 1
        $str,
5337 1
        0,
5338 1
        $offset,
5339 1
        $encoding
5340
    );
5341
  }
5342
5343
  /**
5344
   * Gets the substring before the last occurrence of a separator.
5345
   *
5346
   * @param string $str       <p>The input string.</p>
5347
   * @param string $separator <p>The string separator.</p>
5348
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5349
   *
5350
   * @return string
5351
   */
5352 1
  public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5353
  {
5354
    if (
5355 1
        $separator === ''
5356
        ||
5357 1
        $str === ''
5358
    ) {
5359 1
      return '';
5360
    }
5361
5362 1
    $offset = self::str_iindex_last($str, $separator);
5363 1
    if ($offset === false) {
5364 1
      return '';
5365
    }
5366
5367 1
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str, 0, $offset, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5368 1
        $str,
5369 1
        0,
5370 1
        $offset,
5371 1
        $encoding
5372
    );
5373
  }
5374
5375
  /**
5376
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5377
   *
5378
   * @param string $str          <p>The input string.</p>
5379
   * @param string $needle       <p>The string to look for.</p>
5380
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5381
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5382
   *
5383
   * @return string
5384
   */
5385 2
  public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5386
  {
5387
    if (
5388 2
        '' === $needle
5389
        ||
5390 2
        '' === $str
5391
    ) {
5392 2
      return '';
5393
    }
5394
5395 2
    $part = self::stristr(
5396 2
        $str,
5397 2
        $needle,
5398 2
        $beforeNeedle,
5399 2
        $encoding
5400
    );
5401 2
    if (false === $part) {
5402 2
      return '';
5403
    }
5404
5405 2
    return $part;
5406
  }
5407
5408
  /**
5409
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5410
   *
5411
   * @param string $str          <p>The input string.</p>
5412
   * @param string $needle       <p>The string to look for.</p>
5413
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5414
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5415
   *
5416
   * @return string
5417
   */
5418 1
  public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5419
  {
5420
    if (
5421 1
        '' === $needle
5422
        ||
5423 1
        '' === $str
5424
    ) {
5425 1
      return '';
5426
    }
5427
5428 1
    $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5429 1
    if (false === $part) {
0 ignored issues
show
introduced by
The condition false === $part is always false.
Loading history...
5430 1
      return '';
5431
    }
5432
5433 1
    return $part;
5434
  }
5435
5436
  /**
5437
   * Returns the last $n characters of the string.
5438
   *
5439
   * @param string $str      <p>The input string.</p>
5440
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5441
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
5442
   *
5443
   * @return string
5444
   */
5445 12
  public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5446
  {
5447 12
    if ($n <= 0) {
5448 4
      return '';
5449
    }
5450
5451 8
    return self::substr($str, -$n, null, $encoding);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str, -$n, null, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5452
  }
5453
5454
5455
  /**
5456
   * Limit the number of characters in a string.
5457
   *
5458
   * @param string $str      <p>The input string.</p>
5459
   * @param int    $length   [optional] <p>Default: 100</p>
5460
   * @param string $strAddOn [optional] <p>Default: …</p>
5461
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5462
   *
5463
   * @return string
5464
   */
5465 1
  public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5466
  {
5467 1
    if ('' === $str) {
5468 1
      return '';
5469
    }
5470
5471 1
    if ($length <= 0) {
5472 1
      return '';
5473
    }
5474
5475 1
    if (self::strlen($str, $encoding) <= $length) {
5476 1
      return $str;
5477
    }
5478
5479 1
    return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5479
    return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
5480
  }
5481
5482
  /**
5483
   * Limit the number of characters in a string, but also after the next word.
5484
   *
5485
   * @param string $str      <p>The input string.</p>
5486
   * @param int    $length   [optional] <p>Default: 100</p>
5487
   * @param string $strAddOn [optional] <p>Default: …</p>
5488
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5489
   *
5490
   * @return string
5491
   */
5492 5
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5493
  {
5494 5
    if ('' === $str) {
5495 1
      return '';
5496
    }
5497
5498 5
    if ($length <= 0) {
5499 1
      return '';
5500
    }
5501
5502 5
    if (self::strlen($str, $encoding) <= $length) {
5503 1
      return $str;
5504
    }
5505
5506 5
    if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
5507 4
      return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5507
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
5508
    }
5509
5510 2
    $str = (string)self::substr($str, 0, $length, $encoding);
5511 2
    $array = \explode(' ', $str);
5512 2
    \array_pop($array);
5513 2
    $new_str = \implode(' ', $array);
5514
5515 2
    if ($new_str === '') {
5516 1
      $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
5517
    } else {
5518 2
      $str = $new_str . $strAddOn;
5519
    }
5520
5521 2
    return $str;
5522
  }
5523
5524
  /**
5525
   * Returns the longest common prefix between the string and $otherStr.
5526
   *
5527
   * @param string $str      <p>The input sting.</p>
5528
   * @param string $otherStr <p>Second string for comparison.</p>
5529
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
5530
   *
5531
   * @return string
5532
   */
5533 10
  public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5534
  {
5535 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5536
5537 10
    $longestCommonPrefix = '';
5538 10
    for ($i = 0; $i < $maxLength; $i++) {
5539 8
      $char = self::substr($str, $i, 1, $encoding);
5540
5541 8
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
5542 6
        $longestCommonPrefix .= $char;
5543
      } else {
5544 6
        break;
5545
      }
5546
    }
5547
5548 10
    return $longestCommonPrefix;
5549
  }
5550
5551
  /**
5552
   * Returns the longest common substring between the string and $otherStr.
5553
   * In the case of ties, it returns that which occurs first.
5554
   *
5555
   * @param string $str
5556
   * @param string $otherStr <p>Second string for comparison.</p>
5557
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
5558
   *
5559
   * @return static <p>String with its $str being the longest common substring.</p>
5560
   */
5561 10
  public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5562
  {
5563
    // Uses dynamic programming to solve
5564
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
5565 10
    $strLength = self::strlen($str, $encoding);
5566 10
    $otherLength = self::strlen($otherStr, $encoding);
5567
5568
    // Return if either string is empty
5569 10
    if ($strLength == 0 || $otherLength == 0) {
5570 2
      return '';
0 ignored issues
show
Bug Best Practice introduced by
The expression return '' returns the type string which is incompatible with the documented return type voku\helper\UTF8.
Loading history...
5571
    }
5572
5573 8
    $len = 0;
5574 8
    $end = 0;
5575 8
    $table = \array_fill(
5576 8
        0,
5577 8
        $strLength + 1,
5578 8
        \array_fill(0, $otherLength + 1, 0)
5579
    );
5580
5581 8
    for ($i = 1; $i <= $strLength; $i++) {
5582 8
      for ($j = 1; $j <= $otherLength; $j++) {
5583 8
        $strChar = self::substr($str, $i - 1, 1, $encoding);
5584 8
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
5585
5586 8
        if ($strChar == $otherChar) {
5587 8
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
5588 8
          if ($table[$i][$j] > $len) {
5589 8
            $len = $table[$i][$j];
5590 8
            $end = $i;
5591
          }
5592
        } else {
5593 8
          $table[$i][$j] = 0;
5594
        }
5595
      }
5596
    }
5597
5598 8
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str... $len, $len, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5599 8
        $str,
5600 8
        $end - $len,
5601 8
        $len,
5602 8
        $encoding
5603
    );
5604
  }
5605
5606
  /**
5607
   * Returns the longest common suffix between the string and $otherStr.
5608
   *
5609
   * @param string $str
5610
   * @param string $otherStr <p>Second string for comparison.</p>
5611
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
5612
   *
5613
   * @return string
5614
   */
5615 10
  public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5616
  {
5617 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5618
5619 10
    $longestCommonSuffix = '';
5620 10
    for ($i = 1; $i <= $maxLength; $i++) {
5621 8
      $char = self::substr($str, -$i, 1, $encoding);
5622
5623 8
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
5624 6
        $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5624
        $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
5625
      } else {
5626 6
        break;
5627
      }
5628
    }
5629
5630 10
    return $longestCommonSuffix;
5631
  }
5632
5633
  /**
5634
   * Returns true if $str matches the supplied pattern, false otherwise.
5635
   *
5636
   * @param string $str     <p>The input string.</p>
5637
   * @param string $pattern <p>Regex pattern to match against.</p>
5638
   *
5639
   * @return bool <p>Whether or not $str matches the pattern.</p>
5640
   */
5641 126
  public static function str_matches_pattern(string $str, string $pattern): bool
5642
  {
5643 126
    if (\preg_match('/' . $pattern . '/u', $str)) {
5644 87
      return true;
5645
    }
5646
5647 39
    return false;
5648
  }
5649
5650
  /**
5651
   * Returns whether or not a character exists at an index. Offsets may be
5652
   * negative to count from the last character in the string. Implements
5653
   * part of the ArrayAccess interface.
5654
   *
5655
   * @param string $str      <p>The input string.</p>
5656
   * @param int    $offset   <p>The index to check.</p>
5657
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
5658
   *
5659
   *
5660
   * @return bool <p>Whether or not the index exists.</p>
5661
   */
5662 6
  public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
5663
  {
5664
    // init
5665 6
    $length = self::strlen($str, $encoding);
5666
5667 6
    if ($offset >= 0) {
5668 3
      return ($length > $offset);
5669
    }
5670
5671 3
    return ($length >= \abs($offset));
5672
  }
5673
5674
  /**
5675
   * Returns the character at the given index. Offsets may be negative to
5676
   * count from the last character in the string. Implements part of the
5677
   * ArrayAccess interface, and throws an OutOfBoundsException if the index
5678
   * does not exist.
5679
   *
5680
   * @param string $str      <p>The input string.</p>
5681
   * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
5682
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5683
   *
5684
   * @return string <p>The character at the specified index.</p>
5685
   *
5686
   * @throws \OutOfBoundsException <p>If the positive or negative offset does not exist.</p>
5687
   */
5688 2
  public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
5689
  {
5690
    // init
5691 2
    $length = self::strlen($str);
5692
5693
    if (
5694 2
        ($index >= 0 && $length <= $index)
5695
        ||
5696 2
        $length < \abs($index)
5697
    ) {
5698 1
      throw new \OutOfBoundsException('No character exists at the index');
5699
    }
5700
5701 1
    return self::char_at($str, $index, $encoding);
5702
  }
5703
5704
  /**
5705
   * Pad a UTF-8 string to given length with another string.
5706
   *
5707
   * @param string $str        <p>The input string.</p>
5708
   * @param int    $pad_length <p>The length of return string.</p>
5709
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
5710
   * @param int    $pad_type   [optional] <p>
5711
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
5712
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
5713
   *                           </p>
5714
   * @param string $encoding   [optional] <p>Default: UTF-8</p>
5715
   *
5716
   * @return string <strong>Returns the padded string</strong>
5717
   */
5718 40
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
5719
  {
5720 40
    if ('' === $str) {
5721
      return '';
5722
    }
5723
5724 40
    if ($pad_type !== (int)$pad_type) {
5725 13
      if ($pad_type == 'left') {
5726 3
        $pad_type = STR_PAD_LEFT;
5727 10
      } else if ($pad_type == 'right') {
5728 6
        $pad_type = STR_PAD_RIGHT;
5729 4
      } else if ($pad_type == 'both') {
5730 3
        $pad_type = STR_PAD_BOTH;
5731
      } else {
5732 1
        throw new \InvalidArgumentException(
5733 1
            'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
5734
        );
5735
      }
5736
    }
5737
5738 39
    $str_length = self::strlen($str, $encoding);
5739
5740
    if (
5741 39
        $pad_length > 0
5742
        &&
5743 39
        $pad_length >= $str_length
5744
    ) {
5745 38
      $ps_length = self::strlen($pad_string, $encoding);
5746
5747 38
      $diff = ($pad_length - $str_length);
5748
5749
      switch ($pad_type) {
5750 38
        case STR_PAD_LEFT:
5751 12
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
5752 12
          $pre = (string)self::substr($pre, 0, $diff, $encoding);
5753 12
          $post = '';
5754 12
          break;
5755
5756 28
        case STR_PAD_BOTH:
5757 13
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
5758 13
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2), $encoding);
5759 13
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
5760 13
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2), $encoding);
5761 13
          break;
5762
5763 17
        case STR_PAD_RIGHT:
5764
        default:
5765 17
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
5766 17
          $post = (string)self::substr($post, 0, $diff, $encoding);
5767 17
          $pre = '';
5768
      }
5769
5770 38
      return $pre . $str . $post;
5771
    }
5772
5773 3
    return $str;
5774
  }
5775
5776
  /**
5777
   * Returns a new string of a given length such that both sides of the
5778
   * string are padded. Alias for pad() with a $padType of 'both'.
5779
   *
5780
   * @param string $str
5781
   * @param int    $length   <p>Desired string length after padding.</p>
5782
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
5783
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5784
   *
5785
   * @return string <p>String with padding applied.</p>
5786
   */
5787 11
  public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
5788
  {
5789 11
    $padding = $length - self::strlen($str, $encoding);
5790
5791 11
    return self::apply_padding($str, (int)\floor($padding / 2), (int)\ceil($padding / 2), $padStr, $encoding);
5792
  }
5793
5794
  /**
5795
   * Returns a new string of a given length such that the beginning of the
5796
   * string is padded. Alias for pad() with a $padType of 'left'.
5797
   *
5798
   * @param string $str
5799
   * @param int    $length   <p>Desired string length after padding.</p>
5800
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
5801
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5802
   *
5803
   * @return string <p>String with left padding.</p>
5804
   */
5805 7
  public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
5806
  {
5807 7
    return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
5808
  }
5809
5810
  /**
5811
   * Returns a new string of a given length such that the end of the string
5812
   * is padded. Alias for pad() with a $padType of 'right'.
5813
   *
5814
   * @param string $str
5815
   * @param int    $length   <p>Desired string length after padding.</p>
5816
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
5817
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5818
   *
5819
   * @return string <p>String with right padding.</p>
5820
   */
5821 7
  public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
5822
  {
5823 7
    return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
5824
  }
5825
5826
  /**
5827
   * Repeat a string.
5828
   *
5829
   * @param string $str        <p>
5830
   *                           The string to be repeated.
5831
   *                           </p>
5832
   * @param int    $multiplier <p>
5833
   *                           Number of time the input string should be
5834
   *                           repeated.
5835
   *                           </p>
5836
   *                           <p>
5837
   *                           multiplier has to be greater than or equal to 0.
5838
   *                           If the multiplier is set to 0, the function
5839
   *                           will return an empty string.
5840
   *                           </p>
5841
   *
5842
   * @return string <p>The repeated string.</p>
5843
   */
5844 8
  public static function str_repeat(string $str, int $multiplier): string
5845
  {
5846 8
    $str = self::filter($str);
5847
5848 8
    return \str_repeat($str, $multiplier);
5849
  }
5850
5851
  /**
5852
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
5853
   *
5854
   * Replace all occurrences of the search string with the replacement string
5855
   *
5856
   * @link http://php.net/manual/en/function.str-replace.php
5857
   *
5858
   * @param mixed $search  <p>
5859
   *                       The value being searched for, otherwise known as the needle.
5860
   *                       An array may be used to designate multiple needles.
5861
   *                       </p>
5862
   * @param mixed $replace <p>
5863
   *                       The replacement value that replaces found search
5864
   *                       values. An array may be used to designate multiple replacements.
5865
   *                       </p>
5866
   * @param mixed $subject <p>
5867
   *                       The string or array being searched and replaced on,
5868
   *                       otherwise known as the haystack.
5869
   *                       </p>
5870
   *                       <p>
5871
   *                       If subject is an array, then the search and
5872
   *                       replace is performed with every entry of
5873
   *                       subject, and the return value is an array as
5874
   *                       well.
5875
   *                       </p>
5876
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
5877
   *
5878
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
5879
   */
5880 92
  public static function str_replace($search, $replace, $subject, int &$count = null)
5881
  {
5882 92
    return \str_replace($search, $replace, $subject, $count);
5883
  }
5884
5885
  /**
5886
   * Replaces all occurrences of $search from the beginning of string with $replacement.
5887
   *
5888
   * @param string $str         <p>The input string.</p>
5889
   * @param string $search      <p>The string to search for.</p>
5890
   * @param string $replacement <p>The replacement.</p>
5891
   *
5892
   * @return string <p>String after the replacements.</p>
5893
   */
5894 16
  public static function str_replace_beginning(string $str, string $search, string $replacement): string
5895
  {
5896 16
    return self::regex_replace(
5897 16
        $str,
5898 16
        '^' . \preg_quote($search, '/'),
5899 16
        self::str_replace('\\', '\\\\', $replacement)
5900
    );
5901
  }
5902
5903
  /**
5904
   * Replaces all occurrences of $search from the ending of string with $replacement.
5905
   *
5906
   * @param string $str         <p>The input string.</p>
5907
   * @param string $search      <p>The string to search for.</p>
5908
   * @param string $replacement <p>The replacement.</p>
5909
   *
5910
   * @return string <p>String after the replacements.</p>
5911
   */
5912 16
  public static function str_replace_ending(string $str, string $search, string $replacement): string
5913
  {
5914 16
    return self::regex_replace(
5915 16
        $str,
5916 16
        \preg_quote($search, '/') . '$',
5917 16
        self::str_replace('\\', '\\\\', $replacement)
5918
    );
5919
  }
5920
5921
  /**
5922
   * Replace the first "$search"-term with the "$replace"-term.
5923
   *
5924
   * @param string $search
5925
   * @param string $replace
5926
   * @param string $subject
5927
   *
5928
   * @return string
5929
   */
5930 1
  public static function str_replace_first(string $search, string $replace, string $subject): string
5931
  {
5932 1
    $pos = self::strpos($subject, $search);
5933
5934 1
    if ($pos !== false) {
5935 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5936
    }
5937
5938 1
    return $subject;
5939
  }
5940
5941
  /**
5942
   * Replace the last "$search"-term with the "$replace"-term.
5943
   *
5944
   * @param string $search
5945
   * @param string $replace
5946
   * @param string $subject
5947
   *
5948
   * @return string
5949
   */
5950 1
  public static function str_replace_last(string $search, string $replace, string $subject): string
5951
  {
5952 1
    $pos = self::strrpos($subject, $search);
5953
5954 1
    if ($pos !== false) {
5955 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5956
    }
5957
5958 1
    return $subject;
5959
  }
5960
5961
  /**
5962
   * Shuffles all the characters in the string.
5963
   *
5964
   * PS: uses random algorithm which is weak for cryptography purposes
5965
   *
5966
   * @param string $str <p>The input string</p>
5967
   *
5968
   * @return string <p>The shuffled string.</p>
5969
   */
5970 4
  public static function str_shuffle(string $str): string
5971
  {
5972 4
    $indexes = \range(0, self::strlen($str) - 1);
5973
    /** @noinspection NonSecureShuffleUsageInspection */
5974 4
    \shuffle($indexes);
5975
5976 4
    $shuffledStr = '';
5977 4
    foreach ($indexes as $i) {
5978 4
      $shuffledStr .= self::substr($str, $i, 1);
5979
    }
5980
5981 4
    return $shuffledStr;
5982
  }
5983
5984
  /**
5985
   * Returns the substring beginning at $start, and up to, but not including
5986
   * the index specified by $end. If $end is omitted, the function extracts
5987
   * the remaining string. If $end is negative, it is computed from the end
5988
   * of the string.
5989
   *
5990
   * @param string $str
5991
   * @param int    $start    <p>Initial index from which to begin extraction.</p>
5992
   * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
5993
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5994
   *
5995
   * @return string <p>The extracted substring.</p>
5996
   */
5997 16
  public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8'): string
5998
  {
5999 16
    if ($end === null) {
6000 4
      $length = self::strlen($str);
6001 12
    } elseif ($end >= 0 && $end <= $start) {
6002 4
      return '';
6003 8
    } elseif ($end < 0) {
6004 2
      $length = self::strlen($str) + $end - $start;
6005
    } else {
6006 6
      $length = $end - $start;
6007
    }
6008
6009 12
    return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str...rt, $length, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6010
  }
6011
6012
  /**
6013
   * Convert a string to e.g.: "snake_case"
6014
   *
6015
   * @param string $str
6016
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6017
   *
6018
   * @return string <p>String in snake_case.</p>
6019
   */
6020 20
  public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6021
  {
6022 20
    $str = self::normalize_whitespace($str);
6023 20
    $str = \str_replace('-', '_', $str);
6024
6025 20
    $str = (string)\preg_replace_callback(
6026 20
        '/([\d|A-Z])/u',
6027 20
        function ($matches) use ($encoding) {
6028 8
          $match = $matches[1];
6029 8
          $matchInt = (int)$match;
6030
6031 8
          if ((string)$matchInt == $match) {
6032 4
            return '_' . $match . '_';
6033
          }
6034
6035 4
          return '_' . UTF8::strtolower($match, $encoding);
6036 20
        },
6037 20
        $str
6038
    );
6039
6040 20
    $str = (string)\preg_replace(
6041
        [
6042 20
            '/\s+/',        // convert spaces to "_"
6043
            '/^\s+|\s+$/',  // trim leading & trailing spaces
6044
            '/_+/',         // remove double "_"
6045
        ],
6046
        [
6047 20
            '_',
6048
            '',
6049
            '_',
6050
        ],
6051 20
        $str
6052
    );
6053
6054 20
    $str = self::trim($str, '_'); // trim leading & trailing "_"
6055 20
    $str = self::trim($str); // trim leading & trailing whitespace
6056
6057 20
    return $str;
6058
  }
6059
6060
  /**
6061
   * Sort all characters according to code points.
6062
   *
6063
   * @param string $str    <p>A UTF-8 string.</p>
6064
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6065
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6066
   *
6067
   * @return string <p>String of sorted characters.</p>
6068
   */
6069 1
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6070
  {
6071 1
    $array = self::codepoints($str);
6072
6073 1
    if ($unique) {
6074 1
      $array = \array_flip(\array_flip($array));
6075
    }
6076
6077 1
    if ($desc) {
6078 1
      \arsort($array);
6079
    } else {
6080 1
      \asort($array);
6081
    }
6082
6083 1
    return self::string($array);
6084
  }
6085
6086
  /**
6087
   * Split a string into an array.
6088
   *
6089
   * @param string|string[] $str
6090
   * @param int             $len
6091
   *
6092
   * @return string[]
6093
   */
6094 21
  public static function str_split($str, int $len = 1): array
6095
  {
6096 21
    if (\is_array($str) === true) {
6097 1
      foreach ($str as $k => $v) {
6098 1
        $str[$k] = self::str_split($v, $len);
6099
      }
6100
6101 1
      return $str;
6102
    }
6103
6104 21
    if ('' === $str) {
6105 2
      return [];
6106
    }
6107
6108 19
    if ($len < 1) {
6109
      return \str_split($str, $len);
6110
    }
6111
6112
    /** @noinspection NotOptimalRegularExpressionsInspection */
6113 19
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
6114 19
    $a = $a[0];
6115
6116 19
    if ($len === 1) {
6117 19
      return $a;
6118
    }
6119
6120 1
    $arrayOutput = [];
6121 1
    $p = -1;
6122
6123
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
6124 1
    foreach ($a as $l => $a) {
6125 1
      if ($l % $len) {
6126 1
        $arrayOutput[$p] .= $a;
6127
      } else {
6128 1
        $arrayOutput[++$p] = $a;
6129
      }
6130
    }
6131
6132 1
    return $arrayOutput;
6133
  }
6134
6135
  /**
6136
   * Splits the string with the provided regular expression, returning an
6137
   * array of Stringy objects. An optional integer $limit will truncate the
6138
   * results.
6139
   *
6140
   * @param string $str
6141
   * @param string $pattern <p>The regex with which to split the string.</p>
6142
   * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6143
   *
6144
   * @return string[] <p>An array of strings.</p>
6145
   */
6146 16
  public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6147
  {
6148 16
    if ($limit === 0) {
6149 2
      return [];
6150
    }
6151
6152
    // this->split errors when supplied an empty pattern in < PHP 5.4.13
6153
    // and current versions of HHVM (3.8 and below)
6154 14
    if ($pattern === '') {
6155 1
      return [$str];
6156
    }
6157
6158
    // this->split returns the remaining unsplit string in the last index when
6159
    // supplying a limit
6160 13
    if ($limit > 0) {
6161 8
      ++$limit;
6162
    } else {
6163 5
      $limit = -1;
6164
    }
6165
6166 13
    $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6167
6168 13
    if ($limit > 0 && \count($array) === $limit) {
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type false; however, parameter $var of count() does only seem to accept Countable|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6168
    if ($limit > 0 && \count(/** @scrutinizer ignore-type */ $array) === $limit) {
Loading history...
6169 4
      \array_pop($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type false; however, parameter $array of array_pop() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6169
      \array_pop(/** @scrutinizer ignore-type */ $array);
Loading history...
6170
    }
6171
6172 13
    return $array;
6173
  }
6174
6175
  /**
6176
   * Check if the string starts with the given substring.
6177
   *
6178
   * @param string $haystack <p>The string to search in.</p>
6179
   * @param string $needle   <p>The substring to search for.</p>
6180
   *
6181
   * @return bool
6182
   */
6183 39
  public static function str_starts_with(string $haystack, string $needle): bool
6184
  {
6185 39
    if ('' === $haystack || '' === $needle) {
6186 3
      return false;
6187
    }
6188
6189 37
    if (\strpos($haystack, $needle) === 0) {
6190 17
      return true;
6191
    }
6192
6193 22
    return false;
6194
  }
6195
6196
  /**
6197
   * Returns true if the string begins with any of $substrings, false otherwise.
6198
   *
6199
   * - case-sensitive
6200
   *
6201
   * @param string $str        <p>The input string.</p>
6202
   * @param array  $substrings <p>Substrings to look for.</p>
6203
   *
6204
   * @return bool <p>Whether or not $str starts with $substring.</p>
6205
   */
6206 8
  public static function str_starts_with_any(string $str, array $substrings): bool
6207
  {
6208 8
    if ('' === $str) {
6209
      return false;
6210
    }
6211
6212 8
    if (empty($substrings)) {
6213
      return false;
6214
    }
6215
6216 8
    foreach ($substrings as $substring) {
6217 8
      if (self::str_starts_with($str, $substring)) {
6218 8
        return true;
6219
      }
6220
    }
6221
6222 6
    return false;
6223
  }
6224
6225
  /**
6226
   * Gets the substring after the first occurrence of a separator.
6227
   *
6228
   * @param string $str       <p>The input string.</p>
6229
   * @param string $separator <p>The string separator.</p>
6230
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6231
   *
6232
   * @return string
6233
   */
6234 1
  public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6235
  {
6236
    if (
6237 1
        $separator === ''
6238
        ||
6239 1
        $str === ''
6240
    ) {
6241 1
      return '';
6242
    }
6243
6244 1
    $offset = self::str_index_first($str, $separator);
6245 1
    if ($offset === false) {
6246 1
      return '';
6247
    }
6248
6249 1
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str...ding), null, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6250 1
        $str,
6251 1
        $offset + self::strlen($separator, $encoding),
6252 1
        null,
6253 1
        $encoding
6254
    );
6255
  }
6256
6257
  /**
6258
   * Gets the substring after the last occurrence of a separator.
6259
   *
6260
   * @param string $str       <p>The input string.</p>
6261
   * @param string $separator <p>The string separator.</p>
6262
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6263
   *
6264
   * @return string
6265
   */
6266 1
  public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6267
  {
6268
    if (
6269 1
        $separator === ''
6270
        ||
6271 1
        $str === ''
6272
    ) {
6273 1
      return '';
6274
    }
6275
6276 1
    $offset = self::str_index_last($str, $separator);
6277 1
    if ($offset === false) {
6278 1
      return '';
6279
    }
6280
6281 1
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str...ding), null, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6282 1
        $str,
6283 1
        $offset + self::strlen($separator, $encoding),
6284 1
        null,
6285 1
        $encoding
6286
    );
6287
  }
6288
6289
  /**
6290
   * Gets the substring before the first occurrence of a separator.
6291
   *
6292
   * @param string $str       <p>The input string.</p>
6293
   * @param string $separator <p>The string separator.</p>
6294
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6295
   *
6296
   * @return string
6297
   */
6298 1
  public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6299
  {
6300
    if (
6301 1
        $separator === ''
6302
        ||
6303 1
        $str === ''
6304
    ) {
6305 1
      return '';
6306
    }
6307
6308 1
    $offset = self::str_index_first($str, $separator);
6309 1
    if ($offset === false) {
6310 1
      return '';
6311
    }
6312
6313 1
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str, 0, $offset, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6314 1
        $str,
6315 1
        0,
6316 1
        $offset,
6317 1
        $encoding
6318
    );
6319
  }
6320
6321
  /**
6322
   * Gets the substring before the last occurrence of a separator.
6323
   *
6324
   * @param string $str       <p>The input string.</p>
6325
   * @param string $separator <p>The string separator.</p>
6326
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6327
   *
6328
   * @return string
6329
   */
6330 1
  public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6331
  {
6332
    if (
6333 1
        $separator === ''
6334
        ||
6335 1
        $str === ''
6336
    ) {
6337 1
      return '';
6338
    }
6339
6340 1
    $offset = self::str_index_last($str, $separator);
6341 1
    if ($offset === false) {
6342 1
      return '';
6343
    }
6344
6345 1
    return self::substr(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr($str, 0, $offset, $encoding) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6346 1
        $str,
6347 1
        0,
6348 1
        $offset,
6349 1
        $encoding
6350
    );
6351
  }
6352
6353
  /**
6354
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6355
   *
6356
   * @param string $str          <p>The input string.</p>
6357
   * @param string $needle       <p>The string to look for.</p>
6358
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6359
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6360
   *
6361
   * @return string
6362
   */
6363 2
  public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6364
  {
6365
    if (
6366 2
        '' === $str
6367
        ||
6368 2
        '' === $needle
6369
    ) {
6370 2
      return '';
6371
    }
6372
6373 2
    $part = self::strstr(
6374 2
        $str,
6375 2
        $needle,
6376 2
        $beforeNeedle,
6377 2
        $encoding
6378
    );
6379 2
    if (false === $part) {
6380 2
      return '';
6381
    }
6382
6383 2
    return $part;
6384
  }
6385
6386
  /**
6387
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6388
   *
6389
   * @param string $str          <p>The input string.</p>
6390
   * @param string $needle       <p>The string to look for.</p>
6391
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6392
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6393
   *
6394
   * @return string
6395
   */
6396 2
  public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6397
  {
6398
    if (
6399 2
        '' === $str
6400
        ||
6401 2
        '' === $needle
6402
    ) {
6403 2
      return '';
6404
    }
6405
6406 2
    $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6407 2
    if (false === $part) {
0 ignored issues
show
introduced by
The condition false === $part is always false.
Loading history...
6408 2
      return '';
6409
    }
6410
6411 2
    return $part;
6412
  }
6413
6414
  /**
6415
   * Surrounds $str with the given substring.
6416
   *
6417
   * @param string $str
6418
   * @param string $substring <p>The substring to add to both sides.</P>
6419
   *
6420
   * @return string <p>String with the substring both prepended and appended.</p>
6421
   */
6422 5
  public static function str_surround(string $str, string $substring): string
6423
  {
6424 5
    return \implode('', [$substring, $str, $substring]);
6425
  }
6426
6427
  /**
6428
   * Returns a trimmed string with the first letter of each word capitalized.
6429
   * Also accepts an array, $ignore, allowing you to list words not to be
6430
   * capitalized.
6431
   *
6432
   * @param string              $str
6433
   * @param string[]|array|null $ignore   [optional] <p>An array of words not to capitalize or null. Default: null</p>
6434
   * @param string              $encoding [optional] <p>Default: UTF-8</p>
6435
   *
6436
   * @return string <p>The titleized string.</p>
6437
   */
6438 5
  public static function str_titleize(string $str, array $ignore = null, string $encoding = 'UTF-8'): string
6439
  {
6440 5
    $str = self::trim($str);
6441
6442 5
    $str = (string)\preg_replace_callback(
6443 5
        '/([\S]+)/u',
6444 5
        function ($match) use ($encoding, $ignore) {
6445 5
          if ($ignore && \in_array($match[0], $ignore, true)) {
6446 2
            return $match[0];
6447
          }
6448
6449 5
          return self::str_upper_first(self::strtolower($match[0], $encoding));
6450 5
        },
6451 5
        $str
6452
    );
6453
6454 5
    return $str;
6455
  }
6456
6457
  /**
6458
   * Returns a trimmed string in proper title case.
6459
   *
6460
   * Also accepts an array, $ignore, allowing you to list words not to be
6461
   * capitalized.
6462
   *
6463
   * Adapted from John Gruber's script.
6464
   *
6465
   * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6466
   *
6467
   * @param string $str
6468
   * @param array  $ignore   <p>An array of words not to capitalize.</p>
6469
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6470
   *
6471
   * @return string <p>The titleized string.</p>
6472
   */
6473 35
  public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6474
  {
6475 35
    $smallWords = \array_merge(
6476
        [
6477 35
            '(?<!q&)a',
6478
            'an',
6479
            'and',
6480
            'as',
6481
            'at(?!&t)',
6482
            'but',
6483
            'by',
6484
            'en',
6485
            'for',
6486
            'if',
6487
            'in',
6488
            'of',
6489
            'on',
6490
            'or',
6491
            'the',
6492
            'to',
6493
            'v[.]?',
6494
            'via',
6495
            'vs[.]?',
6496
        ],
6497 35
        $ignore
6498
    );
6499
6500 35
    $smallWordsRx = \implode('|', $smallWords);
6501 35
    $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6502
6503 35
    $str = self::trim($str);
6504
6505 35
    if (self::has_lowercase($str) === false) {
6506 2
      $str = self::strtolower($str);
6507
    }
6508
6509
    // The main substitutions
6510 35
    $str = (string)\preg_replace_callback(
6511
        '~\b (_*) (?:                                                              # 1. Leading underscore and
6512
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6513 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6514
                        |
6515 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6516
                        |
6517 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6518
                        |
6519 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
6520
                      ) (_*) \b                                                           # 6. With trailing underscore
6521
                    ~ux',
6522 35
        function ($matches) use ($encoding) {
6523
          // Preserve leading underscore
6524 35
          $str = $matches[1];
6525 35
          if ($matches[2]) {
6526
            // Preserve URLs, domains, emails and file paths
6527 5
            $str .= $matches[2];
6528 35
          } elseif ($matches[3]) {
6529
            // Lower-case small words
6530 25
            $str .= self::strtolower($matches[3], $encoding);
6531 35
          } elseif ($matches[4]) {
6532
            // Capitalize word w/o internal caps
6533 34
            $str .= static::str_upper_first($matches[4], $encoding);
6534
          } else {
6535
            // Preserve other kinds of word (iPhone)
6536 7
            $str .= $matches[5];
6537
          }
6538
          // Preserve trailing underscore
6539 35
          $str .= $matches[6];
6540
6541 35
          return $str;
6542 35
        },
6543 35
        $str
6544
    );
6545
6546
    // Exceptions for small words: capitalize at start of title...
6547 35
    $str = (string)\preg_replace_callback(
6548
        '~(  \A [[:punct:]]*                # start of title...
6549
                      |  [:.;?!][ ]+               # or of subsentence...
6550
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
6551 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
6552
                     ~uxi',
6553 35
        function ($matches) use ($encoding) {
6554 11
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
6555 35
        },
6556 35
        $str
6557
    );
6558
6559
    // ...and end of title
6560 35
    $str = (string)\preg_replace_callback(
6561 35
        '~\b ( ' . $smallWordsRx . ' ) # small word...
6562
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
6563
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
6564
                     ~uxi',
6565 35
        function ($matches) use ($encoding) {
6566 3
          return static::str_upper_first($matches[1], $encoding);
6567 35
        },
6568 35
        $str
6569
    );
6570
6571
    // Exceptions for small words in hyphenated compound words
6572
    // e.g. "in-flight" -> In-Flight
6573 35
    $str = (string)\preg_replace_callback(
6574
        '~\b
6575
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
6576 35
                        ( ' . $smallWordsRx . ' )
6577
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
6578
                       ~uxi',
6579 35
        function ($matches) use ($encoding) {
6580
          return static::str_upper_first($matches[1], $encoding);
6581 35
        },
6582 35
        $str
6583
    );
6584
6585
    // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
6586 35
    $str = (string)\preg_replace_callback(
6587
        '~\b
6588
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
6589
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
6590 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
6591
                      (?!	- )                   # Negative lookahead for another -
6592
                     ~uxi',
6593 35
        function ($matches) use ($encoding) {
6594
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
6595 35
        },
6596 35
        $str
6597
    );
6598
6599 35
    return $str;
6600
  }
6601
6602
  /**
6603
   * Get a binary representation of a specific string.
6604
   *
6605
   * @param string $str <p>The input string.</p>
6606
   *
6607
   * @return string
6608
   */
6609 1
  public static function str_to_binary(string $str): string
6610
  {
6611 1
    $value = \unpack('H*', $str);
6612
6613 1
    return \base_convert($value[1], 16, 2);
6614
  }
6615
6616
  /**
6617
   * @param string   $str
6618
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6619
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
6620
   *
6621
   * @return string[]
6622
   */
6623 17
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
6624
  {
6625 17
    if ('' === $str) {
6626 1
      if ($removeEmptyValues === true) {
6627
        return [];
6628
      }
6629
6630 1
      return [''];
6631
    }
6632
6633 16
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
6634
6635
    if (
6636 16
        $removeShortValues === null
6637
        &&
6638 16
        $removeEmptyValues === false
6639
    ) {
6640 16
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return could return the type false which is incompatible with the type-hinted return array. Consider adding an additional type-check to rule them out.
Loading history...
6641
    }
6642
6643
    $tmpReturn = self::reduce_string_array(
6644
        $return,
0 ignored issues
show
Bug introduced by
It seems like $return can also be of type false; however, parameter $strings of voku\helper\UTF8::reduce_string_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6644
        /** @scrutinizer ignore-type */ $return,
Loading history...
6645
        $removeEmptyValues,
6646
        $removeShortValues
6647
    );
6648
6649
    return $tmpReturn;
6650
  }
6651
6652
  /**
6653
   * Convert a string into an array of words.
6654
   *
6655
   * @param string   $str
6656
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
6657
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6658
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
6659
   *
6660
   * @return string[]
6661
   */
6662 10
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
6663
  {
6664 10
    if ('' === $str) {
6665 2
      if ($removeEmptyValues === true) {
6666
        return [];
6667
      }
6668
6669 2
      return [''];
6670
    }
6671
6672 10
    $charList = self::rxClass($charList, '\pL');
6673
6674 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
6675
6676
    if (
6677 10
        $removeShortValues === null
6678
        &&
6679 10
        $removeEmptyValues === false
6680
    ) {
6681 10
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return could return the type false which is incompatible with the type-hinted return array. Consider adding an additional type-check to rule them out.
Loading history...
6682
    }
6683
6684 1
    $tmpReturn = self::reduce_string_array(
6685 1
        $return,
0 ignored issues
show
Bug introduced by
It seems like $return can also be of type false; however, parameter $strings of voku\helper\UTF8::reduce_string_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6685
        /** @scrutinizer ignore-type */ $return,
Loading history...
6686 1
        $removeEmptyValues,
6687 1
        $removeShortValues
6688
    );
6689
6690 1
    return $tmpReturn;
6691
  }
6692
6693
  /**
6694
   * alias for "UTF8::to_ascii()"
6695
   *
6696
   * @see UTF8::to_ascii()
6697
   *
6698
   * @param string $str
6699
   * @param string $unknown
6700
   * @param bool   $strict
6701
   *
6702
   * @return string
6703
   */
6704 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
6705
  {
6706 7
    return self::to_ascii($str, $unknown, $strict);
6707
  }
6708
6709
  /**
6710
   * Truncates the string to a given length. If $substring is provided, and
6711
   * truncating occurs, the string is further truncated so that the substring
6712
   * may be appended without exceeding the desired length.
6713
   *
6714
   * @param string $str
6715
   * @param int    $length    <p>Desired length of the truncated string.</p>
6716
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
6717
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6718
   *
6719
   * @return string <p>String after truncating.</p>
6720
   */
6721 22
  public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
6722
  {
6723
    // init
6724 22
    $str = (string)$str;
6725
6726 22
    if ('' === $str) {
6727
      return '';
6728
    }
6729
6730 22
    if ($length >= self::strlen($str, $encoding)) {
6731 4
      return $str;
6732
    }
6733
6734
    // Need to further trim the string so we can append the substring
6735 18
    $substringLength = self::strlen($substring, $encoding);
6736 18
    $length -= $substringLength;
6737
6738 18
    $truncated = self::substr($str, 0, $length, $encoding);
6739
6740 18
    return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6740
    return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
6741
  }
6742
6743
  /**
6744
   * Truncates the string to a given length, while ensuring that it does not
6745
   * split words. If $substring is provided, and truncating occurs, the
6746
   * string is further truncated so that the substring may be appended without
6747
   * exceeding the desired length.
6748
   *
6749
   * @param string $str
6750
   * @param int    $length    <p>Desired length of the truncated string.</p>
6751
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
6752
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6753
   *
6754
   * @return string <p>String after truncating.</p>
6755
   */
6756 23
  public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
6757
  {
6758 23
    if ($length >= self::strlen($str, $encoding)) {
6759 4
      return $str;
6760
    }
6761
6762
    // need to further trim the string so we can append the substring
6763 19
    $substringLength = self::strlen($substring, $encoding);
6764 19
    $length -= $substringLength;
6765
6766 19
    $truncated = self::substr($str, 0, $length, $encoding);
6767
6768
    // if the last word was truncated
6769 19
    $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
6770 19
    if ($strPosSpace != $length) {
6771
      // find pos of the last occurrence of a space, get up to that
6772 12
      $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $truncated can also be of type false; however, parameter $haystack of voku\helper\UTF8::strrpos() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6772
      $lastPos = self::strrpos(/** @scrutinizer ignore-type */ $truncated, ' ', 0, $encoding);
Loading history...
6773
6774 12
      if ($lastPos !== false || $strPosSpace !== false) {
6775 11
        $truncated = self::substr($truncated, 0, (int)$lastPos, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $truncated can also be of type false; however, parameter $str of voku\helper\UTF8::substr() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6775
        $truncated = self::substr(/** @scrutinizer ignore-type */ $truncated, 0, (int)$lastPos, $encoding);
Loading history...
6776
      }
6777
    }
6778
6779 19
    $str = $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6779
    $str = /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
6780
6781 19
    return $str;
6782
  }
6783
6784
  /**
6785
   * Returns a lowercase and trimmed string separated by underscores.
6786
   * Underscores are inserted before uppercase characters (with the exception
6787
   * of the first character of the string), and in place of spaces as well as
6788
   * dashes.
6789
   *
6790
   * @param string $str
6791
   *
6792
   * @return string <p>The underscored string.</p>
6793
   */
6794 16
  public static function str_underscored(string $str): string
6795
  {
6796 16
    return self::str_delimit($str, '_');
6797
  }
6798
6799
  /**
6800
   * Returns an UpperCamelCase version of the supplied string. It trims
6801
   * surrounding spaces, capitalizes letters following digits, spaces, dashes
6802
   * and underscores, and removes spaces, dashes, underscores.
6803
   *
6804
   * @param string $str      <p>The input string.</p>
6805
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6806
   *
6807
   * @return string <p>String in UpperCamelCase.</p>
6808
   */
6809 13
  public static function str_upper_camelize(string $str, string $encoding = 'UTF-8'): string
6810
  {
6811 13
    return self::str_upper_first(self::str_camelize($str, $encoding), $encoding);
6812
  }
6813
6814
  /**
6815
   * alias for "UTF8::ucfirst()"
6816
   *
6817
   * @see UTF8::ucfirst()
6818
   *
6819
   * @param string $str
6820
   * @param string $encoding
6821
   * @param bool   $cleanUtf8
6822
   *
6823
   * @return string
6824
   */
6825 57
  public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6826
  {
6827 57
    return self::ucfirst($str, $encoding, $cleanUtf8);
6828
  }
6829
6830
  /**
6831
   * Counts number of words in the UTF-8 string.
6832
   *
6833
   * @param string $str      <p>The input string.</p>
6834
   * @param int    $format   [optional] <p>
6835
   *                         <strong>0</strong> => return a number of words (default)<br>
6836
   *                         <strong>1</strong> => return an array of words<br>
6837
   *                         <strong>2</strong> => return an array of words with word-offset as key
6838
   *                         </p>
6839
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6840
   *
6841
   * @return string[]|int <p>The number of words in the string</p>
6842
   */
6843 1
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
6844
  {
6845 1
    $strParts = self::str_to_words($str, $charlist);
6846
6847 1
    $len = \count($strParts);
6848
6849 1
    if ($format === 1) {
6850
6851 1
      $numberOfWords = [];
6852 1
      for ($i = 1; $i < $len; $i += 2) {
6853 1
        $numberOfWords[] = $strParts[$i];
6854
      }
6855
6856 1
    } elseif ($format === 2) {
6857
6858 1
      $numberOfWords = [];
6859 1
      $offset = self::strlen($strParts[0]);
6860 1
      for ($i = 1; $i < $len; $i += 2) {
6861 1
        $numberOfWords[$offset] = $strParts[$i];
6862 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
6863
      }
6864
6865
    } else {
6866
6867 1
      $numberOfWords = ($len - 1) / 2;
6868
6869
    }
6870
6871 1
    return $numberOfWords;
6872
  }
6873
6874
  /**
6875
   * Case-insensitive string comparison.
6876
   *
6877
   * INFO: Case-insensitive version of UTF8::strcmp()
6878
   *
6879
   * @param string $str1
6880
   * @param string $str2
6881
   *
6882
   * @return int <p>
6883
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
6884
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
6885
   *             <strong>0</strong> if they are equal.
6886
   *             </p>
6887
   */
6888 19
  public static function strcasecmp(string $str1, string $str2): int
6889
  {
6890 19
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
6891
  }
6892
6893
  /**
6894
   * alias for "UTF8::strstr()"
6895
   *
6896
   * @see UTF8::strstr()
6897
   *
6898
   * @param string $haystack
6899
   * @param string $needle
6900
   * @param bool   $before_needle
6901
   * @param string $encoding
6902
   * @param bool   $cleanUtf8
6903
   *
6904
   * @return string|false
6905
   */
6906 1
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6907
  {
6908 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
6909
  }
6910
6911
  /**
6912
   * Case-sensitive string comparison.
6913
   *
6914
   * @param string $str1
6915
   * @param string $str2
6916
   *
6917
   * @return int  <p>
6918
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
6919
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
6920
   *              <strong>0</strong> if they are equal.
6921
   *              </p>
6922
   */
6923 22
  public static function strcmp(string $str1, string $str2): int
6924
  {
6925
    /** @noinspection PhpUndefinedClassInspection */
6926 22
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
6927 17
        \Normalizer::normalize($str1, \Normalizer::NFD),
6928 22
        \Normalizer::normalize($str2, \Normalizer::NFD)
6929
    );
6930
  }
6931
6932
  /**
6933
   * Find length of initial segment not matching mask.
6934
   *
6935
   * @param string $str
6936
   * @param string $charList
6937
   * @param int    $offset
6938
   * @param int    $length
6939
   *
6940
   * @return int|null
6941
   */
6942 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
6943
  {
6944 15
    if ('' === $charList .= '') {
6945 1
      return null;
6946
    }
6947
6948 14
    if ($offset || $length !== null) {
6949 2
      $strTmp = self::substr($str, $offset, $length);
6950 2
      if ($strTmp === false) {
6951
        return null;
6952
      }
6953 2
      $str = (string)$strTmp;
6954
    }
6955
6956 14
    if ('' === $str) {
6957 1
      return null;
6958
    }
6959
6960 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept null|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6960
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
6961 13
      return self::strlen($length[1]);
6962
    }
6963
6964 1
    return self::strlen($str);
6965
  }
6966
6967
  /**
6968
   * alias for "UTF8::stristr()"
6969
   *
6970
   * @see UTF8::stristr()
6971
   *
6972
   * @param string $haystack
6973
   * @param string $needle
6974
   * @param bool   $before_needle
6975
   * @param string $encoding
6976
   * @param bool   $cleanUtf8
6977
   *
6978
   * @return string|false
6979
   */
6980 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6981
  {
6982 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
6983
  }
6984
6985
  /**
6986
   * Create a UTF-8 string from code points.
6987
   *
6988
   * INFO: opposite to UTF8::codepoints()
6989
   *
6990
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
6991
   *
6992
   * @return string <p>UTF-8 encoded string.</p>
6993
   */
6994 2
  public static function string(array $array): string
6995
  {
6996 2
    return \implode(
6997 2
        '',
6998 2
        \array_map(
6999
            [
7000 2
                self::class,
7001
                'chr',
7002
            ],
7003 2
            $array
7004
        )
7005
    );
7006
  }
7007
7008
  /**
7009
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7010
   *
7011
   * @param string $str <p>The input string.</p>
7012
   *
7013
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
7014
   */
7015 3
  public static function string_has_bom(string $str): bool
7016
  {
7017 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
7018 3
      if (0 === \strpos($str, $bomString)) {
7019 3
        return true;
7020
      }
7021
    }
7022
7023 3
    return false;
7024
  }
7025
7026
  /**
7027
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7028
   *
7029
   * @link http://php.net/manual/en/function.strip-tags.php
7030
   *
7031
   * @param string $str             <p>
7032
   *                                The input string.
7033
   *                                </p>
7034
   * @param string $allowable_tags  [optional] <p>
7035
   *                                You can use the optional second parameter to specify tags which should
7036
   *                                not be stripped.
7037
   *                                </p>
7038
   *                                <p>
7039
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
7040
   *                                can not be changed with allowable_tags.
7041
   *                                </p>
7042
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
7043
   *
7044
   * @return string <p>The stripped string.</p>
7045
   */
7046 2
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7047
  {
7048 2
    if ('' === $str) {
7049 1
      return '';
7050
    }
7051
7052 2
    if ($cleanUtf8 === true) {
7053 1
      $str = self::clean($str);
7054
    }
7055
7056 2
    return \strip_tags($str, $allowable_tags);
7057
  }
7058
7059
  /**
7060
   * Strip all whitespace characters. This includes tabs and newline
7061
   * characters, as well as multibyte whitespace such as the thin space
7062
   * and ideographic space.
7063
   *
7064
   * @param string $str
7065
   *
7066
   * @return string
7067
   */
7068 24
  public static function strip_whitespace(string $str): string
7069
  {
7070 24
    if ('' === $str) {
7071 2
      return '';
7072
    }
7073
7074 22
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
7075
  }
7076
7077
  /**
7078
   * Finds position of first occurrence of a string within another, case insensitive.
7079
   *
7080
   * @link http://php.net/manual/en/function.mb-stripos.php
7081
   *
7082
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7083
   * @param string $needle    <p>The string to find in haystack.</p>
7084
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7085
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7086
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7087
   *
7088
   * @return int|false <p>
7089
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
7090
   *                   or false if needle is not found.
7091
   *                   </p>
7092
   */
7093 71
  public static function stripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7094
  {
7095 71
    if ('' === $haystack || '' === $needle) {
7096 3
      return false;
7097
    }
7098
7099 70
    if ($cleanUtf8 === true) {
7100
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7101
      // if invalid characters are found in $haystack before $needle
7102 1
      $haystack = self::clean($haystack);
7103 1
      $needle = self::clean($needle);
7104
    }
7105
7106 70
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7107 22
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7108
    }
7109
7110 70
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7111
      self::checkForSupport();
7112
    }
7113
7114
    if (
7115 70
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7116
        &&
7117 70
        self::$SUPPORT['intl'] === true
7118
    ) {
7119 70
      return \grapheme_stripos($haystack, $needle, $offset);
7120
    }
7121
7122
    // fallback to "mb_"-function via polyfill
7123 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
7124
  }
7125
7126
  /**
7127
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
7128
   *
7129
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7130
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7131
   * @param bool   $before_needle  [optional] <p>
7132
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
7133
   *                               haystack before the first occurrence of the needle (excluding the needle).
7134
   *                               </p>
7135
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
7136
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7137
   *
7138
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
7139
   */
7140 19
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7141
  {
7142 19
    if ('' === $haystack || '' === $needle) {
7143 6
      return false;
7144
    }
7145
7146 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7147 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7148
    }
7149
7150 13
    if ($cleanUtf8 === true) {
7151
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7152
      // if invalid characters are found in $haystack before $needle
7153 1
      $needle = self::clean($needle);
7154 1
      $haystack = self::clean($haystack);
7155
    }
7156
7157 13
    if (!$needle) {
7158
      return $haystack;
7159
    }
7160
7161 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7162
      self::checkForSupport();
7163
    }
7164
7165
    if (
7166 13
        $encoding !== 'UTF-8'
7167
        &&
7168 13
        self::$SUPPORT['mbstring'] === false
7169
    ) {
7170
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7171
    }
7172
7173 13
    if (self::$SUPPORT['mbstring'] === true) {
7174 13
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7175
    }
7176
7177
    if (
7178
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7179
        &&
7180
        self::$SUPPORT['intl'] === true
7181
    ) {
7182
      return \grapheme_stristr($haystack, $needle, $before_needle);
7183
    }
7184
7185
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7186
      return \stristr($haystack, $needle, $before_needle);
7187
    }
7188
7189
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7190
7191
    if (!isset($match[1])) {
7192
      return false;
7193
    }
7194
7195
    if ($before_needle) {
7196
      return $match[1];
7197
    }
7198
7199
    return self::substr($haystack, self::strlen($match[1]));
7200
  }
7201
7202
  /**
7203
   * Get the string length, not the byte-length!
7204
   *
7205
   * @link     http://php.net/manual/en/function.mb-strlen.php
7206
   *
7207
   * @param string $str       <p>The string being checked for length.</p>
7208
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7209
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7210
   *
7211
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
7212
   *             character counted as +1)</p>
7213
   */
7214 376
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
7215
  {
7216 376
    if ('' === $str) {
7217 32
      return 0;
7218
    }
7219
7220 374
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7221 158
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7222
    }
7223
7224 374
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7225
      self::checkForSupport();
7226
    }
7227
7228
    switch ($encoding) {
7229 374
      case 'ASCII':
7230 374
      case 'CP850':
7231
        if (
7232 6
            $encoding === 'CP850'
7233
            &&
7234 6
            self::$SUPPORT['mbstring_func_overload'] === false
7235
        ) {
7236 6
          return \strlen($str);
7237
        }
7238
7239
        return \mb_strlen($str, 'CP850'); // 8-BIT
7240
    }
7241
7242 369
    if ($cleanUtf8 === true) {
7243
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
7244
      // if invalid characters are found in $str
7245 2
      $str = self::clean($str);
7246
    }
7247
7248
    if (
7249 369
        $encoding !== 'UTF-8'
7250
        &&
7251 369
        self::$SUPPORT['mbstring'] === false
7252
        &&
7253 369
        self::$SUPPORT['iconv'] === false
7254
    ) {
7255
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7256
    }
7257
7258
    if (
7259 369
        $encoding !== 'UTF-8'
7260
        &&
7261 369
        self::$SUPPORT['iconv'] === true
7262
        &&
7263 369
        self::$SUPPORT['mbstring'] === false
7264
    ) {
7265
      $returnTmp = \iconv_strlen($str, $encoding);
7266
      if ($returnTmp !== false) {
7267
        return $returnTmp;
7268
      }
7269
    }
7270
7271 369
    if (self::$SUPPORT['mbstring'] === true) {
7272 368
      $returnTmp = \mb_strlen($str, $encoding);
7273 368
      if ($returnTmp !== false) {
7274 368
        return $returnTmp;
7275
      }
7276
    }
7277
7278 2
    if (self::$SUPPORT['iconv'] === true) {
7279
      $returnTmp = \iconv_strlen($str, $encoding);
7280
      if ($returnTmp !== false) {
7281
        return $returnTmp;
7282
      }
7283
    }
7284
7285
    if (
7286 2
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7287
        &&
7288 2
        self::$SUPPORT['intl'] === true
7289
    ) {
7290
      return \grapheme_strlen($str);
7291
    }
7292
7293 2
    if (self::is_ascii($str)) {
7294 1
      return \strlen($str);
7295
    }
7296
7297
    // fallback via vanilla php
7298 2
    \preg_match_all('/./us', $str, $parts);
7299 2
    $returnTmp = \count($parts[0]);
7300 2
    if ($returnTmp !== 0) {
7301 2
      return $returnTmp;
7302
    }
7303
7304
    // fallback to "mb_"-function via polyfill
7305
    return \mb_strlen($str, $encoding);
7306
  }
7307
7308
  /**
7309
   * Get string length in byte.
7310
   *
7311
   * @param string $str
7312
   *
7313
   * @return int
7314
   */
7315 101
  public static function strlen_in_byte(string $str): int
7316
  {
7317 101
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7318
      $len = \mb_strlen($str, 'CP850'); // 8-BIT
7319
    } else {
7320 101
      $len = \strlen($str);
7321
    }
7322
7323 101
    return $len;
7324
  }
7325
7326
  /**
7327
   * Case insensitive string comparisons using a "natural order" algorithm.
7328
   *
7329
   * INFO: natural order version of UTF8::strcasecmp()
7330
   *
7331
   * @param string $str1 <p>The first string.</p>
7332
   * @param string $str2 <p>The second string.</p>
7333
   *
7334
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
7335
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7336
   *             <strong>0</strong> if they are equal
7337
   */
7338 1
  public static function strnatcasecmp(string $str1, string $str2): int
7339
  {
7340 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
7341
  }
7342
7343
  /**
7344
   * String comparisons using a "natural order" algorithm
7345
   *
7346
   * INFO: natural order version of UTF8::strcmp()
7347
   *
7348
   * @link  http://php.net/manual/en/function.strnatcmp.php
7349
   *
7350
   * @param string $str1 <p>The first string.</p>
7351
   * @param string $str2 <p>The second string.</p>
7352
   *
7353
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
7354
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7355
   *             <strong>0</strong> if they are equal
7356
   */
7357 2
  public static function strnatcmp(string $str1, string $str2): int
7358
  {
7359 2
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
7360
  }
7361
7362
  /**
7363
   * Case-insensitive string comparison of the first n characters.
7364
   *
7365
   * @link  http://php.net/manual/en/function.strncasecmp.php
7366
   *
7367
   * @param string $str1 <p>The first string.</p>
7368
   * @param string $str2 <p>The second string.</p>
7369
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
7370
   *
7371
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7372
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7373
   *             <strong>0</strong> if they are equal
7374
   */
7375 1
  public static function strncasecmp(string $str1, string $str2, int $len): int
7376
  {
7377 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
7378
  }
7379
7380
  /**
7381
   * String comparison of the first n characters.
7382
   *
7383
   * @link  http://php.net/manual/en/function.strncmp.php
7384
   *
7385
   * @param string $str1 <p>The first string.</p>
7386
   * @param string $str2 <p>The second string.</p>
7387
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
7388
   *
7389
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7390
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7391
   *             <strong>0</strong> if they are equal
7392
   */
7393 2
  public static function strncmp(string $str1, string $str2, int $len): int
7394
  {
7395 2
    $str1 = (string)self::substr($str1, 0, $len);
7396 2
    $str2 = (string)self::substr($str2, 0, $len);
7397
7398 2
    return self::strcmp($str1, $str2);
7399
  }
7400
7401
  /**
7402
   * Search a string for any of a set of characters.
7403
   *
7404
   * @link  http://php.net/manual/en/function.strpbrk.php
7405
   *
7406
   * @param string $haystack  <p>The string where char_list is looked for.</p>
7407
   * @param string $char_list <p>This parameter is case sensitive.</p>
7408
   *
7409
   * @return string|false <p>String starting from the character found, or false if it is not found.</p>
7410
   */
7411 1
  public static function strpbrk(string $haystack, string $char_list)
7412
  {
7413 1
    if ('' === $haystack || '' === $char_list) {
7414 1
      return false;
7415
    }
7416
7417 1
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7418 1
      return \substr($haystack, \strpos($haystack, $m[0]));
7419
    }
7420
7421 1
    return false;
7422
  }
7423
7424
  /**
7425
   * Find position of first occurrence of string in a string.
7426
   *
7427
   * @link http://php.net/manual/en/function.mb-strpos.php
7428
   *
7429
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7430
   * @param string $needle    <p>The string to find in haystack.</p>
7431
   * @param int    $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7432
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7433
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7434
   *
7435
   * @return int|false <p>
7436
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
7437
   *                   If needle is not found it returns false.
7438
   *                   </p>
7439
   */
7440 180
  public static function strpos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7441
  {
7442 180
    if ('' === $haystack || '' === $needle) {
7443 4
      return false;
7444
    }
7445
7446
    // iconv and mbstring do not support integer $needle
7447 178
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
introduced by
The condition (int)$needle === $needle is always false.
Loading history...
7448
      $needle = (string)self::chr((int)$needle);
7449
    }
7450
7451 178
    if ($cleanUtf8 === true) {
7452
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7453
      // if invalid characters are found in $haystack before $needle
7454 2
      $needle = self::clean($needle);
7455 2
      $haystack = self::clean($haystack);
7456
    }
7457
7458 178
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7459 57
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7460
    }
7461
7462 178
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7463
      self::checkForSupport();
7464
    }
7465
7466
    if (
7467 178
        $encoding === 'CP850'
7468
        &&
7469 178
        self::$SUPPORT['mbstring_func_overload'] === false
7470
    ) {
7471 61
      return \strpos($haystack, $needle, $offset);
7472
    }
7473
7474
    if (
7475 118
        $encoding !== 'UTF-8'
7476
        &&
7477 118
        self::$SUPPORT['iconv'] === false
7478
        &&
7479 118
        self::$SUPPORT['mbstring'] === false
7480
    ) {
7481
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7482
    }
7483
7484
    if (
7485 118
        $offset >= 0 // iconv_strpos() can't handle negative offset
7486
        &&
7487 118
        $encoding !== 'UTF-8'
7488
        &&
7489 118
        self::$SUPPORT['mbstring'] === false
7490
        &&
7491 118
        self::$SUPPORT['iconv'] === true
7492
    ) {
7493
      // ignore invalid negative offset to keep compatibility
7494
      // with php < 5.5.35, < 5.6.21, < 7.0.6
7495
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
7496
      if ($returnTmp !== false) {
7497
        return $returnTmp;
7498
      }
7499
    }
7500
7501 118
    if (self::$SUPPORT['mbstring'] === true) {
7502 118
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
7503 118
      if ($returnTmp !== false) {
7504 83
        return $returnTmp;
7505
      }
7506
    }
7507
7508
    if (
7509 48
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7510
        &&
7511 48
        self::$SUPPORT['intl'] === true
7512
    ) {
7513 47
      return \grapheme_strpos($haystack, $needle, $offset);
7514
    }
7515
7516
    if (
7517 2
        $offset >= 0 // iconv_strpos() can't handle negative offset
7518
        &&
7519 2
        self::$SUPPORT['iconv'] === true
7520
    ) {
7521
      // ignore invalid negative offset to keep compatibility
7522
      // with php < 5.5.35, < 5.6.21, < 7.0.6
7523 1
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
7524 1
      if ($returnTmp !== false) {
7525
        return $returnTmp;
7526
      }
7527
    }
7528
7529 2
    $haystackIsAscii = self::is_ascii($haystack);
7530 2
    if ($haystackIsAscii && self::is_ascii($needle)) {
7531 1
      return \strpos($haystack, $needle, $offset);
7532
    }
7533
7534
    // fallback via vanilla php
7535
7536 2
    if ($haystackIsAscii) {
7537
      $haystackTmp = \substr($haystack, $offset);
7538
    } else {
7539 2
      $haystackTmp = self::substr($haystack, $offset);
7540
    }
7541 2
    if ($haystackTmp === false) {
7542
      $haystackTmp = '';
7543
    }
7544 2
    $haystack = (string)$haystackTmp;
7545
7546 2
    if ($offset < 0) {
7547
      $offset = 0;
7548
    }
7549
7550 2
    $pos = \strpos($haystack, $needle);
7551 2
    if ($pos === false) {
7552
      return false;
7553
    }
7554
7555 2
    $returnTmp = $offset + self::strlen(\substr($haystack, 0, $pos));
7556 2
    if ($returnTmp !== false) {
0 ignored issues
show
introduced by
The condition $returnTmp !== false is always true.
Loading history...
7557 2
      return $returnTmp;
7558
    }
7559
7560
    // fallback to "mb_"-function via polyfill
7561
    return \mb_strpos($haystack, $needle, $offset, $encoding);
7562
  }
7563
7564
  /**
7565
   * Finds the last occurrence of a character in a string within another.
7566
   *
7567
   * @link http://php.net/manual/en/function.mb-strrchr.php
7568
   *
7569
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
7570
   * @param string $needle        <p>The string to find in haystack</p>
7571
   * @param bool   $before_needle [optional] <p>
7572
   *                              Determines which portion of haystack
7573
   *                              this function returns.
7574
   *                              If set to true, it returns all of haystack
7575
   *                              from the beginning to the last occurrence of needle.
7576
   *                              If set to false, it returns all of haystack
7577
   *                              from the last occurrence of needle to the end,
7578
   *                              </p>
7579
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
7580
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
7581
   *
7582
   * @return string|false The portion of haystack or false if needle is not found.
7583
   */
7584 3
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7585
  {
7586 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7587 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7588
    }
7589
7590 3
    if ($cleanUtf8 === true) {
7591
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7592
      // if invalid characters are found in $haystack before $needle
7593 1
      $needle = self::clean($needle);
7594 1
      $haystack = self::clean($haystack);
7595
    }
7596
7597
    // fallback to "mb_"-function via polyfill
7598 3
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
7599
  }
7600
7601
  /**
7602
   * Reverses characters order in the string.
7603
   *
7604
   * @param string $str <p>The input string.</p>
7605
   *
7606
   * @return string <p>The string with characters in the reverse sequence.</p>
7607
   */
7608 9
  public static function strrev(string $str): string
7609
  {
7610 9
    if ('' === $str) {
7611 3
      return '';
7612
    }
7613
7614 7
    $reversed = '';
7615 7
    $i = self::strlen($str);
7616 7
    while ($i--) {
7617 7
      $reversed .= self::substr($str, $i, 1);
7618
    }
7619
7620 7
    return $reversed;
7621
  }
7622
7623
  /**
7624
   * Finds the last occurrence of a character in a string within another, case insensitive.
7625
   *
7626
   * @link http://php.net/manual/en/function.mb-strrichr.php
7627
   *
7628
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
7629
   * @param string $needle         <p>The string to find in haystack.</p>
7630
   * @param bool   $before_needle  [optional] <p>
7631
   *                               Determines which portion of haystack
7632
   *                               this function returns.
7633
   *                               If set to true, it returns all of haystack
7634
   *                               from the beginning to the last occurrence of needle.
7635
   *                               If set to false, it returns all of haystack
7636
   *                               from the last occurrence of needle to the end,
7637
   *                               </p>
7638
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
7639
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7640
   *
7641
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
7642
   */
7643 2
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7644
  {
7645 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7646 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7647
    }
7648
7649 2
    if ($cleanUtf8 === true) {
7650
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7651
      // if invalid characters are found in $haystack before $needle
7652 1
      $needle = self::clean($needle);
7653 1
      $haystack = self::clean($haystack);
7654
    }
7655
7656 2
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
7657
  }
7658
7659
  /**
7660
   * Find position of last occurrence of a case-insensitive string.
7661
   *
7662
   * @param string $haystack  <p>The string to look in.</p>
7663
   * @param string $needle    <p>The string to look for.</p>
7664
   * @param int    $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
7665
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7666
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7667
   *
7668
   * @return int|false <p>
7669
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
7670
   *                   not found, it returns false.
7671
   *                   </p>
7672
   */
7673 3
  public static function strripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7674
  {
7675 3
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
introduced by
The condition (int)$needle === $needle is always false.
Loading history...
7676
      $needle = (string)self::chr((int)$needle);
7677
    }
7678
7679 3
    if ('' === $haystack || '' === $needle) {
7680
      return false;
7681
    }
7682
7683 3
    if ($cleanUtf8 === true) {
7684
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
7685 1
      $needle = self::clean($needle);
7686 1
      $haystack = self::clean($haystack);
7687
    }
7688
7689 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7690 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7691
    }
7692
7693 3
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7694
      self::checkForSupport();
7695
    }
7696
7697
    if (
7698 3
        $encoding !== 'UTF-8'
7699
        &&
7700 3
        self::$SUPPORT['mbstring'] === false
7701
    ) {
7702
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7703
    }
7704
7705 3
    if (self::$SUPPORT['mbstring'] === true) {
7706 3
      return \mb_strripos($haystack, $needle, $offset, $encoding);
7707
    }
7708
7709
    if (
7710
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7711
        &&
7712
        self::$SUPPORT['intl'] === true
7713
    ) {
7714
      return \grapheme_strripos($haystack, $needle, $offset);
7715
    }
7716
7717
    // fallback via vanilla php
7718
7719
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
7720
  }
7721
7722
  /**
7723
   * Find position of last occurrence of a string in a string.
7724
   *
7725
   * @link http://php.net/manual/en/function.mb-strrpos.php
7726
   *
7727
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
7728
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7729
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
7730
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
7731
   *                              the end of the string.
7732
   *                              </p>
7733
   * @param string     $encoding  [optional] <p>Set the charset.</p>
7734
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7735
   *
7736
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
7737
   *                   is not found, it returns false.</p>
7738
   */
7739 36
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7740
  {
7741 36
    if ((int)$needle === $needle && $needle >= 0) {
7742 2
      $needle = (string)self::chr($needle);
7743
    }
7744 36
    $needle = (string)$needle;
7745
7746 36
    if ('' === $haystack || '' === $needle) {
7747 3
      return false;
7748
    }
7749
7750 35
    if ($cleanUtf8 === true) {
7751
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
7752 3
      $needle = self::clean($needle);
7753 3
      $haystack = self::clean($haystack);
7754
    }
7755
7756 35
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7757 13
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7758
    }
7759
7760 35
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7761
      self::checkForSupport();
7762
    }
7763
7764
    if (
7765 35
        $encoding !== 'UTF-8'
7766
        &&
7767 35
        self::$SUPPORT['mbstring'] === false
7768
    ) {
7769
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7770
    }
7771
7772 35
    if (self::$SUPPORT['mbstring'] === true) {
7773 35
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
7774
    }
7775
7776
    if (
7777
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7778
        &&
7779
        self::$SUPPORT['intl'] === true
7780
    ) {
7781
      return \grapheme_strrpos($haystack, $needle, $offset);
7782
    }
7783
7784
    // fallback via vanilla php
7785
7786
    $haystackTmp = null;
7787
    if ($offset > 0) {
7788
      $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7788
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
7789
    } elseif ($offset < 0) {
7790
      $haystackTmp = self::substr($haystack, 0, $offset);
7791
      $offset = 0;
7792
    }
7793
7794
    if ($haystackTmp !== null) {
7795
      if ($haystackTmp === false) {
7796
        $haystackTmp = '';
7797
      }
7798
      $haystack = (string)$haystackTmp;
7799
    }
7800
7801
    $pos = \strrpos($haystack, $needle);
7802
    if ($pos === false) {
7803
      return false;
7804
    }
7805
7806
    return $offset + self::strlen(\substr($haystack, 0, $pos));
7807
  }
7808
7809
  /**
7810
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
7811
   * mask.
7812
   *
7813
   * @param string $str    <p>The input string.</p>
7814
   * @param string $mask   <p>The mask of chars</p>
7815
   * @param int    $offset [optional]
7816
   * @param int    $length [optional]
7817
   *
7818
   * @return int
7819
   */
7820 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
7821
  {
7822 10
    if ($offset || $length !== null) {
7823 2
      $strTmp = self::substr($str, $offset, $length);
7824 2
      if ($strTmp === false) {
7825
        $strTmp = '';
7826
      }
7827 2
      $str = (string)$strTmp;
7828
    }
7829
7830 10
    if ('' === $str || '' === $mask) {
7831 2
      return 0;
7832
    }
7833
7834 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug introduced by
$str of type string is incompatible with the type null|array expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7834
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
7835
  }
7836
7837
  /**
7838
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
7839
   *
7840
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7841
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7842
   * @param bool   $before_needle  [optional] <p>
7843
   *                               If <b>TRUE</b>, strstr() returns the part of the
7844
   *                               haystack before the first occurrence of the needle (excluding the needle).
7845
   *                               </p>
7846
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
7847
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7848
   *
7849
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
7850
   */
7851 4
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
7852
  {
7853 4
    if ('' === $haystack || '' === $needle) {
7854 1
      return false;
7855
    }
7856
7857 4
    if ($cleanUtf8 === true) {
7858
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7859
      // if invalid characters are found in $haystack before $needle
7860
      $needle = self::clean($needle);
7861
      $haystack = self::clean($haystack);
7862
    }
7863
7864 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7865 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7866
    }
7867
7868 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7869
      self::checkForSupport();
7870
    }
7871
7872
    if (
7873 4
        $encoding !== 'UTF-8'
7874
        &&
7875 4
        self::$SUPPORT['mbstring'] === false
7876
    ) {
7877
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7878
    }
7879
7880 4
    if (self::$SUPPORT['mbstring'] === true) {
7881 4
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
7882
    }
7883
7884
    if (
7885
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7886
        &&
7887
        self::$SUPPORT['intl'] === true
7888
    ) {
7889
      return \grapheme_strstr($haystack, $needle, $before_needle);
7890
    }
7891
7892
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
7893
7894
    if (!isset($match[1])) {
7895
      return false;
7896
    }
7897
7898
    if ($before_needle) {
7899
      return $match[1];
7900
    }
7901
7902
    return self::substr($haystack, self::strlen($match[1]));
7903
  }
7904
7905
  /**
7906
   * Unicode transformation for case-less matching.
7907
   *
7908
   * @link http://unicode.org/reports/tr21/tr21-5.html
7909
   *
7910
   * @param string $str        <p>The input string.</p>
7911
   * @param bool   $full       [optional] <p>
7912
   *                           <b>true</b>, replace full case folding chars (default)<br>
7913
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
7914
   *                           </p>
7915
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7916
   *
7917
   * @return string
7918
   */
7919 21
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
7920
  {
7921 21
    if ('' === $str) {
7922 4
      return '';
7923
    }
7924
7925 20
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
7926 20
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
7927
7928 20
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
7929 1
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
7930 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
7931
    }
7932
7933 20
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
7934
7935 20
    if ($full) {
7936
7937 20
      static $FULL_CASE_FOLD = null;
7938 20
      if ($FULL_CASE_FOLD === null) {
7939 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
7940
      }
7941
7942 20
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
7943
    }
7944
7945 20
    if ($cleanUtf8 === true) {
7946 1
      $str = self::clean($str);
7947
    }
7948
7949 20
    return self::strtolower($str);
7950
  }
7951
7952
  /**
7953
   * Make a string lowercase.
7954
   *
7955
   * @link http://php.net/manual/en/function.mb-strtolower.php
7956
   *
7957
   * @param string      $str       <p>The string being lowercased.</p>
7958
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7959
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7960
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
7961
   *
7962
   * @return string str with all alphabetic characters converted to lowercase.
7963
   */
7964 163
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
7965
  {
7966
    // init
7967 163
    $str = (string)$str;
7968
7969 163
    if ('' === $str) {
7970 3
      return '';
7971
    }
7972
7973 161
    if ($cleanUtf8 === true) {
7974
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7975
      // if invalid characters are found in $haystack before $needle
7976 1
      $str = self::clean($str);
7977
    }
7978
7979 161
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7980 92
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7981
    }
7982
7983 161
    if ($lang !== null) {
7984
7985 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7986
        self::checkForSupport();
7987
      }
7988
7989 1
      if (self::$SUPPORT['intl'] === true) {
7990
7991 1
        $langCode = $lang . '-Lower';
7992 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
7993
          \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, E_USER_WARNING);
7994
7995
          $langCode = 'Any-Lower';
7996
        }
7997
7998
        /** @noinspection PhpComposerExtensionStubsInspection */
7999 1
        return transliterator_transliterate($langCode, $str);
8000
      }
8001
8002
      \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, E_USER_WARNING);
8003
    }
8004
8005 161
    return \mb_strtolower($str, $encoding);
8006
  }
8007
8008
  /**
8009
   * Generic case sensitive transformation for collation matching.
8010
   *
8011
   * @param string $str <p>The input string</p>
8012
   *
8013
   * @return string
8014
   */
8015 3
  private static function strtonatfold(string $str): string
8016
  {
8017
    /** @noinspection PhpUndefinedClassInspection */
8018 3
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
8019
  }
8020
8021
  /**
8022
   * Make a string uppercase.
8023
   *
8024
   * @link http://php.net/manual/en/function.mb-strtoupper.php
8025
   *
8026
   * @param string      $str       <p>The string being uppercased.</p>
8027
   * @param string      $encoding  [optional] <p>Set the charset.</p>
8028
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8029
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8030
   *
8031
   * @return string <p>$str with all alphabetic characters converted to uppercase.</p>
8032
   */
8033 111
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
8034
  {
8035
    // init
8036 111
    $str = (string)$str;
8037
8038 111
    if ('' === $str) {
8039 3
      return '';
8040
    }
8041
8042 109
    if ($cleanUtf8 === true) {
8043
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
8044
      // if invalid characters are found in $haystack before $needle
8045 2
      $str = self::clean($str);
8046
    }
8047
8048 109
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8049 68
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8050
    }
8051
8052 109
    if ($lang !== null) {
8053
8054 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8055
        self::checkForSupport();
8056
      }
8057
8058 1
      if (self::$SUPPORT['intl'] === true) {
8059
8060 1
        $langCode = $lang . '-Upper';
8061 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8062
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
8063
8064
          $langCode = 'Any-Upper';
8065
        }
8066
8067
        /** @noinspection PhpComposerExtensionStubsInspection */
8068 1
        return transliterator_transliterate($langCode, $str);
8069
      }
8070
8071
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
8072
    }
8073
8074 109
    return \mb_strtoupper($str, $encoding);
8075
  }
8076
8077
  /**
8078
   * Translate characters or replace sub-strings.
8079
   *
8080
   * @link  http://php.net/manual/en/function.strtr.php
8081
   *
8082
   * @param string          $str  <p>The string being translated.</p>
8083
   * @param string|string[] $from <p>The string replacing from.</p>
8084
   * @param string|string[] $to   <p>The string being translated to to.</p>
8085
   *
8086
   * @return string <p>
8087
   *                This function returns a copy of str, translating all occurrences of each character in from to the
8088
   *                corresponding character in to.
8089
   *                </p>
8090
   */
8091 1
  public static function strtr(string $str, $from, $to = INF): string
8092
  {
8093 1
    if ('' === $str) {
8094
      return '';
8095
    }
8096
8097 1
    if ($from === $to) {
8098
      return $str;
8099
    }
8100
8101 1
    if (INF !== $to) {
8102 1
      $from = self::str_split($from);
8103 1
      $to = self::str_split($to);
8104 1
      $countFrom = \count($from);
8105 1
      $countTo = \count($to);
8106
8107 1
      if ($countFrom > $countTo) {
8108 1
        $from = \array_slice($from, 0, $countTo);
8109 1
      } elseif ($countFrom < $countTo) {
8110 1
        $to = \array_slice($to, 0, $countFrom);
8111
      }
8112
8113 1
      $from = \array_combine($from, $to);
8114
    }
8115
8116 1
    if (\is_string($from)) {
8117 1
      return \str_replace($from, '', $str);
8118
    }
8119
8120 1
    return \strtr($str, $from);
8121
  }
8122
8123
  /**
8124
   * Return the width of a string.
8125
   *
8126
   * @param string $str       <p>The input string.</p>
8127
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
8128
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8129
   *
8130
   * @return int
8131
   */
8132 1
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
8133
  {
8134 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8135 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8136
    }
8137
8138 1
    if ($cleanUtf8 === true) {
8139
      // iconv and mbstring are not tolerant to invalid encoding
8140
      // further, their behaviour is inconsistent with that of PHP's substr
8141 1
      $str = self::clean($str);
8142
    }
8143
8144
    // fallback to "mb_"-function via polyfill
8145 1
    return \mb_strwidth($str, $encoding);
8146
  }
8147
8148
  /**
8149
   * Get part of a string.
8150
   *
8151
   * @link http://php.net/manual/en/function.mb-substr.php
8152
   *
8153
   * @param string $str       <p>The string being checked.</p>
8154
   * @param int    $offset    <p>The first position used in str.</p>
8155
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
8156
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
8157
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8158
   *
8159
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
8160
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
8161
   *                      characters long, <b>FALSE</b> will be returned.</p>
8162
   */
8163 353
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8164
  {
8165 353
    if ('' === $str) {
8166 11
      return '';
8167
    }
8168
8169
    // Empty string
8170 348
    if ($length === 0) {
8171 11
      return '';
8172
    }
8173
8174 345
    if ($cleanUtf8 === true) {
8175
      // iconv and mbstring are not tolerant to invalid encoding
8176
      // further, their behaviour is inconsistent with that of PHP's substr
8177 1
      $str = self::clean($str);
8178
    }
8179
8180
    // Whole string
8181 345
    if (!$offset && $length === null) {
8182 5
      return $str;
8183
    }
8184
8185 340
    $str_length = 0;
8186 340
    if ($offset || $length === null) {
8187 216
      $str_length = self::strlen($str, $encoding);
8188
    }
8189
8190
    // Empty string
8191 340
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
8192 19
      return '';
8193
    }
8194
8195
    // Impossible
8196 340
    if ($offset && $offset > $str_length) {
8197 2
      return false;
8198
    }
8199
8200 338
    if ($length === null) {
8201 132
      $length = $str_length;
8202
    } else {
8203 308
      $length = (int)$length;
8204
    }
8205
8206 338
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8207 150
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8208
    }
8209
8210 338
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8211
      self::checkForSupport();
8212
    }
8213
8214
    if (
8215 338
        $encoding === 'CP850'
8216
        &&
8217 338
        self::$SUPPORT['mbstring_func_overload'] === false
8218
    ) {
8219 16
      return \substr($str, $offset, $length ?? $str_length);
8220
    }
8221
8222
    if (
8223 322
        $encoding !== 'UTF-8'
8224
        &&
8225 322
        self::$SUPPORT['mbstring'] === false
8226
    ) {
8227
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8228
    }
8229
8230 322
    if (self::$SUPPORT['mbstring'] === true) {
8231 322
      return \mb_substr($str, $offset, $length, $encoding);
8232
    }
8233
8234
    if (
8235
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8236
        &&
8237
        self::$SUPPORT['intl'] === true
8238
    ) {
8239
      return \grapheme_substr($str, $offset, $length);
8240
    }
8241
8242
    if (
8243
        $length >= 0 // "iconv_substr()" can't handle negative length
8244
        &&
8245
        self::$SUPPORT['iconv'] === true
8246
    ) {
8247
      $returnTmp = \iconv_substr($str, $offset, $length);
8248
      if ($returnTmp !== false) {
8249
        return $returnTmp;
8250
      }
8251
    }
8252
8253
    if (self::is_ascii($str)) {
8254
      return ($length === null) ?
0 ignored issues
show
introduced by
The condition $length === null is always false.
Loading history...
8255
          \substr($str, $offset) :
8256
          \substr($str, $offset, $length);
8257
    }
8258
8259
    // fallback via vanilla php
8260
8261
    // split to array, and remove invalid characters
8262
    $array = self::split($str);
8263
8264
    // extract relevant part, and join to make sting again
8265
    return \implode('', \array_slice($array, $offset, $length));
8266
  }
8267
8268
  /**
8269
   * Binary safe comparison of two strings from an offset, up to length characters.
8270
   *
8271
   * @param string   $str1               <p>The main string being compared.</p>
8272
   * @param string   $str2               <p>The secondary string being compared.</p>
8273
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
8274
   *                                     counting from the end of the string.</p>
8275
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
8276
   *                                     the length of the str compared to the length of main_str less the offset.</p>
8277
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
8278
   *                                     insensitive.</p>
8279
   *
8280
   * @return int <p>
8281
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8282
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8283
   *             <strong>0</strong> if they are equal.
8284
   *             </p>
8285
   */
8286 1
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
8287
  {
8288
    if (
8289 1
        $offset !== 0
8290
        ||
8291 1
        $length !== null
8292
    ) {
8293 1
      $str1Tmp = self::substr($str1, $offset, $length);
8294 1
      if ($str1Tmp === false) {
8295
        $str1Tmp = '';
8296
      }
8297 1
      $str1 = (string)$str1Tmp;
8298
8299 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
8300 1
      if ($str2Tmp === false) {
8301
        $str2Tmp = '';
8302
      }
8303 1
      $str2 = (string)$str2Tmp;
8304
    }
8305
8306 1
    if ($case_insensitivity === true) {
8307 1
      return self::strcasecmp($str1, $str2);
8308
    }
8309
8310 1
    return self::strcmp($str1, $str2);
8311
  }
8312
8313
  /**
8314
   * Count the number of substring occurrences.
8315
   *
8316
   * @link  http://php.net/manual/en/function.substr-count.php
8317
   *
8318
   * @param string $haystack   <p>The string to search in.</p>
8319
   * @param string $needle     <p>The substring to search for.</p>
8320
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
8321
   * @param int    $length     [optional] <p>
8322
   *                           The maximum length after the specified offset to search for the
8323
   *                           substring. It outputs a warning if the offset plus the length is
8324
   *                           greater than the haystack length.
8325
   *                           </p>
8326
   * @param string $encoding   [optional] <p>Set the charset for e.g. "\mb_" function</p>
8327
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
8328
   *
8329
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
8330
   */
8331 19
  public static function substr_count(
8332
      string $haystack,
8333
      string $needle,
8334
      int $offset = 0,
8335
      int $length = null,
8336
      string $encoding = 'UTF-8',
8337
      bool $cleanUtf8 = false
8338
  )
8339
  {
8340 19
    if ('' === $haystack || '' === $needle) {
8341 3
      return false;
8342
    }
8343
8344 17
    if ($offset || $length !== null) {
8345
8346 1
      if ($length === null) {
8347 1
        $length = self::strlen($haystack);
8348
      }
8349
8350
      if (
8351
          (
8352 1
              $length !== 0
8353
              &&
8354 1
              $offset !== 0
8355
          )
8356
          &&
8357 1
          ($length + $offset) <= 0
8358
          &&
8359 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
8360
      ) {
8361 1
        return false;
8362
      }
8363
8364 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
8365 1
      if ($haystackTmp === false) {
8366
        $haystackTmp = '';
8367
      }
8368 1
      $haystack = (string)$haystackTmp;
8369
    }
8370
8371 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8372 11
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8373
    }
8374
8375 17
    if ($cleanUtf8 === true) {
8376
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
8377
      // if invalid characters are found in $haystack before $needle
8378
      $needle = self::clean($needle);
8379
      $haystack = self::clean($haystack);
8380
    }
8381
8382 17
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8383
      self::checkForSupport();
8384
    }
8385
8386
    if (
8387 17
        $encoding !== 'UTF-8'
8388
        &&
8389 17
        self::$SUPPORT['mbstring'] === false
8390
    ) {
8391
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8392
    }
8393
8394 17
    if (self::$SUPPORT['mbstring'] === true) {
8395 17
      return \mb_substr_count($haystack, $needle, $encoding);
8396
    }
8397
8398
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
8399
8400
    return \count($matches);
8401
  }
8402
8403
  /**
8404
   * Returns the number of occurrences of $substring in the given string.
8405
   * By default, the comparison is case-sensitive, but can be made insensitive
8406
   * by setting $caseSensitive to false.
8407
   *
8408
   * @param string $str           <p>The input string.</p>
8409
   * @param string $substring     <p>The substring to search for.</p>
8410
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
8411
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
8412
   *
8413
   * @return int
8414
   */
8415 15
  public static function substr_count_simple(string $str, string $substring, bool $caseSensitive = true, string $encoding = 'UTF-8'): int
8416
  {
8417 15
    if (!$caseSensitive) {
8418 6
      $str = self::strtoupper($str, $encoding);
8419 6
      $substring = self::strtoupper($substring, $encoding);
8420
    }
8421
8422 15
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
8423
  }
8424
8425
  /**
8426
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
8427
   *
8428
   * @param string $haystack <p>The string to search in.</p>
8429
   * @param string $needle   <p>The substring to search for.</p>
8430
   *
8431
   * @return string <p>Return the sub-string.</p>
8432
   */
8433 1
  public static function substr_ileft(string $haystack, string $needle): string
8434
  {
8435 1
    if ('' === $haystack) {
8436 1
      return '';
8437
    }
8438
8439 1
    if ('' === $needle) {
8440 1
      return $haystack;
8441
    }
8442
8443 1
    if (self::str_istarts_with($haystack, $needle) === true) {
8444 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
8445 1
      if ($haystackTmp === false) {
8446
        $haystackTmp = '';
8447
      }
8448 1
      $haystack = (string)$haystackTmp;
8449
    }
8450
8451 1
    return $haystack;
8452
  }
8453
8454
  /**
8455
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
8456
   *
8457
   * @param string $haystack <p>The string to search in.</p>
8458
   * @param string $needle   <p>The substring to search for.</p>
8459
   *
8460
   * @return string <p>Return the sub-string.</p>
8461
   */
8462 1
  public static function substr_iright(string $haystack, string $needle): string
8463
  {
8464 1
    if ('' === $haystack) {
8465 1
      return '';
8466
    }
8467
8468 1
    if ('' === $needle) {
8469 1
      return $haystack;
8470
    }
8471
8472 1
    if (self::str_iends_with($haystack, $needle) === true) {
8473 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
8474 1
      if ($haystackTmp === false) {
8475
        $haystackTmp = '';
8476
      }
8477 1
      $haystack = (string)$haystackTmp;
8478
    }
8479
8480 1
    return $haystack;
8481
  }
8482
8483
  /**
8484
   * Removes an prefix ($needle) from start of the string ($haystack).
8485
   *
8486
   * @param string $haystack <p>The string to search in.</p>
8487
   * @param string $needle   <p>The substring to search for.</p>
8488
   *
8489
   * @return string <p>Return the sub-string.</p>
8490
   */
8491 1
  public static function substr_left(string $haystack, string $needle): string
8492
  {
8493 1
    if ('' === $haystack) {
8494 1
      return '';
8495
    }
8496
8497 1
    if ('' === $needle) {
8498 1
      return $haystack;
8499
    }
8500
8501 1
    if (self::str_starts_with($haystack, $needle) === true) {
8502 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
8503 1
      if ($haystackTmp === false) {
8504
        $haystackTmp = '';
8505
      }
8506 1
      $haystack = (string)$haystackTmp;
8507
    }
8508
8509 1
    return $haystack;
8510
  }
8511
8512
  /**
8513
   * Replace text within a portion of a string.
8514
   *
8515
   * source: https://gist.github.com/stemar/8287074
8516
   *
8517
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
8518
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
8519
   * @param int|int[]       $offset           <p>
8520
   *                                          If start is positive, the replacing will begin at the start'th offset
8521
   *                                          into string.
8522
   *                                          <br><br>
8523
   *                                          If start is negative, the replacing will begin at the start'th character
8524
   *                                          from the end of string.
8525
   *                                          </p>
8526
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
8527
   *                                          portion of string which is to be replaced. If it is negative, it
8528
   *                                          represents the number of characters from the end of string at which to
8529
   *                                          stop replacing. If it is not given, then it will default to strlen(
8530
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
8531
   *                                          length is zero then this function will have the effect of inserting
8532
   *                                          replacement into string at the given start offset.</p>
8533
   *
8534
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
8535
   */
8536 8
  public static function substr_replace($str, $replacement, $offset, $length = null)
8537
  {
8538 8
    if (\is_array($str) === true) {
8539 1
      $num = \count($str);
8540
8541
      // the replacement
8542 1
      if (\is_array($replacement) === true) {
8543 1
        $replacement = \array_slice($replacement, 0, $num);
8544
      } else {
8545 1
        $replacement = \array_pad([$replacement], $num, $replacement);
8546
      }
8547
8548
      // the offset
8549 1
      if (\is_array($offset) === true) {
8550 1
        $offset = \array_slice($offset, 0, $num);
8551 1
        foreach ($offset as &$valueTmp) {
8552 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
8553
        }
8554 1
        unset($valueTmp);
8555
      } else {
8556 1
        $offset = \array_pad([$offset], $num, $offset);
8557
      }
8558
8559
      // the length
8560 1
      if (null === $length) {
8561 1
        $length = \array_fill(0, $num, 0);
8562 1
      } elseif (\is_array($length) === true) {
8563 1
        $length = \array_slice($length, 0, $num);
8564 1
        foreach ($length as &$valueTmpV2) {
8565 1
          if (null !== $valueTmpV2) {
8566 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
8567
          } else {
8568 1
            $valueTmpV2 = 0;
8569
          }
8570
        }
8571 1
        unset($valueTmpV2);
8572
      } else {
8573 1
        $length = \array_pad([$length], $num, $length);
8574
      }
8575
8576
      // recursive call
8577 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
8578
    }
8579
8580 8
    if (\is_array($replacement) === true) {
8581 1
      if (\count($replacement) > 0) {
8582 1
        $replacement = $replacement[0];
8583
      } else {
8584 1
        $replacement = '';
8585
      }
8586
    }
8587
8588
    // init
8589 8
    $str = (string)$str;
8590 8
    $replacement = (string)$replacement;
8591
8592 8
    if ('' === $str) {
8593 1
      return $replacement;
8594
    }
8595
8596 7
    if (self::is_ascii($str)) {
8597 4
      return ($length === null) ?
8598
          \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8598
          \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
8599 4
          \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8599
          \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
8600
    }
8601
8602 6
    \preg_match_all('/./us', $str, $smatches);
8603 6
    \preg_match_all('/./us', $replacement, $rmatches);
8604
8605 6
    if ($length === null) {
8606 3
      $length = self::strlen($str);
8607
    }
8608
8609 6
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8609
    \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8609
    \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
8610
8611 6
    return \implode('', $smatches[0]);
8612
  }
8613
8614
  /**
8615
   * Removes an suffix ($needle) from end of the string ($haystack).
8616
   *
8617
   * @param string $haystack <p>The string to search in.</p>
8618
   * @param string $needle   <p>The substring to search for.</p>
8619
   *
8620
   * @return string <p>Return the sub-string.</p>
8621
   */
8622 1
  public static function substr_right(string $haystack, string $needle): string
8623
  {
8624 1
    if ('' === $haystack) {
8625 1
      return '';
8626
    }
8627
8628 1
    if ('' === $needle) {
8629 1
      return $haystack;
8630
    }
8631
8632 1
    if (self::str_ends_with($haystack, $needle) === true) {
8633 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
8634 1
      if ($haystackTmp === false) {
8635
        $haystackTmp = '';
8636
      }
8637 1
      $haystack = (string)$haystackTmp;
8638
    }
8639
8640 1
    return $haystack;
8641
  }
8642
8643
  /**
8644
   * Returns a case swapped version of the string.
8645
   *
8646
   * @param string $str       <p>The input string.</p>
8647
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
8648
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8649
   *
8650
   * @return string <p>Each character's case swapped.</p>
8651
   */
8652 5
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
8653
  {
8654 5
    if ('' === $str) {
8655 1
      return '';
8656
    }
8657
8658 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8659 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8660
    }
8661
8662 5
    if ($cleanUtf8 === true) {
8663
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
8664
      // if invalid characters are found in $haystack before $needle
8665 1
      $str = self::clean($str);
8666
    }
8667
8668 5
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
8669
  }
8670
8671
  /**
8672
   * @param string $str
8673
   * @param int    $tabLength
8674
   *
8675
   * @return string
8676
   */
8677 6
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
8678
  {
8679 6
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
8680
  }
8681
8682
  /**
8683
   * Converts the first character of each word in the string to uppercase
8684
   * and all other chars to lowercase.
8685
   *
8686
   * @param string $str      <p>The input string.</p>
8687
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
8688
   *
8689
   * @return string <p>String with all characters of $str being title-cased.</p>
8690
   */
8691 5
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
8692
  {
8693 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8694 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8695
    }
8696
8697
    // "mb_convert_case()" used a polyfill if needed ...
8698 5
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
8699
  }
8700
8701
  /**
8702
   * alias for "UTF8::to_ascii()"
8703
   *
8704
   * @see        UTF8::to_ascii()
8705
   *
8706
   * @param string $str
8707
   * @param string $subst_chr
8708
   * @param bool   $strict
8709
   *
8710
   * @return string
8711
   *
8712
   * @deprecated <p>use "UTF8::to_ascii()"</p>
8713
   */
8714 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
8715
  {
8716 7
    return self::to_ascii($str, $subst_chr, $strict);
8717
  }
8718
8719
  /**
8720
   * alias for "UTF8::to_iso8859()"
8721
   *
8722
   * @see        UTF8::to_iso8859()
8723
   *
8724
   * @param string|string[] $str
8725
   *
8726
   * @return string|string[]
8727
   *
8728
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
8729
   */
8730 1
  public static function toIso8859($str)
8731
  {
8732 1
    return self::to_iso8859($str);
8733
  }
8734
8735
  /**
8736
   * alias for "UTF8::to_latin1()"
8737
   *
8738
   * @see        UTF8::to_latin1()
8739
   *
8740
   * @param string|string[] $str
8741
   *
8742
   * @return string|string[]
8743
   *
8744
   * @deprecated <p>use "UTF8::to_latin1()"</p>
8745
   */
8746 1
  public static function toLatin1($str)
8747
  {
8748 1
    return self::to_latin1($str);
8749
  }
8750
8751
  /**
8752
   * alias for "UTF8::to_utf8()"
8753
   *
8754
   * @see        UTF8::to_utf8()
8755
   *
8756
   * @param string|string[] $str
8757
   *
8758
   * @return string|string[]
8759
   *
8760
   * @deprecated <p>use "UTF8::to_utf8()"</p>
8761
   */
8762 1
  public static function toUTF8($str)
8763
  {
8764 1
    return self::to_utf8($str);
8765
  }
8766
8767
  /**
8768
   * Convert a string into ASCII.
8769
   *
8770
   * @param string $str     <p>The input string.</p>
8771
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
8772
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
8773
   *                        performance</p>
8774
   *
8775
   * @return string
8776
   */
8777 37
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
8778
  {
8779 37
    static $UTF8_TO_ASCII;
8780
8781 37
    if ('' === $str) {
8782 4
      return '';
8783
    }
8784
8785
    // check if we only have ASCII, first (better performance)
8786 34
    if (self::is_ascii($str) === true) {
8787 6
      return $str;
8788
    }
8789
8790 29
    $str = self::clean(
8791 29
        $str,
8792 29
        true,
8793 29
        true,
8794 29
        true,
8795 29
        false,
8796 29
        true,
8797 29
        true
8798
    );
8799
8800
    // check again, if we only have ASCII, now ...
8801 29
    if (self::is_ascii($str) === true) {
8802 12
      return $str;
8803
    }
8804
8805 18
    if ($strict === true) {
8806
8807 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8808
        self::checkForSupport();
8809
      }
8810
8811 1
      if (self::$SUPPORT['intl'] === true) {
8812
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
8813
        /** @noinspection PhpComposerExtensionStubsInspection */
8814 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
8815
8816
        // check again, if we only have ASCII, now ...
8817 1
        if (self::is_ascii($str) === true) {
8818 1
          return $str;
8819
        }
8820
8821
      }
8822
    }
8823
8824 18
    if (self::$ORD === null) {
8825
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type integer or string or boolean. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
8826
    }
8827
8828 18
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
8829 18
    $chars = $ar[0];
8830 18
    $ord = null;
8831 18
    foreach ($chars as &$c) {
8832
8833 18
      $ordC0 = self::$ORD[$c[0]];
8834
8835 18
      if ($ordC0 >= 0 && $ordC0 <= 127) {
8836 14
        continue;
8837
      }
8838
8839 18
      $ordC1 = self::$ORD[$c[1]];
8840
8841
      // ASCII - next please
8842 18
      if ($ordC0 >= 192 && $ordC0 <= 223) {
8843 16
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
8844
      }
8845
8846 18
      if ($ordC0 >= 224) {
8847 7
        $ordC2 = self::$ORD[$c[2]];
8848
8849 7
        if ($ordC0 <= 239) {
8850 6
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
8851
        }
8852
8853 7
        if ($ordC0 >= 240) {
8854 2
          $ordC3 = self::$ORD[$c[3]];
8855
8856 2
          if ($ordC0 <= 247) {
8857 2
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
8858
          }
8859
8860 2
          if ($ordC0 >= 248) {
8861
            $ordC4 = self::$ORD[$c[4]];
8862
8863
            if ($ordC0 <= 251) {
8864
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
8865
            }
8866
8867
            if ($ordC0 >= 252) {
8868
              $ordC5 = self::$ORD[$c[5]];
8869
8870
              if ($ordC0 <= 253) {
8871
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
8872
              }
8873
            }
8874
          }
8875
        }
8876
      }
8877
8878 18
      if ($ordC0 === 254 || $ordC0 === 255) {
8879
        $c = $unknown;
8880
        continue;
8881
      }
8882
8883 18
      if ($ord === null) {
8884
        $c = $unknown;
8885
        continue;
8886
      }
8887
8888 18
      $bank = $ord >> 8;
8889 18
      if (!isset($UTF8_TO_ASCII[$bank])) {
8890 9
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
8891 9
        if ($UTF8_TO_ASCII[$bank] === false) {
8892 2
          $UTF8_TO_ASCII[$bank] = [];
8893
        }
8894
      }
8895
8896 18
      $newchar = $ord & 255;
8897
8898 18
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
8899
8900
        // keep for debugging
8901
        /*
8902
        echo "file: " . sprintf('x%02x', $bank) . "\n";
8903
        echo "char: " . $c . "\n";
8904
        echo "ord: " . $ord . "\n";
8905
        echo "newchar: " . $newchar . "\n";
8906
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
8907
        echo "bank:" . $bank . "\n\n";
8908
        */
8909
8910 17
        $c = $UTF8_TO_ASCII[$bank][$newchar];
8911
      } else {
8912
8913
        // keep for debugging missing chars
8914
        /*
8915
        echo "file: " . sprintf('x%02x', $bank) . "\n";
8916
        echo "char: " . $c . "\n";
8917
        echo "ord: " . $ord . "\n";
8918
        echo "newchar: " . $newchar . "\n";
8919
        echo "bank:" . $bank . "\n\n";
8920
        */
8921
8922 18
        $c = $unknown;
8923
      }
8924
    }
8925
8926 18
    return \implode('', $chars);
8927
  }
8928
8929
  /**
8930
   * @param mixed $str
8931
   *
8932
   * @return bool
8933
   */
8934 19
  public static function to_boolean($str): bool
8935
  {
8936
    // init
8937 19
    $str = (string)$str;
8938
8939 19
    if ('' === $str) {
8940 2
      return false;
8941
    }
8942
8943 17
    $key = \strtolower($str);
8944
8945
    // Info: http://php.net/manual/en/filter.filters.validate.php
8946
    $map = [
8947 17
        'true'  => true,
8948
        '1'     => true,
8949
        'on'    => true,
8950
        'yes'   => true,
8951
        'false' => false,
8952
        '0'     => false,
8953
        'off'   => false,
8954
        'no'    => false,
8955
    ];
8956
8957 17
    if (isset($map[$key])) {
8958 13
      return $map[$key];
8959
    }
8960
8961
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
8962 4
    if (\is_numeric($str)) {
8963 2
      return (($str + 0) > 0);
8964
    }
8965
8966 2
    return (bool)self::trim($str);
8967
  }
8968
8969
  /**
8970
   * Convert a string into "ISO-8859"-encoding (Latin-1).
8971
   *
8972
   * @param string|string[] $str
8973
   *
8974
   * @return string|string[]
8975
   */
8976 3
  public static function to_iso8859($str)
8977
  {
8978 3
    if (\is_array($str) === true) {
8979 1
      foreach ($str as $k => $v) {
8980 1
        $str[$k] = self::to_iso8859($v);
8981
      }
8982
8983 1
      return $str;
8984
    }
8985
8986 3
    $str = (string)$str;
8987 3
    if ('' === $str) {
8988 1
      return '';
8989
    }
8990
8991 3
    return self::utf8_decode($str);
8992
  }
8993
8994
  /**
8995
   * alias for "UTF8::to_iso8859()"
8996
   *
8997
   * @see UTF8::to_iso8859()
8998
   *
8999
   * @param string|string[] $str
9000
   *
9001
   * @return string|string[]
9002
   */
9003 1
  public static function to_latin1($str)
9004
  {
9005 1
    return self::to_iso8859($str);
9006
  }
9007
9008
  /**
9009
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
9010
   *
9011
   * <ul>
9012
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
9013
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
9014
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
9015
   * case.</li>
9016
   * </ul>
9017
   *
9018
   * @param string|string[] $str                    <p>Any string or array.</p>
9019
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
9020
   *
9021
   * @return string|string[] <p>The UTF-8 encoded string.</p>
9022
   */
9023 22
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
9024
  {
9025 22
    if (\is_array($str) === true) {
9026 2
      foreach ($str as $k => $v) {
9027 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
9028
      }
9029
9030 2
      return $str;
9031
    }
9032
9033 22
    $str = (string)$str;
9034 22
    if ('' === $str) {
9035 3
      return $str;
9036
    }
9037
9038 22
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9039
      self::checkForSupport();
9040
    }
9041
9042 22
    $max = self::strlen_in_byte($str);
9043 22
    $buf = '';
9044
9045
    /** @noinspection ForeachInvariantsInspection */
9046 22
    for ($i = 0; $i < $max; $i++) {
9047 22
      $c1 = $str[$i];
9048
9049 22
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
9050
9051 22
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
9052
9053 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9054
9055 20
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
9056 15
            $buf .= $c1 . $c2;
9057 15
            $i++;
9058
          } else { // not valid UTF8 - convert it
9059 20
            $buf .= self::to_utf8_convert($c1);
9060
          }
9061
9062 21
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
9063
9064 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9065 20
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
9066
9067 20
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
9068 12
            $buf .= $c1 . $c2 . $c3;
9069 12
            $i += 2;
9070
          } else { // not valid UTF8 - convert it
9071 20
            $buf .= self::to_utf8_convert($c1);
9072
          }
9073
9074 14
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
9075
9076 14
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
9077 14
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
9078 14
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
9079
9080 14
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
9081 5
            $buf .= $c1 . $c2 . $c3 . $c4;
9082 5
            $i += 3;
9083
          } else { // not valid UTF8 - convert it
9084 14
            $buf .= self::to_utf8_convert($c1);
9085
          }
9086
9087
        } else { // doesn't look like UTF8, but should be converted
9088 22
          $buf .= self::to_utf8_convert($c1);
9089
        }
9090
9091 20
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
9092
9093 2
        $buf .= self::to_utf8_convert($c1);
9094
9095
      } else { // it doesn't need conversion
9096 20
        $buf .= $c1;
9097
      }
9098
    }
9099
9100
    // decode unicode escape sequences
9101 22
    $buf = \preg_replace_callback(
9102 22
        '/\\\\u([0-9a-f]{4})/i',
9103 22
        function ($match) {
9104 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
9105 22
        },
9106 22
        $buf
9107
    );
9108
9109
    // decode UTF-8 codepoints
9110 22
    if ($decodeHtmlEntityToUtf8 === true) {
9111 1
      $buf = self::html_entity_decode($buf);
9112
    }
9113
9114 22
    return $buf;
9115
  }
9116
9117
  /**
9118
   * @param int $int
9119
   *
9120
   * @return string
9121
   */
9122 16
  private static function to_utf8_convert($int): string
9123
  {
9124
    // init
9125 16
    $buf = '';
9126
9127 16
    if (self::$ORD === null) {
9128 1
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type integer or string or boolean. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9129
    }
9130
9131 16
    if (self::$CHR === null) {
9132 1
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type integer or string or boolean. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9133
    }
9134
9135 16
    if (self::$WIN1252_TO_UTF8 === null) {
9136 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type integer or string or boolean. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9137
    }
9138
9139 16
    $ordC1 = self::$ORD[$int];
9140 16
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
9141 16
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
9142
    } else {
9143 1
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
9144 1
      $cc2 = ($int & "\x3F") | "\x80";
9145 1
      $buf .= $cc1 . $cc2;
9146
    }
9147
9148 16
    return $buf;
9149
  }
9150
9151
  /**
9152
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
9153
   *
9154
   * INFO: This is slower then "trim()"
9155
   *
9156
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
9157
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
9158
   *
9159
   * @param string $str   <p>The string to be trimmed</p>
9160
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
9161
   *
9162
   * @return string <p>The trimmed string.</p>
9163
   */
9164 196
  public static function trim(string $str = '', $chars = INF): string
9165
  {
9166 196
    if ('' === $str) {
9167 6
      return '';
9168
    }
9169
9170
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
9171 191
    if ($chars === INF || !$chars) {
9172 174
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
9173
    } else {
9174 37
      $chars = \preg_quote($chars, '/');
9175 37
      $pattern = "^[$chars]+|[$chars]+\$";
9176
    }
9177
9178 191
    return self::regex_replace($str, $pattern, '', '', '/');
9179
  }
9180
9181
  /**
9182
   * Makes string's first char uppercase.
9183
   *
9184
   * @param string $str       <p>The input string.</p>
9185
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
9186
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9187
   *
9188
   * @return string <p>The resulting string</p>
9189
   */
9190 74
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9191
  {
9192 74
    if ($cleanUtf8 === true) {
9193
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
9194
      // if invalid characters are found in $haystack before $needle
9195 1
      $str = self::clean($str);
9196
    }
9197
9198 74
    $strPartTwo = self::substr($str, 1, null, $encoding);
9199 74
    if ($strPartTwo === false) {
9200
      $strPartTwo = '';
9201
    }
9202
9203 74
    $strPartOne = self::strtoupper(
9204 74
        (string)self::substr($str, 0, 1, $encoding),
9205 74
        $encoding,
9206 74
        $cleanUtf8
9207
    );
9208
9209 74
    return $strPartOne . $strPartTwo;
9210
  }
9211
9212
  /**
9213
   * alias for "UTF8::ucfirst()"
9214
   *
9215
   * @see UTF8::ucfirst()
9216
   *
9217
   * @param string $str
9218
   * @param string $encoding
9219
   * @param bool   $cleanUtf8
9220
   *
9221
   * @return string
9222
   */
9223 1
  public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9224
  {
9225 1
    return self::ucfirst($str, $encoding, $cleanUtf8);
9226
  }
9227
9228
  /**
9229
   * Uppercase for all words in the string.
9230
   *
9231
   * @param string   $str        <p>The input string.</p>
9232
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
9233
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9234
   * @param string   $encoding   [optional] <p>Set the charset.</p>
9235
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9236
   *
9237
   * @return string
9238
   */
9239 8
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9240
  {
9241 8
    if (!$str) {
9242 2
      return '';
9243
    }
9244
9245
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
9246
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
9247
9248 7
    if ($cleanUtf8 === true) {
9249
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
9250
      // if invalid characters are found in $haystack before $needle
9251 1
      $str = self::clean($str);
9252
    }
9253
9254 7
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
9255
9256
    if (
9257 7
        $usePhpDefaultFunctions === true
9258
        &&
9259 7
        self::is_ascii($str) === true
9260
    ) {
9261
      return \ucwords($str);
9262
    }
9263
9264 7
    $words = self::str_to_words($str, $charlist);
9265 7
    $newWords = [];
9266
9267 7
    if (\count($exceptions) > 0) {
9268 1
      $useExceptions = true;
9269
    } else {
9270 7
      $useExceptions = false;
9271
    }
9272
9273 7
    foreach ($words as $word) {
9274
9275 7
      if (!$word) {
9276 7
        continue;
9277
      }
9278
9279
      if (
9280 7
          $useExceptions === false
9281
          ||
9282
          (
9283 1
              $useExceptions === true
9284
              &&
9285 7
              !\in_array($word, $exceptions, true)
9286
          )
9287
      ) {
9288 7
        $word = self::ucfirst($word, $encoding);
9289
      }
9290
9291 7
      $newWords[] = $word;
9292
    }
9293
9294 7
    return \implode('', $newWords);
9295
  }
9296
9297
  /**
9298
   * Multi decode html entity & fix urlencoded-win1252-chars.
9299
   *
9300
   * e.g:
9301
   * 'test+test'                     => 'test test'
9302
   * 'D&#252;sseldorf'               => 'Düsseldorf'
9303
   * 'D%FCsseldorf'                  => 'Düsseldorf'
9304
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
9305
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
9306
   * 'Düsseldorf'                   => 'Düsseldorf'
9307
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
9308
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
9309
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
9310
   *
9311
   * @param string $str          <p>The input string.</p>
9312
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
9313
   *
9314
   * @return string
9315
   */
9316 1
  public static function urldecode(string $str, bool $multi_decode = true): string
9317
  {
9318 1
    if ('' === $str) {
9319 1
      return '';
9320
    }
9321
9322 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
9323 1
    if (\preg_match($pattern, $str)) {
9324 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
9325
    }
9326
9327 1
    $flags = ENT_QUOTES | ENT_HTML5;
9328
9329
    do {
9330 1
      $str_compare = $str;
9331
9332 1
      $str = self::fix_simple_utf8(
9333 1
          \urldecode(
9334 1
              self::html_entity_decode(
9335 1
                  self::to_utf8($str),
9336 1
                  $flags
9337
              )
9338
          )
9339
      );
9340
9341 1
    } while ($multi_decode === true && $str_compare !== $str);
9342
9343 1
    return $str;
9344
  }
9345
9346
  /**
9347
   * Return a array with "urlencoded"-win1252 -> UTF-8
9348
   *
9349
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
9350
   *
9351
   * @return string[]
9352
   */
9353 1
  public static function urldecode_fix_win1252_chars(): array
9354
  {
9355
    return [
9356 1
        '%20' => ' ',
9357
        '%21' => '!',
9358
        '%22' => '"',
9359
        '%23' => '#',
9360
        '%24' => '$',
9361
        '%25' => '%',
9362
        '%26' => '&',
9363
        '%27' => "'",
9364
        '%28' => '(',
9365
        '%29' => ')',
9366
        '%2A' => '*',
9367
        '%2B' => '+',
9368
        '%2C' => ',',
9369
        '%2D' => '-',
9370
        '%2E' => '.',
9371
        '%2F' => '/',
9372
        '%30' => '0',
9373
        '%31' => '1',
9374
        '%32' => '2',
9375
        '%33' => '3',
9376
        '%34' => '4',
9377
        '%35' => '5',
9378
        '%36' => '6',
9379
        '%37' => '7',
9380
        '%38' => '8',
9381
        '%39' => '9',
9382
        '%3A' => ':',
9383
        '%3B' => ';',
9384
        '%3C' => '<',
9385
        '%3D' => '=',
9386
        '%3E' => '>',
9387
        '%3F' => '?',
9388
        '%40' => '@',
9389
        '%41' => 'A',
9390
        '%42' => 'B',
9391
        '%43' => 'C',
9392
        '%44' => 'D',
9393
        '%45' => 'E',
9394
        '%46' => 'F',
9395
        '%47' => 'G',
9396
        '%48' => 'H',
9397
        '%49' => 'I',
9398
        '%4A' => 'J',
9399
        '%4B' => 'K',
9400
        '%4C' => 'L',
9401
        '%4D' => 'M',
9402
        '%4E' => 'N',
9403
        '%4F' => 'O',
9404
        '%50' => 'P',
9405
        '%51' => 'Q',
9406
        '%52' => 'R',
9407
        '%53' => 'S',
9408
        '%54' => 'T',
9409
        '%55' => 'U',
9410
        '%56' => 'V',
9411
        '%57' => 'W',
9412
        '%58' => 'X',
9413
        '%59' => 'Y',
9414
        '%5A' => 'Z',
9415
        '%5B' => '[',
9416
        '%5C' => '\\',
9417
        '%5D' => ']',
9418
        '%5E' => '^',
9419
        '%5F' => '_',
9420
        '%60' => '`',
9421
        '%61' => 'a',
9422
        '%62' => 'b',
9423
        '%63' => 'c',
9424
        '%64' => 'd',
9425
        '%65' => 'e',
9426
        '%66' => 'f',
9427
        '%67' => 'g',
9428
        '%68' => 'h',
9429
        '%69' => 'i',
9430
        '%6A' => 'j',
9431
        '%6B' => 'k',
9432
        '%6C' => 'l',
9433
        '%6D' => 'm',
9434
        '%6E' => 'n',
9435
        '%6F' => 'o',
9436
        '%70' => 'p',
9437
        '%71' => 'q',
9438
        '%72' => 'r',
9439
        '%73' => 's',
9440
        '%74' => 't',
9441
        '%75' => 'u',
9442
        '%76' => 'v',
9443
        '%77' => 'w',
9444
        '%78' => 'x',
9445
        '%79' => 'y',
9446
        '%7A' => 'z',
9447
        '%7B' => '{',
9448
        '%7C' => '|',
9449
        '%7D' => '}',
9450
        '%7E' => '~',
9451
        '%7F' => '',
9452
        '%80' => '`',
9453
        '%81' => '',
9454
        '%82' => '‚',
9455
        '%83' => 'ƒ',
9456
        '%84' => '„',
9457
        '%85' => '…',
9458
        '%86' => '†',
9459
        '%87' => '‡',
9460
        '%88' => 'ˆ',
9461
        '%89' => '‰',
9462
        '%8A' => 'Š',
9463
        '%8B' => '‹',
9464
        '%8C' => 'Œ',
9465
        '%8D' => '',
9466
        '%8E' => 'Ž',
9467
        '%8F' => '',
9468
        '%90' => '',
9469
        '%91' => '‘',
9470
        '%92' => '’',
9471
        '%93' => '“',
9472
        '%94' => '”',
9473
        '%95' => '•',
9474
        '%96' => '–',
9475
        '%97' => '—',
9476
        '%98' => '˜',
9477
        '%99' => '™',
9478
        '%9A' => 'š',
9479
        '%9B' => '›',
9480
        '%9C' => 'œ',
9481
        '%9D' => '',
9482
        '%9E' => 'ž',
9483
        '%9F' => 'Ÿ',
9484
        '%A0' => '',
9485
        '%A1' => '¡',
9486
        '%A2' => '¢',
9487
        '%A3' => '£',
9488
        '%A4' => '¤',
9489
        '%A5' => '¥',
9490
        '%A6' => '¦',
9491
        '%A7' => '§',
9492
        '%A8' => '¨',
9493
        '%A9' => '©',
9494
        '%AA' => 'ª',
9495
        '%AB' => '«',
9496
        '%AC' => '¬',
9497
        '%AD' => '',
9498
        '%AE' => '®',
9499
        '%AF' => '¯',
9500
        '%B0' => '°',
9501
        '%B1' => '±',
9502
        '%B2' => '²',
9503
        '%B3' => '³',
9504
        '%B4' => '´',
9505
        '%B5' => 'µ',
9506
        '%B6' => '¶',
9507
        '%B7' => '·',
9508
        '%B8' => '¸',
9509
        '%B9' => '¹',
9510
        '%BA' => 'º',
9511
        '%BB' => '»',
9512
        '%BC' => '¼',
9513
        '%BD' => '½',
9514
        '%BE' => '¾',
9515
        '%BF' => '¿',
9516
        '%C0' => 'À',
9517
        '%C1' => 'Á',
9518
        '%C2' => 'Â',
9519
        '%C3' => 'Ã',
9520
        '%C4' => 'Ä',
9521
        '%C5' => 'Å',
9522
        '%C6' => 'Æ',
9523
        '%C7' => 'Ç',
9524
        '%C8' => 'È',
9525
        '%C9' => 'É',
9526
        '%CA' => 'Ê',
9527
        '%CB' => 'Ë',
9528
        '%CC' => 'Ì',
9529
        '%CD' => 'Í',
9530
        '%CE' => 'Î',
9531
        '%CF' => 'Ï',
9532
        '%D0' => 'Ð',
9533
        '%D1' => 'Ñ',
9534
        '%D2' => 'Ò',
9535
        '%D3' => 'Ó',
9536
        '%D4' => 'Ô',
9537
        '%D5' => 'Õ',
9538
        '%D6' => 'Ö',
9539
        '%D7' => '×',
9540
        '%D8' => 'Ø',
9541
        '%D9' => 'Ù',
9542
        '%DA' => 'Ú',
9543
        '%DB' => 'Û',
9544
        '%DC' => 'Ü',
9545
        '%DD' => 'Ý',
9546
        '%DE' => 'Þ',
9547
        '%DF' => 'ß',
9548
        '%E0' => 'à',
9549
        '%E1' => 'á',
9550
        '%E2' => 'â',
9551
        '%E3' => 'ã',
9552
        '%E4' => 'ä',
9553
        '%E5' => 'å',
9554
        '%E6' => 'æ',
9555
        '%E7' => 'ç',
9556
        '%E8' => 'è',
9557
        '%E9' => 'é',
9558
        '%EA' => 'ê',
9559
        '%EB' => 'ë',
9560
        '%EC' => 'ì',
9561
        '%ED' => 'í',
9562
        '%EE' => 'î',
9563
        '%EF' => 'ï',
9564
        '%F0' => 'ð',
9565
        '%F1' => 'ñ',
9566
        '%F2' => 'ò',
9567
        '%F3' => 'ó',
9568
        '%F4' => 'ô',
9569
        '%F5' => 'õ',
9570
        '%F6' => 'ö',
9571
        '%F7' => '÷',
9572
        '%F8' => 'ø',
9573
        '%F9' => 'ù',
9574
        '%FA' => 'ú',
9575
        '%FB' => 'û',
9576
        '%FC' => 'ü',
9577
        '%FD' => 'ý',
9578
        '%FE' => 'þ',
9579
        '%FF' => 'ÿ',
9580
    ];
9581
  }
9582
9583
  /**
9584
   * Decodes an UTF-8 string to ISO-8859-1.
9585
   *
9586
   * @param string $str <p>The input string.</p>
9587
   * @param bool   $keepUtf8Chars
9588
   *
9589
   * @return string
9590
   */
9591 6
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
9592
  {
9593 6
    if ('' === $str) {
9594 3
      return '';
9595
    }
9596
9597 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
9598 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
9599
9600 6
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
9601
9602 1
      if (self::$WIN1252_TO_UTF8 === null) {
9603
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type integer or string or boolean. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9604
      }
9605
9606 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type integer and string and boolean; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9606
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
9607 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type integer and string and boolean; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9607
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
9608
    }
9609
9610
    /** @noinspection PhpInternalEntityUsedInspection */
9611 6
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
9612
9613 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9614
      self::checkForSupport();
9615
    }
9616
9617
    // save for later comparision
9618 6
    $str_backup = $str;
9619 6
    $len = self::strlen_in_byte($str);
9620
9621 6
    if (self::$ORD === null) {
9622
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type integer or string or boolean. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9623
    }
9624
9625 6
    if (self::$CHR === null) {
9626
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type integer or string or boolean. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9627
    }
9628
9629 6
    $noCharFound = '?';
9630
    /** @noinspection ForeachInvariantsInspection */
9631 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
9632 6
      switch ($str[$i] & "\xF0") {
9633 6
        case "\xC0":
9634 6
        case "\xD0":
9635 6
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
9636 6
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
9637 6
          break;
9638
9639
        /** @noinspection PhpMissingBreakStatementInspection */
9640 6
        case "\xF0":
9641
          ++$i;
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment if this fall-through is intended.
Loading history...
9642 6
        case "\xE0":
9643 5
          $str[$j] = $noCharFound;
9644 5
          $i += 2;
9645 5
          break;
9646
9647
        default:
9648 6
          $str[$j] = $str[$i];
9649
      }
9650
    }
9651
9652 6
    $return = (string)self::substr($str, 0, $j, 'CP850'); // 8-BIT
9653
9654
    if (
9655 6
        $keepUtf8Chars === true
9656
        &&
9657 6
        self::strlen($return) >= self::strlen($str_backup)
9658
    ) {
9659 1
      return $str_backup;
9660
    }
9661
9662 6
    return $return;
9663
  }
9664
9665
  /**
9666
   * Encodes an ISO-8859-1 string to UTF-8.
9667
   *
9668
   * @param string $str <p>The input string.</p>
9669
   *
9670
   * @return string
9671
   */
9672 7
  public static function utf8_encode(string $str): string
9673
  {
9674 7
    if ('' === $str) {
9675 7
      return '';
9676
    }
9677
9678 7
    $strTmp = \utf8_encode($str);
9679
9680
    // the polyfill maybe return false
9681 7
    if ($strTmp === false) {
9682
      return '';
9683
    }
9684
9685 7
    $str = (string)$strTmp;
9686 7
    if (false === \strpos($str, "\xC2")) {
9687 3
      return $str;
9688
    }
9689
9690 6
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
9691 6
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
9692
9693 6
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
9694
9695 1
      if (self::$WIN1252_TO_UTF8 === null) {
9696
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type integer or string or boolean. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9697
      }
9698
9699 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type integer and string and boolean; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9699
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
9700 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type integer and string and boolean; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9700
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
9701
    }
9702
9703 6
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
9704
  }
9705
9706
  /**
9707
   * fix -> utf8-win1252 chars
9708
   *
9709
   * @param string $str <p>The input string.</p>
9710
   *
9711
   * @return string
9712
   *
9713
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
9714
   */
9715 1
  public static function utf8_fix_win1252_chars(string $str): string
9716
  {
9717 1
    return self::fix_simple_utf8($str);
9718
  }
9719
9720
  /**
9721
   * Returns an array with all utf8 whitespace characters.
9722
   *
9723
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
9724
   *
9725
   * @author: Derek E. [email protected]
9726
   *
9727
   * @return string[] <p>
9728
   *               An array with all known whitespace characters as values and the type of whitespace as keys
9729
   *               as defined in above URL.
9730
   *               </p>
9731
   */
9732 1
  public static function whitespace_table(): array
9733
  {
9734 1
    return self::$WHITESPACE_TABLE;
9735
  }
9736
9737
  /**
9738
   * Limit the number of words in a string.
9739
   *
9740
   * @param string $str      <p>The input string.</p>
9741
   * @param int    $limit    <p>The limit of words as integer.</p>
9742
   * @param string $strAddOn <p>Replacement for the striped string.</p>
9743
   *
9744
   * @return string
9745
   */
9746 1
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
9747
  {
9748 1
    if ('' === $str) {
9749 1
      return '';
9750
    }
9751
9752 1
    if ($limit < 1) {
9753 1
      return '';
9754
    }
9755
9756 1
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
9757
9758
    if (
9759 1
        !isset($matches[0])
9760
        ||
9761 1
        self::strlen($str) === self::strlen($matches[0])
9762
    ) {
9763 1
      return $str;
9764
    }
9765
9766 1
    return self::rtrim($matches[0]) . $strAddOn;
9767
  }
9768
9769
  /**
9770
   * Wraps a string to a given number of characters
9771
   *
9772
   * @link  http://php.net/manual/en/function.wordwrap.php
9773
   *
9774
   * @param string $str   <p>The input string.</p>
9775
   * @param int    $width [optional] <p>The column width.</p>
9776
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
9777
   * @param bool   $cut   [optional] <p>
9778
   *                      If the cut is set to true, the string is
9779
   *                      always wrapped at or before the specified width. So if you have
9780
   *                      a word that is larger than the given width, it is broken apart.
9781
   *                      </p>
9782
   *
9783
   * @return string <p>The given string wrapped at the specified column.</p>
9784
   */
9785 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
9786
  {
9787 10
    if ('' === $str || '' === $break) {
9788 3
      return '';
9789
    }
9790
9791 8
    $w = '';
9792 8
    $strSplit = \explode($break, $str);
9793 8
    $count = \count($strSplit);
9794
9795 8
    $chars = [];
9796
    /** @noinspection ForeachInvariantsInspection */
9797 8
    for ($i = 0; $i < $count; ++$i) {
9798
9799 8
      if ($i) {
9800 1
        $chars[] = $break;
9801 1
        $w .= '#';
9802
      }
9803
9804 8
      $c = $strSplit[$i];
9805 8
      unset($strSplit[$i]);
9806
9807 8
      foreach (self::split($c) as $c) {
9808 8
        $chars[] = $c;
9809 8
        $w .= ' ' === $c ? ' ' : '?';
9810
      }
9811
    }
9812
9813 8
    $strReturn = '';
9814 8
    $j = 0;
9815 8
    $b = $i = -1;
9816 8
    $w = \wordwrap($w, $width, '#', $cut);
9817
9818 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
9819 6
      for (++$i; $i < $b; ++$i) {
9820 6
        $strReturn .= $chars[$j];
9821 6
        unset($chars[$j++]);
9822
      }
9823
9824 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
9825 3
        unset($chars[$j++]);
9826
      }
9827
9828 6
      $strReturn .= $break;
9829
    }
9830
9831 8
    return $strReturn . \implode('', $chars);
9832
  }
9833
9834
  /**
9835
   * Line-Wrap the string after $limit, but also after the next word.
9836
   *
9837
   * @param string $str
9838
   * @param int    $limit
9839
   *
9840
   * @return string
9841
   */
9842 1
  public static function wordwrap_per_line(string $str, int $limit): string
9843
  {
9844 1
    $strings = (array)\preg_split('/\\r\\n|\\r|\\n/', $str);
9845
9846 1
    $string = '';
9847 1
    foreach ($strings as $value) {
9848 1
      $string .= wordwrap($value, $limit);
9849 1
      $string .= "\n";
9850
    }
9851
9852 1
    return $string;
9853
  }
9854
9855
  /**
9856
   * Returns an array of Unicode White Space characters.
9857
   *
9858
   * @return string[] <p>An array with numeric code point as key and White Space Character as value.</p>
9859
   */
9860 1
  public static function ws(): array
9861
  {
9862 1
    return self::$WHITESPACE;
9863
  }
9864
9865
}
9866