Completed
Push — master ( 23df09...224446 )
by Lars
02:34
created

UTF8::split()   F

Complexity

Conditions 24
Paths 49

Size

Total Lines 108

Duplication

Lines 12
Ratio 11.11 %

Code Coverage

Tests 32
CRAP Score 37.8888

Importance

Changes 0
Metric Value
dl 12
loc 108
ccs 32
cts 45
cp 0.711
rs 3.3333
c 0
b 0
f 0
cc 24
nc 49
nop 3
crap 37.8888

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206
   * __construct()
207
   */
208 16
  public function __construct()
209
  {
210 16
    self::checkForSupport();
211 16
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219
   * @return string <p>Single Multi-Byte character.</p>
220
   */
221 2
  public static function access(string $str, int $pos): string
222
  {
223 2
    if (!isset($str[0])) {
224 1
      return '';
225
    }
226
227 2
    if ($pos < 0) {
228 1
      return '';
229
    }
230
231 2
    return (string)self::substr($str, $pos, 1);
232
  }
233
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string <p>The output string that contains BOM.</p>
242
   */
243 1
  public static function add_bom_to_string(string $str): string
244
  {
245 1
    if (self::string_has_bom($str) === false) {
246 1
      $str = self::bom() . $str;
247
    }
248
249 1
    return $str;
250
  }
251
252
  /**
253
   * Changes all keys in an array.
254
   *
255
   * @param array $array <p>The array to work on</p>
256
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
257
   *                     or <strong>CASE_LOWER</strong> (default)</p>
258
   *
259
   * @return string[] <p>An array with its keys lower or uppercased.</p>
260
   */
261 1
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
262
  {
263
    if (
264 1
        $case !== CASE_LOWER
265
        &&
266 1
        $case !== CASE_UPPER
267
    ) {
268
      $case = CASE_LOWER;
269
    }
270
271 1
    $return = [];
272 1
    foreach ($array as $key => $value) {
273 1
      if ($case === CASE_LOWER) {
274 1
        $key = self::strtolower($key);
275
      } else {
276 1
        $key = self::strtoupper($key);
277
      }
278
279 1
      $return[$key] = $value;
280
    }
281
282 1
    return $return;
283
  }
284
285
  /**
286
   * Returns the substring between $start and $end, if found, or an empty
287
   * string. An optional offset may be supplied from which to begin the
288
   * search for the start string.
289
   *
290
   * @param string $str
291
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
292
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
293
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
294
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
295
   *
296
   * @return string
297
   */
298
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
299
  {
300
    $posStart = self::strpos($str, $start, $offset, $encoding);
301
    if ($posStart === false) {
302
      return '';
303
    }
304
305
    $substrIndex = $posStart + self::strlen($start, $encoding);
306
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
307
    if (
308
        $posEnd === false
309
        ||
310
        $posEnd === $substrIndex
311
    ) {
312
      return '';
313
    }
314
315
    return self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
316
  }
317
318
  /**
319
   * Convert binary into an string.
320
   *
321
   * @param mixed $bin 1|0
322
   *
323
   * @return string
324
   */
325 1
  public static function binary_to_str($bin): string
326
  {
327 1
    if (!isset($bin[0])) {
328
      return '';
329
    }
330
331 1
    $convert = \base_convert($bin, 2, 16);
332 1
    if ($convert === '0') {
333 1
      return '';
334
    }
335
336 1
    return \pack('H*', $convert);
337
  }
338
339
  /**
340
   * Returns the UTF-8 Byte Order Mark Character.
341
   *
342
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
343
   *
344
   * @return string UTF-8 Byte Order Mark
345
   */
346 2
  public static function bom(): string
347
  {
348 2
    return "\xef\xbb\xbf";
349
  }
350
351
  /**
352
   * @alias of UTF8::chr_map()
353
   *
354
   * @see   UTF8::chr_map()
355
   *
356
   * @param string|array $callback
357
   * @param string       $str
358
   *
359
   * @return string[]
360
   */
361 1
  public static function callback($callback, string $str): array
362
  {
363 1
    return self::chr_map($callback, $str);
364
  }
365
366
  /**
367
   * Returns the character at $index, with indexes starting at 0.
368
   *
369
   * @param string $str
370
   * @param int    $index <p>Position of the character.</p>
371
   *
372
   * @return string <p>The character at $index.</p>
373
   */
374
  public static function char_at(string $str, int $index): string
375
  {
376
    return self::substr($str, $index, 1);
377
  }
378
379
  /**
380
   * Returns an array consisting of the characters in the string.
381
   *
382
   * @param string $str <p>The input string.</p>
383
   *
384
   * @return string[] <p>An array of chars.</p>
385
   */
386
  public static function chars(string $str): array
387
  {
388
    return self::str_split($str, 1);
389
  }
390
391
  /**
392
   * This method will auto-detect your server environment for UTF-8 support.
393
   *
394
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
395
   */
396 19
  public static function checkForSupport()
397
  {
398 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
399
400 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
401
402
      // http://php.net/manual/en/book.mbstring.php
403 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
404 1
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
405
406
      // http://php.net/manual/en/book.iconv.php
407 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
408
409
      // http://php.net/manual/en/book.intl.php
410 1
      self::$SUPPORT['intl'] = self::intl_loaded();
411 1
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
412
      if (
413 1
          self::$SUPPORT['intl'] === true
414
          &&
415 1
          \function_exists('transliterator_list_ids') === true
416
      ) {
417 1
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
418
      }
419
420
      // http://php.net/manual/en/class.intlchar.php
421 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
422
423
      // http://php.net/manual/en/book.pcre.php
424 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
425
    }
426 19
  }
427
428
  /**
429
   * Generates a UTF-8 encoded character from the given code point.
430
   *
431
   * INFO: opposite to UTF8::ord()
432
   *
433
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
434
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
435
   *
436
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
437
   */
438 10
  public static function chr($code_point, string $encoding = 'UTF-8')
439
  {
440
    // init
441 10
    static $CHAR_CACHE = [];
442
443 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
444
      self::checkForSupport();
445
    }
446
447 10
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
448 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
449
    }
450
451 View Code Duplication
    if (
452 10
        $encoding !== 'UTF-8'
453
        &&
454 10
        $encoding !== 'ISO-8859-1'
455
        &&
456 10
        $encoding !== 'WINDOWS-1252'
457
        &&
458 10
        self::$SUPPORT['mbstring'] === false
459
    ) {
460
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
461
    }
462
463 10
    $cacheKey = $code_point . $encoding;
464 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
465 8
      return $CHAR_CACHE[$cacheKey];
466
    }
467
468 9
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
469
470 7
      if (self::$CHR === null) {
471
        self::$CHR = self::getData('chr');
472
      }
473
474 7
      $chr = self::$CHR[$code_point];
475
476 7
      if ($encoding !== 'UTF-8') {
477 1
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
478
      }
479
480 7
      return $CHAR_CACHE[$cacheKey] = $chr;
481
    }
482
483 7
    if (self::$SUPPORT['intlChar'] === true) {
484 7
      $chr = \IntlChar::chr($code_point);
485
486 7
      if ($encoding !== 'UTF-8') {
487
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
488
      }
489
490 7
      return $CHAR_CACHE[$cacheKey] = $chr;
491
    }
492
493
    if (self::$CHR === null) {
494
      self::$CHR = self::getData('chr');
495
    }
496
497
    if ($code_point <= 0x7F) {
498
      $chr = self::$CHR[$code_point];
499
    } elseif ($code_point <= 0x7FF) {
500
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
501
             self::$CHR[($code_point & 0x3F) + 0x80];
502
    } elseif ($code_point <= 0xFFFF) {
503
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
504
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
505
             self::$CHR[($code_point & 0x3F) + 0x80];
506
    } else {
507
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
508
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
509
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
510
             self::$CHR[($code_point & 0x3F) + 0x80];
511
    }
512
513
    if ($encoding !== 'UTF-8') {
514
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
515
    }
516
517
    return $CHAR_CACHE[$cacheKey] = $chr;
518
  }
519
520
  /**
521
   * Applies callback to all characters of a string.
522
   *
523
   * @param string|array $callback <p>The callback function.</p>
524
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
525
   *
526
   * @return string[] <p>The outcome of callback.</p>
527
   */
528 1
  public static function chr_map($callback, string $str): array
529
  {
530 1
    $chars = self::split($str);
531
532 1
    return \array_map($callback, $chars);
533
  }
534
535
  /**
536
   * Generates an array of byte length of each character of a Unicode string.
537
   *
538
   * 1 byte => U+0000  - U+007F
539
   * 2 byte => U+0080  - U+07FF
540
   * 3 byte => U+0800  - U+FFFF
541
   * 4 byte => U+10000 - U+10FFFF
542
   *
543
   * @param string $str <p>The original unicode string.</p>
544
   *
545
   * @return int[] <p>An array of byte lengths of each character.</p>
546
   */
547 2
  public static function chr_size_list(string $str): array
548
  {
549 2
    if (!isset($str[0])) {
550 2
      return [];
551
    }
552
553 2
    $strSplit = self::split($str);
554
555 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
556
      return \array_map(
557
          function ($data) {
558
            return UTF8::strlen($data, 'CP850'); // 8-BIT
559
          },
560
          $strSplit
561
      );
562
    }
563
564 2
    return \array_map('\strlen', $strSplit);
565
  }
566
567
  /**
568
   * Get a decimal code representation of a specific character.
569
   *
570
   * @param string $char <p>The input character.</p>
571
   *
572
   * @return int
573
   */
574 2
  public static function chr_to_decimal(string $char): int
575
  {
576 2
    $code = self::ord($char[0]);
577 2
    $bytes = 1;
578
579 2
    if (!($code & 0x80)) {
580
      // 0xxxxxxx
581 2
      return $code;
582
    }
583
584 2
    if (($code & 0xe0) === 0xc0) {
585
      // 110xxxxx
586 2
      $bytes = 2;
587 2
      $code &= ~0xc0;
588 2
    } elseif (($code & 0xf0) === 0xe0) {
589
      // 1110xxxx
590 2
      $bytes = 3;
591 2
      $code &= ~0xe0;
592 1
    } elseif (($code & 0xf8) === 0xf0) {
593
      // 11110xxx
594 1
      $bytes = 4;
595 1
      $code &= ~0xf0;
596
    }
597
598 2
    for ($i = 2; $i <= $bytes; $i++) {
599
      // 10xxxxxx
600 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
601
    }
602
603 2
    return $code;
604
  }
605
606
  /**
607
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
608
   *
609
   * @param string $char <p>The input character</p>
610
   * @param string $pfix [optional]
611
   *
612
   * @return string <p>The code point encoded as U+xxxx<p>
613
   */
614 1
  public static function chr_to_hex(string $char, string $pfix = 'U+'): string
615
  {
616 1
    if (!isset($char[0])) {
617 1
      return '';
618
    }
619
620 1
    if ($char === '&#0;') {
621 1
      $char = '';
622
    }
623
624 1
    return self::int_to_hex(self::ord($char), $pfix);
625
  }
626
627
  /**
628
   * alias for "UTF8::chr_to_decimal()"
629
   *
630
   * @see UTF8::chr_to_decimal()
631
   *
632
   * @param string $chr
633
   *
634
   * @return int
635
   */
636 1
  public static function chr_to_int(string $chr): int
637
  {
638 1
    return self::chr_to_decimal($chr);
639
  }
640
641
  /**
642
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
643
   *
644
   * @param string $body     <p>The original string to be split.</p>
645
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
646
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
647
   *
648
   * @return string <p>The chunked string</p>
649
   */
650 1
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
651
  {
652 1
    return \implode($end, self::split($body, $chunklen));
653
  }
654
655
  /**
656
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
657
   *
658
   * @param string $str                           <p>The string to be sanitized.</p>
659
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
660
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
661
   *                                              whitespace.</p>
662
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
663
   *                                              e.g.: "…"
664
   *                                              => "..."</p>
665
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
666
   *                                              combination with
667
   *                                              $normalize_whitespace</p>
668
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
669
   *                                              mark e.g.: "�"</p>
670
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
671
   *                                              characters e.g.: "\0"</p>
672
   *
673
   * @return string <p>Clean UTF-8 encoded string.</p>
674
   */
675 64
  public static function clean(string $str, bool $remove_bom = false, bool $normalize_whitespace = false, bool $normalize_msword = false, bool $keep_non_breaking_space = false, bool $replace_diamond_question_mark = false, bool $remove_invisible_characters = true): string
676
  {
677
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
678
    // caused connection reset problem on larger strings
679
680 64
    $regx = '/
681
      (
682
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
683
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
684
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
685
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
686
        ){1,100}                      # ...one or more times
687
      )
688
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
689
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
690
    /x';
691 64
    $str = (string)\preg_replace($regx, '$1', $str);
692
693 64
    if ($replace_diamond_question_mark === true) {
694 36
      $str = self::replace_diamond_question_mark($str, '');
695
    }
696
697 64
    if ($remove_invisible_characters === true) {
698 64
      $str = self::remove_invisible_characters($str);
699
    }
700
701 64
    if ($normalize_whitespace === true) {
702 38
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
703
    }
704
705 64
    if ($normalize_msword === true) {
706 15
      $str = self::normalize_msword($str);
707
    }
708
709 64
    if ($remove_bom === true) {
710 37
      $str = self::remove_bom($str);
711
    }
712
713 64
    return $str;
714
  }
715
716
  /**
717
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
718
   *
719
   * @param string $str <p>The input string.</p>
720
   *
721
   * @return string
722
   */
723 24
  public static function cleanup(string $str): string
724
  {
725 24
    if (!isset($str[0])) {
726 2
      return '';
727
    }
728
729
    // fixed ISO <-> UTF-8 Errors
730 24
    $str = self::fix_simple_utf8($str);
731
732
    // remove all none UTF-8 symbols
733
    // && remove diamond question mark (�)
734
    // && remove remove invisible characters (e.g. "\0")
735
    // && remove BOM
736
    // && normalize whitespace chars (but keep non-breaking-spaces)
737 24
    $str = self::clean(
738 24
        $str,
739 24
        true,
740 24
        true,
741 24
        false,
742 24
        true,
743 24
        true,
744 24
        true
745
    );
746
747 24
    return $str;
748
  }
749
750
  /**
751
   * Accepts a string or a array of strings and returns an array of Unicode code points.
752
   *
753
   * INFO: opposite to UTF8::string()
754
   *
755
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
756
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
757
   *                                    default, code points will be returned as integers.</p>
758
   *
759
   * @return int[] <p>The array of code points.</p>
760
   */
761 7
  public static function codepoints($arg, bool $u_style = false): array
762
  {
763 7
    if (\is_string($arg) === true) {
764 7
      $arg = self::split($arg);
765
    }
766
767 7
    $arg = \array_map(
768
        [
769 7
            self::class,
770
            'ord',
771
        ],
772 7
        $arg
773
    );
774
775 7
    if ($u_style) {
776 1
      $arg = \array_map(
777
          [
778 1
              self::class,
779
              'int_to_hex',
780
          ],
781 1
          $arg
782
      );
783
    }
784
785 7
    return $arg;
786
  }
787
788
  /**
789
   * Trims the string and replaces consecutive whitespace characters with a
790
   * single space. This includes tabs and newline characters, as well as
791
   * multibyte whitespace such as the thin space and ideographic space.
792
   *
793
   * @param string $str <p>The input string.</p>
794
   *
795
   * @return string <p>String with a trimmed $str and condensed whitespace.</p>
796
   */
797
  public static function collapse_whitespace(string $str): string
798
  {
799
    return self::trim(
800
        self::regexReplace($str, '[[:space:]]+', ' ')
801
    );
802
  }
803
804
  /**
805
   * Returns count of characters used in a string.
806
   *
807
   * @param string $str       <p>The input string.</p>
808
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
809
   *
810
   * @return int[] <p>An associative array of Character as keys and
811
   *               their count as values.</p>
812
   */
813 9
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
814
  {
815 9
    return \array_count_values(self::split($str, 1, $cleanUtf8));
816
  }
817
818
  /**
819
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
820
   * inserted before uppercase characters (with the exception of the first
821
   * character of the string), and in place of spaces as well as underscores.
822
   *
823
   * @param string $str <p>The input string.</p>
824
   *
825
   * @return string
826
   */
827
  public static function dasherize(string $str): string
828
  {
829
    return self::delimit($str, '-');
830
  }
831
832
  /**
833
   * Converts a int-value into an UTF-8 character.
834
   *
835
   * @param mixed $int
836
   *
837
   * @return string
838
   */
839 5
  public static function decimal_to_chr($int): string
840
  {
841 5
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
842
  }
843
844
  /**
845
   * Returns a lowercase and trimmed string separated by the given delimiter.
846
   * Delimiters are inserted before uppercase characters (with the exception
847
   * of the first character of the string), and in place of spaces, dashes,
848
   * and underscores. Alpha delimiters are not converted to lowercase.
849
   *
850
   * @param string $str       <p>The input string.</p>
851
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
852
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
853
   *
854
   * @return string
855
   */
856
  public static function delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
857
  {
858
    $str = self::trim($str);
859
860
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
861
862
    $str = self::strtolower($str, $encoding);
863
864
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
865
  }
866
867
  /**
868
   * Encode a string with a new charset-encoding.
869
   *
870
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
871
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
872
   *
873
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
874
   * @param string $str      <p>The input string</p>
875
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
876
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
877
   *
878
   * @return string
879
   */
880 14
  public static function encode(string $encoding, string $str, bool $force = true): string
881
  {
882 14
    if (!isset($str[0], $encoding[0])) {
883 6
      return $str;
884
    }
885
886 14
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
887 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
888
    }
889
890 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
891
      self::checkForSupport();
892
    }
893
894 14
    $encodingDetected = self::str_detect_encoding($str);
895
896
    if (
897 14
        $force === true
898
        ||
899
        (
900 6
            $encodingDetected !== false
901
            &&
902 14
            $encodingDetected !== $encoding
903
        )
904
    ) {
905
906 View Code Duplication
      if (
907 14
          $encoding === 'UTF-8'
908
          &&
909
          (
910 14
              $force === true
911 4
              || $encodingDetected === 'UTF-8'
912 4
              || $encodingDetected === 'WINDOWS-1252'
913 14
              || $encodingDetected === 'ISO-8859-1'
914
          )
915
      ) {
916 13
        return self::to_utf8($str);
917
      }
918
919 View Code Duplication
      if (
920 4
          $encoding === 'ISO-8859-1'
921
          &&
922
          (
923 2
              $force === true
924 1
              || $encodingDetected === 'ISO-8859-1'
925 1
              || $encodingDetected === 'WINDOWS-1252'
926 4
              || $encodingDetected === 'UTF-8'
927
          )
928
      ) {
929 2
        return self::to_iso8859($str);
930
      }
931
932 View Code Duplication
      if (
933 3
          $encoding !== 'UTF-8'
934
          &&
935 3
          $encoding !== 'ISO-8859-1'
936
          &&
937 3
          $encoding !== 'WINDOWS-1252'
938
          &&
939 3
          self::$SUPPORT['mbstring'] === false
940
      ) {
941
        \trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
942
      }
943
944 3
      $strEncoded = \mb_convert_encoding(
945 3
          $str,
946 3
          $encoding,
947 3
          $encodingDetected
948
      );
949
950 3
      if ($strEncoded) {
951 3
        return $strEncoded;
952
      }
953
    }
954
955 3
    return $str;
956
  }
957
958
  /**
959
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
960
   *
961
   * @param string   $str                    <p>The input string.</p>
962
   * @param string   $search                 <p>The searched string.</p>
963
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
964
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
965
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "\mb_" function</p>
966
   *
967
   * @return string
968
   */
969
  public static function extractText(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
970
  {
971
    // init
972
    $text = $str;
973
974
    if (empty($text)) {
975
      return '';
976
    }
977
978
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
979
980
    if ($length === null) {
981
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
982
    }
983
984
    if (empty($search)) {
985
986
      $stringLength = self::strlen($text, $encoding);
987
988
      if ($length > 0) {
989
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
990
      } else {
991
        $end = 0;
992
      }
993
994
      $pos = \min(
995
          self::strpos($text, ' ', $end, $encoding),
996
          self::strpos($text, '.', $end, $encoding)
997
      );
998
999
      if ($pos) {
1000
        return \rtrim(
1001
                   self::substr($text, 0, $pos, $encoding),
1002
                   $trimChars
1003
               ) . $replacerForSkippedText;
1004
      }
1005
1006
      return $text;
1007
    }
1008
1009
    $wordPos = self::stripos(
1010
        $text,
1011
        $search,
1012
        0,
1013
        $encoding
1014
    );
1015
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1016
1017
    if ($halfSide > 0) {
1018
1019
      $halfText = self::substr($text, 0, $halfSide, $encoding);
1020
      $pos_start = \max(
1021
          self::strrpos($halfText, ' ', 0, $encoding),
0 ignored issues
show
Security Bug introduced by
It seems like $halfText defined by self::substr($text, 0, $halfSide, $encoding) on line 1019 can also be of type false; however, voku\helper\UTF8::strrpos() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1022
          self::strrpos($halfText, '.', 0, $encoding)
0 ignored issues
show
Security Bug introduced by
It seems like $halfText defined by self::substr($text, 0, $halfSide, $encoding) on line 1019 can also be of type false; however, voku\helper\UTF8::strrpos() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1023
      );
1024
1025
      if (!$pos_start) {
1026
        $pos_start = 0;
1027
      }
1028
1029
    } else {
1030
      $pos_start = 0;
1031
    }
1032
1033
    if ($wordPos && $halfSide > 0) {
1034
      $l = $pos_start + $length - 1;
1035
      $realLength = self::strlen($text, $encoding);
1036
1037
      if ($l > $realLength) {
1038
        $l = $realLength;
1039
      }
1040
1041
      $pos_end = \min(
1042
                     self::strpos($text, ' ', $l, $encoding),
1043
                     self::strpos($text, '.', $l, $encoding)
1044
                 ) - $pos_start;
1045
1046
      if (!$pos_end || $pos_end <= 0) {
1047
        $extract = $replacerForSkippedText . \ltrim(
1048
                self::substr(
1049
                    $text,
1050
                    $pos_start,
1051
                    self::strlen($text),
1052
                    $encoding
1053
                ),
1054
                $trimChars
1055
            );
1056 View Code Duplication
      } else {
1057
        $extract = $replacerForSkippedText . \trim(
1058
                self::substr(
1059
                    $text,
1060
                    $pos_start,
1061
                    $pos_end,
1062
                    $encoding
1063
                ),
1064
                $trimChars
1065
            ) . $replacerForSkippedText;
1066
      }
1067
1068
    } else {
1069
1070
      $l = $length - 1;
1071
      $trueLength = self::strlen($text, $encoding);
1072
1073
      if ($l > $trueLength) {
1074
        $l = $trueLength;
1075
      }
1076
1077
      $pos_end = \min(
1078
          self::strpos($text, ' ', $l, $encoding),
1079
          self::strpos($text, '.', $l, $encoding)
1080
      );
1081
1082 View Code Duplication
      if ($pos_end) {
1083
        $extract = \rtrim(
1084
                       self::substr($text, 0, $pos_end, $encoding),
1085
                       $trimChars
1086
                   ) . $replacerForSkippedText;
1087
      } else {
1088
        $extract = $text;
1089
      }
1090
    }
1091
1092
    return $extract;
1093
  }
1094
1095
  /**
1096
   * Reads entire file into a string.
1097
   *
1098
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1099
   *
1100
   * @link http://php.net/manual/en/function.file-get-contents.php
1101
   *
1102
   * @param string        $filename         <p>
1103
   *                                        Name of the file to read.
1104
   *                                        </p>
1105
   * @param bool          $use_include_path [optional] <p>
1106
   *                                        Prior to PHP 5, this parameter is called
1107
   *                                        use_include_path and is a bool.
1108
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1109
   *                                        to trigger include path
1110
   *                                        search.
1111
   *                                        </p>
1112
   * @param resource|null $context          [optional] <p>
1113
   *                                        A valid context resource created with
1114
   *                                        stream_context_create. If you don't need to use a
1115
   *                                        custom context, you can skip this parameter by &null;.
1116
   *                                        </p>
1117
   * @param int|null      $offset           [optional] <p>
1118
   *                                        The offset where the reading starts.
1119
   *                                        </p>
1120
   * @param int|null      $maxLength        [optional] <p>
1121
   *                                        Maximum length of data read. The default is to read until end
1122
   *                                        of file is reached.
1123
   *                                        </p>
1124
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1125
   *
1126
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
1127
   *                                        images or pdf, because they used non default utf-8 chars.</p>
1128
   *
1129
   * @return string|false <p>The function returns the read data or false on failure.</p>
1130
   */
1131 6
  public static function file_get_contents(string $filename, bool $use_include_path = false, $context = null, int $offset = null, int $maxLength = null, int $timeout = 10, bool $convertToUtf8 = true)
1132
  {
1133
    // init
1134 6
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1135
1136 6
    if ($timeout && $context === null) {
1137 5
      $context = \stream_context_create(
1138
          [
1139
              'http' =>
1140
                  [
1141 5
                      'timeout' => $timeout,
1142
                  ],
1143
          ]
1144
      );
1145
    }
1146
1147 6
    if ($offset === null) {
1148 6
      $offset = 0;
1149
    }
1150
1151 6
    if (\is_int($maxLength) === true) {
1152 1
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1153
    } else {
1154 6
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1155
    }
1156
1157
    // return false on error
1158 6
    if ($data === false) {
1159
      return false;
1160
    }
1161
1162 6
    if ($convertToUtf8 === true) {
1163
      // only for non binary, but also for UTF-16 or UTF-32
1164
      if (
1165 6
          self::is_binary($data, true) !== true
1166
          ||
1167 4
          self::is_utf16($data) !== false
1168
          ||
1169 6
          self::is_utf32($data) !== false
1170
      ) {
1171 5
        $data = self::encode('UTF-8', $data, false);
1172 5
        $data = self::cleanup($data);
1173
      }
1174
    }
1175
1176 6
    return $data;
1177
  }
1178
1179
  /**
1180
   * Returns whether or not a character exists at an index. Offsets may be
1181
   * negative to count from the last character in the string. Implements
1182
   * part of the ArrayAccess interface.
1183
   *
1184
   * @param int    $offset   <p>The index to check.</p>
1185
   * @param string $str      <p>The input string.</p>
1186
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
1187
   *
1188
   *
1189
   * @return boolean <p>Whether or not the index exists.</p>
1190
   */
1191
  public static function offset_exists($offset, string $str, string $encoding = 'UTF-8'): bool
1192
  {
1193
    // init
1194
    $length = self::strlen($str, $encoding);;
1195
    $offset = (int)$offset;
1196
1197
    if ($offset >= 0) {
1198
      return ($length > $offset);
1199
    }
1200
1201
    return ($length >= \abs($offset));
1202
  }
1203
1204
  /**
1205
   * Checks if a file starts with BOM (Byte Order Mark) character.
1206
   *
1207
   * @param string $file_path <p>Path to a valid file.</p>
1208
   *
1209
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1210
   */
1211 1
  public static function file_has_bom(string $file_path): bool
1212
  {
1213 1
    return self::string_has_bom(\file_get_contents($file_path));
1214
  }
1215
1216
  /**
1217
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1218
   *
1219
   * @param mixed  $var
1220
   * @param int    $normalization_form
1221
   * @param string $leading_combining
1222
   *
1223
   * @return mixed
1224
   */
1225 9
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1226
  {
1227 9
    switch (\gettype($var)) {
1228 9 View Code Duplication
      case 'array':
1229 3
        foreach ($var as $k => $v) {
1230
          /** @noinspection AlterInForeachInspection */
1231 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1232
        }
1233 3
        break;
1234 9 View Code Duplication
      case 'object':
1235 2
        foreach ($var as $k => $v) {
1236 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1237
        }
1238 2
        break;
1239 9
      case 'string':
1240
1241 9
        if (false !== \strpos($var, "\r")) {
1242
          // Workaround https://bugs.php.net/65732
1243 2
          $var = \str_replace(["\r\n", "\r"], "\n", $var);
1244
        }
1245
1246 9
        if (self::is_ascii($var) === false) {
1247
          /** @noinspection PhpUndefinedClassInspection */
1248 9
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1249 6
            $n = '-';
1250
          } else {
1251
            /** @noinspection PhpUndefinedClassInspection */
1252 7
            $n = \Normalizer::normalize($var, $normalization_form);
1253
1254 7
            if (isset($n[0])) {
1255 4
              $var = $n;
1256
            } else {
1257 5
              $var = self::encode('UTF-8', $var, true);
1258
            }
1259
          }
1260
1261
          if (
1262 9
              $var[0] >= "\x80"
1263
              &&
1264 9
              isset($n[0], $leading_combining[0])
1265
              &&
1266 9
              \preg_match('/^\p{Mn}/u', $var)
1267
          ) {
1268
            // Prevent leading combining chars
1269
            // for NFC-safe concatenations.
1270 2
            $var = $leading_combining . $var;
1271
          }
1272
        }
1273
1274 9
        break;
1275
    }
1276
1277 9
    return $var;
1278
  }
1279
1280
  /**
1281
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1282
   *
1283
   * Gets a specific external variable by name and optionally filters it
1284
   *
1285
   * @link  http://php.net/manual/en/function.filter-input.php
1286
   *
1287
   * @param int    $type          <p>
1288
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1289
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1290
   *                              <b>INPUT_ENV</b>.
1291
   *                              </p>
1292
   * @param string $variable_name <p>
1293
   *                              Name of a variable to get.
1294
   *                              </p>
1295
   * @param int    $filter        [optional] <p>
1296
   *                              The ID of the filter to apply. The
1297
   *                              manual page lists the available filters.
1298
   *                              </p>
1299
   * @param mixed  $options       [optional] <p>
1300
   *                              Associative array of options or bitwise disjunction of flags. If filter
1301
   *                              accepts options, flags can be provided in "flags" field of array.
1302
   *                              </p>
1303
   *
1304
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1305
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1306
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1307
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1308
   * @since 5.2.0
1309
   */
1310 View Code Duplication
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1311
  {
1312
    if (4 > \func_num_args()) {
1313
      $var = \filter_input($type, $variable_name, $filter);
1314
    } else {
1315
      $var = \filter_input($type, $variable_name, $filter, $options);
1316
    }
1317
1318
    return self::filter($var);
1319
  }
1320
1321
  /**
1322
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1323
   *
1324
   * Gets external variables and optionally filters them
1325
   *
1326
   * @link  http://php.net/manual/en/function.filter-input-array.php
1327
   *
1328
   * @param int   $type       <p>
1329
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1330
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1331
   *                          <b>INPUT_ENV</b>.
1332
   *                          </p>
1333
   * @param mixed $definition [optional] <p>
1334
   *                          An array defining the arguments. A valid key is a string
1335
   *                          containing a variable name and a valid value is either a filter type, or an array
1336
   *                          optionally specifying the filter, flags and options. If the value is an
1337
   *                          array, valid keys are filter which specifies the
1338
   *                          filter type,
1339
   *                          flags which specifies any flags that apply to the
1340
   *                          filter, and options which specifies any options that
1341
   *                          apply to the filter. See the example below for a better understanding.
1342
   *                          </p>
1343
   *                          <p>
1344
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1345
   *                          input array are filtered by this filter.
1346
   *                          </p>
1347
   * @param bool  $add_empty  [optional] <p>
1348
   *                          Add missing keys as <b>NULL</b> to the return value.
1349
   *                          </p>
1350
   *
1351
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1352
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1353
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1354
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1355
   * fails.
1356
   * @since 5.2.0
1357
   */
1358 View Code Duplication
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1359
  {
1360
    if (2 > \func_num_args()) {
1361
      $a = \filter_input_array($type);
1362
    } else {
1363
      $a = \filter_input_array($type, $definition, $add_empty);
1364
    }
1365
1366
    return self::filter($a);
1367
  }
1368
1369
  /**
1370
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1371
   *
1372
   * Filters a variable with a specified filter
1373
   *
1374
   * @link  http://php.net/manual/en/function.filter-var.php
1375
   *
1376
   * @param mixed $variable <p>
1377
   *                        Value to filter.
1378
   *                        </p>
1379
   * @param int   $filter   [optional] <p>
1380
   *                        The ID of the filter to apply. The
1381
   *                        manual page lists the available filters.
1382
   *                        </p>
1383
   * @param mixed $options  [optional] <p>
1384
   *                        Associative array of options or bitwise disjunction of flags. If filter
1385
   *                        accepts options, flags can be provided in "flags" field of array. For
1386
   *                        the "callback" filter, callable type should be passed. The
1387
   *                        callback must accept one argument, the value to be filtered, and return
1388
   *                        the value after filtering/sanitizing it.
1389
   *                        </p>
1390
   *                        <p>
1391
   *                        <code>
1392
   *                        // for filters that accept options, use this format
1393
   *                        $options = array(
1394
   *                        'options' => array(
1395
   *                        'default' => 3, // value to return if the filter fails
1396
   *                        // other options here
1397
   *                        'min_range' => 0
1398
   *                        ),
1399
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1400
   *                        );
1401
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1402
   *                        // for filter that only accept flags, you can pass them directly
1403
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1404
   *                        // for filter that only accept flags, you can also pass as an array
1405
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1406
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1407
   *                        // callback validate filter
1408
   *                        function foo($value)
1409
   *                        {
1410
   *                        // Expected format: Surname, GivenNames
1411
   *                        if (strpos($value, ", ") === false) return false;
1412
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1413
   *                        $empty = (empty($surname) || empty($givennames));
1414
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1415
   *                        if ($empty || $notstrings) {
1416
   *                        return false;
1417
   *                        } else {
1418
   *                        return $value;
1419
   *                        }
1420
   *                        }
1421
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1422
   *                        </code>
1423
   *                        </p>
1424
   *
1425
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1426
   * @since 5.2.0
1427
   */
1428 1 View Code Duplication
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1429
  {
1430 1
    if (3 > \func_num_args()) {
1431 1
      $variable = \filter_var($variable, $filter);
1432
    } else {
1433 1
      $variable = \filter_var($variable, $filter, $options);
1434
    }
1435
1436 1
    return self::filter($variable);
1437
  }
1438
1439
  /**
1440
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1441
   *
1442
   * Gets multiple variables and optionally filters them
1443
   *
1444
   * @link  http://php.net/manual/en/function.filter-var-array.php
1445
   *
1446
   * @param array $data       <p>
1447
   *                          An array with string keys containing the data to filter.
1448
   *                          </p>
1449
   * @param mixed $definition [optional] <p>
1450
   *                          An array defining the arguments. A valid key is a string
1451
   *                          containing a variable name and a valid value is either a
1452
   *                          filter type, or an
1453
   *                          array optionally specifying the filter, flags and options.
1454
   *                          If the value is an array, valid keys are filter
1455
   *                          which specifies the filter type,
1456
   *                          flags which specifies any flags that apply to the
1457
   *                          filter, and options which specifies any options that
1458
   *                          apply to the filter. See the example below for a better understanding.
1459
   *                          </p>
1460
   *                          <p>
1461
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1462
   *                          input array are filtered by this filter.
1463
   *                          </p>
1464
   * @param bool  $add_empty  [optional] <p>
1465
   *                          Add missing keys as <b>NULL</b> to the return value.
1466
   *                          </p>
1467
   *
1468
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1469
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1470
   * the variable is not set.
1471
   * @since 5.2.0
1472
   */
1473 1 View Code Duplication
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1474
  {
1475 1
    if (2 > \func_num_args()) {
1476 1
      $a = \filter_var_array($data);
1477
    } else {
1478 1
      $a = \filter_var_array($data, $definition, $add_empty);
1479
    }
1480
1481 1
    return self::filter($a);
1482
  }
1483
1484
  /**
1485
   * Returns the first $n characters of the string.
1486
   *
1487
   * @param string $str      <p>The input string.</p>
1488
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1489
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
1490
   *
1491
   * @return string
1492
   */
1493
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1494
  {
1495
    if ($n <= 0) {
1496
      return '';
1497
    }
1498
1499
    return self::substr($str, 0, $n, $encoding);
1500
  }
1501
1502
  /**
1503
   * Check if the number of unicode characters are not more than the specified integer.
1504
   *
1505
   * @param string $str      The original string to be checked.
1506
   * @param int    $box_size The size in number of chars to be checked against string.
1507
   *
1508
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1509
   */
1510 1
  public static function fits_inside(string $str, int $box_size): bool
1511
  {
1512 1
    return (self::strlen($str) <= $box_size);
1513
  }
1514
1515
  /**
1516
   * Try to fix simple broken UTF-8 strings.
1517
   *
1518
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1519
   *
1520
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1521
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1522
   * See: http://en.wikipedia.org/wiki/Windows-1252
1523
   *
1524
   * @param string $str <p>The input string</p>
1525
   *
1526
   * @return string
1527
   */
1528 29 View Code Duplication
  public static function fix_simple_utf8(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1529
  {
1530 29
    if (!isset($str[0])) {
1531 2
      return '';
1532
    }
1533
1534 29
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1535 29
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1536
1537 29
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1538
1539 1
      if (self::$BROKEN_UTF8_FIX === null) {
1540 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1541
      }
1542
1543 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1544 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1545
    }
1546
1547 29
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1548
  }
1549
1550
  /**
1551
   * Fix a double (or multiple) encoded UTF8 string.
1552
   *
1553
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1554
   *
1555
   * @return string|string[] <p>Will return the fixed input-"array" or
1556
   *                         the fixed input-"string".</p>
1557
   */
1558 1
  public static function fix_utf8($str)
1559
  {
1560 1
    if (\is_array($str) === true) {
1561 1
      foreach ($str as $k => $v) {
1562 1
        $str[$k] = self::fix_utf8($v);
1563
      }
1564
1565 1
      return $str;
1566
    }
1567
1568 1
    $last = '';
1569 1
    while ($last !== $str) {
1570 1
      $last = $str;
1571 1
      $str = self::to_utf8(
1572 1
          self::utf8_decode($str, true)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str, true)) on line 1571 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1573
      );
1574
    }
1575
1576 1
    return $str;
1577
  }
1578
1579
  /**
1580
   * Get character of a specific character.
1581
   *
1582
   * @param string $char
1583
   *
1584
   * @return string <p>'RTL' or 'LTR'</p>
1585
   */
1586 1
  public static function getCharDirection(string $char): string
1587
  {
1588 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1589
      self::checkForSupport();
1590
    }
1591
1592 1
    if (self::$SUPPORT['intlChar'] === true) {
1593 1
      $tmpReturn = \IntlChar::charDirection($char);
1594
1595
      // from "IntlChar"-Class
1596
      $charDirection = [
1597 1
          'RTL' => [1, 13, 14, 15, 21],
1598
          'LTR' => [0, 11, 12, 20],
1599
      ];
1600
1601 1
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1602
        return 'LTR';
1603
      }
1604
1605 1
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1606 1
        return 'RTL';
1607
      }
1608
    }
1609
1610 1
    $c = static::chr_to_decimal($char);
1611
1612 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1613 1
      return 'LTR';
1614
    }
1615
1616 1
    if (0x85e >= $c) {
1617
1618 1
      if (0x5be === $c ||
1619 1
          0x5c0 === $c ||
1620 1
          0x5c3 === $c ||
1621 1
          0x5c6 === $c ||
1622 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1623 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1624 1
          0x608 === $c ||
1625 1
          0x60b === $c ||
1626 1
          0x60d === $c ||
1627 1
          0x61b === $c ||
1628 1
          (0x61e <= $c && 0x64a >= $c) ||
1629
          (0x66d <= $c && 0x66f >= $c) ||
1630
          (0x671 <= $c && 0x6d5 >= $c) ||
1631
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1632
          (0x6ee <= $c && 0x6ef >= $c) ||
1633
          (0x6fa <= $c && 0x70d >= $c) ||
1634
          0x710 === $c ||
1635
          (0x712 <= $c && 0x72f >= $c) ||
1636
          (0x74d <= $c && 0x7a5 >= $c) ||
1637
          0x7b1 === $c ||
1638
          (0x7c0 <= $c && 0x7ea >= $c) ||
1639
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1640
          0x7fa === $c ||
1641
          (0x800 <= $c && 0x815 >= $c) ||
1642
          0x81a === $c ||
1643
          0x824 === $c ||
1644
          0x828 === $c ||
1645
          (0x830 <= $c && 0x83e >= $c) ||
1646
          (0x840 <= $c && 0x858 >= $c) ||
1647 1
          0x85e === $c
1648
      ) {
1649 1
        return 'RTL';
1650
      }
1651
1652 1
    } elseif (0x200f === $c) {
1653
1654
      return 'RTL';
1655
1656 1
    } elseif (0xfb1d <= $c) {
1657
1658 1
      if (0xfb1d === $c ||
1659 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1660 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1661 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1662 1
          0xfb3e === $c ||
1663 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1664 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1665 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1666 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1667 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1668 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1669 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1670 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1671 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1672 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1673 1
          0x10808 === $c ||
1674 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1675 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1676 1
          0x1083c === $c ||
1677 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1678 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1679 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1680 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1681 1
          0x1093f === $c ||
1682 1
          0x10a00 === $c ||
1683 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1684 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1685 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1686 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1687 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1688 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1689 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1690 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1691 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1692 1
          (0x10b78 <= $c && 0x10b7f >= $c)
1693
      ) {
1694 1
        return 'RTL';
1695
      }
1696
    }
1697
1698 1
    return 'LTR';
1699
  }
1700
1701
  /**
1702
   * get data from "/data/*.ser"
1703
   *
1704
   * @param string $file
1705
   *
1706
   * @return bool|string|array|int <p>Will return false on error.</p>
1707
   */
1708 7
  private static function getData(string $file)
1709
  {
1710 7
    $file = __DIR__ . '/data/' . $file . '.php';
1711 7
    if (\file_exists($file)) {
1712
      /** @noinspection PhpIncludeInspection */
1713 7
      return require $file;
1714
    }
1715
1716 1
    return false;
1717
  }
1718
1719
  /**
1720
   * Check for php-support.
1721
   *
1722
   * @param string|null $key
1723
   *
1724
   * @return mixed <p>Return the full support-"array", if $key === null<br>
1725
   *               return bool-value, if $key is used and available<br>
1726
   *               otherwise return null</p>
1727
   */
1728 19
  public static function getSupportInfo(string $key = null)
1729
  {
1730 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1731
      self::checkForSupport();
1732
    }
1733
1734 19
    if ($key === null) {
1735 2
      return self::$SUPPORT;
1736
    }
1737
1738 18
    if (!isset(self::$SUPPORT[$key])) {
1739 1
      return null;
1740
    }
1741
1742 17
    return self::$SUPPORT[$key];
1743
  }
1744
1745
  /**
1746
   * @param int    $length        <p>Length of the random string.</p>
1747
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
1748
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
1749
   *
1750
   * @return string
1751
   */
1752
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
1753
  {
1754
    // init
1755
    $i = 0;
1756
    $str = '';
1757
    $maxlength = self::strlen($possibleChars, $encoding);
1758
1759
    if ($maxlength === 0) {
1760
      return '';
1761
    }
1762
1763
    // add random chars
1764
    while ($i < $length) {
1765
      try {
1766
        $randInt = \random_int(0, $maxlength - 1);
1767
      } catch (\Exception $e) {
1768
        /** @noinspection RandomApiMigrationInspection */
1769
        $randInt = \mt_rand(0, $maxlength - 1);
1770
      }
1771
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
1772
      $str .= $char;
1773
      $i++;
1774
    }
1775
1776
    return $str;
1777
  }
1778
1779
  /**
1780
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
1781
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
1782
   *
1783
   * @return string
1784
   */
1785
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
1786
  {
1787
    $uniqueHelper = \mt_rand() .
1788
                    \session_id() .
1789
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
1790
                    ($_SERVER['SERVER_ADDR'] ?? '') .
1791
                    $entropyExtra;
1792
1793
    $uniqueString = \uniqid($uniqueHelper, true);
1794
1795
    if ($md5) {
1796
      $uniqueString = \md5($uniqueString . $uniqueHelper);
1797
    }
1798
1799
    return $uniqueString;
1800
  }
1801
1802
  /**
1803
   * alias for "UTF8::string_has_bom()"
1804
   *
1805
   * @see        UTF8::string_has_bom()
1806
   *
1807
   * @param string $str
1808
   *
1809
   * @return bool
1810
   *
1811
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1812
   */
1813 1
  public static function hasBom(string $str): bool
1814
  {
1815 1
    return self::string_has_bom($str);
1816
  }
1817
1818
  /**
1819
   * Returns true if the string contains a lower case char, false otherwise.
1820
   *
1821
   * @param string $str <p>The input string.</p>
1822
   *
1823
   * @return bool <p>Whether or not the string contains a lower case character.</p>
1824
   */
1825
  public static function has_lowercase(string $str): bool
1826
  {
1827
    return self::matchesPattern($str, '.*[[:lower:]]');
1828
  }
1829
1830
  /**
1831
   * Returns true if the string contains an upper case char, false otherwise.
1832
   *
1833
   * @param string $str <p>The input string.</p>
1834
   *
1835
   * @return bool <p>Whether or not the string contains an upper case character.</p>
1836
   */
1837
  public static function has_uppercase(string $str): bool
1838
  {
1839
    return self::matchesPattern($str, '.*[[:upper:]]');
1840
  }
1841
1842
  /**
1843
   * Converts a hexadecimal-value into an UTF-8 character.
1844
   *
1845
   * @param string $hexdec <p>The hexadecimal value.</p>
1846
   *
1847
   * @return string|false <p>One single UTF-8 character.</p>
1848
   */
1849 2
  public static function hex_to_chr(string $hexdec)
1850
  {
1851 2
    return self::decimal_to_chr(\hexdec($hexdec));
1852
  }
1853
1854
  /**
1855
   * Converts hexadecimal U+xxxx code point representation to integer.
1856
   *
1857
   * INFO: opposite to UTF8::int_to_hex()
1858
   *
1859
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1860
   *
1861
   * @return int|false <p>The code point, or false on failure.</p>
1862
   */
1863 1
  public static function hex_to_int(string $hexDec)
1864
  {
1865 1
    if (!isset($hexDec[0])) {
1866 1
      return false;
1867
    }
1868
1869 1
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1870 1
      return \intval($match[1], 16);
1871
    }
1872
1873 1
    return false;
1874
  }
1875
1876
  /**
1877
   * alias for "UTF8::html_entity_decode()"
1878
   *
1879
   * @see UTF8::html_entity_decode()
1880
   *
1881
   * @param string $str
1882
   * @param int    $flags
1883
   * @param string $encoding
1884
   *
1885
   * @return string
1886
   */
1887 1
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1888
  {
1889 1
    return self::html_entity_decode($str, $flags, $encoding);
1890
  }
1891
1892
  /**
1893
   * Converts a UTF-8 string to a series of HTML numbered entities.
1894
   *
1895
   * INFO: opposite to UTF8::html_decode()
1896
   *
1897
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1898
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1899
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
1900
   *
1901
   * @return string <p>HTML numbered entities.</p>
1902
   */
1903 3
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
1904
  {
1905 3
    if (!isset($str[0])) {
1906 2
      return '';
1907
    }
1908
1909 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1910 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1911
    }
1912
1913
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1914 3
    if (\function_exists('mb_encode_numericentity')) {
1915
1916 3
      $startCode = 0x00;
1917 3
      if ($keepAsciiChars === true) {
1918 3
        $startCode = 0x80;
1919
      }
1920
1921 3
      return \mb_encode_numericentity(
1922 3
          $str,
1923 3
          [$startCode, 0xfffff, 0, 0xfffff, 0],
1924 3
          $encoding
1925
      );
1926
    }
1927
1928
    return \implode(
1929
        '',
1930
        \array_map(
1931
            function ($data) use ($keepAsciiChars, $encoding) {
1932
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1933
            },
1934
            self::split($str)
1935
        )
1936
    );
1937
  }
1938
1939
  /**
1940
   * UTF-8 version of html_entity_decode()
1941
   *
1942
   * The reason we are not using html_entity_decode() by itself is because
1943
   * while it is not technically correct to leave out the semicolon
1944
   * at the end of an entity most browsers will still interpret the entity
1945
   * correctly. html_entity_decode() does not convert entities without
1946
   * semicolons, so we are left with our own little solution here. Bummer.
1947
   *
1948
   * Convert all HTML entities to their applicable characters
1949
   *
1950
   * INFO: opposite to UTF8::html_encode()
1951
   *
1952
   * @link http://php.net/manual/en/function.html-entity-decode.php
1953
   *
1954
   * @param string $str      <p>
1955
   *                         The input string.
1956
   *                         </p>
1957
   * @param int    $flags    [optional] <p>
1958
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1959
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1960
   *                         <table>
1961
   *                         Available <i>flags</i> constants
1962
   *                         <tr valign="top">
1963
   *                         <td>Constant Name</td>
1964
   *                         <td>Description</td>
1965
   *                         </tr>
1966
   *                         <tr valign="top">
1967
   *                         <td><b>ENT_COMPAT</b></td>
1968
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1969
   *                         </tr>
1970
   *                         <tr valign="top">
1971
   *                         <td><b>ENT_QUOTES</b></td>
1972
   *                         <td>Will convert both double and single quotes.</td>
1973
   *                         </tr>
1974
   *                         <tr valign="top">
1975
   *                         <td><b>ENT_NOQUOTES</b></td>
1976
   *                         <td>Will leave both double and single quotes unconverted.</td>
1977
   *                         </tr>
1978
   *                         <tr valign="top">
1979
   *                         <td><b>ENT_HTML401</b></td>
1980
   *                         <td>
1981
   *                         Handle code as HTML 4.01.
1982
   *                         </td>
1983
   *                         </tr>
1984
   *                         <tr valign="top">
1985
   *                         <td><b>ENT_XML1</b></td>
1986
   *                         <td>
1987
   *                         Handle code as XML 1.
1988
   *                         </td>
1989
   *                         </tr>
1990
   *                         <tr valign="top">
1991
   *                         <td><b>ENT_XHTML</b></td>
1992
   *                         <td>
1993
   *                         Handle code as XHTML.
1994
   *                         </td>
1995
   *                         </tr>
1996
   *                         <tr valign="top">
1997
   *                         <td><b>ENT_HTML5</b></td>
1998
   *                         <td>
1999
   *                         Handle code as HTML 5.
2000
   *                         </td>
2001
   *                         </tr>
2002
   *                         </table>
2003
   *                         </p>
2004
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
2005
   *
2006
   * @return string <p>The decoded string.</p>
2007
   */
2008 17
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2009
  {
2010 17
    if (!isset($str[0])) {
2011 6
      return '';
2012
    }
2013
2014 17
    if (!isset($str[3])) { // examples: &; || &x;
2015 10
      return $str;
2016
    }
2017
2018
    if (
2019 16
        \strpos($str, '&') === false
2020
        ||
2021
        (
2022 16
            \strpos($str, '&#') === false
2023
            &&
2024 16
            \strpos($str, ';') === false
2025
        )
2026
    ) {
2027 9
      return $str;
2028
    }
2029
2030 16
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2031 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2032
    }
2033
2034 16
    if ($flags === null) {
2035 5
      $flags = ENT_QUOTES | ENT_HTML5;
2036
    }
2037
2038 View Code Duplication
    if (
2039 16
        $encoding !== 'UTF-8'
2040
        &&
2041 16
        $encoding !== 'ISO-8859-1'
2042
        &&
2043 16
        $encoding !== 'WINDOWS-1252'
2044
        &&
2045 16
        self::$SUPPORT['mbstring'] === false
2046
    ) {
2047
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2048
    }
2049
2050
    do {
2051 16
      $str_compare = $str;
2052
2053 16
      $str = (string)\preg_replace_callback(
2054 16
          "/&#\d{2,6};/",
2055
          function ($matches) use ($encoding) {
2056 14
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2057
2058 14
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2059 13
              return $returnTmp;
2060
            }
2061
2062 7
            return $matches[0];
2063 16
          },
2064 16
          $str
2065
      );
2066
2067
      // decode numeric & UTF16 two byte entities
2068 16
      $str = \html_entity_decode(
2069 16
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2070 16
          $flags,
2071 16
          $encoding
2072
      );
2073
2074 16
    } while ($str_compare !== $str);
2075
2076 16
    return $str;
2077
  }
2078
2079
  /**
2080
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2081
   *
2082
   * @link http://php.net/manual/en/function.htmlentities.php
2083
   *
2084
   * @param string $str           <p>
2085
   *                              The input string.
2086
   *                              </p>
2087
   * @param int    $flags         [optional] <p>
2088
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2089
   *                              invalid code unit sequences and the used document type. The default is
2090
   *                              ENT_COMPAT | ENT_HTML401.
2091
   *                              <table>
2092
   *                              Available <i>flags</i> constants
2093
   *                              <tr valign="top">
2094
   *                              <td>Constant Name</td>
2095
   *                              <td>Description</td>
2096
   *                              </tr>
2097
   *                              <tr valign="top">
2098
   *                              <td><b>ENT_COMPAT</b></td>
2099
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2100
   *                              </tr>
2101
   *                              <tr valign="top">
2102
   *                              <td><b>ENT_QUOTES</b></td>
2103
   *                              <td>Will convert both double and single quotes.</td>
2104
   *                              </tr>
2105
   *                              <tr valign="top">
2106
   *                              <td><b>ENT_NOQUOTES</b></td>
2107
   *                              <td>Will leave both double and single quotes unconverted.</td>
2108
   *                              </tr>
2109
   *                              <tr valign="top">
2110
   *                              <td><b>ENT_IGNORE</b></td>
2111
   *                              <td>
2112
   *                              Silently discard invalid code unit sequences instead of returning
2113
   *                              an empty string. Using this flag is discouraged as it
2114
   *                              may have security implications.
2115
   *                              </td>
2116
   *                              </tr>
2117
   *                              <tr valign="top">
2118
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2119
   *                              <td>
2120
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2121
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2122
   *                              </td>
2123
   *                              </tr>
2124
   *                              <tr valign="top">
2125
   *                              <td><b>ENT_DISALLOWED</b></td>
2126
   *                              <td>
2127
   *                              Replace invalid code points for the given document type with a
2128
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2129
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2130
   *                              instance, to ensure the well-formedness of XML documents with
2131
   *                              embedded external content.
2132
   *                              </td>
2133
   *                              </tr>
2134
   *                              <tr valign="top">
2135
   *                              <td><b>ENT_HTML401</b></td>
2136
   *                              <td>
2137
   *                              Handle code as HTML 4.01.
2138
   *                              </td>
2139
   *                              </tr>
2140
   *                              <tr valign="top">
2141
   *                              <td><b>ENT_XML1</b></td>
2142
   *                              <td>
2143
   *                              Handle code as XML 1.
2144
   *                              </td>
2145
   *                              </tr>
2146
   *                              <tr valign="top">
2147
   *                              <td><b>ENT_XHTML</b></td>
2148
   *                              <td>
2149
   *                              Handle code as XHTML.
2150
   *                              </td>
2151
   *                              </tr>
2152
   *                              <tr valign="top">
2153
   *                              <td><b>ENT_HTML5</b></td>
2154
   *                              <td>
2155
   *                              Handle code as HTML 5.
2156
   *                              </td>
2157
   *                              </tr>
2158
   *                              </table>
2159
   *                              </p>
2160
   * @param string $encoding      [optional] <p>
2161
   *                              Like <b>htmlspecialchars</b>,
2162
   *                              <b>htmlentities</b> takes an optional third argument
2163
   *                              <i>encoding</i> which defines encoding used in
2164
   *                              conversion.
2165
   *                              Although this argument is technically optional, you are highly
2166
   *                              encouraged to specify the correct value for your code.
2167
   *                              </p>
2168
   * @param bool   $double_encode [optional] <p>
2169
   *                              When <i>double_encode</i> is turned off PHP will not
2170
   *                              encode existing html entities. The default is to convert everything.
2171
   *                              </p>
2172
   *
2173
   *
2174
   * @return string the encoded string.
2175
   * </p>
2176
   * <p>
2177
   * If the input <i>string</i> contains an invalid code unit
2178
   * sequence within the given <i>encoding</i> an empty string
2179
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2180
   * <b>ENT_SUBSTITUTE</b> flags are set.
2181
   */
2182 2
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2183
  {
2184 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2185 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2186
    }
2187
2188 2
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2189
2190
    /**
2191
     * PHP doesn't replace a backslash to its html entity since this is something
2192
     * that's mostly used to escape characters when inserting in a database. Since
2193
     * we're using a decent database layer, we don't need this shit and we're replacing
2194
     * the double backslashes by its' html entity equivalent.
2195
     *
2196
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2197
     */
2198 2
    $str = \str_replace('\\', '&#92;', $str);
2199
2200 2
    return self::html_encode($str, true, $encoding);
2201
  }
2202
2203
  /**
2204
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2205
   *
2206
   * INFO: Take a look at "UTF8::htmlentities()"
2207
   *
2208
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2209
   *
2210
   * @param string $str           <p>
2211
   *                              The string being converted.
2212
   *                              </p>
2213
   * @param int    $flags         [optional] <p>
2214
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2215
   *                              invalid code unit sequences and the used document type. The default is
2216
   *                              ENT_COMPAT | ENT_HTML401.
2217
   *                              <table>
2218
   *                              Available <i>flags</i> constants
2219
   *                              <tr valign="top">
2220
   *                              <td>Constant Name</td>
2221
   *                              <td>Description</td>
2222
   *                              </tr>
2223
   *                              <tr valign="top">
2224
   *                              <td><b>ENT_COMPAT</b></td>
2225
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2226
   *                              </tr>
2227
   *                              <tr valign="top">
2228
   *                              <td><b>ENT_QUOTES</b></td>
2229
   *                              <td>Will convert both double and single quotes.</td>
2230
   *                              </tr>
2231
   *                              <tr valign="top">
2232
   *                              <td><b>ENT_NOQUOTES</b></td>
2233
   *                              <td>Will leave both double and single quotes unconverted.</td>
2234
   *                              </tr>
2235
   *                              <tr valign="top">
2236
   *                              <td><b>ENT_IGNORE</b></td>
2237
   *                              <td>
2238
   *                              Silently discard invalid code unit sequences instead of returning
2239
   *                              an empty string. Using this flag is discouraged as it
2240
   *                              may have security implications.
2241
   *                              </td>
2242
   *                              </tr>
2243
   *                              <tr valign="top">
2244
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2245
   *                              <td>
2246
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2247
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2248
   *                              </td>
2249
   *                              </tr>
2250
   *                              <tr valign="top">
2251
   *                              <td><b>ENT_DISALLOWED</b></td>
2252
   *                              <td>
2253
   *                              Replace invalid code points for the given document type with a
2254
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2255
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2256
   *                              instance, to ensure the well-formedness of XML documents with
2257
   *                              embedded external content.
2258
   *                              </td>
2259
   *                              </tr>
2260
   *                              <tr valign="top">
2261
   *                              <td><b>ENT_HTML401</b></td>
2262
   *                              <td>
2263
   *                              Handle code as HTML 4.01.
2264
   *                              </td>
2265
   *                              </tr>
2266
   *                              <tr valign="top">
2267
   *                              <td><b>ENT_XML1</b></td>
2268
   *                              <td>
2269
   *                              Handle code as XML 1.
2270
   *                              </td>
2271
   *                              </tr>
2272
   *                              <tr valign="top">
2273
   *                              <td><b>ENT_XHTML</b></td>
2274
   *                              <td>
2275
   *                              Handle code as XHTML.
2276
   *                              </td>
2277
   *                              </tr>
2278
   *                              <tr valign="top">
2279
   *                              <td><b>ENT_HTML5</b></td>
2280
   *                              <td>
2281
   *                              Handle code as HTML 5.
2282
   *                              </td>
2283
   *                              </tr>
2284
   *                              </table>
2285
   *                              </p>
2286
   * @param string $encoding      [optional] <p>
2287
   *                              Defines encoding used in conversion.
2288
   *                              </p>
2289
   *                              <p>
2290
   *                              For the purposes of this function, the encodings
2291
   *                              ISO-8859-1, ISO-8859-15,
2292
   *                              UTF-8, cp866,
2293
   *                              cp1251, cp1252, and
2294
   *                              KOI8-R are effectively equivalent, provided the
2295
   *                              <i>string</i> itself is valid for the encoding, as
2296
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2297
   *                              the same positions in all of these encodings.
2298
   *                              </p>
2299
   * @param bool   $double_encode [optional] <p>
2300
   *                              When <i>double_encode</i> is turned off PHP will not
2301
   *                              encode existing html entities, the default is to convert everything.
2302
   *                              </p>
2303
   *
2304
   * @return string The converted string.
2305
   * </p>
2306
   * <p>
2307
   * If the input <i>string</i> contains an invalid code unit
2308
   * sequence within the given <i>encoding</i> an empty string
2309
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2310
   * <b>ENT_SUBSTITUTE</b> flags are set.
2311
   */
2312 1 View Code Duplication
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2313
  {
2314 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2315 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2316
    }
2317
2318 1
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2319
  }
2320
2321
  /**
2322
   * Checks whether iconv is available on the server.
2323
   *
2324
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2325
   */
2326 1
  public static function iconv_loaded(): bool
2327
  {
2328 1
    return \extension_loaded('iconv') ? true : false;
2329
  }
2330
2331
  /**
2332
   * alias for "UTF8::decimal_to_chr()"
2333
   *
2334
   * @see UTF8::decimal_to_chr()
2335
   *
2336
   * @param mixed $int
2337
   *
2338
   * @return string
2339
   */
2340 2
  public static function int_to_chr($int): string
2341
  {
2342 2
    return self::decimal_to_chr($int);
2343
  }
2344
2345
  /**
2346
   * Converts Integer to hexadecimal U+xxxx code point representation.
2347
   *
2348
   * INFO: opposite to UTF8::hex_to_int()
2349
   *
2350
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2351
   * @param string $pfix [optional]
2352
   *
2353
   * @return string <p>The code point, or empty string on failure.</p>
2354
   */
2355 3
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2356
  {
2357 3
    $hex = \dechex($int);
2358
2359 3
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2360
2361 3
    return $pfix . $hex;
2362
  }
2363
2364
  /**
2365
   * Checks whether intl-char is available on the server.
2366
   *
2367
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2368
   */
2369 1
  public static function intlChar_loaded(): bool
2370
  {
2371 1
    return \class_exists('IntlChar');
2372
  }
2373
2374
  /**
2375
   * Checks whether intl is available on the server.
2376
   *
2377
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2378
   */
2379 4
  public static function intl_loaded(): bool
2380
  {
2381 4
    return \extension_loaded('intl');
2382
  }
2383
2384
  /**
2385
   * alias for "UTF8::is_ascii()"
2386
   *
2387
   * @see        UTF8::is_ascii()
2388
   *
2389
   * @param string $str
2390
   *
2391
   * @return boolean
2392
   *
2393
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2394
   */
2395 1
  public static function isAscii(string $str): bool
2396
  {
2397 1
    return self::is_ascii($str);
2398
  }
2399
2400
  /**
2401
   * alias for "UTF8::is_base64()"
2402
   *
2403
   * @see        UTF8::is_base64()
2404
   *
2405
   * @param string $str
2406
   *
2407
   * @return bool
2408
   *
2409
   * @deprecated <p>use "UTF8::is_base64()"</p>
2410
   */
2411 1
  public static function isBase64(string $str): bool
2412
  {
2413 1
    return self::is_base64($str);
2414
  }
2415
2416
  /**
2417
   * alias for "UTF8::is_binary()"
2418
   *
2419
   * @see        UTF8::is_binary()
2420
   *
2421
   * @param mixed $str
2422
   * @param bool  $strict
2423
   *
2424
   * @return bool
2425
   *
2426
   * @deprecated <p>use "UTF8::is_binary()"</p>
2427
   */
2428 2
  public static function isBinary($str, $strict = false): bool
2429
  {
2430 2
    return self::is_binary($str, $strict);
2431
  }
2432
2433
  /**
2434
   * alias for "UTF8::is_bom()"
2435
   *
2436
   * @see        UTF8::is_bom()
2437
   *
2438
   * @param string $utf8_chr
2439
   *
2440
   * @return boolean
2441
   *
2442
   * @deprecated <p>use "UTF8::is_bom()"</p>
2443
   */
2444 1
  public static function isBom(string $utf8_chr): bool
2445
  {
2446 1
    return self::is_bom($utf8_chr);
2447
  }
2448
2449
  /**
2450
   * alias for "UTF8::is_html()"
2451
   *
2452
   * @see        UTF8::is_html()
2453
   *
2454
   * @param string $str
2455
   *
2456
   * @return boolean
2457
   *
2458
   * @deprecated <p>use "UTF8::is_html()"</p>
2459
   */
2460 1
  public static function isHtml(string $str): bool
2461
  {
2462 1
    return self::is_html($str);
2463
  }
2464
2465
  /**
2466
   * alias for "UTF8::is_json()"
2467
   *
2468
   * @see        UTF8::is_json()
2469
   *
2470
   * @param string $str
2471
   *
2472
   * @return bool
2473
   *
2474
   * @deprecated <p>use "UTF8::is_json()"</p>
2475
   */
2476
  public static function isJson(string $str): bool
2477
  {
2478
    return self::is_json($str);
2479
  }
2480
2481
  /**
2482
   * alias for "UTF8::is_utf16()"
2483
   *
2484
   * @see        UTF8::is_utf16()
2485
   *
2486
   * @param string $str
2487
   *
2488
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2489
   *
2490
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2491
   */
2492 1
  public static function isUtf16(string $str)
2493
  {
2494 1
    return self::is_utf16($str);
2495
  }
2496
2497
  /**
2498
   * alias for "UTF8::is_utf32()"
2499
   *
2500
   * @see        UTF8::is_utf32()
2501
   *
2502
   * @param string $str
2503
   *
2504
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2505
   *
2506
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2507
   */
2508 1
  public static function isUtf32(string $str)
2509
  {
2510 1
    return self::is_utf32($str);
2511
  }
2512
2513
  /**
2514
   * alias for "UTF8::is_utf8()"
2515
   *
2516
   * @see        UTF8::is_utf8()
2517
   *
2518
   * @param string $str
2519
   * @param bool   $strict
2520
   *
2521
   * @return bool
2522
   *
2523
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2524
   */
2525 16
  public static function isUtf8($str, $strict = false): bool
2526
  {
2527 16
    return self::is_utf8($str, $strict);
2528
  }
2529
2530
  /**
2531
   * Returns true if the string contains only alphabetic chars, false otherwise.
2532
   *
2533
   * @param string $str
2534
   *
2535
   * @return bool <p>Whether or not $str contains only alphabetic chars.</p>
2536
   */
2537
  public static function is_alpha(string $str): bool
2538
  {
2539
    return self::matchesPattern($str, '^[[:alpha:]]*$');
2540
  }
2541
2542
  /**
2543
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2544
   *
2545
   * @param string $str
2546
   *
2547
   * @return bool <p>Whether or not $str contains only alphanumeric chars.</p>
2548
   */
2549
  public static function is_alphanumeric(string $str): bool
2550
  {
2551
    return self::matchesPattern($str, '^[[:alnum:]]*$');
2552
  }
2553
2554
  /**
2555
   * Checks if a string is 7 bit ASCII.
2556
   *
2557
   * @param string $str <p>The string to check.</p>
2558
   *
2559
   * @return bool <p>
2560
   *              <strong>true</strong> if it is ASCII<br>
2561
   *              <strong>false</strong> otherwise
2562
   *              </p>
2563
   */
2564 58
  public static function is_ascii(string $str): bool
2565
  {
2566 58
    if (!isset($str[0])) {
2567 6
      return true;
2568
    }
2569
2570 57
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2571
  }
2572
2573
  /**
2574
   * Returns true if the string is base64 encoded, false otherwise.
2575
   *
2576
   * @param string $str <p>The input string.</p>
2577
   *
2578
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2579
   */
2580 1
  public static function is_base64(string $str): bool
2581
  {
2582 1
    $base64String = (string)\base64_decode($str, true);
2583
2584 1
    return $base64String && \base64_encode($base64String) === $str;
2585
  }
2586
2587
  /**
2588
   * Check if the input is binary... (is look like a hack).
2589
   *
2590
   * @param mixed $input
2591
   * @param bool  $strict
2592
   *
2593
   * @return bool
2594
   */
2595 19
  public static function is_binary($input, bool $strict = false): bool
2596
  {
2597 19
    $input = (string)$input;
2598 19
    if (!isset($input[0])) {
2599 5
      return false;
2600
    }
2601
2602 19
    if (\preg_match('~^[01]+$~', $input)) {
2603 6
      return true;
2604
    }
2605
2606 19
    $testNull = 0;
2607 19
    $testLength = \strlen($input);
2608 19
    if ($testLength) {
2609 19
      $testNull = \substr_count($input, "\x0");
2610 19
      if (($testNull / $testLength) > 0.3) {
2611 6
        return true;
2612
      }
2613
    }
2614
2615
    if (
2616 18
        $strict === true
2617
        &&
2618 18
        \class_exists('finfo')
2619
    ) {
2620
2621 16
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
2622 16
      $finfo_encoding = $finfo->buffer($input);
2623 16
      if ($finfo_encoding && $finfo_encoding === 'binary') {
2624 16
        return true;
2625
      }
2626
2627 8
    } elseif ($testNull > 0) {
2628
2629 3
      return true;
2630
2631
    }
2632
2633 17
    return false;
2634
  }
2635
2636
  /**
2637
   * Check if the file is binary.
2638
   *
2639
   * @param string $file
2640
   *
2641
   * @return boolean
2642
   */
2643 3
  public static function is_binary_file($file): bool
2644
  {
2645
    try {
2646 3
      $fp = \fopen($file, 'rb');
2647 3
      $block = \fread($fp, 512);
2648 3
      \fclose($fp);
2649
    } catch (\Exception $e) {
2650
      $block = '';
2651
    }
2652
2653 3
    return self::is_binary($block, true);
2654
  }
2655
2656
  /**
2657
   * Returns true if the string contains only whitespace chars, false otherwise.
2658
   *
2659
   * @param string $str
2660
   *
2661
   * @return bool <p>Whether or not $str contains only whitespace characters.</p>
2662
   */
2663
  public static function is_blank(string $str): bool
2664
  {
2665
    return self::matchesPattern($str, '^[[:space:]]*$');
2666
  }
2667
2668
  /**
2669
   * Checks if the given string is equal to any "Byte Order Mark".
2670
   *
2671
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2672
   *
2673
   * @param string $str <p>The input string.</p>
2674
   *
2675
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2676
   */
2677 1
  public static function is_bom($str): bool
2678
  {
2679 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2680 1
      if ($str === $bomString) {
2681 1
        return true;
2682
      }
2683
    }
2684
2685 1
    return false;
2686
  }
2687
2688
  /**
2689
   * Determine whether the string is considered to be empty.
2690
   *
2691
   * A variable is considered empty if it does not exist or if its value equals FALSE.
2692
   * empty() does not generate a warning if the variable does not exist.
2693
   *
2694
   * @param mixed $str
2695
   *
2696
   * @return bool <p>Whether or not $str is empty().</p>
2697
   */
2698
  public static function is_empty($str): bool
2699
  {
2700
    return empty($str);
2701
  }
2702
2703
  /**
2704
   * Returns true if the string contains only hexadecimal chars, false otherwise.
2705
   *
2706
   * @param string $str
2707
   *
2708
   * @return bool <p>Whether or not $str contains only hexadecimal chars.</p>
2709
   */
2710
  public static function is_hexadecimal(string $str): bool
2711
  {
2712
    return self::matchesPattern($str, '^[[:xdigit:]]*$');
2713
  }
2714
2715
  /**
2716
   * Check if the string contains any html-tags <lall>.
2717
   *
2718
   * @param string $str <p>The input string.</p>
2719
   *
2720
   * @return boolean
2721
   */
2722 1
  public static function is_html(string $str): bool
2723
  {
2724 1
    if (!isset($str[0])) {
2725 1
      return false;
2726
    }
2727
2728
    // init
2729 1
    $matches = [];
2730
2731 1
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2732
2733 1
    return !(\count($matches) === 0);
2734
  }
2735
2736
  /**
2737
   * Try to check if "$str" is an json-string.
2738
   *
2739
   * @param string $str <p>The input string.</p>
2740
   *
2741
   * @return bool
2742
   */
2743 1
  public static function is_json(string $str): bool
2744
  {
2745 1
    if (!isset($str[0])) {
2746 1
      return false;
2747
    }
2748
2749 1
    $json = self::json_decode($str);
2750
2751
    return (
2752 1
               \is_object($json) === true
2753
               ||
2754 1
               \is_array($json) === true
2755
           )
2756
           &&
2757 1
           \json_last_error() === JSON_ERROR_NONE;
2758
  }
2759
2760
  /**
2761
   * @param string $str
2762
   *
2763
   * @return bool
2764
   */
2765
  public static function is_lowercase(string $str): bool
2766
  {
2767
    if (self::matchesPattern($str, '^[[:lower:]]*$')) {
2768
      return true;
2769
    }
2770
2771
    return false;
2772
  }
2773
2774
  /**
2775
   * Returns true if the string is serialized, false otherwise.
2776
   *
2777
   * @param string $str
2778
   *
2779
   * @return bool <p>Whether or not $str is serialized.</p>
2780
   */
2781
  public static function is_serialized(string $str): bool
2782
  {
2783
    if (!isset($str[0])) {
2784
      return false;
2785
    }
2786
2787
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2788
    /** @noinspection UnserializeExploitsInspection */
2789
    return $str === 'b:0;'
2790
           ||
2791
           @\unserialize($str) !== false;
2792
  }
2793
2794
  /**
2795
   * Returns true if the string contains only lower case chars, false
2796
   * otherwise.
2797
   *
2798
   * @param string $str <p>The input string.</p>
2799
   *
2800
   * @return bool <p>Whether or not $str contains only lower case characters.</p>
2801
   */
2802
  public static function is_uppercase(string $str): bool
2803
  {
2804
    return self::matchesPattern($str, '^[[:upper:]]*$');
2805
  }
2806
2807
  /**
2808
   * Check if the string is UTF-16.
2809
   *
2810
   * @param string $str <p>The input string.</p>
2811
   *
2812
   * @return int|false <p>
2813
   *                   <strong>false</strong> if is't not UTF-16,<br>
2814
   *                   <strong>1</strong> for UTF-16LE,<br>
2815
   *                   <strong>2</strong> for UTF-16BE.
2816
   *                   </p>
2817
   */
2818 10 View Code Duplication
  public static function is_utf16(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2819
  {
2820 10
    if (self::is_binary($str) === false) {
2821 4
      return false;
2822
    }
2823
2824
    // init
2825 8
    $strChars = [];
2826
2827 8
    $str = self::remove_bom($str);
2828
2829 8
    $maybeUTF16LE = 0;
2830 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2831 8
    if ($test) {
2832 7
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2833 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2834 7
      if ($test3 === $test) {
2835 7
        if (\count($strChars) === 0) {
2836 7
          $strChars = self::count_chars($str, true);
2837
        }
2838 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2839 7
          if (\in_array($test3char, $strChars, true) === true) {
2840 7
            $maybeUTF16LE++;
2841
          }
2842
        }
2843
      }
2844
    }
2845
2846 8
    $maybeUTF16BE = 0;
2847 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2848 8
    if ($test) {
2849 7
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2850 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2851 7
      if ($test3 === $test) {
2852 7
        if (\count($strChars) === 0) {
2853 3
          $strChars = self::count_chars($str, true);
2854
        }
2855 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2856 7
          if (\in_array($test3char, $strChars, true) === true) {
2857 7
            $maybeUTF16BE++;
2858
          }
2859
        }
2860
      }
2861
    }
2862
2863 8
    if ($maybeUTF16BE !== $maybeUTF16LE) {
2864 3
      if ($maybeUTF16LE > $maybeUTF16BE) {
2865 2
        return 1;
2866
      }
2867
2868 3
      return 2;
2869
    }
2870
2871 6
    return false;
2872
  }
2873
2874
  /**
2875
   * Check if the string is UTF-32.
2876
   *
2877
   * @param string $str
2878
   *
2879
   * @return int|false <p>
2880
   *                   <strong>false</strong> if is't not UTF-32,<br>
2881
   *                   <strong>1</strong> for UTF-32LE,<br>
2882
   *                   <strong>2</strong> for UTF-32BE.
2883
   *                   </p>
2884
   */
2885 8 View Code Duplication
  public static function is_utf32(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2886
  {
2887 8
    if (self::is_binary($str) === false) {
2888 4
      return false;
2889
    }
2890
2891
    // init
2892 6
    $strChars = [];
2893
2894 6
    $str = self::remove_bom($str);
2895
2896 6
    $maybeUTF32LE = 0;
2897 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2898 6
    if ($test) {
2899 5
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2900 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2901 5
      if ($test3 === $test) {
2902 5
        if (\count($strChars) === 0) {
2903 5
          $strChars = self::count_chars($str, true);
2904
        }
2905 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2906 5
          if (\in_array($test3char, $strChars, true) === true) {
2907 5
            $maybeUTF32LE++;
2908
          }
2909
        }
2910
      }
2911
    }
2912
2913 6
    $maybeUTF32BE = 0;
2914 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2915 6
    if ($test) {
2916 5
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2917 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2918 5
      if ($test3 === $test) {
2919 5
        if (\count($strChars) === 0) {
2920 3
          $strChars = self::count_chars($str, true);
2921
        }
2922 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2923 5
          if (\in_array($test3char, $strChars, true) === true) {
2924 5
            $maybeUTF32BE++;
2925
          }
2926
        }
2927
      }
2928
    }
2929
2930 6
    if ($maybeUTF32BE !== $maybeUTF32LE) {
2931 1
      if ($maybeUTF32LE > $maybeUTF32BE) {
2932 1
        return 1;
2933
      }
2934
2935 1
      return 2;
2936
    }
2937
2938 6
    return false;
2939
  }
2940
2941
  /**
2942
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2943
   *
2944
   * @see    http://hsivonen.iki.fi/php-utf8/
2945
   *
2946
   * @param string|string[] $str    <p>The string to be checked.</p>
2947
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2948
   *
2949
   * @return bool
2950
   */
2951 61
  public static function is_utf8($str, bool $strict = false): bool
2952
  {
2953 61
    if (\is_array($str) === true) {
2954 1
      foreach ($str as $k => $v) {
2955 1
        if (false === self::is_utf8($v, $strict)) {
2956 1
          return false;
2957
        }
2958
      }
2959
2960
      return true;
2961
    }
2962
2963 61
    if (!isset($str[0])) {
2964 3
      return true;
2965
    }
2966
2967 59
    if ($strict === true) {
2968 1
      if (self::is_utf16($str) !== false) {
2969 1
        return false;
2970
      }
2971
2972
      if (self::is_utf32($str) !== false) {
2973
        return false;
2974
      }
2975
    }
2976
2977 59
    if (self::pcre_utf8_support() !== true) {
2978
2979
      // If even just the first character can be matched, when the /u
2980
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2981
      // invalid, nothing at all will match, even if the string contains
2982
      // some valid sequences
2983
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
2984
    }
2985
2986 59
    $mState = 0; // cached expected number of octets after the current octet
2987
    // until the beginning of the next UTF8 character sequence
2988 59
    $mUcs4 = 0; // cached Unicode character
2989 59
    $mBytes = 1; // cached expected number of octets in the current sequence
2990
2991 59
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2992
      self::checkForSupport();
2993
    }
2994
2995 59
    if (self::$ORD === null) {
2996
      self::$ORD = self::getData('ord');
2997
    }
2998
2999 59
    $len = self::strlen_in_byte($str);
3000
    /** @noinspection ForeachInvariantsInspection */
3001 59
    for ($i = 0; $i < $len; $i++) {
3002 59
      $in = self::$ORD[$str[$i]];
3003 59
      if ($mState === 0) {
3004
        // When mState is zero we expect either a US-ASCII character or a
3005
        // multi-octet sequence.
3006 59
        if (0 === (0x80 & $in)) {
3007
          // US-ASCII, pass straight through.
3008 56
          $mBytes = 1;
3009 56 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
3010
          // First octet of 2 octet sequence.
3011 49
          $mUcs4 = $in;
3012 49
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3013 49
          $mState = 1;
3014 49
          $mBytes = 2;
3015 46
        } elseif (0xE0 === (0xF0 & $in)) {
3016
          // First octet of 3 octet sequence.
3017 30
          $mUcs4 = $in;
3018 30
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3019 30
          $mState = 2;
3020 30
          $mBytes = 3;
3021 23 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
3022
          // First octet of 4 octet sequence.
3023 13
          $mUcs4 = $in;
3024 13
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3025 13
          $mState = 3;
3026 13
          $mBytes = 4;
3027 11
        } elseif (0xF8 === (0xFC & $in)) {
3028
          /* First octet of 5 octet sequence.
3029
          *
3030
          * This is illegal because the encoded codepoint must be either
3031
          * (a) not the shortest form or
3032
          * (b) outside the Unicode range of 0-0x10FFFF.
3033
          * Rather than trying to resynchronize, we will carry on until the end
3034
          * of the sequence and let the later error handling code catch it.
3035
          */
3036 4
          $mUcs4 = $in;
3037 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3038 4
          $mState = 4;
3039 4
          $mBytes = 5;
3040 8 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
3041
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3042 4
          $mUcs4 = $in;
3043 4
          $mUcs4 = ($mUcs4 & 1) << 30;
3044 4
          $mState = 5;
3045 4
          $mBytes = 6;
3046
        } else {
3047
          /* Current octet is neither in the US-ASCII range nor a legal first
3048
           * octet of a multi-octet sequence.
3049
           */
3050 59
          return false;
3051
        }
3052
      } else {
3053
        // When mState is non-zero, we expect a continuation of the multi-octet
3054
        // sequence
3055 56
        if (0x80 === (0xC0 & $in)) {
3056
          // Legal continuation.
3057 50
          $shift = ($mState - 1) * 6;
3058 50
          $tmp = $in;
3059 50
          $tmp = ($tmp & 0x0000003F) << $shift;
3060 50
          $mUcs4 |= $tmp;
3061
          /**Prefix
3062
           * End of the multi-octet sequence. mUcs4 now contains the final
3063
           * Unicode code point to be output
3064
           */
3065 50
          if (0 === --$mState) {
3066
            /*
3067
            * Check for illegal sequences and code points.
3068
            */
3069
            // From Unicode 3.1, non-shortest form is illegal
3070
            if (
3071 50
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3072 50
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3073 50
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3074 50
                (4 < $mBytes) ||
3075
                // From Unicode 3.2, surrogate characters are illegal.
3076 50
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3077
                // Code points outside the Unicode range are illegal.
3078 50
                ($mUcs4 > 0x10FFFF)
3079
            ) {
3080 7
              return false;
3081
            }
3082
            // initialize UTF8 cache
3083 50
            $mState = 0;
3084 50
            $mUcs4 = 0;
3085 50
            $mBytes = 1;
3086
          }
3087
        } else {
3088
          /**
3089
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3090
           * Incomplete multi-octet sequence.
3091
           */
3092 28
          return false;
3093
        }
3094
      }
3095
    }
3096
3097 27
    return true;
3098
  }
3099
3100
  /**
3101
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3102
   * Decodes a JSON string
3103
   *
3104
   * @link http://php.net/manual/en/function.json-decode.php
3105
   *
3106
   * @param string $json    <p>
3107
   *                        The <i>json</i> string being decoded.
3108
   *                        </p>
3109
   *                        <p>
3110
   *                        This function only works with UTF-8 encoded strings.
3111
   *                        </p>
3112
   *                        <p>PHP implements a superset of
3113
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3114
   *                        only supports these values when they are nested inside an array or an object.
3115
   *                        </p>
3116
   * @param bool   $assoc   [optional] <p>
3117
   *                        When <b>TRUE</b>, returned objects will be converted into
3118
   *                        associative arrays.
3119
   *                        </p>
3120
   * @param int    $depth   [optional] <p>
3121
   *                        User specified recursion depth.
3122
   *                        </p>
3123
   * @param int    $options [optional] <p>
3124
   *                        Bitmask of JSON decode options. Currently only
3125
   *                        <b>JSON_BIGINT_AS_STRING</b>
3126
   *                        is supported (default is to cast large integers as floats)
3127
   *                        </p>
3128
   *
3129
   * @return mixed the value encoded in <i>json</i> in appropriate
3130
   * PHP type. Values true, false and
3131
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3132
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3133
   * <i>json</i> cannot be decoded or if the encoded
3134
   * data is deeper than the recursion limit.
3135
   */
3136 2
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3137
  {
3138 2
    $json = self::filter($json);
3139
3140 2
    $json = \json_decode($json, $assoc, $depth, $options);
3141
3142 2
    return $json;
3143
  }
3144
3145
  /**
3146
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3147
   * Returns the JSON representation of a value.
3148
   *
3149
   * @link http://php.net/manual/en/function.json-encode.php
3150
   *
3151
   * @param mixed $value   <p>
3152
   *                       The <i>value</i> being encoded. Can be any type except
3153
   *                       a resource.
3154
   *                       </p>
3155
   *                       <p>
3156
   *                       All string data must be UTF-8 encoded.
3157
   *                       </p>
3158
   *                       <p>PHP implements a superset of
3159
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3160
   *                       only supports these values when they are nested inside an array or an object.
3161
   *                       </p>
3162
   * @param int   $options [optional] <p>
3163
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3164
   *                       <b>JSON_HEX_TAG</b>,
3165
   *                       <b>JSON_HEX_AMP</b>,
3166
   *                       <b>JSON_HEX_APOS</b>,
3167
   *                       <b>JSON_NUMERIC_CHECK</b>,
3168
   *                       <b>JSON_PRETTY_PRINT</b>,
3169
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3170
   *                       <b>JSON_FORCE_OBJECT</b>,
3171
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3172
   *                       constants is described on
3173
   *                       the JSON constants page.
3174
   *                       </p>
3175
   * @param int   $depth   [optional] <p>
3176
   *                       Set the maximum depth. Must be greater than zero.
3177
   *                       </p>
3178
   *
3179
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3180
   */
3181 2
  public static function json_encode($value, int $options = 0, int $depth = 512): string
3182
  {
3183 2
    $value = self::filter($value);
3184
3185 2
    $json = \json_encode($value, $options, $depth);
3186
3187 2
    return $json;
3188
  }
3189
3190
  /**
3191
   * Returns the last $n characters of the string.
3192
   *
3193
   * @param string $str      <p>The input string.</p>
3194
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
3195
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3196
   *
3197
   * @return string
3198
   */
3199
  public static function last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
3200
  {
3201
    if ($n <= 0) {
3202
      return '';
3203
    }
3204
3205
    return self::substr($str, -$n, null, $encoding);
3206
  }
3207
3208
  /**
3209
   * Makes string's first char lowercase.
3210
   *
3211
   * @param string $str       <p>The input string</p>
3212
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
3213
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3214
   *
3215
   * @return string <p>The resulting string</p>
3216
   */
3217 7
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3218
  {
3219 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3220 7
    if ($strPartTwo === false) {
3221
      $strPartTwo = '';
3222
    }
3223
3224 7
    $strPartOne = self::strtolower(
3225 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3226 7
        $encoding,
3227 7
        $cleanUtf8
3228
    );
3229
3230 7
    return $strPartOne . $strPartTwo;
3231
  }
3232
3233
  /**
3234
   * alias for "UTF8::lcfirst()"
3235
   *
3236
   * @see UTF8::lcfirst()
3237
   *
3238
   * @param string $word
3239
   * @param string $encoding
3240
   * @param bool   $cleanUtf8
3241
   *
3242
   * @return string
3243
   */
3244 1
  public static function lcword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3245
  {
3246 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
3247
  }
3248
3249
  /**
3250
   * Lowercase for all words in the string.
3251
   *
3252
   * @param string   $str        <p>The input string.</p>
3253
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3254
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3255
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3256
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3257
   *
3258
   * @return string
3259
   */
3260 1
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3261
  {
3262 1
    if (!$str) {
3263 1
      return '';
3264
    }
3265
3266 1
    $words = self::str_to_words($str, $charlist);
3267 1
    $newWords = [];
3268
3269 1
    if (\count($exceptions) > 0) {
3270 1
      $useExceptions = true;
3271
    } else {
3272 1
      $useExceptions = false;
3273
    }
3274
3275 1 View Code Duplication
    foreach ($words as $word) {
3276
3277 1
      if (!$word) {
3278 1
        continue;
3279
      }
3280
3281
      if (
3282 1
          $useExceptions === false
3283
          ||
3284
          (
3285 1
              $useExceptions === true
3286
              &&
3287 1
              !\in_array($word, $exceptions, true)
3288
          )
3289
      ) {
3290 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3291
      }
3292
3293 1
      $newWords[] = $word;
3294
    }
3295
3296 1
    return \implode('', $newWords);
3297
  }
3298
3299
  /**
3300
   * Returns the longest common prefix between the string and $otherStr.
3301
   *
3302
   * @param string $str      <p>The input sting.</p>
3303
   * @param string $otherStr <p>Second string for comparison.</p>
3304
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3305
   *
3306
   * @return string
3307
   */
3308
  public static function longestCommonPrefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
3309
  {
3310
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
3311
3312
    $longestCommonPrefix = '';
3313
    for ($i = 0; $i < $maxLength; $i++) {
3314
      $char = self::substr($str, $i, 1, $encoding);
3315
3316
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
3317
        $longestCommonPrefix .= $char;
3318
      } else {
3319
        break;
3320
      }
3321
    }
3322
3323
    return $longestCommonPrefix;
3324
  }
3325
3326
  /**
3327
   * Returns the longest common substring between the string and $otherStr.
3328
   * In the case of ties, it returns that which occurs first.
3329
   *
3330
   * @param string $str
3331
   * @param string $otherStr <p>Second string for comparison.</p>
3332
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3333
   *
3334
   * @return static <p>Object with its $str being the longest common substring.</p>
3335
   */
3336
  public static function longestCommonSubstring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
3337
  {
3338
    // Uses dynamic programming to solve
3339
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
3340
    $strLength = self::strlen($str, $encoding);
3341
    $otherLength = self::strlen($otherStr, $encoding);
3342
3343
    // Return if either string is empty
3344
    if ($strLength == 0 || $otherLength == 0) {
3345
      return '';
3346
    }
3347
3348
    $len = 0;
3349
    $end = 0;
3350
    $table = \array_fill(
3351
        0,
3352
        $strLength + 1,
3353
        \array_fill(0, $otherLength + 1, 0)
3354
    );
3355
3356
    for ($i = 1; $i <= $strLength; $i++) {
3357
      for ($j = 1; $j <= $otherLength; $j++) {
3358
        $strChar = self::substr($str, $i - 1, 1, $encoding);
3359
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
3360
3361
        if ($strChar == $otherChar) {
3362
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
3363
          if ($table[$i][$j] > $len) {
3364
            $len = $table[$i][$j];
3365
            $end = $i;
3366
          }
3367
        } else {
3368
          $table[$i][$j] = 0;
3369
        }
3370
      }
3371
    }
3372
3373
    return self::substr(
3374
        $str,
3375
        $end - $len,
3376
        $len,
3377
        $encoding
3378
    );
3379
  }
3380
3381
  /**
3382
   * Returns the longest common suffix between the string and $otherStr.
3383
   *
3384
   * @param string $str
3385
   * @param string $otherStr <p>Second string for comparison.</p>
3386
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3387
   *
3388
   * @return string
3389
   */
3390
  public static function longestCommonSuffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
3391
  {
3392
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
3393
3394
    $longestCommonSuffix = '';
3395
    for ($i = 1; $i <= $maxLength; $i++) {
3396
      $char = self::substr($str, -$i, 1, $encoding);
3397
3398
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
3399
        $longestCommonSuffix = $char . $longestCommonSuffix;
3400
      } else {
3401
        break;
3402
      }
3403
    }
3404
3405
    return $longestCommonSuffix;
3406
  }
3407
3408
  /**
3409
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3410
   *
3411
   * @param string $str   <p>The string to be trimmed</p>
3412
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3413
   *
3414
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3415
   */
3416 8 View Code Duplication
  public static function ltrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3417
  {
3418 8
    if (!isset($str[0])) {
3419 2
      return '';
3420
    }
3421
3422
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3423 7
    if ($chars === INF || !$chars) {
3424 2
      $pattern = "^[\pZ\pC]+";
3425
    } else {
3426 7
      $chars = \preg_quote($chars, '/');
3427 7
      $pattern = "^[$chars]+";
3428
    }
3429
3430 7
    return self::regexReplace($str, $pattern, '', '', '/');
3431
  }
3432
3433
  /**
3434
   * Returns true if $str matches the supplied pattern, false otherwise.
3435
   *
3436
   * @param string $str     <p>The input string.</p>
3437
   * @param string $pattern <p>Regex pattern to match against.</p>
3438
   *
3439
   * @return bool <p>Whether or not $str matches the pattern.</p>
3440
   */
3441
  public static function matchesPattern(string $str, string $pattern): bool
3442
  {
3443
    if (\preg_match('/' . $pattern . '/u', $str)) {
3444
      return true;
3445
    }
3446
3447
    return false;
3448
  }
3449
3450
  /**
3451
   * Returns the UTF-8 character with the maximum code point in the given data.
3452
   *
3453
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3454
   *
3455
   * @return string <p>The character with the highest code point than others.</p>
3456
   */
3457 1 View Code Duplication
  public static function max($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3458
  {
3459 1
    if (\is_array($arg) === true) {
3460 1
      $arg = \implode('', $arg);
3461
    }
3462
3463 1
    return self::chr(\max(self::codepoints($arg)));
3464
  }
3465
3466
  /**
3467
   * Calculates and returns the maximum number of bytes taken by any
3468
   * UTF-8 encoded character in the given string.
3469
   *
3470
   * @param string $str <p>The original Unicode string.</p>
3471
   *
3472
   * @return int <p>Max byte lengths of the given chars.</p>
3473
   */
3474 1
  public static function max_chr_width(string $str): int
3475
  {
3476 1
    $bytes = self::chr_size_list($str);
3477 1
    if (\count($bytes) > 0) {
3478 1
      return (int)\max($bytes);
3479
    }
3480
3481 1
    return 0;
3482
  }
3483
3484
  /**
3485
   * Checks whether mbstring is available on the server.
3486
   *
3487
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3488
   */
3489 12
  public static function mbstring_loaded(): bool
3490
  {
3491 12
    $return = \extension_loaded('mbstring') ? true : false;
3492
3493 12
    if ($return === true) {
3494 12
      \mb_internal_encoding('UTF-8');
3495
    }
3496
3497 12
    return $return;
3498
  }
3499
3500 1
  private static function mbstring_overloaded(): bool
3501
  {
3502
    /**
3503
     * @noinspection PhpUsageOfSilenceOperatorInspection
3504
     *
3505
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3506
     */
3507
    return \defined('MB_OVERLOAD_STRING')
3508
           &&
3509 1
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3510
  }
3511
3512
  /**
3513
   * Returns the UTF-8 character with the minimum code point in the given data.
3514
   *
3515
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3516
   *
3517
   * @return string <p>The character with the lowest code point than others.</p>
3518
   */
3519 1 View Code Duplication
  public static function min($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3520
  {
3521 1
    if (\is_array($arg) === true) {
3522 1
      $arg = \implode('', $arg);
3523
    }
3524
3525 1
    return self::chr(\min(self::codepoints($arg)));
3526
  }
3527
3528
  /**
3529
   * alias for "UTF8::normalize_encoding()"
3530
   *
3531
   * @see        UTF8::normalize_encoding()
3532
   *
3533
   * @param string $encoding
3534
   * @param mixed  $fallback
3535
   *
3536
   * @return mixed
3537
   *
3538
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3539
   */
3540 1
  public static function normalizeEncoding(string $encoding, $fallback = '')
3541
  {
3542 1
    return self::normalize_encoding($encoding, $fallback);
3543
  }
3544
3545
  /**
3546
   * Normalize the encoding-"name" input.
3547
   *
3548
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3549
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3550
   *
3551
   * @return mixed <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by
3552
   *                default)</p>
3553
   */
3554 27
  public static function normalize_encoding(string $encoding, $fallback = '')
3555
  {
3556 27
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3557
3558 27
    if (!$encoding) {
3559 4
      return $fallback;
3560
    }
3561
3562
    if (
3563 26
        'UTF-8' === $encoding
3564
        ||
3565 26
        'UTF8' === $encoding
3566
    ) {
3567 11
      return 'UTF-8';
3568
    }
3569
3570 22
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3571 19
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3572
    }
3573
3574 6
    if (self::$ENCODINGS === null) {
3575 1
      self::$ENCODINGS = self::getData('encodings');
3576
    }
3577
3578 6
    if (\in_array($encoding, self::$ENCODINGS, true)) {
3579 3
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3580
3581 3
      return $encoding;
3582
    }
3583
3584 5
    $encodingOrig = $encoding;
3585 5
    $encoding = \strtoupper($encoding);
3586 5
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3587
3588
    $equivalences = [
3589 5
        'ISO8859'     => 'ISO-8859-1',
3590
        'ISO88591'    => 'ISO-8859-1',
3591
        'ISO'         => 'ISO-8859-1',
3592
        'LATIN'       => 'ISO-8859-1',
3593
        'LATIN1'      => 'ISO-8859-1', // Western European
3594
        'ISO88592'    => 'ISO-8859-2',
3595
        'LATIN2'      => 'ISO-8859-2', // Central European
3596
        'ISO88593'    => 'ISO-8859-3',
3597
        'LATIN3'      => 'ISO-8859-3', // Southern European
3598
        'ISO88594'    => 'ISO-8859-4',
3599
        'LATIN4'      => 'ISO-8859-4', // Northern European
3600
        'ISO88595'    => 'ISO-8859-5',
3601
        'ISO88596'    => 'ISO-8859-6', // Greek
3602
        'ISO88597'    => 'ISO-8859-7',
3603
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3604
        'ISO88599'    => 'ISO-8859-9',
3605
        'LATIN5'      => 'ISO-8859-9', // Turkish
3606
        'ISO885911'   => 'ISO-8859-11',
3607
        'TIS620'      => 'ISO-8859-11', // Thai
3608
        'ISO885910'   => 'ISO-8859-10',
3609
        'LATIN6'      => 'ISO-8859-10', // Nordic
3610
        'ISO885913'   => 'ISO-8859-13',
3611
        'LATIN7'      => 'ISO-8859-13', // Baltic
3612
        'ISO885914'   => 'ISO-8859-14',
3613
        'LATIN8'      => 'ISO-8859-14', // Celtic
3614
        'ISO885915'   => 'ISO-8859-15',
3615
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3616
        'ISO885916'   => 'ISO-8859-16',
3617
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3618
        'CP1250'      => 'WINDOWS-1250',
3619
        'WIN1250'     => 'WINDOWS-1250',
3620
        'WINDOWS1250' => 'WINDOWS-1250',
3621
        'CP1251'      => 'WINDOWS-1251',
3622
        'WIN1251'     => 'WINDOWS-1251',
3623
        'WINDOWS1251' => 'WINDOWS-1251',
3624
        'CP1252'      => 'WINDOWS-1252',
3625
        'WIN1252'     => 'WINDOWS-1252',
3626
        'WINDOWS1252' => 'WINDOWS-1252',
3627
        'CP1253'      => 'WINDOWS-1253',
3628
        'WIN1253'     => 'WINDOWS-1253',
3629
        'WINDOWS1253' => 'WINDOWS-1253',
3630
        'CP1254'      => 'WINDOWS-1254',
3631
        'WIN1254'     => 'WINDOWS-1254',
3632
        'WINDOWS1254' => 'WINDOWS-1254',
3633
        'CP1255'      => 'WINDOWS-1255',
3634
        'WIN1255'     => 'WINDOWS-1255',
3635
        'WINDOWS1255' => 'WINDOWS-1255',
3636
        'CP1256'      => 'WINDOWS-1256',
3637
        'WIN1256'     => 'WINDOWS-1256',
3638
        'WINDOWS1256' => 'WINDOWS-1256',
3639
        'CP1257'      => 'WINDOWS-1257',
3640
        'WIN1257'     => 'WINDOWS-1257',
3641
        'WINDOWS1257' => 'WINDOWS-1257',
3642
        'CP1258'      => 'WINDOWS-1258',
3643
        'WIN1258'     => 'WINDOWS-1258',
3644
        'WINDOWS1258' => 'WINDOWS-1258',
3645
        'UTF16'       => 'UTF-16',
3646
        'UTF32'       => 'UTF-32',
3647
        'UTF8'        => 'UTF-8',
3648
        'UTF'         => 'UTF-8',
3649
        'UTF7'        => 'UTF-7',
3650
        '8BIT'        => 'CP850',
3651
        'BINARY'      => 'CP850',
3652
    ];
3653
3654 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3655 4
      $encoding = $equivalences[$encodingUpperHelper];
3656
    }
3657
3658 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3659
3660 5
    return $encoding;
3661
  }
3662
3663
  /**
3664
   * Normalize some MS Word special characters.
3665
   *
3666
   * @param string $str <p>The string to be normalized.</p>
3667
   *
3668
   * @return string
3669
   */
3670 16 View Code Duplication
  public static function normalize_msword(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3671
  {
3672 16
    if (!isset($str[0])) {
3673 1
      return '';
3674
    }
3675
3676 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
3677 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3678
3679 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3680
3681 1
      if (self::$UTF8_MSWORD === null) {
3682 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
3683
      }
3684
3685 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
3686 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
3687
    }
3688
3689 16
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3690
  }
3691
3692
  /**
3693
   * Normalize the whitespace.
3694
   *
3695
   * @param string $str                     <p>The string to be normalized.</p>
3696
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3697
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3698
   *                                        bidirectional text chars.</p>
3699
   *
3700
   * @return string
3701
   */
3702 39
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
3703
  {
3704 39
    if (!isset($str[0])) {
3705 4
      return '';
3706
    }
3707
3708 39
    static $WHITESPACE_CACHE = [];
3709 39
    $cacheKey = (int)$keepNonBreakingSpace;
3710
3711 39
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3712
3713 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3714
3715 2
      if ($keepNonBreakingSpace === true) {
3716 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3717
      }
3718
3719 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
3720
    }
3721
3722 39
    if ($keepBidiUnicodeControls === false) {
3723 39
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3724
3725 39
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3726 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3727
      }
3728
3729 39
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3730
    }
3731
3732 39
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3733
  }
3734
3735
  /**
3736
   * Calculates Unicode code point of the given UTF-8 encoded character.
3737
   *
3738
   * INFO: opposite to UTF8::chr()
3739
   *
3740
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3741
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3742
   *
3743
   * @return int <p>
3744
   *             Unicode code point of the given character,<br>
3745
   *             0 on invalid UTF-8 byte sequence.
3746
   *             </p>
3747
   */
3748 23
  public static function ord(string $chr, string $encoding = 'UTF-8'): int
3749
  {
3750
    // init
3751 23
    static $CHAR_CACHE = [];
3752
3753
    // save the original string
3754 23
    $chr_orig = $chr;
3755
3756 23
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3757 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3758
3759
      // check again, if it's still not UTF-8
3760
      /** @noinspection NotOptimalIfConditionsInspection */
3761 2
      if ($encoding !== 'UTF-8') {
3762 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3763
      }
3764
    }
3765
3766 23
    $cacheKey = $chr_orig . $encoding;
3767 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3768 23
      return $CHAR_CACHE[$cacheKey];
3769
    }
3770
3771 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3772
      self::checkForSupport();
3773
    }
3774
3775 11
    if (self::$SUPPORT['intlChar'] === true) {
3776 10
      $code = \IntlChar::ord($chr);
3777 10
      if ($code) {
3778 9
        return $CHAR_CACHE[$cacheKey] = $code;
3779
      }
3780
    }
3781
3782
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3783 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
3784 6
    $code = $chr ? $chr[1] : 0;
3785
3786 6
    if (0xF0 <= $code && isset($chr[4])) {
3787
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3788
    }
3789
3790 6
    if (0xE0 <= $code && isset($chr[3])) {
3791 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3792
    }
3793
3794 6
    if (0xC0 <= $code && isset($chr[2])) {
3795 2
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3796
    }
3797
3798 5
    return $CHAR_CACHE[$cacheKey] = $code;
3799
  }
3800
3801
  /**
3802
   * Parses the string into an array (into the the second parameter).
3803
   *
3804
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3805
   *          if the second parameter is not set!
3806
   *
3807
   * @link http://php.net/manual/en/function.parse-str.php
3808
   *
3809
   * @param string $str       <p>The input string.</p>
3810
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
3811
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3812
   *
3813
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3814
   */
3815 1
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
3816
  {
3817 1
    if ($cleanUtf8 === true) {
3818 1
      $str = self::clean($str);
3819
    }
3820
3821
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3822 1
    $return = \mb_parse_str($str, $result);
3823
3824 1
    return !($return === false || empty($result));
3825
  }
3826
3827
  /**
3828
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3829
   *
3830
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3831
   */
3832 60
  public static function pcre_utf8_support(): bool
3833
  {
3834
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3835 60
    return (bool)@\preg_match('//u', '');
3836
  }
3837
3838
  /**
3839
   * Create an array containing a range of UTF-8 characters.
3840
   *
3841
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3842
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3843
   *
3844
   * @return string[]
3845
   */
3846 1
  public static function range($var1, $var2): array
3847
  {
3848 1
    if (!$var1 || !$var2) {
3849 1
      return [];
3850
    }
3851
3852 1 View Code Duplication
    if (\ctype_digit((string)$var1)) {
3853 1
      $start = (int)$var1;
3854 1
    } elseif (\ctype_xdigit($var1)) {
3855
      $start = (int)self::hex_to_int($var1);
3856
    } else {
3857 1
      $start = self::ord($var1);
3858
    }
3859
3860 1
    if (!$start) {
3861
      return [];
3862
    }
3863
3864 1 View Code Duplication
    if (\ctype_digit((string)$var2)) {
3865 1
      $end = (int)$var2;
3866 1
    } elseif (\ctype_xdigit($var2)) {
3867
      $end = (int)self::hex_to_int($var2);
3868
    } else {
3869 1
      $end = self::ord($var2);
3870
    }
3871
3872 1
    if (!$end) {
3873
      return [];
3874
    }
3875
3876 1
    return \array_map(
3877
        [
3878 1
            self::class,
3879
            'chr',
3880
        ],
3881 1
        \range($start, $end)
3882
    );
3883
  }
3884
3885
  /**
3886
   * Multi decode html entity & fix urlencoded-win1252-chars.
3887
   *
3888
   * e.g:
3889
   * 'test+test'                     => 'test+test'
3890
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3891
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3892
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3893
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3894
   * 'Düsseldorf'                   => 'Düsseldorf'
3895
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3896
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3897
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3898
   *
3899
   * @param string $str          <p>The input string.</p>
3900
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3901
   *
3902
   * @return string
3903
   */
3904 2 View Code Duplication
  public static function rawurldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3905
  {
3906 2
    if (!isset($str[0])) {
3907 1
      return '';
3908
    }
3909
3910 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3911 2
    if (\preg_match($pattern, $str)) {
3912 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
3913
    }
3914
3915 2
    $flags = ENT_QUOTES | ENT_HTML5;
3916
3917
    do {
3918 2
      $str_compare = $str;
3919
3920 2
      $str = self::fix_simple_utf8(
3921 2
          \rawurldecode(
3922 2
              self::html_entity_decode(
3923 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3924 2
                  $flags
3925
              )
3926
          )
3927
      );
3928
3929 2
    } while ($multi_decode === true && $str_compare !== $str);
3930
3931 2
    return $str;
3932
  }
3933
3934
  /**
3935
   * @param array $strings
3936
   * @param bool  $removeEmptyValues
3937
   * @param int   $removeShortValues
3938
   *
3939
   * @return array
3940
   */
3941 1
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
3942
  {
3943
    // init
3944 1
    $return = [];
3945
3946 1
    foreach ($strings as $str) {
3947
      if (
3948 1
          $removeShortValues !== null
3949
          &&
3950 1
          self::strlen($str) <= $removeShortValues
3951
      ) {
3952 1
        continue;
3953
      }
3954
3955
      if (
3956 1
          $removeEmptyValues === true
3957
          &&
3958 1
          \trim($str) === ''
3959
      ) {
3960 1
        continue;
3961
      }
3962
3963 1
      $return[] = $str;
3964
    }
3965
3966 1
    return $return;
3967
  }
3968
3969
  /**
3970
   * Replaces all occurrences of $pattern in $str by $replacement.
3971
   *
3972
   * @param string $str         <p>The input string.</p>
3973
   * @param string $pattern     <p>The regular expression pattern.</p>
3974
   * @param string $replacement <p>The string to replace with.</p>
3975
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
3976
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
3977
   *
3978
   * @return string
3979
   */
3980 35
  public static function regexReplace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
3981
  {
3982 35
    if ($options === 'msr') {
3983
      $options = 'ms';
3984
    }
3985
3986
    // fallback
3987 35
    if (!$delimiter) {
3988
      $delimiter = '/';
3989
    }
3990
3991 35
    $str = (string)\preg_replace(
3992 35
        $delimiter . $pattern . $delimiter . 'u' . $options,
3993 35
        $replacement,
3994 35
        $str
3995
    );
3996
3997 35
    return $str;
3998
  }
3999
4000
  /**
4001
   * alias for "UTF8::remove_bom()"
4002
   *
4003
   * @see        UTF8::remove_bom()
4004
   *
4005
   * @param string $str
4006
   *
4007
   * @return string
4008
   *
4009
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4010
   */
4011
  public static function removeBOM(string $str): string
4012
  {
4013
    return self::remove_bom($str);
4014
  }
4015
4016
  /**
4017
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4018
   *
4019
   * @param string $str <p>The input string.</p>
4020
   *
4021
   * @return string <p>String without UTF-BOM</p>
4022
   */
4023 43
  public static function remove_bom(string $str): string
4024
  {
4025 43
    if (!isset($str[0])) {
4026 3
      return '';
4027
    }
4028
4029 43
    foreach (self::$BOM as $bomString => $bomByteLength) {
4030 43
      if (0 === self::strpos($str, $bomString, 0, 'CP850')) {
4031 5
        $strTmp = self::substr($str, $bomByteLength, null, 'CP850');
4032 5
        if ($strTmp === false) {
4033
          $strTmp = '';
4034
        }
4035 43
        $str = (string)$strTmp;
4036
      }
4037
    }
4038
4039 43
    return $str;
4040
  }
4041
4042
  /**
4043
   * Removes duplicate occurrences of a string in another string.
4044
   *
4045
   * @param string          $str  <p>The base string.</p>
4046
   * @param string|string[] $what <p>String to search for in the base string.</p>
4047
   *
4048
   * @return string <p>The result string with removed duplicates.</p>
4049
   */
4050 1
  public static function remove_duplicates(string $str, $what = ' '): string
4051
  {
4052 1
    if (\is_string($what) === true) {
4053 1
      $what = [$what];
4054
    }
4055
4056 1
    if (\is_array($what) === true) {
4057
      /** @noinspection ForeachSourceInspection */
4058 1
      foreach ($what as $item) {
4059 1
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4060
      }
4061
    }
4062
4063 1
    return $str;
4064
  }
4065
4066
  /**
4067
   * Remove invisible characters from a string.
4068
   *
4069
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4070
   *
4071
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4072
   *
4073
   * @param string $str
4074
   * @param bool   $url_encoded
4075
   * @param string $replacement
4076
   *
4077
   * @return string
4078
   */
4079 65
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4080
  {
4081
    // init
4082 65
    $non_displayables = [];
4083
4084
    // every control character except newline (dec 10),
4085
    // carriage return (dec 13) and horizontal tab (dec 09)
4086 65
    if ($url_encoded) {
4087 65
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4088 65
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4089
    }
4090
4091 65
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4092
4093
    do {
4094 65
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4095 65
    } while ($count !== 0);
4096
4097 65
    return $str;
4098
  }
4099
4100
  /**
4101
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4102
   *
4103
   * @param string $str                <p>The input string</p>
4104
   * @param string $replacementChar    <p>The replacement character.</p>
4105
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4106
   *
4107
   * @return string
4108
   */
4109 37
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4110
  {
4111 37
    if (!isset($str[0])) {
4112 4
      return '';
4113
    }
4114
4115 37
    if ($processInvalidUtf8 === true) {
4116 37
      $replacementCharHelper = $replacementChar;
4117 37
      if ($replacementChar === '') {
4118 37
        $replacementCharHelper = 'none';
4119
      }
4120
4121 37
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4122
        self::checkForSupport();
4123
      }
4124
4125 37
      $save = \mb_substitute_character();
4126 37
      \mb_substitute_character($replacementCharHelper);
4127 37
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4128 37
      \mb_substitute_character($save);
4129
4130 37
      if (\is_string($strTmp)) {
4131 37
        $str = $strTmp;
4132
      } else {
4133
        $str = '';
4134
      }
4135
    }
4136
4137 37
    return str_replace(
4138
        [
4139 37
            "\xEF\xBF\xBD",
4140
            '�',
4141
        ],
4142
        [
4143 37
            $replacementChar,
4144 37
            $replacementChar,
4145
        ],
4146 37
        $str
4147
    );
4148
  }
4149
4150
  /**
4151
   * Strip whitespace or other characters from end of a UTF-8 string.
4152
   *
4153
   * @param string $str   <p>The string to be trimmed.</p>
4154
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4155
   *
4156
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4157
   */
4158 7 View Code Duplication
  public static function rtrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4159
  {
4160 7
    if (!isset($str[0])) {
4161 2
      return '';
4162
    }
4163
4164
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4165 6
    if ($chars === INF || !$chars) {
4166 3
      $pattern = "[\pZ\pC]+\$";
4167
    } else {
4168 5
      $chars = \preg_quote($chars, '/');
4169 5
      $pattern = "[$chars]+\$";
4170
    }
4171
4172 6
    return self::regexReplace($str, $pattern, '', '', '/');
4173
  }
4174
4175
  /**
4176
   * rxClass
4177
   *
4178
   * @param string $s
4179
   * @param string $class
4180
   *
4181
   * @return string
4182
   */
4183 32
  private static function rxClass(string $s, string $class = ''): string
4184
  {
4185 32
    static $RX_CLASSS_CACHE = [];
4186
4187 32
    $cacheKey = $s . $class;
4188
4189 32
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4190 20
      return $RX_CLASSS_CACHE[$cacheKey];
4191
    }
4192
4193
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4194 15
    $class = [$class];
4195
4196
    /** @noinspection SuspiciousLoopInspection */
4197 15
    foreach (self::str_split($s) as $s) {
4198 14
      if ('-' === $s) {
4199
        $class[0] = '-' . $class[0];
4200 14
      } elseif (!isset($s[2])) {
4201 14
        $class[0] .= \preg_quote($s, '/');
4202 1
      } elseif (1 === self::strlen($s)) {
4203 1
        $class[0] .= $s;
4204
      } else {
4205 14
        $class[] = $s;
4206
      }
4207
    }
4208
4209 15
    if ($class[0]) {
4210 15
      $class[0] = '[' . $class[0] . ']';
4211
    }
4212
4213 15
    if (1 === \count($class)) {
4214 15
      $return = $class[0];
4215
    } else {
4216
      $return = '(?:' . \implode('|', $class) . ')';
4217
    }
4218
4219 15
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4220
4221 15
    return $return;
4222
  }
4223
4224
  /**
4225
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4226
   */
4227 1
  public static function showSupport()
4228
  {
4229 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4230
      self::checkForSupport();
4231
    }
4232
4233 1
    echo '<pre>';
4234 1
    foreach (self::$SUPPORT as $key => $value) {
4235 1
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4236
    }
4237 1
    echo '</pre>';
4238 1
  }
4239
4240
  /**
4241
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4242
   *
4243
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4244
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4245
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
4246
   *
4247
   * @return string <p>The HTML numbered entity.</p>
4248
   */
4249 1
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4250
  {
4251 1
    if (!isset($char[0])) {
4252 1
      return '';
4253
    }
4254
4255
    if (
4256 1
        $keepAsciiChars === true
4257
        &&
4258 1
        self::is_ascii($char) === true
4259
    ) {
4260 1
      return $char;
4261
    }
4262
4263 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4264 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4265
    }
4266
4267 1
    return '&#' . self::ord($char, $encoding) . ';';
4268
  }
4269
4270
  /**
4271
   * @param string $str
4272
   * @param int    $tabLength
4273
   *
4274
   * @return string
4275
   */
4276
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4277
  {
4278
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4279
  }
4280
4281
  /**
4282
   * Convert a string to an array of Unicode characters.
4283
   *
4284
   * @param string $str       <p>The string to split into array.</p>
4285
   * @param int    $length    [optional] <p>Max character length of each array element.</p>
4286
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4287
   *
4288
   * @return string[] <p>An array containing chunks of the string.</p>
4289
   */
4290 35
  public static function split(string $str, int $length = 1, bool $cleanUtf8 = false): array
4291
  {
4292 35
    if (!isset($str[0])) {
4293 3
      return [];
4294
    }
4295
4296
    // init
4297 34
    $ret = [];
4298
4299 34
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4300
      self::checkForSupport();
4301
    }
4302
4303 34
    if ($cleanUtf8 === true) {
4304 9
      $str = self::clean($str);
4305
    }
4306
4307 34
    if (self::$SUPPORT['pcre_utf8'] === true) {
4308
4309 34
      \preg_match_all('/./us', $str, $retArray);
4310 34
      if (isset($retArray[0])) {
4311 34
        $ret = $retArray[0];
4312
      }
4313 34
      unset($retArray);
4314
4315
    } else {
4316
4317
      // fallback
4318
4319 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4320
        self::checkForSupport();
4321
      }
4322
4323 2
      $len = self::strlen_in_byte($str);
4324
4325
      /** @noinspection ForeachInvariantsInspection */
4326 2
      for ($i = 0; $i < $len; $i++) {
4327
4328 2
        if (($str[$i] & "\x80") === "\x00") {
4329
4330 2
          $ret[] = $str[$i];
4331
4332
        } elseif (
4333 2
            isset($str[$i + 1])
4334
            &&
4335 2
            ($str[$i] & "\xE0") === "\xC0"
4336
        ) {
4337
4338
          if (($str[$i + 1] & "\xC0") === "\x80") {
4339
            $ret[] = $str[$i] . $str[$i + 1];
4340
4341
            $i++;
4342
          }
4343
4344 View Code Duplication
        } elseif (
4345 2
            isset($str[$i + 2])
4346
            &&
4347 2
            ($str[$i] & "\xF0") === "\xE0"
4348
        ) {
4349
4350
          if (
4351 2
              ($str[$i + 1] & "\xC0") === "\x80"
4352
              &&
4353 2
              ($str[$i + 2] & "\xC0") === "\x80"
4354
          ) {
4355 2
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4356
4357 2
            $i += 2;
4358
          }
4359
4360
        } elseif (
4361
            isset($str[$i + 3])
4362
            &&
4363
            ($str[$i] & "\xF8") === "\xF0"
4364
        ) {
4365
4366 View Code Duplication
          if (
4367
              ($str[$i + 1] & "\xC0") === "\x80"
4368
              &&
4369
              ($str[$i + 2] & "\xC0") === "\x80"
4370
              &&
4371
              ($str[$i + 3] & "\xC0") === "\x80"
4372
          ) {
4373
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4374
4375
            $i += 3;
4376
          }
4377
4378
        }
4379
      }
4380
    }
4381
4382 34
    if ($length > 1) {
4383 5
      $ret = \array_chunk($ret, $length);
4384
4385 5
      return \array_map(
4386
          function ($item) {
4387 5
            return \implode('', $item);
4388 5
          }, $ret
4389
      );
4390
    }
4391
4392 30
    if (isset($ret[0]) && $ret[0] === '') {
4393
      return [];
4394
    }
4395
4396 30
    return $ret;
4397
  }
4398
4399
  /**
4400
   * Returns true if the string contains $needle, false otherwise. By default
4401
   * the comparison is case-sensitive, but can be made insensitive by setting
4402
   * $caseSensitive to false.
4403
   *
4404
   * @param string $haystack      <p>The input string.</p>
4405
   * @param string $needle        <p>Substring to look for.</p>
4406
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4407
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4408
   *
4409
   * @return bool <p>Whether or not $haystack contains $needle.</p>
4410
   */
4411
  public static function str_contains(string $haystack, string $needle, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4412
  {
4413
    if ($caseSensitive) {
4414
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
4415
    }
4416
4417
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
4418
  }
4419
4420
  /**
4421
   * Returns true if the string contains all $needles, false otherwise. By
4422
   * default the comparison is case-sensitive, but can be made insensitive by
4423
   * setting $caseSensitive to false.
4424
   *
4425
   * @param string $haystack      <p>The input string.</p>
4426
   * @param array  $needles       <p>SubStrings to look for.</p>
4427
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4428
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4429
   *
4430
   * @return bool <p>Whether or not $haystack contains $needle.</p>
4431
   */
4432
  public static function str_contains_all(string $haystack, array $needles, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4433
  {
4434
    if (!isset($haystack[0])) {
4435
      return false;
4436
    }
4437
4438
    if (empty($needles)) {
4439
      return false;
4440
    }
4441
4442
    foreach ($needles as $needle) {
4443
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4444
        return false;
4445
      }
4446
    }
4447
4448
    return true;
4449
  }
4450
4451
  /**
4452
   * Returns true if the string contains any $needles, false otherwise. By
4453
   * default the comparison is case-sensitive, but can be made insensitive by
4454
   * setting $caseSensitive to false.
4455
   *
4456
   * @param string <p>The input stiring.</p>
4457
   * @param array  $needles       <p>SubStrings to look for.</p>
4458
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4459
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4460
   *
4461
   * @return bool <p>Whether or not $str contains $needle.</p>
4462
   */
4463
  public static function str_contains_any(string $haystack, array $needles, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4464
  {
4465
    if (empty($needles)) {
4466
      return false;
4467
    }
4468
4469
    foreach ($needles as $needle) {
4470
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4471
        return true;
4472
      }
4473
    }
4474
4475
    return false;
4476
  }
4477
4478
  /**
4479
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4480
   *
4481
   * @param string $str <p>The input string.</p>
4482
   *
4483
   * @return false|string <p>
4484
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4485
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
4486
   *                      </p>
4487
   */
4488 15
  public static function str_detect_encoding(string $str)
4489
  {
4490
    //
4491
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
4492
    //
4493
4494 15
    if (self::is_binary($str, true) === true) {
4495
4496 5
      if (self::is_utf16($str) === 1) {
4497 1
        return 'UTF-16LE';
4498
      }
4499
4500 5
      if (self::is_utf16($str) === 2) {
4501 1
        return 'UTF-16BE';
4502
      }
4503
4504 4
      if (self::is_utf32($str) === 1) {
4505
        return 'UTF-32LE';
4506
      }
4507
4508 4
      if (self::is_utf32($str) === 2) {
4509
        return 'UTF-32BE';
4510
      }
4511
4512
      // is binary but not "UTF-16" or "UTF-32"
4513 4
      return false;
4514
    }
4515
4516
    //
4517
    // 2.) simple check for ASCII chars
4518
    //
4519
4520 13
    if (self::is_ascii($str) === true) {
4521 5
      return 'ASCII';
4522
    }
4523
4524
    //
4525
    // 3.) simple check for UTF-8 chars
4526
    //
4527
4528 13
    if (self::is_utf8($str) === true) {
4529 9
      return 'UTF-8';
4530
    }
4531
4532
    //
4533
    // 4.) check via "\mb_detect_encoding()"
4534
    //
4535
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4536
4537
    $detectOrder = [
4538 8
        'ISO-8859-1',
4539
        'ISO-8859-2',
4540
        'ISO-8859-3',
4541
        'ISO-8859-4',
4542
        'ISO-8859-5',
4543
        'ISO-8859-6',
4544
        'ISO-8859-7',
4545
        'ISO-8859-8',
4546
        'ISO-8859-9',
4547
        'ISO-8859-10',
4548
        'ISO-8859-13',
4549
        'ISO-8859-14',
4550
        'ISO-8859-15',
4551
        'ISO-8859-16',
4552
        'WINDOWS-1251',
4553
        'WINDOWS-1252',
4554
        'WINDOWS-1254',
4555
        'CP932',
4556
        'CP936',
4557
        'CP950',
4558
        'CP866',
4559
        'CP850',
4560
        'CP51932',
4561
        'CP50220',
4562
        'CP50221',
4563
        'CP50222',
4564
        'ISO-2022-JP',
4565
        'ISO-2022-KR',
4566
        'JIS',
4567
        'JIS-ms',
4568
        'EUC-CN',
4569
        'EUC-JP',
4570
    ];
4571
4572 8
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4573 8
    if ($encoding) {
4574 8
      return $encoding;
4575
    }
4576
4577
    //
4578
    // 5.) check via "iconv()"
4579
    //
4580
4581
    if (self::$ENCODINGS === null) {
4582
      self::$ENCODINGS = self::getData('encodings');
4583
    }
4584
4585
    $md5 = \md5($str);
4586
    foreach (self::$ENCODINGS as $encodingTmp) {
0 ignored issues
show
Bug introduced by
The expression self::$ENCODINGS of type boolean|string|array|integer is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
4587
      # INFO: //IGNORE and //TRANSLIT still throw notice
4588
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4589
      if (\md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4590
        return $encodingTmp;
4591
      }
4592
    }
4593
4594
    return false;
4595
  }
4596
4597
  /**
4598
   * Check if the string ends with the given substring.
4599
   *
4600
   * @param string $haystack <p>The string to search in.</p>
4601
   * @param string $needle   <p>The substring to search for.</p>
4602
   *
4603
   * @return bool
4604
   */
4605 2
  public static function str_ends_with(string $haystack, string $needle): bool
4606
  {
4607 2
    if (!isset($haystack[0], $needle[0])) {
4608 1
      return false;
4609
    }
4610
4611 2
    return \substr($haystack, -\strlen($needle)) === $needle;
4612
  }
4613
4614
  /**
4615
   * Returns true if the string ends with any of $substrings, false otherwise.
4616
   *
4617
   * - case-sensitive
4618
   *
4619
   * @param string   $str        <p>The input string.</p>
4620
   * @param string[] $substrings <p>Substrings to look for.</p>
4621
   *
4622
   * @return bool     <p>Whether or not $str ends with $substring.</p>
4623
   */
4624
  public static function str_ends_with_any(string $str, array $substrings): bool
4625
  {
4626
    if (empty($substrings)) {
4627
      return false;
4628
    }
4629
4630
    foreach ($substrings as $substring) {
4631
      if (self::str_ends_with($str, $substring)) {
4632
        return true;
4633
      }
4634
    }
4635
4636
    return false;
4637
  }
4638
4639
  /**
4640
   * Ensures that the string begins with $substring. If it doesn't, it's
4641
   * prepended.
4642
   *
4643
   * @param string str <p>The input string.</p>
4644
   * @param string $substring <p>The substring to add if not present.</p>
4645
   *
4646
   * @return string
4647
   */
4648
  public static function str_ensure_left(string $str, string $substring): string
4649
  {
4650
    if (!self::str_starts_with($str, $substring)) {
4651
      $str = $substring . $str;
4652
    }
4653
4654
    return $str;
4655
  }
4656
4657
  /**
4658
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
4659
   *
4660
   * @param string str <p>The input string.</p>
4661
   * @param string $substring <p>The substring to add if not present.</p>
4662
   *
4663
   * @return string
4664
   */
4665
  public static function str_ensure_right(string $str, string $substring): string
4666
  {
4667
    if (!self::str_ends_with($str, $substring)) {
4668
      $str .= $substring;
4669
    }
4670
4671
    return $str;
4672
  }
4673
4674
  /**
4675
   * Check if the string ends with the given substring, case insensitive.
4676
   *
4677
   * @param string $haystack <p>The string to search in.</p>
4678
   * @param string $needle   <p>The substring to search for.</p>
4679
   *
4680
   * @return bool
4681
   */
4682 2
  public static function str_iends_with(string $haystack, string $needle): bool
4683
  {
4684 2
    if (!isset($haystack[0], $needle[0])) {
4685 1
      return false;
4686
    }
4687
4688 2
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
4689 2
      return true;
4690
    }
4691
4692 2
    return false;
4693
  }
4694
4695
  /**
4696
   * Returns true if the string ends with any of $substrings, false otherwise.
4697
   *
4698
   * - case-insensitive
4699
   *
4700
   * @param string   $str        <p>The input string.</p>
4701
   * @param string[] $substrings <p>Substrings to look for.</p>
4702
   *
4703
   * @return bool     <p>Whether or not $str ends with $substring.</p>
4704
   */
4705
  public static function str_iends_with_any(string $str, array $substrings): bool
4706
  {
4707
    if (empty($substrings)) {
4708
      return false;
4709
    }
4710
4711
    foreach ($substrings as $substring) {
4712
      if (self::str_iends_with($str, $substring)) {
4713
        return true;
4714
      }
4715
    }
4716
4717
    return false;
4718
  }
4719
4720
  /**
4721
   * Inserts $substring into the string at the $index provided.
4722
   *
4723
   * @param string $str       <p>The input string.</p>
4724
   * @param string $substring <p>String to be inserted.</p>
4725
   * @param int    $index     <p>The index at which to insert the substring.</p>
4726
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
4727
   *
4728
   * @return string
4729
   */
4730
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
4731
  {
4732
    $len = self::strlen($str, $encoding);
4733
4734
    if ($index > $len) {
4735
      return $str;
4736
    }
4737
4738
    $start = self::substr($str, 0, $index, $encoding);
4739
    $end = self::substr($str, $index, $len, $encoding);
4740
4741
    return $start . $substring . $end;
4742
  }
4743
4744
  /**
4745
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4746
   *
4747
   * @link  http://php.net/manual/en/function.str-ireplace.php
4748
   *
4749
   * @param mixed $search  <p>
4750
   *                       Every replacement with search array is
4751
   *                       performed on the result of previous replacement.
4752
   *                       </p>
4753
   * @param mixed $replace <p>
4754
   *                       </p>
4755
   * @param mixed $subject <p>
4756
   *                       If subject is an array, then the search and
4757
   *                       replace is performed with every entry of
4758
   *                       subject, and the return value is an array as
4759
   *                       well.
4760
   *                       </p>
4761
   * @param int   $count   [optional] <p>
4762
   *                       The number of matched and replaced needles will
4763
   *                       be returned in count which is passed by
4764
   *                       reference.
4765
   *                       </p>
4766
   *
4767
   * @return mixed <p>A string or an array of replacements.</p>
4768
   */
4769 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4770
  {
4771 26
    $search = (array)$search;
4772
4773
    /** @noinspection AlterInForeachInspection */
4774 26
    foreach ($search as &$s) {
4775 26
      if ('' === $s .= '') {
4776 2
        $s = '/^(?<=.)$/';
4777
      } else {
4778 26
        $s = '/' . \preg_quote($s, '/') . '/ui';
4779
      }
4780
    }
4781
4782 26
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
4783 26
    $count = $replace; // used as reference parameter
4784
4785 26
    return $subject;
4786
  }
4787
4788
  /**
4789
   * Check if the string starts with the given substring, case insensitive.
4790
   *
4791
   * @param string $haystack <p>The string to search in.</p>
4792
   * @param string $needle   <p>The substring to search for.</p>
4793
   *
4794
   * @return bool
4795
   */
4796 2 View Code Duplication
  public static function str_istarts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4797
  {
4798 2
    if (!isset($haystack[0], $needle[0])) {
4799 1
      return false;
4800
    }
4801
4802 2
    if (self::stripos($haystack, $needle) === 0) {
4803 2
      return true;
4804
    }
4805
4806 2
    return false;
4807
  }
4808
4809
  /**
4810
   * Returns true if the string begins with any of $substrings, false otherwise.
4811
   *
4812
   * - case-insensitive
4813
   *
4814
   * @param string $str        <p>The input string.</p>
4815
   * @param array  $substrings <p>Substrings to look for.</p>
4816
   *
4817
   * @return bool <p>Whether or not $str starts with $substring.</p>
4818
   */
4819 View Code Duplication
  public static function str_istarts_with_any(string $str, array $substrings): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4820
  {
4821
    if (!isset($str[0])) {
4822
      return false;
4823
    }
4824
4825
    if (empty($substrings)) {
4826
      return false;
4827
    }
4828
4829
    foreach ($substrings as $substring) {
4830
      if (self::str_istarts_with($str, $substring)) {
4831
        return true;
4832
      }
4833
    }
4834
4835
    return false;
4836
  }
4837
4838
  /**
4839
   * Limit the number of characters in a string, but also after the next word.
4840
   *
4841
   * @param string $str
4842
   * @param int    $length
4843
   * @param string $strAddOn
4844
   *
4845
   * @return string
4846
   */
4847 1
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…'): string
4848
  {
4849 1
    if (!isset($str[0])) {
4850 1
      return '';
4851
    }
4852
4853 1
    if (self::strlen($str) <= $length) {
4854 1
      return $str;
4855
    }
4856
4857 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4858 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4859
    }
4860
4861 1
    $str = (string)self::substr($str, 0, $length);
4862 1
    $array = \explode(' ', $str);
4863 1
    \array_pop($array);
4864 1
    $new_str = \implode(' ', $array);
4865
4866 1
    if ($new_str === '') {
4867 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4868
    } else {
4869 1
      $str = $new_str . $strAddOn;
4870
    }
4871
4872 1
    return $str;
4873
  }
4874
4875
  /**
4876
   * Pad a UTF-8 string to given length with another string.
4877
   *
4878
   * @param string $str        <p>The input string.</p>
4879
   * @param int    $pad_length <p>The length of return string.</p>
4880
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4881
   * @param int    $pad_type   [optional] <p>
4882
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4883
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4884
   *                           </p>
4885
   *
4886
   * @encoding
4887
   *
4888
   * @return string <strong>Returns the padded string</strong>
4889
   */
4890 2
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', int $pad_type = STR_PAD_RIGHT): string
4891
  {
4892 2
    $str_length = self::strlen($str);
4893
4894
    if (
4895 2
        $pad_length > 0
4896
        &&
4897 2
        $pad_length >= $str_length
4898
    ) {
4899 2
      $ps_length = self::strlen($pad_string);
4900
4901 2
      $diff = ($pad_length - $str_length);
4902
4903
      switch ($pad_type) {
4904 2 View Code Duplication
        case STR_PAD_LEFT:
4905 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
4906 2
          $pre = (string)self::substr($pre, 0, $diff);
4907 2
          $post = '';
4908 2
          break;
4909
4910 2
        case STR_PAD_BOTH:
4911 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
4912 2
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2));
4913 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
4914 2
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2));
4915 2
          break;
4916
4917 2
        case STR_PAD_RIGHT:
4918 View Code Duplication
        default:
4919 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
4920 2
          $post = (string)self::substr($post, 0, $diff);
4921 2
          $pre = '';
4922
      }
4923
4924 2
      return $pre . $str . $post;
4925
    }
4926
4927 2
    return $str;
4928
  }
4929
4930
  /**
4931
   * Repeat a string.
4932
   *
4933
   * @param string $str        <p>
4934
   *                           The string to be repeated.
4935
   *                           </p>
4936
   * @param int    $multiplier <p>
4937
   *                           Number of time the input string should be
4938
   *                           repeated.
4939
   *                           </p>
4940
   *                           <p>
4941
   *                           multiplier has to be greater than or equal to 0.
4942
   *                           If the multiplier is set to 0, the function
4943
   *                           will return an empty string.
4944
   *                           </p>
4945
   *
4946
   * @return string <p>The repeated string.</p>
4947
   */
4948 1
  public static function str_repeat(string $str, int $multiplier): string
4949
  {
4950 1
    $str = self::filter($str);
4951
4952 1
    return \str_repeat($str, $multiplier);
4953
  }
4954
4955
  /**
4956
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4957
   *
4958
   * Replace all occurrences of the search string with the replacement string
4959
   *
4960
   * @link http://php.net/manual/en/function.str-replace.php
4961
   *
4962
   * @param mixed $search  <p>
4963
   *                       The value being searched for, otherwise known as the needle.
4964
   *                       An array may be used to designate multiple needles.
4965
   *                       </p>
4966
   * @param mixed $replace <p>
4967
   *                       The replacement value that replaces found search
4968
   *                       values. An array may be used to designate multiple replacements.
4969
   *                       </p>
4970
   * @param mixed $subject <p>
4971
   *                       The string or array being searched and replaced on,
4972
   *                       otherwise known as the haystack.
4973
   *                       </p>
4974
   *                       <p>
4975
   *                       If subject is an array, then the search and
4976
   *                       replace is performed with every entry of
4977
   *                       subject, and the return value is an array as
4978
   *                       well.
4979
   *                       </p>
4980
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4981
   *
4982
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4983
   */
4984 12
  public static function str_replace($search, $replace, $subject, int &$count = null)
4985
  {
4986 12
    return \str_replace($search, $replace, $subject, $count);
4987
  }
4988
4989
  /**
4990
   * Replace the first "$search"-term with the "$replace"-term.
4991
   *
4992
   * @param string $search
4993
   * @param string $replace
4994
   * @param string $subject
4995
   *
4996
   * @return string
4997
   */
4998 1
  public static function str_replace_first(string $search, string $replace, string $subject): string
4999
  {
5000 1
    $pos = self::strpos($subject, $search);
5001
5002 1
    if ($pos !== false) {
5003 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
5004
    }
5005
5006 1
    return $subject;
5007
  }
5008
5009
  /**
5010
   * Shuffles all the characters in the string.
5011
   *
5012
   * PS: uses random algorithm which is weak for cryptography purposes
5013
   *
5014
   * @param string $str <p>The input string</p>
5015
   *
5016
   * @return string <p>The shuffled string.</p>
5017
   */
5018 1
  public static function str_shuffle(string $str): string
5019
  {
5020 1
    $indexes = \range(0, self::strlen($str) - 1);
5021
    /** @noinspection NonSecureShuffleUsageInspection */
5022 1
    \shuffle($indexes);
5023
5024 1
    $shuffledStr = '';
5025 1
    foreach ($indexes as $i) {
5026 1
      $shuffledStr .= self::substr($str, $i, 1);
5027
    }
5028
5029 1
    return $shuffledStr;
5030
  }
5031
5032
  /**
5033
   * Sort all characters according to code points.
5034
   *
5035
   * @param string $str    <p>A UTF-8 string.</p>
5036
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
5037
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
5038
   *
5039
   * @return string <p>String of sorted characters.</p>
5040
   */
5041 1
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
5042
  {
5043 1
    $array = self::codepoints($str);
5044
5045 1
    if ($unique) {
5046 1
      $array = \array_flip(\array_flip($array));
5047
    }
5048
5049 1
    if ($desc) {
5050 1
      \arsort($array);
5051
    } else {
5052 1
      \asort($array);
5053
    }
5054
5055 1
    return self::string($array);
5056
  }
5057
5058
  /**
5059
   * Split a string into an array.
5060
   *
5061
   * @param string|string[] $str
5062
   * @param int             $len
5063
   *
5064
   * @return string[]
5065
   */
5066 18
  public static function str_split($str, int $len = 1): array
5067
  {
5068 18 View Code Duplication
    if (\is_array($str) === true) {
5069 1
      foreach ($str as $k => $v) {
5070 1
        $str[$k] = self::str_split($v, $len);
5071
      }
5072
5073 1
      return $str;
5074
    }
5075
5076 18
    if (!isset($str[0])) {
5077 1
      return [];
5078
    }
5079
5080 17
    if ($len < 1) {
5081
      return \str_split($str, $len);
5082
    }
5083
5084
    /** @noinspection NotOptimalRegularExpressionsInspection */
5085 17
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
5086 17
    $a = $a[0];
5087
5088 17
    if ($len === 1) {
5089 17
      return $a;
5090
    }
5091
5092 1
    $arrayOutput = [];
5093 1
    $p = -1;
5094
5095
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
5096 1
    foreach ($a as $l => $a) {
5097 1
      if ($l % $len) {
5098 1
        $arrayOutput[$p] .= $a;
5099
      } else {
5100 1
        $arrayOutput[++$p] = $a;
5101
      }
5102
    }
5103
5104 1
    return $arrayOutput;
5105
  }
5106
5107
  /**
5108
   * Check if the string starts with the given substring.
5109
   *
5110
   * @param string $haystack <p>The string to search in.</p>
5111
   * @param string $needle   <p>The substring to search for.</p>
5112
   *
5113
   * @return bool
5114
   */
5115 2 View Code Duplication
  public static function str_starts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5116
  {
5117 2
    if (!isset($haystack[0], $needle[0])) {
5118 1
      return false;
5119
    }
5120
5121 2
    if (\strpos($haystack, $needle) === 0) {
5122 2
      return true;
5123
    }
5124
5125 2
    return false;
5126
  }
5127
5128
  /**
5129
   * Returns true if the string begins with any of $substrings, false otherwise.
5130
   *
5131
   * - case-sensitive
5132
   *
5133
   * @param string $str        <p>The input string.</p>
5134
   * @param array  $substrings <p>Substrings to look for.</p>
5135
   *
5136
   * @return bool <p>Whether or not $str starts with $substring.</p>
5137
   */
5138 View Code Duplication
  public static function str_starts_with_any(string $str, array $substrings): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5139
  {
5140
    if (!isset($str[0])) {
5141
      return false;
5142
    }
5143
5144
    if (empty($substrings)) {
5145
      return false;
5146
    }
5147
5148
    foreach ($substrings as $substring) {
5149
      if (self::str_starts_with($str, $substring)) {
5150
        return true;
5151
      }
5152
    }
5153
5154
    return false;
5155
  }
5156
5157
  /**
5158
   * Get a binary representation of a specific string.
5159
   *
5160
   * @param string $str <p>The input string.</p>
5161
   *
5162
   * @return string
5163
   */
5164 1
  public static function str_to_binary(string $str): string
5165
  {
5166 1
    $value = \unpack('H*', $str);
5167
5168 1
    return \base_convert($value[1], 16, 2);
5169
  }
5170
5171
  /**
5172
   * @param string   $str
5173
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
5174
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
5175
   *
5176
   * @return string[]
5177
   */
5178
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
5179
  {
5180 View Code Duplication
    if (!isset($str[0])) {
5181
      if ($removeEmptyValues === true) {
5182
        return [];
5183
      }
5184
5185
      return [''];
5186
    }
5187
5188
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
5189
5190
    if (
5191
        $removeShortValues === null
5192
        &&
5193
        $removeEmptyValues === false
5194
    ) {
5195
      return $return;
5196
    }
5197
5198
    $tmpReturn = self::reduce_string_array(
5199
        $return,
5200
        $removeEmptyValues,
5201
        $removeShortValues
5202
    );
5203
5204
    return $tmpReturn;
5205
  }
5206
5207
  /**
5208
   * Convert a string into an array of words.
5209
   *
5210
   * @param string   $str
5211
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
5212
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
5213
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
5214
   *
5215
   * @return string[]
5216
   */
5217 10
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
5218
  {
5219 10 View Code Duplication
    if (!isset($str[0])) {
5220 2
      if ($removeEmptyValues === true) {
5221
        return [];
5222
      }
5223
5224 2
      return [''];
5225
    }
5226
5227 10
    $charList = self::rxClass($charList, '\pL');
5228
5229 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
5230
5231
    if (
5232 10
        $removeShortValues === null
5233
        &&
5234 10
        $removeEmptyValues === false
5235
    ) {
5236 10
      return $return;
5237
    }
5238
5239 1
    $tmpReturn = self::reduce_string_array(
5240 1
        $return,
5241 1
        $removeEmptyValues,
5242 1
        $removeShortValues
5243
    );
5244
5245 1
    return $tmpReturn;
5246
  }
5247
5248
  /**
5249
   * alias for "UTF8::to_ascii()"
5250
   *
5251
   * @see UTF8::to_ascii()
5252
   *
5253
   * @param string $str
5254
   * @param string $unknown
5255
   * @param bool   $strict
5256
   *
5257
   * @return string
5258
   */
5259 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
5260
  {
5261 7
    return self::to_ascii($str, $unknown, $strict);
5262
  }
5263
5264
  /**
5265
   * Counts number of words in the UTF-8 string.
5266
   *
5267
   * @param string $str      <p>The input string.</p>
5268
   * @param int    $format   [optional] <p>
5269
   *                         <strong>0</strong> => return a number of words (default)<br>
5270
   *                         <strong>1</strong> => return an array of words<br>
5271
   *                         <strong>2</strong> => return an array of words with word-offset as key
5272
   *                         </p>
5273
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
5274
   *
5275
   * @return string[]|int <p>The number of words in the string</p>
5276
   */
5277 1
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
5278
  {
5279 1
    $strParts = self::str_to_words($str, $charlist);
5280
5281 1
    $len = \count($strParts);
5282
5283 1
    if ($format === 1) {
5284
5285 1
      $numberOfWords = [];
5286 1
      for ($i = 1; $i < $len; $i += 2) {
5287 1
        $numberOfWords[] = $strParts[$i];
5288
      }
5289
5290 1
    } elseif ($format === 2) {
5291
5292 1
      $numberOfWords = [];
5293 1
      $offset = self::strlen($strParts[0]);
5294 1
      for ($i = 1; $i < $len; $i += 2) {
5295 1
        $numberOfWords[$offset] = $strParts[$i];
5296 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
5297
      }
5298
5299
    } else {
5300
5301 1
      $numberOfWords = ($len - 1) / 2;
5302
5303
    }
5304
5305 1
    return $numberOfWords;
5306
  }
5307
5308
  /**
5309
   * Case-insensitive string comparison.
5310
   *
5311
   * INFO: Case-insensitive version of UTF8::strcmp()
5312
   *
5313
   * @param string $str1
5314
   * @param string $str2
5315
   *
5316
   * @return int <p>
5317
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5318
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5319
   *             <strong>0</strong> if they are equal.
5320
   *             </p>
5321
   */
5322 11
  public static function strcasecmp(string $str1, string $str2): int
5323
  {
5324 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5325
  }
5326
5327
  /**
5328
   * alias for "UTF8::strstr()"
5329
   *
5330
   * @see UTF8::strstr()
5331
   *
5332
   * @param string $haystack
5333
   * @param string $needle
5334
   * @param bool   $before_needle
5335
   * @param string $encoding
5336
   * @param bool   $cleanUtf8
5337
   *
5338
   * @return string|false
5339
   */
5340 1
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5341
  {
5342 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5343
  }
5344
5345
  /**
5346
   * Case-sensitive string comparison.
5347
   *
5348
   * @param string $str1
5349
   * @param string $str2
5350
   *
5351
   * @return int  <p>
5352
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
5353
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
5354
   *              <strong>0</strong> if they are equal.
5355
   *              </p>
5356
   */
5357 14
  public static function strcmp(string $str1, string $str2): int
5358
  {
5359
    /** @noinspection PhpUndefinedClassInspection */
5360 14
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
5361 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
5362 14
        \Normalizer::normalize($str2, \Normalizer::NFD)
5363
    );
5364
  }
5365
5366
  /**
5367
   * Find length of initial segment not matching mask.
5368
   *
5369
   * @param string $str
5370
   * @param string $charList
5371
   * @param int    $offset
5372
   * @param int    $length
5373
   *
5374
   * @return int|null
5375
   */
5376 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
5377
  {
5378 15
    if ('' === $charList .= '') {
5379 1
      return null;
5380
    }
5381
5382 14 View Code Duplication
    if ($offset || $length !== null) {
5383 2
      $strTmp = self::substr($str, $offset, $length);
5384 2
      if ($strTmp === false) {
5385
        return null;
5386
      }
5387 2
      $str = (string)$strTmp;
5388
    }
5389
5390 14
    if (!isset($str[0])) {
5391 1
      return null;
5392
    }
5393
5394 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5395 13
      return self::strlen($length[1]);
5396
    }
5397
5398 1
    return self::strlen($str);
5399
  }
5400
5401
  /**
5402
   * alias for "UTF8::stristr()"
5403
   *
5404
   * @see UTF8::stristr()
5405
   *
5406
   * @param string $haystack
5407
   * @param string $needle
5408
   * @param bool   $before_needle
5409
   * @param string $encoding
5410
   * @param bool   $cleanUtf8
5411
   *
5412
   * @return string|false
5413
   */
5414 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5415
  {
5416 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5417
  }
5418
5419
  /**
5420
   * Create a UTF-8 string from code points.
5421
   *
5422
   * INFO: opposite to UTF8::codepoints()
5423
   *
5424
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
5425
   *
5426
   * @return string <p>UTF-8 encoded string.</p>
5427
   */
5428 2
  public static function string(array $array): string
5429
  {
5430 2
    return \implode(
5431 2
        '',
5432 2
        \array_map(
5433
            [
5434 2
                self::class,
5435
                'chr',
5436
            ],
5437 2
            $array
5438
        )
5439
    );
5440
  }
5441
5442
  /**
5443
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5444
   *
5445
   * @param string $str <p>The input string.</p>
5446
   *
5447
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5448
   */
5449 3
  public static function string_has_bom(string $str): bool
5450
  {
5451 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
5452 3
      if (0 === \strpos($str, $bomString)) {
5453 3
        return true;
5454
      }
5455
    }
5456
5457 3
    return false;
5458
  }
5459
5460
  /**
5461
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5462
   *
5463
   * @link http://php.net/manual/en/function.strip-tags.php
5464
   *
5465
   * @param string $str             <p>
5466
   *                                The input string.
5467
   *                                </p>
5468
   * @param string $allowable_tags  [optional] <p>
5469
   *                                You can use the optional second parameter to specify tags which should
5470
   *                                not be stripped.
5471
   *                                </p>
5472
   *                                <p>
5473
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5474
   *                                can not be changed with allowable_tags.
5475
   *                                </p>
5476
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
5477
   *
5478
   * @return string <p>The stripped string.</p>
5479
   */
5480 2
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
5481
  {
5482 2
    if (!isset($str[0])) {
5483 1
      return '';
5484
    }
5485
5486 2
    if ($cleanUtf8 === true) {
5487 1
      $str = self::clean($str);
5488
    }
5489
5490 2
    return \strip_tags($str, $allowable_tags);
5491
  }
5492
5493
  /**
5494
   * Strip all whitespace characters. This includes tabs and newline
5495
   * characters, as well as multibyte whitespace such as the thin space
5496
   * and ideographic space.
5497
   *
5498
   * @param string $str
5499
   *
5500
   * @return string
5501
   */
5502 12
  public static function strip_whitespace(string $str): string
5503
  {
5504 12
    if (!isset($str[0])) {
5505 1
      return '';
5506
    }
5507
5508 11
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
5509
  }
5510
5511
  /**
5512
   * Finds position of first occurrence of a string within another, case insensitive.
5513
   *
5514
   * @link http://php.net/manual/en/function.mb-stripos.php
5515
   *
5516
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5517
   * @param string $needle    <p>The string to find in haystack.</p>
5518
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
5519
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5520
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5521
   *
5522
   * @return int|false <p>
5523
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
5524
   *                   or false if needle is not found.
5525
   *                   </p>
5526
   */
5527 10
  public static function stripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5528
  {
5529 10
    if (!isset($haystack[0], $needle[0])) {
5530 3
      return false;
5531
    }
5532
5533 9
    if ($cleanUtf8 === true) {
5534
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5535
      // if invalid characters are found in $haystack before $needle
5536 1
      $haystack = self::clean($haystack);
5537 1
      $needle = self::clean($needle);
5538
    }
5539
5540 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5541 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5542
    }
5543
5544 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5545
      self::checkForSupport();
5546
    }
5547
5548 View Code Duplication
    if (
5549 9
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5550
        &&
5551 9
        self::$SUPPORT['intl'] === true
5552
    ) {
5553 9
      return \grapheme_stripos($haystack, $needle, $offset);
5554
    }
5555
5556
    // fallback to "mb_"-function via polyfill
5557 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5558
  }
5559
5560
  /**
5561
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5562
   *
5563
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
5564
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
5565
   * @param bool   $before_needle  [optional] <p>
5566
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5567
   *                               haystack before the first occurrence of the needle (excluding the needle).
5568
   *                               </p>
5569
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
5570
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5571
   *
5572
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
5573
   */
5574 17
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5575
  {
5576 17
    if (!isset($haystack[0], $needle[0])) {
5577 6
      return false;
5578
    }
5579
5580 11
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5581 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5582
    }
5583
5584 11
    if ($cleanUtf8 === true) {
5585
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5586
      // if invalid characters are found in $haystack before $needle
5587 1
      $needle = self::clean($needle);
5588 1
      $haystack = self::clean($haystack);
5589
    }
5590
5591 11
    if (!$needle) {
5592
      return $haystack;
5593
    }
5594
5595 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5596
      self::checkForSupport();
5597
    }
5598
5599 View Code Duplication
    if (
5600 11
        $encoding !== 'UTF-8'
5601
        &&
5602 11
        self::$SUPPORT['mbstring'] === false
5603
    ) {
5604
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5605
    }
5606
5607 11
    if (self::$SUPPORT['mbstring'] === true) {
5608 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5609
    }
5610
5611 View Code Duplication
    if (
5612
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5613
        &&
5614
        self::$SUPPORT['intl'] === true
5615
    ) {
5616
      return \grapheme_stristr($haystack, $needle, $before_needle);
5617
    }
5618
5619
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
5620
      return \stristr($haystack, $needle, $before_needle);
5621
    }
5622
5623
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
5624
5625
    if (!isset($match[1])) {
5626
      return false;
5627
    }
5628
5629
    if ($before_needle) {
5630
      return $match[1];
5631
    }
5632
5633
    return self::substr($haystack, self::strlen($match[1]));
5634
  }
5635
5636
  /**
5637
   * Get the string length, not the byte-length!
5638
   *
5639
   * @link     http://php.net/manual/en/function.mb-strlen.php
5640
   *
5641
   * @param string $str       <p>The string being checked for length.</p>
5642
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5643
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5644
   *
5645
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5646
   *             character counted as +1)</p>
5647
   */
5648 88
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
5649
  {
5650 88
    if (!isset($str[0])) {
5651 6
      return 0;
5652
    }
5653
5654 87
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5655 5
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5656
    }
5657
5658 87
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5659
      self::checkForSupport();
5660
    }
5661
5662
    switch ($encoding) {
5663 87
      case 'ASCII':
5664 87
      case 'CP850':
5665
        if (
5666 6
            $encoding === 'CP850'
5667
            &&
5668 6
            self::$SUPPORT['mbstring_func_overload'] === false
5669
        ) {
5670 6
          return \strlen($str);
5671
        }
5672
5673
        return \mb_strlen($str, 'CP850'); // 8-BIT
5674
    }
5675
5676 82
    if ($cleanUtf8 === true) {
5677
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5678
      // if invalid characters are found in $str
5679 2
      $str = self::clean($str);
5680
    }
5681
5682 View Code Duplication
    if (
5683 82
        $encoding !== 'UTF-8'
5684
        &&
5685 82
        self::$SUPPORT['mbstring'] === false
5686
        &&
5687 82
        self::$SUPPORT['iconv'] === false
5688
    ) {
5689
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5690
    }
5691
5692
    if (
5693 82
        $encoding !== 'UTF-8'
5694
        &&
5695 82
        self::$SUPPORT['iconv'] === true
5696
        &&
5697 82
        self::$SUPPORT['mbstring'] === false
5698
    ) {
5699
      $returnTmp = \iconv_strlen($str, $encoding);
5700
      if ($returnTmp !== false) {
5701
        return $returnTmp;
5702
      }
5703
    }
5704
5705 82 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
5706 81
      $returnTmp = \mb_strlen($str, $encoding);
5707 81
      if ($returnTmp !== false) {
5708 81
        return $returnTmp;
5709
      }
5710
    }
5711
5712 2 View Code Duplication
    if (self::$SUPPORT['iconv'] === true) {
5713
      $returnTmp = \iconv_strlen($str, $encoding);
5714
      if ($returnTmp !== false) {
5715
        return $returnTmp;
5716
      }
5717
    }
5718
5719
    if (
5720 2
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5721
        &&
5722 2
        self::$SUPPORT['intl'] === true
5723
    ) {
5724
      return \grapheme_strlen($str);
5725
    }
5726
5727 2
    if (self::is_ascii($str)) {
5728 1
      return \strlen($str);
5729
    }
5730
5731
    // fallback via vanilla php
5732 2
    \preg_match_all('/./us', $str, $parts);
5733 2
    $returnTmp = \count($parts[0]);
5734 2
    if ($returnTmp !== 0) {
5735 2
      return $returnTmp;
5736
    }
5737
5738
    // fallback to "mb_"-function via polyfill
5739
    return \mb_strlen($str, $encoding);
5740
  }
5741
5742
  /**
5743
   * Get string length in byte.
5744
   *
5745
   * @param string $str
5746
   *
5747
   * @return int
5748
   */
5749 72
  public static function strlen_in_byte(string $str): int
5750
  {
5751 72
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
5752
      $len = \mb_strlen($str, 'CP850'); // 8-BIT
5753
    } else {
5754 72
      $len = \strlen($str);
5755
    }
5756
5757 72
    return $len;
5758
  }
5759
5760
  /**
5761
   * Case insensitive string comparisons using a "natural order" algorithm.
5762
   *
5763
   * INFO: natural order version of UTF8::strcasecmp()
5764
   *
5765
   * @param string $str1 <p>The first string.</p>
5766
   * @param string $str2 <p>The second string.</p>
5767
   *
5768
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
5769
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
5770
   *             <strong>0</strong> if they are equal
5771
   */
5772 1
  public static function strnatcasecmp(string $str1, string $str2): int
5773
  {
5774 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5775
  }
5776
5777
  /**
5778
   * String comparisons using a "natural order" algorithm
5779
   *
5780
   * INFO: natural order version of UTF8::strcmp()
5781
   *
5782
   * @link  http://php.net/manual/en/function.strnatcmp.php
5783
   *
5784
   * @param string $str1 <p>The first string.</p>
5785
   * @param string $str2 <p>The second string.</p>
5786
   *
5787
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
5788
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
5789
   *             <strong>0</strong> if they are equal
5790
   */
5791 2
  public static function strnatcmp(string $str1, string $str2): int
5792
  {
5793 2
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5794
  }
5795
5796
  /**
5797
   * Case-insensitive string comparison of the first n characters.
5798
   *
5799
   * @link  http://php.net/manual/en/function.strncasecmp.php
5800
   *
5801
   * @param string $str1 <p>The first string.</p>
5802
   * @param string $str2 <p>The second string.</p>
5803
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5804
   *
5805
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5806
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5807
   *             <strong>0</strong> if they are equal
5808
   */
5809 1
  public static function strncasecmp(string $str1, string $str2, int $len): int
5810
  {
5811 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5812
  }
5813
5814
  /**
5815
   * String comparison of the first n characters.
5816
   *
5817
   * @link  http://php.net/manual/en/function.strncmp.php
5818
   *
5819
   * @param string $str1 <p>The first string.</p>
5820
   * @param string $str2 <p>The second string.</p>
5821
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5822
   *
5823
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5824
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5825
   *             <strong>0</strong> if they are equal
5826
   */
5827 2
  public static function strncmp(string $str1, string $str2, int $len): int
5828
  {
5829 2
    $str1 = (string)self::substr($str1, 0, $len);
5830 2
    $str2 = (string)self::substr($str2, 0, $len);
5831
5832 2
    return self::strcmp($str1, $str2);
5833
  }
5834
5835
  /**
5836
   * Search a string for any of a set of characters.
5837
   *
5838
   * @link  http://php.net/manual/en/function.strpbrk.php
5839
   *
5840
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5841
   * @param string $char_list <p>This parameter is case sensitive.</p>
5842
   *
5843
   * @return string|false <p>String starting from the character found, or false if it is not found.</p>
5844
   */
5845 1
  public static function strpbrk(string $haystack, string $char_list)
5846
  {
5847 1
    if (!isset($haystack[0], $char_list[0])) {
5848 1
      return false;
5849
    }
5850
5851 1
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5852 1
      return \substr($haystack, \strpos($haystack, $m[0]));
5853
    }
5854
5855 1
    return false;
5856
  }
5857
5858
  /**
5859
   * Find position of first occurrence of string in a string.
5860
   *
5861
   * @link http://php.net/manual/en/function.mb-strpos.php
5862
   *
5863
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5864
   * @param string $needle    <p>The string to find in haystack.</p>
5865
   * @param int    $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5866
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5867
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5868
   *
5869
   * @return int|false <p>
5870
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
5871
   *                   If needle is not found it returns false.
5872
   *                   </p>
5873
   */
5874 59
  public static function strpos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5875
  {
5876 59
    if (!isset($haystack[0], $needle[0])) {
5877 3
      return false;
5878
    }
5879
5880
    // iconv and mbstring do not support integer $needle
5881 58 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5882
      $needle = (string)self::chr((int)$needle);
5883
    }
5884
5885 58
    if ($cleanUtf8 === true) {
5886
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5887
      // if invalid characters are found in $haystack before $needle
5888 2
      $needle = self::clean($needle);
5889 2
      $haystack = self::clean($haystack);
5890
    }
5891
5892 58
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5893 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5894
    }
5895
5896 58
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5897
      self::checkForSupport();
5898
    }
5899
5900
    if (
5901 58
        $encoding === 'CP850'
5902
        &&
5903 58
        self::$SUPPORT['mbstring_func_overload'] === false
5904
    ) {
5905 44
      return \strpos($haystack, $needle, $offset);
5906
    }
5907
5908 View Code Duplication
    if (
5909 15
        $encoding !== 'UTF-8'
5910
        &&
5911 15
        self::$SUPPORT['iconv'] === false
5912
        &&
5913 15
        self::$SUPPORT['mbstring'] === false
5914
    ) {
5915
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5916
    }
5917
5918
    if (
5919 15
        $offset >= 0 // iconv_strpos() can't handle negative offset
5920
        &&
5921 15
        $encoding !== 'UTF-8'
5922
        &&
5923 15
        self::$SUPPORT['mbstring'] === false
5924
        &&
5925 15
        self::$SUPPORT['iconv'] === true
5926
    ) {
5927
      // ignore invalid negative offset to keep compatibility
5928
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5929
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5930
      if ($returnTmp !== false) {
5931
        return $returnTmp;
5932
      }
5933
    }
5934
5935 15 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
5936 15
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
5937 15
      if ($returnTmp !== false) {
5938 13
        return $returnTmp;
5939
      }
5940
    }
5941
5942 View Code Duplication
    if (
5943 10
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5944
        &&
5945 10
        self::$SUPPORT['intl'] === true
5946
    ) {
5947 9
      return \grapheme_strpos($haystack, $needle, $offset);
5948
    }
5949
5950
    if (
5951 2
        $offset >= 0 // iconv_strpos() can't handle negative offset
5952
        &&
5953 2
        self::$SUPPORT['iconv'] === true
5954
    ) {
5955
      // ignore invalid negative offset to keep compatibility
5956
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5957 1
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5958 1
      if ($returnTmp !== false) {
5959
        return $returnTmp;
5960
      }
5961
    }
5962
5963 2
    $haystackIsAscii = self::is_ascii($haystack);
5964 2
    if ($haystackIsAscii && self::is_ascii($needle)) {
5965 1
      return \strpos($haystack, $needle, $offset);
5966
    }
5967
5968
    // fallback via vanilla php
5969
5970 2
    if ($haystackIsAscii) {
5971
      $haystackTmp = \substr($haystack, $offset);
5972
    } else {
5973 2
      $haystackTmp = self::substr($haystack, $offset);
5974
    }
5975 2
    if ($haystackTmp === false) {
5976
      $haystackTmp = '';
5977
    }
5978 2
    $haystack = (string)$haystackTmp;
5979
5980 2
    if ($offset < 0) {
5981
      $offset = 0;
5982
    }
5983
5984 2
    $pos = \strpos($haystack, $needle);
5985 2
    if ($pos === false) {
5986
      return false;
5987
    }
5988
5989 2
    $returnTmp = $offset + self::strlen(\substr($haystack, 0, $pos));
5990 2
    if ($returnTmp !== false) {
5991 2
      return $returnTmp;
5992
    }
5993
5994
    // fallback to "mb_"-function via polyfill
5995
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5996
  }
5997
5998
  /**
5999
   * Finds the last occurrence of a character in a string within another.
6000
   *
6001
   * @link http://php.net/manual/en/function.mb-strrchr.php
6002
   *
6003
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
6004
   * @param string $needle        <p>The string to find in haystack</p>
6005
   * @param bool   $before_needle [optional] <p>
6006
   *                              Determines which portion of haystack
6007
   *                              this function returns.
6008
   *                              If set to true, it returns all of haystack
6009
   *                              from the beginning to the last occurrence of needle.
6010
   *                              If set to false, it returns all of haystack
6011
   *                              from the last occurrence of needle to the end,
6012
   *                              </p>
6013
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
6014
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
6015
   *
6016
   * @return string|false The portion of haystack or false if needle is not found.
6017
   */
6018 1 View Code Duplication
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6019
  {
6020 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6021 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6022
    }
6023
6024 1
    if ($cleanUtf8 === true) {
6025
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6026
      // if invalid characters are found in $haystack before $needle
6027 1
      $needle = self::clean($needle);
6028 1
      $haystack = self::clean($haystack);
6029
    }
6030
6031
    // fallback to "mb_"-function via polyfill
6032 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
6033
  }
6034
6035
  /**
6036
   * Reverses characters order in the string.
6037
   *
6038
   * @param string $str <p>The input string.</p>
6039
   *
6040
   * @return string <p>The string with characters in the reverse sequence.</p>
6041
   */
6042 4
  public static function strrev(string $str): string
6043
  {
6044 4
    if (!isset($str[0])) {
6045 2
      return '';
6046
    }
6047
6048 3
    $reversed = '';
6049 3
    $i = self::strlen($str);
6050 3
    while ($i--) {
6051 3
      $reversed .= self::substr($str, $i, 1);
6052
    }
6053
6054 3
    return $reversed;
6055
  }
6056
6057
  /**
6058
   * Finds the last occurrence of a character in a string within another, case insensitive.
6059
   *
6060
   * @link http://php.net/manual/en/function.mb-strrichr.php
6061
   *
6062
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
6063
   * @param string $needle         <p>The string to find in haystack.</p>
6064
   * @param bool   $before_needle  [optional] <p>
6065
   *                               Determines which portion of haystack
6066
   *                               this function returns.
6067
   *                               If set to true, it returns all of haystack
6068
   *                               from the beginning to the last occurrence of needle.
6069
   *                               If set to false, it returns all of haystack
6070
   *                               from the last occurrence of needle to the end,
6071
   *                               </p>
6072
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
6073
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
6074
   *
6075
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
6076
   */
6077 1 View Code Duplication
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6078
  {
6079 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6080 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6081
    }
6082
6083 1
    if ($cleanUtf8 === true) {
6084
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6085
      // if invalid characters are found in $haystack before $needle
6086 1
      $needle = self::clean($needle);
6087 1
      $haystack = self::clean($haystack);
6088
    }
6089
6090 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
6091
  }
6092
6093
  /**
6094
   * Find position of last occurrence of a case-insensitive string.
6095
   *
6096
   * @param string $haystack  <p>The string to look in.</p>
6097
   * @param string $needle    <p>The string to look for.</p>
6098
   * @param int    $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
6099
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6100
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6101
   *
6102
   * @return int|false <p>
6103
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
6104
   *                   not found, it returns false.
6105
   *                   </p>
6106
   */
6107 1
  public static function strripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6108
  {
6109 1 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
6110
      $needle = (string)self::chr((int)$needle);
6111
    }
6112
6113 1
    if (!isset($haystack[0], $needle[0])) {
6114
      return false;
6115
    }
6116
6117 1
    if ($cleanUtf8 === true) {
6118
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
6119 1
      $needle = self::clean($needle);
6120 1
      $haystack = self::clean($haystack);
6121
    }
6122
6123 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6124 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6125
    }
6126
6127 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6128
      self::checkForSupport();
6129
    }
6130
6131 View Code Duplication
    if (
6132 1
        $encoding !== 'UTF-8'
6133
        &&
6134 1
        self::$SUPPORT['mbstring'] === false
6135
    ) {
6136
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6137
    }
6138
6139 1
    if (self::$SUPPORT['mbstring'] === true) {
6140 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
6141
    }
6142
6143 View Code Duplication
    if (
6144
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6145
        &&
6146
        self::$SUPPORT['intl'] === true
6147
    ) {
6148
      return \grapheme_strripos($haystack, $needle, $offset);
6149
    }
6150
6151
    // fallback via vanilla php
6152
6153
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
6154
  }
6155
6156
  /**
6157
   * Find position of last occurrence of a string in a string.
6158
   *
6159
   * @link http://php.net/manual/en/function.mb-strrpos.php
6160
   *
6161
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
6162
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
6163
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
6164
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
6165
   *                              the end of the string.
6166
   *                              </p>
6167
   * @param string     $encoding  [optional] <p>Set the charset.</p>
6168
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6169
   *
6170
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
6171
   *                   is not found, it returns false.</p>
6172
   */
6173 10
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6174
  {
6175 10
    if ((int)$needle === $needle && $needle >= 0) {
6176 2
      $needle = (string)self::chr($needle);
6177
    }
6178 10
    $needle = (string)$needle;
6179
6180 10
    if (!isset($haystack[0], $needle[0])) {
6181 2
      return false;
6182
    }
6183
6184
    if (
6185 9
        $cleanUtf8 === true
6186
        ||
6187 9
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
6188
    ) {
6189
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
6190 3
      $needle = self::clean($needle);
6191 3
      $haystack = self::clean($haystack);
6192
    }
6193
6194 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6195 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6196
    }
6197
6198 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6199
      self::checkForSupport();
6200
    }
6201
6202 View Code Duplication
    if (
6203 9
        $encoding !== 'UTF-8'
6204
        &&
6205 9
        self::$SUPPORT['mbstring'] === false
6206
    ) {
6207
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6208
    }
6209
6210 9
    if (self::$SUPPORT['mbstring'] === true) {
6211 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
6212
    }
6213
6214 View Code Duplication
    if (
6215
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6216
        &&
6217
        self::$SUPPORT['intl'] === true
6218
    ) {
6219
      return \grapheme_strrpos($haystack, $needle, $offset);
6220
    }
6221
6222
    // fallback via vanilla php
6223
6224
    $haystackTmp = null;
6225
    if ($offset > 0) {
6226
      $haystackTmp = self::substr($haystack, $offset);
6227
    } elseif ($offset < 0) {
6228
      $haystackTmp = self::substr($haystack, 0, $offset);
6229
      $offset = 0;
6230
    }
6231
6232
    if ($haystackTmp !== null) {
6233
      if ($haystackTmp === false) {
6234
        $haystackTmp = '';
6235
      }
6236
      $haystack = (string)$haystackTmp;
6237
    }
6238
6239
    $pos = \strrpos($haystack, $needle);
6240
    if ($pos === false) {
6241
      return false;
6242
    }
6243
6244
    return $offset + self::strlen(\substr($haystack, 0, $pos));
6245
  }
6246
6247
  /**
6248
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
6249
   * mask.
6250
   *
6251
   * @param string $str    <p>The input string.</p>
6252
   * @param string $mask   <p>The mask of chars</p>
6253
   * @param int    $offset [optional]
6254
   * @param int    $length [optional]
6255
   *
6256
   * @return int
6257
   */
6258 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
6259
  {
6260 10 View Code Duplication
    if ($offset || $length !== null) {
6261 2
      $strTmp = self::substr($str, $offset, $length);
6262 2
      if ($strTmp === false) {
6263
        $strTmp = '';
6264
      }
6265 2
      $str = (string)$strTmp;
6266
    }
6267
6268 10
    if (!isset($str[0], $mask[0])) {
6269 2
      return 0;
6270
    }
6271
6272 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
6273
  }
6274
6275
  /**
6276
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
6277
   *
6278
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
6279
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
6280
   * @param bool   $before_needle  [optional] <p>
6281
   *                               If <b>TRUE</b>, strstr() returns the part of the
6282
   *                               haystack before the first occurrence of the needle (excluding the needle).
6283
   *                               </p>
6284
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
6285
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
6286
   *
6287
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
6288
   */
6289 2
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
6290
  {
6291 2
    if (!isset($haystack[0], $needle[0])) {
6292 1
      return false;
6293
    }
6294
6295 2
    if ($cleanUtf8 === true) {
6296
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6297
      // if invalid characters are found in $haystack before $needle
6298
      $needle = self::clean($needle);
6299
      $haystack = self::clean($haystack);
6300
    }
6301
6302 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6303 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6304
    }
6305
6306 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6307
      self::checkForSupport();
6308
    }
6309
6310 View Code Duplication
    if (
6311 2
        $encoding !== 'UTF-8'
6312
        &&
6313 2
        self::$SUPPORT['mbstring'] === false
6314
    ) {
6315
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6316
    }
6317
6318 2
    if (self::$SUPPORT['mbstring'] === true) {
6319 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
6320
    }
6321
6322 View Code Duplication
    if (
6323
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6324
        &&
6325
        self::$SUPPORT['intl'] === true
6326
    ) {
6327
      return \grapheme_strstr($haystack, $needle, $before_needle);
6328
    }
6329
6330
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
6331
6332
    if (!isset($match[1])) {
6333
      return false;
6334
    }
6335
6336
    if ($before_needle) {
6337
      return $match[1];
6338
    }
6339
6340
    return self::substr($haystack, self::strlen($match[1]));
6341
  }
6342
6343
  /**
6344
   * Unicode transformation for case-less matching.
6345
   *
6346
   * @link http://unicode.org/reports/tr21/tr21-5.html
6347
   *
6348
   * @param string $str        <p>The input string.</p>
6349
   * @param bool   $full       [optional] <p>
6350
   *                           <b>true</b>, replace full case folding chars (default)<br>
6351
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
6352
   *                           </p>
6353
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6354
   *
6355
   * @return string
6356
   */
6357 13
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
6358
  {
6359 13
    if (!isset($str[0])) {
6360 4
      return '';
6361
    }
6362
6363 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
6364 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
6365
6366 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
6367 1
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
6368 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
6369
    }
6370
6371 12
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
6372
6373 12
    if ($full) {
6374
6375 12
      static $FULL_CASE_FOLD = null;
6376 12
      if ($FULL_CASE_FOLD === null) {
6377 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
6378
      }
6379
6380 12
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
6381
    }
6382
6383 12
    if ($cleanUtf8 === true) {
6384 1
      $str = self::clean($str);
6385
    }
6386
6387 12
    return self::strtolower($str);
6388
  }
6389
6390
  /**
6391
   * Make a string lowercase.
6392
   *
6393
   * @link http://php.net/manual/en/function.mb-strtolower.php
6394
   *
6395
   * @param string      $str       <p>The string being lowercased.</p>
6396
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6397
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6398
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6399
   *
6400
   * @return string str with all alphabetic characters converted to lowercase.
6401
   */
6402 25 View Code Duplication
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6403
  {
6404
    // init
6405 25
    $str = (string)$str;
6406 25
    if (!isset($str[0])) {
6407 3
      return '';
6408
    }
6409
6410 23
    if ($cleanUtf8 === true) {
6411
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6412
      // if invalid characters are found in $haystack before $needle
6413 1
      $str = self::clean($str);
6414
    }
6415
6416 23
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6417 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6418
    }
6419
6420 23
    if ($lang !== null) {
6421
6422 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6423
        self::checkForSupport();
6424
      }
6425
6426 1
      if (self::$SUPPORT['intl'] === true) {
6427
6428 1
        $langCode = $lang . '-Lower';
6429 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6430
          \trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
6431
6432
          $langCode = 'Any-Lower';
6433
        }
6434
6435 1
        return transliterator_transliterate($langCode, $str);
6436
      }
6437
6438
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6439
    }
6440
6441 23
    return \mb_strtolower($str, $encoding);
6442
  }
6443
6444
  /**
6445
   * Generic case sensitive transformation for collation matching.
6446
   *
6447
   * @param string $str <p>The input string</p>
6448
   *
6449
   * @return string
6450
   */
6451 3
  private static function strtonatfold(string $str): string
6452
  {
6453
    /** @noinspection PhpUndefinedClassInspection */
6454 3
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6455
  }
6456
6457
  /**
6458
   * Make a string uppercase.
6459
   *
6460
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6461
   *
6462
   * @param string      $str       <p>The string being uppercased.</p>
6463
   * @param string      $encoding  [optional] <p>Set the charset.</p>
6464
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6465
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6466
   *
6467
   * @return string <p>$str with all alphabetic characters converted to uppercase.</p>
6468
   */
6469 19 View Code Duplication
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6470
  {
6471 19
    $str = (string)$str;
6472 19
    if (!isset($str[0])) {
6473 3
      return '';
6474
    }
6475
6476 17
    if ($cleanUtf8 === true) {
6477
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6478
      // if invalid characters are found in $haystack before $needle
6479 2
      $str = self::clean($str);
6480
    }
6481
6482 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6483 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6484
    }
6485
6486 17
    if ($lang !== null) {
6487
6488 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6489
        self::checkForSupport();
6490
      }
6491
6492 1
      if (self::$SUPPORT['intl'] === true) {
6493
6494 1
        $langCode = $lang . '-Upper';
6495 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6496
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6497
6498
          $langCode = 'Any-Upper';
6499
        }
6500
6501 1
        return transliterator_transliterate($langCode, $str);
6502
      }
6503
6504
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6505
    }
6506
6507 17
    return \mb_strtoupper($str, $encoding);
6508
  }
6509
6510
  /**
6511
   * Translate characters or replace sub-strings.
6512
   *
6513
   * @link  http://php.net/manual/en/function.strtr.php
6514
   *
6515
   * @param string          $str  <p>The string being translated.</p>
6516
   * @param string|string[] $from <p>The string replacing from.</p>
6517
   * @param string|string[] $to   <p>The string being translated to to.</p>
6518
   *
6519
   * @return string <p>
6520
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6521
   *                corresponding character in to.
6522
   *                </p>
6523
   */
6524 1
  public static function strtr(string $str, $from, $to = INF): string
6525
  {
6526 1
    if (!isset($str[0])) {
6527
      return '';
6528
    }
6529
6530 1
    if ($from === $to) {
6531
      return $str;
6532
    }
6533
6534 1
    if (INF !== $to) {
6535 1
      $from = self::str_split($from);
6536 1
      $to = self::str_split($to);
6537 1
      $countFrom = \count($from);
6538 1
      $countTo = \count($to);
6539
6540 1
      if ($countFrom > $countTo) {
6541 1
        $from = \array_slice($from, 0, $countTo);
6542 1
      } elseif ($countFrom < $countTo) {
6543 1
        $to = \array_slice($to, 0, $countFrom);
6544
      }
6545
6546 1
      $from = \array_combine($from, $to);
6547
    }
6548
6549 1
    if (\is_string($from)) {
6550 1
      return \str_replace($from, '', $str);
6551
    }
6552
6553 1
    return \strtr($str, $from);
6554
  }
6555
6556
  /**
6557
   * Return the width of a string.
6558
   *
6559
   * @param string $str       <p>The input string.</p>
6560
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6561
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6562
   *
6563
   * @return int
6564
   */
6565 1 View Code Duplication
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6566
  {
6567 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6568 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6569
    }
6570
6571 1
    if ($cleanUtf8 === true) {
6572
      // iconv and mbstring are not tolerant to invalid encoding
6573
      // further, their behaviour is inconsistent with that of PHP's substr
6574 1
      $str = self::clean($str);
6575
    }
6576
6577
    // fallback to "mb_"-function via polyfill
6578 1
    return \mb_strwidth($str, $encoding);
6579
  }
6580
6581
  /**
6582
   * Get part of a string.
6583
   *
6584
   * @link http://php.net/manual/en/function.mb-substr.php
6585
   *
6586
   * @param string $str       <p>The string being checked.</p>
6587
   * @param int    $offset    <p>The first position used in str.</p>
6588
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
6589
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6590
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6591
   *
6592
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
6593
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
6594
   *                      characters long, <b>FALSE</b> will be returned.</p>
6595
   */
6596 75
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6597
  {
6598 75
    if (!isset($str[0])) {
6599 10
      return '';
6600
    }
6601
6602
    // Empty string
6603 72
    if ($length === 0) {
6604 3
      return '';
6605
    }
6606
6607 71
    if ($cleanUtf8 === true) {
6608
      // iconv and mbstring are not tolerant to invalid encoding
6609
      // further, their behaviour is inconsistent with that of PHP's substr
6610 1
      $str = self::clean($str);
6611
    }
6612
6613
    // Whole string
6614 71
    if (!$offset && $length === null) {
6615 3
      return $str;
6616
    }
6617
6618 68
    $str_length = 0;
6619 68
    if ($offset || $length === null) {
6620 48
      $str_length = self::strlen($str, $encoding);
6621
    }
6622
6623
    // Empty string
6624 68
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be zero. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
6625 10
      return '';
6626
    }
6627
6628
    // Impossible
6629 68
    if ($offset && $offset > $str_length) {
6630 2
      return false;
6631
    }
6632
6633 66
    if ($length === null) {
6634 27
      $length = $str_length;
6635
    } else {
6636 57
      $length = (int)$length;
6637
    }
6638
6639 66
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6640 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6641
    }
6642
6643 66
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6644
      self::checkForSupport();
6645
    }
6646
6647
    if (
6648 66
        $encoding === 'CP850'
6649
        &&
6650 66
        self::$SUPPORT['mbstring_func_overload'] === false
6651
    ) {
6652 16
      return \substr($str, $offset, $length ?? $str_length);
6653
    }
6654
6655 View Code Duplication
    if (
6656 50
        $encoding !== 'UTF-8'
6657
        &&
6658 50
        self::$SUPPORT['mbstring'] === false
6659
    ) {
6660
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6661
    }
6662
6663 50
    if (self::$SUPPORT['mbstring'] === true) {
6664 50
      return \mb_substr($str, $offset, $length, $encoding);
6665
    }
6666
6667
    if (
6668
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6669
        &&
6670
        self::$SUPPORT['intl'] === true
6671
    ) {
6672
      return \grapheme_substr($str, $offset, $length);
6673
    }
6674
6675
    if (
6676
        $length >= 0 // "iconv_substr()" can't handle negative length
6677
        &&
6678
        self::$SUPPORT['iconv'] === true
6679
    ) {
6680
      $returnTmp = \iconv_substr($str, $offset, $length);
6681
      if ($returnTmp !== false) {
6682
        return $returnTmp;
6683
      }
6684
    }
6685
6686
    if (self::is_ascii($str)) {
6687
      return ($length === null) ?
6688
          \substr($str, $offset) :
6689
          \substr($str, $offset, $length);
6690
    }
6691
6692
    // fallback via vanilla php
6693
6694
    // split to array, and remove invalid characters
6695
    $array = self::split($str);
6696
6697
    // extract relevant part, and join to make sting again
6698
    return \implode('', \array_slice($array, $offset, $length));
6699
  }
6700
6701
  /**
6702
   * Binary safe comparison of two strings from an offset, up to length characters.
6703
   *
6704
   * @param string   $str1               <p>The main string being compared.</p>
6705
   * @param string   $str2               <p>The secondary string being compared.</p>
6706
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
6707
   *                                     counting from the end of the string.</p>
6708
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
6709
   *                                     the length of the str compared to the length of main_str less the offset.</p>
6710
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6711
   *                                     insensitive.</p>
6712
   *
6713
   * @return int <p>
6714
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
6715
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
6716
   *             <strong>0</strong> if they are equal.
6717
   *             </p>
6718
   */
6719 1
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
6720
  {
6721
    if (
6722 1
        $offset !== 0
6723
        ||
6724 1
        $length !== null
6725
    ) {
6726 1
      $str1Tmp = self::substr($str1, $offset, $length);
6727 1
      if ($str1Tmp === false) {
6728
        $str1Tmp = '';
6729
      }
6730 1
      $str1 = (string)$str1Tmp;
6731
6732 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
6733 1
      if ($str2Tmp === false) {
6734
        $str2Tmp = '';
6735
      }
6736 1
      $str2 = (string)$str2Tmp;
6737
    }
6738
6739 1
    if ($case_insensitivity === true) {
6740 1
      return self::strcasecmp($str1, $str2);
6741
    }
6742
6743 1
    return self::strcmp($str1, $str2);
6744
  }
6745
6746
  /**
6747
   * Count the number of substring occurrences.
6748
   *
6749
   * @link  http://php.net/manual/en/function.substr-count.php
6750
   *
6751
   * @param string $haystack   <p>The string to search in.</p>
6752
   * @param string $needle     <p>The substring to search for.</p>
6753
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
6754
   * @param int    $length     [optional] <p>
6755
   *                           The maximum length after the specified offset to search for the
6756
   *                           substring. It outputs a warning if the offset plus the length is
6757
   *                           greater than the haystack length.
6758
   *                           </p>
6759
   * @param string $encoding   [optional] <p>Set the charset for e.g. "\mb_" function</p>
6760
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6761
   *
6762
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6763
   */
6764 1
  public static function substr_count(string $haystack, string $needle, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6765
  {
6766 1
    if (!isset($haystack[0], $needle[0])) {
6767 1
      return false;
6768
    }
6769
6770 1
    if ($offset || $length !== null) {
6771
6772 1
      if ($length === null) {
6773 1
        $length = self::strlen($haystack);
6774
      }
6775
6776
      if (
6777
          (
6778 1
              $length !== 0
6779
              &&
6780 1
              $offset !== 0
6781
          )
6782
          &&
6783 1
          ($length + $offset) <= 0
6784
          &&
6785 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6786
      ) {
6787
        return false;
6788
      }
6789
6790 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6791 1
      if ($haystackTmp === false) {
6792
        $haystackTmp = '';
6793
      }
6794 1
      $haystack = (string)$haystackTmp;
6795
    }
6796
6797 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6798 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6799
    }
6800
6801 1
    if ($cleanUtf8 === true) {
6802
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6803
      // if invalid characters are found in $haystack before $needle
6804
      $needle = self::clean($needle);
6805
      $haystack = self::clean($haystack);
6806
    }
6807
6808 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6809
      self::checkForSupport();
6810
    }
6811
6812 View Code Duplication
    if (
6813 1
        $encoding !== 'UTF-8'
6814
        &&
6815 1
        self::$SUPPORT['mbstring'] === false
6816
    ) {
6817
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6818
    }
6819
6820 1
    if (self::$SUPPORT['mbstring'] === true) {
6821 1
      return \mb_substr_count($haystack, $needle, $encoding);
6822
    }
6823
6824
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6825
6826
    return \count($matches);
6827
  }
6828
6829
  /**
6830
   * Returns the number of occurrences of $substring in the given string.
6831
   * By default, the comparison is case-sensitive, but can be made insensitive
6832
   * by setting $caseSensitive to false.
6833
   *
6834
   * @param string $str           <p>The input string.</p>
6835
   * @param string $substring     <p>The substring to search for.</p>
6836
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6837
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
6838
   *
6839
   * @return int
6840
   */
6841
  public static function substr_count_simple(string $str, string $substring, bool $caseSensitive = true, string $encoding = 'UTF-8'): int
6842
  {
6843
    if (!$caseSensitive) {
6844
      $str = self::strtoupper($str, $encoding);
6845
      $substring = self::strtoupper($substring, $encoding);
6846
    }
6847
6848
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
6849
  }
6850
6851
  /**
6852
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6853
   *
6854
   * @param string $haystack <p>The string to search in.</p>
6855
   * @param string $needle   <p>The substring to search for.</p>
6856
   *
6857
   * @return string <p>Return the sub-string.</p>
6858
   */
6859 1 View Code Duplication
  public static function substr_ileft(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6860
  {
6861 1
    if (!isset($haystack[0])) {
6862 1
      return '';
6863
    }
6864
6865 1
    if (!isset($needle[0])) {
6866 1
      return $haystack;
6867
    }
6868
6869 1
    if (self::str_istarts_with($haystack, $needle) === true) {
6870 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6871 1
      if ($haystackTmp === false) {
6872
        $haystackTmp = '';
6873
      }
6874 1
      $haystack = (string)$haystackTmp;
6875
    }
6876
6877 1
    return $haystack;
6878
  }
6879
6880
  /**
6881
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6882
   *
6883
   * @param string $haystack <p>The string to search in.</p>
6884
   * @param string $needle   <p>The substring to search for.</p>
6885
   *
6886
   * @return string <p>Return the sub-string.</p>
6887
   */
6888 1 View Code Duplication
  public static function substr_iright(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6889
  {
6890 1
    if (!isset($haystack[0])) {
6891 1
      return '';
6892
    }
6893
6894 1
    if (!isset($needle[0])) {
6895 1
      return $haystack;
6896
    }
6897
6898 1
    if (self::str_iends_with($haystack, $needle) === true) {
6899 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6900 1
      if ($haystackTmp === false) {
6901
        $haystackTmp = '';
6902
      }
6903 1
      $haystack = (string)$haystackTmp;
6904
    }
6905
6906 1
    return $haystack;
6907
  }
6908
6909
  /**
6910
   * Removes an prefix ($needle) from start of the string ($haystack).
6911
   *
6912
   * @param string $haystack <p>The string to search in.</p>
6913
   * @param string $needle   <p>The substring to search for.</p>
6914
   *
6915
   * @return string <p>Return the sub-string.</p>
6916
   */
6917 1 View Code Duplication
  public static function substr_left(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6918
  {
6919 1
    if (!isset($haystack[0])) {
6920 1
      return '';
6921
    }
6922
6923 1
    if (!isset($needle[0])) {
6924 1
      return $haystack;
6925
    }
6926
6927 1
    if (self::str_starts_with($haystack, $needle) === true) {
6928 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6929 1
      if ($haystackTmp === false) {
6930
        $haystackTmp = '';
6931
      }
6932 1
      $haystack = (string)$haystackTmp;
6933
    }
6934
6935 1
    return $haystack;
6936
  }
6937
6938
  /**
6939
   * Replace text within a portion of a string.
6940
   *
6941
   * source: https://gist.github.com/stemar/8287074
6942
   *
6943
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6944
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6945
   * @param int|int[]       $offset           <p>
6946
   *                                          If start is positive, the replacing will begin at the start'th offset
6947
   *                                          into string.
6948
   *                                          <br><br>
6949
   *                                          If start is negative, the replacing will begin at the start'th character
6950
   *                                          from the end of string.
6951
   *                                          </p>
6952
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
6953
   *                                          portion of string which is to be replaced. If it is negative, it
6954
   *                                          represents the number of characters from the end of string at which to
6955
   *                                          stop replacing. If it is not given, then it will default to strlen(
6956
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6957
   *                                          length is zero then this function will have the effect of inserting
6958
   *                                          replacement into string at the given start offset.</p>
6959
   *
6960
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6961
   */
6962 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
6963
  {
6964 7
    if (\is_array($str) === true) {
6965 1
      $num = \count($str);
6966
6967
      // the replacement
6968 1
      if (\is_array($replacement) === true) {
6969 1
        $replacement = \array_slice($replacement, 0, $num);
6970
      } else {
6971 1
        $replacement = \array_pad([$replacement], $num, $replacement);
6972
      }
6973
6974
      // the offset
6975 1 View Code Duplication
      if (\is_array($offset) === true) {
6976 1
        $offset = \array_slice($offset, 0, $num);
6977 1
        foreach ($offset as &$valueTmp) {
6978 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6979
        }
6980 1
        unset($valueTmp);
6981
      } else {
6982 1
        $offset = \array_pad([$offset], $num, $offset);
6983
      }
6984
6985
      // the length
6986 1
      if (null === $length) {
6987 1
        $length = \array_fill(0, $num, 0);
6988 1 View Code Duplication
      } elseif (\is_array($length) === true) {
6989 1
        $length = \array_slice($length, 0, $num);
6990 1
        foreach ($length as &$valueTmpV2) {
6991 1
          if (null !== $valueTmpV2) {
6992 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6993
          } else {
6994 1
            $valueTmpV2 = 0;
6995
          }
6996
        }
6997 1
        unset($valueTmpV2);
6998
      } else {
6999 1
        $length = \array_pad([$length], $num, $length);
7000
      }
7001
7002
      // recursive call
7003 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return \array_map(array(...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
7004
    }
7005
7006 7
    if (\is_array($replacement) === true) {
7007 1
      if (\count($replacement) > 0) {
7008 1
        $replacement = $replacement[0];
7009
      } else {
7010 1
        $replacement = '';
7011
      }
7012
    }
7013
7014
    // init
7015 7
    $str = (string)$str;
7016 7
    $replacement = (string)$replacement;
7017
7018 7
    if (!isset($str[0])) {
7019 1
      return $replacement;
7020
    }
7021
7022 6
    if (self::is_ascii($str)) {
7023 3
      return ($length === null) ?
7024
          \substr_replace($str, $replacement, $offset) :
7025 3
          \substr_replace($str, $replacement, $offset, $length);
7026
    }
7027
7028 5
    \preg_match_all('/./us', $str, $smatches);
7029 5
    \preg_match_all('/./us', $replacement, $rmatches);
7030
7031 5
    if ($length === null) {
7032 3
      $length = self::strlen($str);
7033
    }
7034
7035 5
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
7036
7037 5
    return \implode('', $smatches[0]);
7038
  }
7039
7040
  /**
7041
   * Removes an suffix ($needle) from end of the string ($haystack).
7042
   *
7043
   * @param string $haystack <p>The string to search in.</p>
7044
   * @param string $needle   <p>The substring to search for.</p>
7045
   *
7046
   * @return string <p>Return the sub-string.</p>
7047
   */
7048 1 View Code Duplication
  public static function substr_right(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7049
  {
7050 1
    if (!isset($haystack[0])) {
7051 1
      return '';
7052
    }
7053
7054 1
    if (!isset($needle[0])) {
7055 1
      return $haystack;
7056
    }
7057
7058 1
    if (self::str_ends_with($haystack, $needle) === true) {
7059 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
7060 1
      if ($haystackTmp === false) {
7061
        $haystackTmp = '';
7062
      }
7063 1
      $haystack = (string)$haystackTmp;
7064
    }
7065
7066 1
    return $haystack;
7067
  }
7068
7069
  /**
7070
   * Returns a case swapped version of the string.
7071
   *
7072
   * @param string $str       <p>The input string.</p>
7073
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7074
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7075
   *
7076
   * @return string <p>Each character's case swapped.</p>
7077
   */
7078 1
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7079
  {
7080 1
    if (!isset($str[0])) {
7081 1
      return '';
7082
    }
7083
7084 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7085 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7086
    }
7087
7088 1
    if ($cleanUtf8 === true) {
7089
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7090
      // if invalid characters are found in $haystack before $needle
7091 1
      $str = self::clean($str);
7092
    }
7093
7094 1
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
7095
  }
7096
7097
  /**
7098
   * @param string $str
7099
   * @param int    $tabLength
7100
   *
7101
   * @return string
7102
   */
7103
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
7104
  {
7105
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
7106
  }
7107
7108
  /**
7109
   * Converts the first character of each word in the string to uppercase
7110
   * and all other chars to lowercase.
7111
   *
7112
   * @param string $str      <p>The input string.</p>
7113
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
7114
   *
7115
   * @return string <p>String with all characters of $str being title-cased.</p>
7116
   */
7117
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
7118
  {
7119
    // "mb_convert_case()" used a polyfill from the "UTF8"-Class
7120
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
7121
  }
7122
7123
  /**
7124
   * alias for "UTF8::to_ascii()"
7125
   *
7126
   * @see        UTF8::to_ascii()
7127
   *
7128
   * @param string $str
7129
   * @param string $subst_chr
7130
   * @param bool   $strict
7131
   *
7132
   * @return string
7133
   *
7134
   * @deprecated <p>use "UTF8::to_ascii()"</p>
7135
   */
7136 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
7137
  {
7138 7
    return self::to_ascii($str, $subst_chr, $strict);
7139
  }
7140
7141
  /**
7142
   * alias for "UTF8::to_iso8859()"
7143
   *
7144
   * @see        UTF8::to_iso8859()
7145
   *
7146
   * @param string|string[] $str
7147
   *
7148
   * @return string|string[]
7149
   *
7150
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
7151
   */
7152 1
  public static function toIso8859($str)
7153
  {
7154 1
    return self::to_iso8859($str);
7155
  }
7156
7157
  /**
7158
   * alias for "UTF8::to_latin1()"
7159
   *
7160
   * @see        UTF8::to_latin1()
7161
   *
7162
   * @param string|string[] $str
7163
   *
7164
   * @return string|string[]
7165
   *
7166
   * @deprecated <p>use "UTF8::to_latin1()"</p>
7167
   */
7168 1
  public static function toLatin1($str)
7169
  {
7170 1
    return self::to_latin1($str);
7171
  }
7172
7173
  /**
7174
   * alias for "UTF8::to_utf8()"
7175
   *
7176
   * @see        UTF8::to_utf8()
7177
   *
7178
   * @param string|string[] $str
7179
   *
7180
   * @return string|string[]
7181
   *
7182
   * @deprecated <p>use "UTF8::to_utf8()"</p>
7183
   */
7184 1
  public static function toUTF8($str)
7185
  {
7186 1
    return self::to_utf8($str);
7187
  }
7188
7189
  /**
7190
   * Convert a string into ASCII.
7191
   *
7192
   * @param string $str     <p>The input string.</p>
7193
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
7194
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
7195
   *                        performance</p>
7196
   *
7197
   * @return string
7198
   */
7199 21
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
7200
  {
7201 21
    static $UTF8_TO_ASCII;
7202
7203 21
    if (!isset($str[0])) {
7204 4
      return '';
7205
    }
7206
7207
    // check if we only have ASCII, first (better performance)
7208 18
    if (self::is_ascii($str) === true) {
7209 6
      return $str;
7210
    }
7211
7212 13
    $str = self::clean(
7213 13
        $str,
7214 13
        true,
7215 13
        true,
7216 13
        true,
7217 13
        false,
7218 13
        true,
7219 13
        true
7220
    );
7221
7222
    // check again, if we only have ASCII, now ...
7223 13
    if (self::is_ascii($str) === true) {
7224 7
      return $str;
7225
    }
7226
7227 7
    if ($strict === true) {
7228
7229 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7230
        self::checkForSupport();
7231
      }
7232
7233 1
      if (self::$SUPPORT['intl'] === true) {
7234
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
7235 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
7236
7237
        // check again, if we only have ASCII, now ...
7238 1
        if (self::is_ascii($str) === true) {
7239 1
          return $str;
7240
        }
7241
7242
      }
7243
    }
7244
7245 7
    if (self::$ORD === null) {
7246
      self::$ORD = self::getData('ord');
7247
    }
7248
7249 7
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
7250 7
    $chars = $ar[0];
7251 7
    foreach ($chars as &$c) {
7252
7253 7
      $ordC0 = self::$ORD[$c[0]];
7254
7255 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
7256 7
        continue;
7257
      }
7258
7259 7
      $ordC1 = self::$ORD[$c[1]];
7260
7261
      // ASCII - next please
7262 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
7263 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
7264
      }
7265
7266 7
      if ($ordC0 >= 224) {
7267 2
        $ordC2 = self::$ORD[$c[2]];
7268
7269 2
        if ($ordC0 <= 239) {
7270 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
7271
        }
7272
7273 2
        if ($ordC0 >= 240) {
7274 1
          $ordC3 = self::$ORD[$c[3]];
7275
7276 1
          if ($ordC0 <= 247) {
7277 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
7278
          }
7279
7280 1
          if ($ordC0 >= 248) {
7281
            $ordC4 = self::$ORD[$c[4]];
7282
7283 View Code Duplication
            if ($ordC0 <= 251) {
7284
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
7285
            }
7286
7287
            if ($ordC0 >= 252) {
7288
              $ordC5 = self::$ORD[$c[5]];
7289
7290 View Code Duplication
              if ($ordC0 <= 253) {
7291
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
7292
              }
7293
            }
7294
          }
7295
        }
7296
      }
7297
7298 7
      if ($ordC0 === 254 || $ordC0 === 255) {
7299
        $c = $unknown;
7300
        continue;
7301
      }
7302
7303 7
      if (!isset($ord)) {
7304
        $c = $unknown;
7305
        continue;
7306
      }
7307
7308 7
      $bank = $ord >> 8;
7309 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
7310 3
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
7311 3
        if ($UTF8_TO_ASCII[$bank] === false) {
7312 1
          $UTF8_TO_ASCII[$bank] = [];
7313
        }
7314
      }
7315
7316 7
      $newchar = $ord & 255;
7317
7318 7
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
7319
7320
        // keep for debugging
7321
        /*
7322
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7323
        echo "char: " . $c . "\n";
7324
        echo "ord: " . $ord . "\n";
7325
        echo "newchar: " . $newchar . "\n";
7326
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
7327
        echo "bank:" . $bank . "\n\n";
7328
        */
7329
7330 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
7331
      } else {
7332
7333
        // keep for debugging missing chars
7334
        /*
7335
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7336
        echo "char: " . $c . "\n";
7337
        echo "ord: " . $ord . "\n";
7338
        echo "newchar: " . $newchar . "\n";
7339
        echo "bank:" . $bank . "\n\n";
7340
        */
7341
7342 7
        $c = $unknown;
7343
      }
7344
    }
7345
7346 7
    return \implode('', $chars);
7347
  }
7348
7349
  /**
7350
   * @param string $str
7351
   *
7352
   * @return bool
7353
   */
7354
  public static function to_boolean(string $str): bool
7355
  {
7356
    $key = \strtolower($str);
7357
7358
    // Info: http://php.net/manual/en/filter.filters.validate.php
7359
    $map = [
7360
        'true'  => true,
7361
        '1'     => true,
7362
        'on'    => true,
7363
        'yes'   => true,
7364
        'false' => false,
7365
        '0'     => false,
7366
        'off'   => false,
7367
        'no'    => false,
7368
    ];
7369
7370
    if (isset($map[$key])) {
7371
      return $map[$key];
7372
    }
7373
7374
    if (\is_numeric($str)) {
7375
      return $str + 0 > 0;
7376
    }
7377
7378
    return (bool)self::trim($str);
7379
  }
7380
7381
  /**
7382
   * Convert a string into "ISO-8859"-encoding (Latin-1).
7383
   *
7384
   * @param string|string[] $str
7385
   *
7386
   * @return string|string[]
7387
   */
7388 3
  public static function to_iso8859($str)
7389
  {
7390 3
    if (\is_array($str) === true) {
7391 1
      foreach ($str as $k => $v) {
7392 1
        $str[$k] = self::to_iso8859($v);
7393
      }
7394
7395 1
      return $str;
7396
    }
7397
7398 3
    $str = (string)$str;
7399 3
    if (!isset($str[0])) {
7400 1
      return '';
7401
    }
7402
7403 3
    return self::utf8_decode($str);
7404
  }
7405
7406
  /**
7407
   * alias for "UTF8::to_iso8859()"
7408
   *
7409
   * @see UTF8::to_iso8859()
7410
   *
7411
   * @param string|string[] $str
7412
   *
7413
   * @return string|string[]
7414
   */
7415 1
  public static function to_latin1($str)
7416
  {
7417 1
    return self::to_iso8859($str);
7418
  }
7419
7420
  /**
7421
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
7422
   *
7423
   * <ul>
7424
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
7425
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
7426
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
7427
   * case.</li>
7428
   * </ul>
7429
   *
7430
   * @param string|string[] $str                    <p>Any string or array.</p>
7431
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
7432
   *
7433
   * @return string|string[] <p>The UTF-8 encoded string.</p>
7434
   */
7435 22
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
7436
  {
7437 22 View Code Duplication
    if (\is_array($str) === true) {
7438 2
      foreach ($str as $k => $v) {
7439 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
7440
      }
7441
7442 2
      return $str;
7443
    }
7444
7445 22
    $str = (string)$str;
7446 22
    if (!isset($str[0])) {
7447 3
      return $str;
7448
    }
7449
7450 22
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7451
      self::checkForSupport();
7452
    }
7453
7454 22
    $max = self::strlen_in_byte($str);
7455 22
    $buf = '';
7456
7457
    /** @noinspection ForeachInvariantsInspection */
7458 22
    for ($i = 0; $i < $max; $i++) {
7459 22
      $c1 = $str[$i];
7460
7461 22
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
7462
7463 22
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
7464
7465 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7466
7467 20
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
7468 15
            $buf .= $c1 . $c2;
7469 15
            $i++;
7470
          } else { // not valid UTF8 - convert it
7471 20
            $buf .= self::to_utf8_convert($c1);
7472
          }
7473
7474 21
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
7475
7476 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7477 20
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7478
7479 20
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
7480 12
            $buf .= $c1 . $c2 . $c3;
7481 12
            $i += 2;
7482
          } else { // not valid UTF8 - convert it
7483 20
            $buf .= self::to_utf8_convert($c1);
7484
          }
7485
7486 14
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
7487
7488 14
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7489 14
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7490 14
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
7491
7492 14
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
7493 5
            $buf .= $c1 . $c2 . $c3 . $c4;
7494 5
            $i += 3;
7495
          } else { // not valid UTF8 - convert it
7496 14
            $buf .= self::to_utf8_convert($c1);
7497
          }
7498
7499
        } else { // doesn't look like UTF8, but should be converted
7500 22
          $buf .= self::to_utf8_convert($c1);
7501
        }
7502
7503 20
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
7504
7505 2
        $buf .= self::to_utf8_convert($c1);
7506
7507
      } else { // it doesn't need conversion
7508 20
        $buf .= $c1;
7509
      }
7510
    }
7511
7512
    // decode unicode escape sequences
7513 22
    $buf = \preg_replace_callback(
7514 22
        '/\\\\u([0-9a-f]{4})/i',
7515
        function ($match) {
7516 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
7517 22
        },
7518 22
        $buf
7519
    );
7520
7521
    // decode UTF-8 codepoints
7522 22
    if ($decodeHtmlEntityToUtf8 === true) {
7523 1
      $buf = self::html_entity_decode($buf);
7524
    }
7525
7526 22
    return $buf;
7527
  }
7528
7529
  /**
7530
   * @param int $int
7531
   *
7532
   * @return string
7533
   */
7534 16
  private static function to_utf8_convert($int): string
7535
  {
7536
    // init
7537 16
    $buf = '';
7538
7539 16
    if (self::$ORD === null) {
7540 1
      self::$ORD = self::getData('ord');
7541
    }
7542
7543 16
    if (self::$CHR === null) {
7544 1
      self::$CHR = self::getData('chr');
7545
    }
7546
7547 16
    if (self::$WIN1252_TO_UTF8 === null) {
7548 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
7549
    }
7550
7551 16
    $ordC1 = self::$ORD[$int];
7552 16
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
7553 16
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
7554
    } else {
7555 1
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
7556 1
      $cc2 = ($int & "\x3F") | "\x80";
7557 1
      $buf .= $cc1 . $cc2;
7558
    }
7559
7560 16
    return $buf;
7561
  }
7562
7563
  /**
7564
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7565
   *
7566
   * INFO: This is slower then "trim()"
7567
   *
7568
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7569
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7570
   *
7571
   * @param string $str   <p>The string to be trimmed</p>
7572
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
7573
   *
7574
   * @return string <p>The trimmed string.</p>
7575
   */
7576 26 View Code Duplication
  public static function trim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7577
  {
7578 26
    if (!isset($str[0])) {
7579 5
      return '';
7580
    }
7581
7582
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7583 22
    if ($chars === INF || !$chars) {
7584 6
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
7585
    } else {
7586 16
      $chars = \preg_quote($chars, '/');
7587 16
      $pattern = "^[$chars]+|[$chars]+\$";
7588
    }
7589
7590 22
    return self::regexReplace($str, $pattern, '', '', '/');
7591
  }
7592
7593
  /**
7594
   * Makes string's first char uppercase.
7595
   *
7596
   * @param string $str       <p>The input string.</p>
7597
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7598
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7599
   *
7600
   * @return string <p>The resulting string</p>
7601
   */
7602 14
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7603
  {
7604 14
    if ($cleanUtf8 === true) {
7605
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7606
      // if invalid characters are found in $haystack before $needle
7607 1
      $str = self::clean($str);
7608
    }
7609
7610 14
    $strPartTwo = self::substr($str, 1, null, $encoding);
7611 14
    if ($strPartTwo === false) {
7612
      $strPartTwo = '';
7613
    }
7614
7615 14
    $strPartOne = self::strtoupper(
7616 14
        (string)self::substr($str, 0, 1, $encoding),
7617 14
        $encoding,
7618 14
        $cleanUtf8
7619
    );
7620
7621 14
    return $strPartOne . $strPartTwo;
7622
  }
7623
7624
  /**
7625
   * alias for "UTF8::ucfirst()"
7626
   *
7627
   * @see UTF8::ucfirst()
7628
   *
7629
   * @param string $word
7630
   * @param string $encoding
7631
   * @param bool   $cleanUtf8
7632
   *
7633
   * @return string
7634
   */
7635 1
  public static function ucword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7636
  {
7637 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
7638
  }
7639
7640
  /**
7641
   * Uppercase for all words in the string.
7642
   *
7643
   * @param string   $str        <p>The input string.</p>
7644
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7645
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7646
   * @param string   $encoding   [optional] <p>Set the charset.</p>
7647
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7648
   *
7649
   * @return string
7650
   */
7651 8
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7652
  {
7653 8
    if (!$str) {
7654 2
      return '';
7655
    }
7656
7657
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
7658
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
7659
7660 7
    if ($cleanUtf8 === true) {
7661
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7662
      // if invalid characters are found in $haystack before $needle
7663 1
      $str = self::clean($str);
7664
    }
7665
7666 7
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
7667
7668
    if (
7669 7
        $usePhpDefaultFunctions === true
7670
        &&
7671 7
        self::is_ascii($str) === true
7672
    ) {
7673
      return \ucwords($str);
7674
    }
7675
7676 7
    $words = self::str_to_words($str, $charlist);
7677 7
    $newWords = [];
7678
7679 7
    if (\count($exceptions) > 0) {
7680 1
      $useExceptions = true;
7681
    } else {
7682 7
      $useExceptions = false;
7683
    }
7684
7685 7 View Code Duplication
    foreach ($words as $word) {
7686
7687 7
      if (!$word) {
7688 7
        continue;
7689
      }
7690
7691
      if (
7692 7
          $useExceptions === false
7693
          ||
7694
          (
7695 1
              $useExceptions === true
7696
              &&
7697 7
              !\in_array($word, $exceptions, true)
7698
          )
7699
      ) {
7700 7
        $word = self::ucfirst($word, $encoding);
7701
      }
7702
7703 7
      $newWords[] = $word;
7704
    }
7705
7706 7
    return \implode('', $newWords);
7707
  }
7708
7709
  /**
7710
   * Multi decode html entity & fix urlencoded-win1252-chars.
7711
   *
7712
   * e.g:
7713
   * 'test+test'                     => 'test test'
7714
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7715
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7716
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7717
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7718
   * 'Düsseldorf'                   => 'Düsseldorf'
7719
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7720
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7721
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7722
   *
7723
   * @param string $str          <p>The input string.</p>
7724
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7725
   *
7726
   * @return string
7727
   */
7728 1 View Code Duplication
  public static function urldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7729
  {
7730 1
    if (!isset($str[0])) {
7731 1
      return '';
7732
    }
7733
7734 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
7735 1
    if (\preg_match($pattern, $str)) {
7736 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
7737
    }
7738
7739 1
    $flags = ENT_QUOTES | ENT_HTML5;
7740
7741
    do {
7742 1
      $str_compare = $str;
7743
7744 1
      $str = self::fix_simple_utf8(
7745 1
          \urldecode(
7746 1
              self::html_entity_decode(
7747 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7748 1
                  $flags
7749
              )
7750
          )
7751
      );
7752
7753 1
    } while ($multi_decode === true && $str_compare !== $str);
7754
7755 1
    return $str;
7756
  }
7757
7758
  /**
7759
   * Return a array with "urlencoded"-win1252 -> UTF-8
7760
   *
7761
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
7762
   *
7763
   * @return string[]
7764
   */
7765 1
  public static function urldecode_fix_win1252_chars(): array
7766
  {
7767
    return [
7768 1
        '%20' => ' ',
7769
        '%21' => '!',
7770
        '%22' => '"',
7771
        '%23' => '#',
7772
        '%24' => '$',
7773
        '%25' => '%',
7774
        '%26' => '&',
7775
        '%27' => "'",
7776
        '%28' => '(',
7777
        '%29' => ')',
7778
        '%2A' => '*',
7779
        '%2B' => '+',
7780
        '%2C' => ',',
7781
        '%2D' => '-',
7782
        '%2E' => '.',
7783
        '%2F' => '/',
7784
        '%30' => '0',
7785
        '%31' => '1',
7786
        '%32' => '2',
7787
        '%33' => '3',
7788
        '%34' => '4',
7789
        '%35' => '5',
7790
        '%36' => '6',
7791
        '%37' => '7',
7792
        '%38' => '8',
7793
        '%39' => '9',
7794
        '%3A' => ':',
7795
        '%3B' => ';',
7796
        '%3C' => '<',
7797
        '%3D' => '=',
7798
        '%3E' => '>',
7799
        '%3F' => '?',
7800
        '%40' => '@',
7801
        '%41' => 'A',
7802
        '%42' => 'B',
7803
        '%43' => 'C',
7804
        '%44' => 'D',
7805
        '%45' => 'E',
7806
        '%46' => 'F',
7807
        '%47' => 'G',
7808
        '%48' => 'H',
7809
        '%49' => 'I',
7810
        '%4A' => 'J',
7811
        '%4B' => 'K',
7812
        '%4C' => 'L',
7813
        '%4D' => 'M',
7814
        '%4E' => 'N',
7815
        '%4F' => 'O',
7816
        '%50' => 'P',
7817
        '%51' => 'Q',
7818
        '%52' => 'R',
7819
        '%53' => 'S',
7820
        '%54' => 'T',
7821
        '%55' => 'U',
7822
        '%56' => 'V',
7823
        '%57' => 'W',
7824
        '%58' => 'X',
7825
        '%59' => 'Y',
7826
        '%5A' => 'Z',
7827
        '%5B' => '[',
7828
        '%5C' => '\\',
7829
        '%5D' => ']',
7830
        '%5E' => '^',
7831
        '%5F' => '_',
7832
        '%60' => '`',
7833
        '%61' => 'a',
7834
        '%62' => 'b',
7835
        '%63' => 'c',
7836
        '%64' => 'd',
7837
        '%65' => 'e',
7838
        '%66' => 'f',
7839
        '%67' => 'g',
7840
        '%68' => 'h',
7841
        '%69' => 'i',
7842
        '%6A' => 'j',
7843
        '%6B' => 'k',
7844
        '%6C' => 'l',
7845
        '%6D' => 'm',
7846
        '%6E' => 'n',
7847
        '%6F' => 'o',
7848
        '%70' => 'p',
7849
        '%71' => 'q',
7850
        '%72' => 'r',
7851
        '%73' => 's',
7852
        '%74' => 't',
7853
        '%75' => 'u',
7854
        '%76' => 'v',
7855
        '%77' => 'w',
7856
        '%78' => 'x',
7857
        '%79' => 'y',
7858
        '%7A' => 'z',
7859
        '%7B' => '{',
7860
        '%7C' => '|',
7861
        '%7D' => '}',
7862
        '%7E' => '~',
7863
        '%7F' => '',
7864
        '%80' => '`',
7865
        '%81' => '',
7866
        '%82' => '‚',
7867
        '%83' => 'ƒ',
7868
        '%84' => '„',
7869
        '%85' => '…',
7870
        '%86' => '†',
7871
        '%87' => '‡',
7872
        '%88' => 'ˆ',
7873
        '%89' => '‰',
7874
        '%8A' => 'Š',
7875
        '%8B' => '‹',
7876
        '%8C' => 'Œ',
7877
        '%8D' => '',
7878
        '%8E' => 'Ž',
7879
        '%8F' => '',
7880
        '%90' => '',
7881
        '%91' => '‘',
7882
        '%92' => '’',
7883
        '%93' => '“',
7884
        '%94' => '”',
7885
        '%95' => '•',
7886
        '%96' => '–',
7887
        '%97' => '—',
7888
        '%98' => '˜',
7889
        '%99' => '™',
7890
        '%9A' => 'š',
7891
        '%9B' => '›',
7892
        '%9C' => 'œ',
7893
        '%9D' => '',
7894
        '%9E' => 'ž',
7895
        '%9F' => 'Ÿ',
7896
        '%A0' => '',
7897
        '%A1' => '¡',
7898
        '%A2' => '¢',
7899
        '%A3' => '£',
7900
        '%A4' => '¤',
7901
        '%A5' => '¥',
7902
        '%A6' => '¦',
7903
        '%A7' => '§',
7904
        '%A8' => '¨',
7905
        '%A9' => '©',
7906
        '%AA' => 'ª',
7907
        '%AB' => '«',
7908
        '%AC' => '¬',
7909
        '%AD' => '',
7910
        '%AE' => '®',
7911
        '%AF' => '¯',
7912
        '%B0' => '°',
7913
        '%B1' => '±',
7914
        '%B2' => '²',
7915
        '%B3' => '³',
7916
        '%B4' => '´',
7917
        '%B5' => 'µ',
7918
        '%B6' => '¶',
7919
        '%B7' => '·',
7920
        '%B8' => '¸',
7921
        '%B9' => '¹',
7922
        '%BA' => 'º',
7923
        '%BB' => '»',
7924
        '%BC' => '¼',
7925
        '%BD' => '½',
7926
        '%BE' => '¾',
7927
        '%BF' => '¿',
7928
        '%C0' => 'À',
7929
        '%C1' => 'Á',
7930
        '%C2' => 'Â',
7931
        '%C3' => 'Ã',
7932
        '%C4' => 'Ä',
7933
        '%C5' => 'Å',
7934
        '%C6' => 'Æ',
7935
        '%C7' => 'Ç',
7936
        '%C8' => 'È',
7937
        '%C9' => 'É',
7938
        '%CA' => 'Ê',
7939
        '%CB' => 'Ë',
7940
        '%CC' => 'Ì',
7941
        '%CD' => 'Í',
7942
        '%CE' => 'Î',
7943
        '%CF' => 'Ï',
7944
        '%D0' => 'Ð',
7945
        '%D1' => 'Ñ',
7946
        '%D2' => 'Ò',
7947
        '%D3' => 'Ó',
7948
        '%D4' => 'Ô',
7949
        '%D5' => 'Õ',
7950
        '%D6' => 'Ö',
7951
        '%D7' => '×',
7952
        '%D8' => 'Ø',
7953
        '%D9' => 'Ù',
7954
        '%DA' => 'Ú',
7955
        '%DB' => 'Û',
7956
        '%DC' => 'Ü',
7957
        '%DD' => 'Ý',
7958
        '%DE' => 'Þ',
7959
        '%DF' => 'ß',
7960
        '%E0' => 'à',
7961
        '%E1' => 'á',
7962
        '%E2' => 'â',
7963
        '%E3' => 'ã',
7964
        '%E4' => 'ä',
7965
        '%E5' => 'å',
7966
        '%E6' => 'æ',
7967
        '%E7' => 'ç',
7968
        '%E8' => 'è',
7969
        '%E9' => 'é',
7970
        '%EA' => 'ê',
7971
        '%EB' => 'ë',
7972
        '%EC' => 'ì',
7973
        '%ED' => 'í',
7974
        '%EE' => 'î',
7975
        '%EF' => 'ï',
7976
        '%F0' => 'ð',
7977
        '%F1' => 'ñ',
7978
        '%F2' => 'ò',
7979
        '%F3' => 'ó',
7980
        '%F4' => 'ô',
7981
        '%F5' => 'õ',
7982
        '%F6' => 'ö',
7983
        '%F7' => '÷',
7984
        '%F8' => 'ø',
7985
        '%F9' => 'ù',
7986
        '%FA' => 'ú',
7987
        '%FB' => 'û',
7988
        '%FC' => 'ü',
7989
        '%FD' => 'ý',
7990
        '%FE' => 'þ',
7991
        '%FF' => 'ÿ',
7992
    ];
7993
  }
7994
7995
  /**
7996
   * Decodes an UTF-8 string to ISO-8859-1.
7997
   *
7998
   * @param string $str <p>The input string.</p>
7999
   * @param bool   $keepUtf8Chars
8000
   *
8001
   * @return string
8002
   */
8003 6
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
8004
  {
8005 6
    if (!isset($str[0])) {
8006 3
      return '';
8007
    }
8008
8009 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
8010 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
8011
8012 6 View Code Duplication
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
8013
8014 1
      if (self::$WIN1252_TO_UTF8 === null) {
8015
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
8016
      }
8017
8018 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
8019 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
8020
    }
8021
8022
    /** @noinspection PhpInternalEntityUsedInspection */
8023 6
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
8024
8025 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8026
      self::checkForSupport();
8027
    }
8028
8029
    // save for later comparision
8030 6
    $str_backup = $str;
8031 6
    $len = self::strlen_in_byte($str);
8032
8033 6
    if (self::$ORD === null) {
8034
      self::$ORD = self::getData('ord');
8035
    }
8036
8037 6
    if (self::$CHR === null) {
8038
      self::$CHR = self::getData('chr');
8039
    }
8040
8041 6
    $noCharFound = '?';
8042
    /** @noinspection ForeachInvariantsInspection */
8043 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
8044 6
      switch ($str[$i] & "\xF0") {
8045 6
        case "\xC0":
8046 6
        case "\xD0":
8047 6
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
8048 6
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
8049 6
          break;
8050
8051
        /** @noinspection PhpMissingBreakStatementInspection */
8052 6
        case "\xF0":
8053
          ++$i;
8054 6
        case "\xE0":
8055 5
          $str[$j] = $noCharFound;
8056 5
          $i += 2;
8057 5
          break;
8058
8059
        default:
8060 6
          $str[$j] = $str[$i];
8061
      }
8062
    }
8063
8064 6
    $return = (string)self::substr($str, 0, $j, 'CP850'); // 8-BIT
8065
8066
    if (
8067 6
        $keepUtf8Chars === true
8068
        &&
8069 6
        self::strlen($return) >= self::strlen($str_backup)
8070
    ) {
8071 1
      return $str_backup;
8072
    }
8073
8074 6
    return $return;
8075
  }
8076
8077
  /**
8078
   * Encodes an ISO-8859-1 string to UTF-8.
8079
   *
8080
   * @param string $str <p>The input string.</p>
8081
   *
8082
   * @return string
8083
   */
8084 7
  public static function utf8_encode(string $str): string
8085
  {
8086 7
    if (!isset($str[0])) {
8087 7
      return '';
8088
    }
8089
8090 7
    $strTmp = \utf8_encode($str);
8091
8092
    // the polyfill maybe return false
8093 7
    if ($strTmp === false) {
8094
      return '';
8095
    }
8096
8097 7
    $str = (string)$strTmp;
8098 7
    if (false === \strpos($str, "\xC2")) {
8099 3
      return $str;
8100
    }
8101
8102 6
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
8103 6
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
8104
8105 6 View Code Duplication
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
8106
8107 1
      if (self::$WIN1252_TO_UTF8 === null) {
8108
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
8109
      }
8110
8111 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
8112 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
8113
    }
8114
8115 6
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
8116
  }
8117
8118
  /**
8119
   * fix -> utf8-win1252 chars
8120
   *
8121
   * @param string $str <p>The input string.</p>
8122
   *
8123
   * @return string
8124
   *
8125
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
8126
   */
8127 1
  public static function utf8_fix_win1252_chars(string $str): string
8128
  {
8129 1
    return self::fix_simple_utf8($str);
8130
  }
8131
8132
  /**
8133
   * Returns an array with all utf8 whitespace characters.
8134
   *
8135
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
8136
   *
8137
   * @author: Derek E. [email protected]
8138
   *
8139
   * @return string[] <p>
8140
   *               An array with all known whitespace characters as values and the type of whitespace as keys
8141
   *               as defined in above URL.
8142
   *               </p>
8143
   */
8144 1
  public static function whitespace_table(): array
8145
  {
8146 1
    return self::$WHITESPACE_TABLE;
8147
  }
8148
8149
  /**
8150
   * Limit the number of words in a string.
8151
   *
8152
   * @param string $str      <p>The input string.</p>
8153
   * @param int    $limit    <p>The limit of words as integer.</p>
8154
   * @param string $strAddOn <p>Replacement for the striped string.</p>
8155
   *
8156
   * @return string
8157
   */
8158 1
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
8159
  {
8160 1
    if (!isset($str[0])) {
8161 1
      return '';
8162
    }
8163
8164 1
    if ($limit < 1) {
8165 1
      return '';
8166
    }
8167
8168 1
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
8169
8170
    if (
8171 1
        !isset($matches[0])
8172
        ||
8173 1
        self::strlen($str) === self::strlen($matches[0])
8174
    ) {
8175 1
      return $str;
8176
    }
8177
8178 1
    return self::rtrim($matches[0]) . $strAddOn;
8179
  }
8180
8181
  /**
8182
   * Wraps a string to a given number of characters
8183
   *
8184
   * @link  http://php.net/manual/en/function.wordwrap.php
8185
   *
8186
   * @param string $str   <p>The input string.</p>
8187
   * @param int    $width [optional] <p>The column width.</p>
8188
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
8189
   * @param bool   $cut   [optional] <p>
8190
   *                      If the cut is set to true, the string is
8191
   *                      always wrapped at or before the specified width. So if you have
8192
   *                      a word that is larger than the given width, it is broken apart.
8193
   *                      </p>
8194
   *
8195
   * @return string <p>The given string wrapped at the specified column.</p>
8196
   */
8197 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
8198
  {
8199 10
    if (!isset($str[0], $break[0])) {
8200 3
      return '';
8201
    }
8202
8203 8
    $w = '';
8204 8
    $strSplit = \explode($break, $str);
8205 8
    $count = \count($strSplit);
8206
8207 8
    $chars = [];
8208
    /** @noinspection ForeachInvariantsInspection */
8209 8
    for ($i = 0; $i < $count; ++$i) {
8210
8211 8
      if ($i) {
8212 1
        $chars[] = $break;
8213 1
        $w .= '#';
8214
      }
8215
8216 8
      $c = $strSplit[$i];
8217 8
      unset($strSplit[$i]);
8218
8219 8
      foreach (self::split($c) as $c) {
8220 8
        $chars[] = $c;
8221 8
        $w .= ' ' === $c ? ' ' : '?';
8222
      }
8223
    }
8224
8225 8
    $strReturn = '';
8226 8
    $j = 0;
8227 8
    $b = $i = -1;
8228 8
    $w = \wordwrap($w, $width, '#', $cut);
8229
8230 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
8231 6
      for (++$i; $i < $b; ++$i) {
8232 6
        $strReturn .= $chars[$j];
8233 6
        unset($chars[$j++]);
8234
      }
8235
8236 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
8237 3
        unset($chars[$j++]);
8238
      }
8239
8240 6
      $strReturn .= $break;
8241
    }
8242
8243 8
    return $strReturn . \implode('', $chars);
8244
  }
8245
8246
  /**
8247
   * Returns an array of Unicode White Space characters.
8248
   *
8249
   * @return string[] <p>An array with numeric code point as key and White Space Character as value.</p>
8250
   */
8251 1
  public static function ws(): array
8252
  {
8253 1
    return self::$WHITESPACE;
8254
  }
8255
8256
}
8257