Completed
Pull Request — master (#71)
by Lars
13:13 queued 10:58
created

UTF8::str_split()   B

Complexity

Conditions 8
Paths 8

Size

Total Lines 40

Duplication

Lines 7
Ratio 17.5 %

Code Coverage

Tests 19
CRAP Score 8.0079

Importance

Changes 0
Metric Value
dl 7
loc 40
ccs 19
cts 20
cp 0.95
rs 8.0355
c 0
b 0
f 0
cc 8
nc 8
nop 2
crap 8.0079
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206
   * __construct()
207
   */
208 16
  public function __construct()
209
  {
210 16
    self::checkForSupport();
211 16
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219
   * @return string <p>Single Multi-Byte character.</p>
220
   */
221 2
  public static function access(string $str, int $pos): string
222
  {
223 2
    if (!isset($str[0])) {
224 1
      return '';
225
    }
226
227 2
    if ($pos < 0) {
228 1
      return '';
229
    }
230
231 2
    return (string)self::substr($str, $pos, 1);
232
  }
233
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string <p>The output string that contains BOM.</p>
242
   */
243 1
  public static function add_bom_to_string(string $str): string
244
  {
245 1
    if (self::string_has_bom($str) === false) {
246 1
      $str = self::bom() . $str;
247
    }
248
249 1
    return $str;
250
  }
251
252
  /**
253
   * Changes all keys in an array.
254
   *
255
   * @param array $array <p>The array to work on</p>
256
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
257
   *                     or <strong>CASE_LOWER</strong> (default)</p>
258
   *
259
   * @return string[] <p>An array with its keys lower or uppercased.</p>
260
   */
261 1
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
262
  {
263
    if (
264 1
        $case !== CASE_LOWER
265
        &&
266 1
        $case !== CASE_UPPER
267
    ) {
268
      $case = CASE_LOWER;
269
    }
270
271 1
    $return = [];
272 1
    foreach ($array as $key => $value) {
273 1
      if ($case === CASE_LOWER) {
274 1
        $key = self::strtolower($key);
275
      } else {
276 1
        $key = self::strtoupper($key);
277
      }
278
279 1
      $return[$key] = $value;
280
    }
281
282 1
    return $return;
283
  }
284
285
  /**
286
   * Returns the substring between $start and $end, if found, or an empty
287
   * string. An optional offset may be supplied from which to begin the
288
   * search for the start string.
289
   *
290
   * @param string $str
291
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
292
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
293
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
294
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
295
   *
296
   * @return string
297
   */
298
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
299
  {
300
    $posStart = self::strpos($str, $start, $offset, $encoding);
301
    if ($posStart === false) {
302
      return '';
303
    }
304
305
    $substrIndex = $posStart + self::strlen($start, $encoding);
306
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
307
    if (
308
        $posEnd === false
309
        ||
310
        $posEnd === $substrIndex
311
    ) {
312
      return '';
313
    }
314
315
    return self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
316
  }
317
318
  /**
319
   * Convert binary into an string.
320
   *
321
   * @param mixed $bin 1|0
322
   *
323
   * @return string
324
   */
325 1
  public static function binary_to_str($bin): string
326
  {
327 1
    if (!isset($bin[0])) {
328
      return '';
329
    }
330
331 1
    $convert = \base_convert($bin, 2, 16);
332 1
    if ($convert === '0') {
333 1
      return '';
334
    }
335
336 1
    return \pack('H*', $convert);
337
  }
338
339
  /**
340
   * Returns the UTF-8 Byte Order Mark Character.
341
   *
342
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
343
   *
344
   * @return string UTF-8 Byte Order Mark
345
   */
346 2
  public static function bom(): string
347
  {
348 2
    return "\xef\xbb\xbf";
349
  }
350
351
  /**
352
   * @alias of UTF8::chr_map()
353
   *
354
   * @see   UTF8::chr_map()
355
   *
356
   * @param string|array $callback
357
   * @param string       $str
358
   *
359
   * @return string[]
360
   */
361 1
  public static function callback($callback, string $str): array
362
  {
363 1
    return self::chr_map($callback, $str);
364
  }
365
366
  /**
367
   * Returns the character at $index, with indexes starting at 0.
368
   *
369
   * @param string $str
370
   * @param int    $index <p>Position of the character.</p>
371
   *
372
   * @return string <p>The character at $index.</p>
373
   */
374
  public static function char_at(string $str, int $index): string
375
  {
376
    return self::substr($str, $index, 1);
377
  }
378
379
  /**
380
   * Returns an array consisting of the characters in the string.
381
   *
382
   * @param string $str <p>The input string.</p>
383
   *
384
   * @return string[] <p>An array of chars.</p>
385
   */
386
  public static function chars(string $str): array
387
  {
388
    return self::str_split($str, 1);
389
  }
390
391
  /**
392
   * This method will auto-detect your server environment for UTF-8 support.
393
   *
394
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
395
   */
396 19
  public static function checkForSupport()
397
  {
398 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
399
400 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
401
402
      // http://php.net/manual/en/book.mbstring.php
403 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
404 1
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
405
406
      // http://php.net/manual/en/book.iconv.php
407 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
408
409
      // http://php.net/manual/en/book.intl.php
410 1
      self::$SUPPORT['intl'] = self::intl_loaded();
411 1
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
412
      if (
413 1
          self::$SUPPORT['intl'] === true
414
          &&
415 1
          \function_exists('transliterator_list_ids') === true
416
      ) {
417 1
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
418
      }
419
420
      // http://php.net/manual/en/class.intlchar.php
421 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
422
423
      // http://php.net/manual/en/book.pcre.php
424 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
425
    }
426 19
  }
427
428
  /**
429
   * Generates a UTF-8 encoded character from the given code point.
430
   *
431
   * INFO: opposite to UTF8::ord()
432
   *
433
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
434
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
435
   *
436
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
437
   */
438 10
  public static function chr($code_point, string $encoding = 'UTF-8')
439
  {
440
    // init
441 10
    static $CHAR_CACHE = [];
442
443 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
444
      self::checkForSupport();
445
    }
446
447 10
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
448 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
449
    }
450
451 View Code Duplication
    if (
452 10
        $encoding !== 'UTF-8'
453
        &&
454 10
        $encoding !== 'ISO-8859-1'
455
        &&
456 10
        $encoding !== 'WINDOWS-1252'
457
        &&
458 10
        self::$SUPPORT['mbstring'] === false
459
    ) {
460
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
461
    }
462
463 10
    $cacheKey = $code_point . $encoding;
464 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
465 8
      return $CHAR_CACHE[$cacheKey];
466
    }
467
468 9
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
469
470 7
      if (self::$CHR === null) {
471
        self::$CHR = self::getData('chr');
472
      }
473
474 7
      $chr = self::$CHR[$code_point];
475
476 7
      if ($encoding !== 'UTF-8') {
477 1
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
478
      }
479
480 7
      return $CHAR_CACHE[$cacheKey] = $chr;
481
    }
482
483 7
    if (self::$SUPPORT['intlChar'] === true) {
484 7
      $chr = \IntlChar::chr($code_point);
485
486 7
      if ($encoding !== 'UTF-8') {
487
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
488
      }
489
490 7
      return $CHAR_CACHE[$cacheKey] = $chr;
491
    }
492
493
    if (self::$CHR === null) {
494
      self::$CHR = self::getData('chr');
495
    }
496
497
    if ($code_point <= 0x7F) {
498
      $chr = self::$CHR[$code_point];
499
    } elseif ($code_point <= 0x7FF) {
500
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
501
             self::$CHR[($code_point & 0x3F) + 0x80];
502
    } elseif ($code_point <= 0xFFFF) {
503
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
504
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
505
             self::$CHR[($code_point & 0x3F) + 0x80];
506
    } else {
507
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
508
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
509
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
510
             self::$CHR[($code_point & 0x3F) + 0x80];
511
    }
512
513
    if ($encoding !== 'UTF-8') {
514
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
515
    }
516
517
    return $CHAR_CACHE[$cacheKey] = $chr;
518
  }
519
520
  /**
521
   * Applies callback to all characters of a string.
522
   *
523
   * @param string|array $callback <p>The callback function.</p>
524
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
525
   *
526
   * @return string[] <p>The outcome of callback.</p>
527
   */
528 1
  public static function chr_map($callback, string $str): array
529
  {
530 1
    $chars = self::split($str);
531
532 1
    return \array_map($callback, $chars);
533
  }
534
535
  /**
536
   * Generates an array of byte length of each character of a Unicode string.
537
   *
538
   * 1 byte => U+0000  - U+007F
539
   * 2 byte => U+0080  - U+07FF
540
   * 3 byte => U+0800  - U+FFFF
541
   * 4 byte => U+10000 - U+10FFFF
542
   *
543
   * @param string $str <p>The original unicode string.</p>
544
   *
545
   * @return int[] <p>An array of byte lengths of each character.</p>
546
   */
547 2
  public static function chr_size_list(string $str): array
548
  {
549 2
    if (!isset($str[0])) {
550 2
      return [];
551
    }
552
553 2
    $strSplit = self::split($str);
554
555 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
556
      return \array_map(
557
          function ($data) {
558
            return UTF8::strlen($data, 'CP850'); // 8-BIT
559
          },
560
          $strSplit
561
      );
562
    }
563
564 2
    return \array_map('\strlen', $strSplit);
565
  }
566
567
  /**
568
   * Get a decimal code representation of a specific character.
569
   *
570
   * @param string $char <p>The input character.</p>
571
   *
572
   * @return int
573
   */
574 2
  public static function chr_to_decimal(string $char): int
575
  {
576 2
    $code = self::ord($char[0]);
577 2
    $bytes = 1;
578
579 2
    if (!($code & 0x80)) {
580
      // 0xxxxxxx
581 2
      return $code;
582
    }
583
584 2
    if (($code & 0xe0) === 0xc0) {
585
      // 110xxxxx
586 2
      $bytes = 2;
587 2
      $code &= ~0xc0;
588 2
    } elseif (($code & 0xf0) === 0xe0) {
589
      // 1110xxxx
590 2
      $bytes = 3;
591 2
      $code &= ~0xe0;
592 1
    } elseif (($code & 0xf8) === 0xf0) {
593
      // 11110xxx
594 1
      $bytes = 4;
595 1
      $code &= ~0xf0;
596
    }
597
598 2
    for ($i = 2; $i <= $bytes; $i++) {
599
      // 10xxxxxx
600 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
601
    }
602
603 2
    return $code;
604
  }
605
606
  /**
607
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
608
   *
609
   * @param string $char <p>The input character</p>
610
   * @param string $pfix [optional]
611
   *
612
   * @return string <p>The code point encoded as U+xxxx<p>
613
   */
614 1
  public static function chr_to_hex(string $char, string $pfix = 'U+'): string
615
  {
616 1
    if (!isset($char[0])) {
617 1
      return '';
618
    }
619
620 1
    if ($char === '&#0;') {
621 1
      $char = '';
622
    }
623
624 1
    return self::int_to_hex(self::ord($char), $pfix);
625
  }
626
627
  /**
628
   * alias for "UTF8::chr_to_decimal()"
629
   *
630
   * @see UTF8::chr_to_decimal()
631
   *
632
   * @param string $chr
633
   *
634
   * @return int
635
   */
636 1
  public static function chr_to_int(string $chr): int
637
  {
638 1
    return self::chr_to_decimal($chr);
639
  }
640
641
  /**
642
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
643
   *
644
   * @param string $body     <p>The original string to be split.</p>
645
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
646
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
647
   *
648
   * @return string <p>The chunked string</p>
649
   */
650 1
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
651
  {
652 1
    return \implode($end, self::split($body, $chunklen));
653
  }
654
655
  /**
656
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
657
   *
658
   * @param string $str                           <p>The string to be sanitized.</p>
659
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
660
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
661
   *                                              whitespace.</p>
662
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
663
   *                                              e.g.: "…"
664
   *                                              => "..."</p>
665
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
666
   *                                              combination with
667
   *                                              $normalize_whitespace</p>
668
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
669
   *                                              mark e.g.: "�"</p>
670
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
671
   *                                              characters e.g.: "\0"</p>
672
   *
673
   * @return string <p>Clean UTF-8 encoded string.</p>
674
   */
675 64
  public static function clean(string $str, bool $remove_bom = false, bool $normalize_whitespace = false, bool $normalize_msword = false, bool $keep_non_breaking_space = false, bool $replace_diamond_question_mark = false, bool $remove_invisible_characters = true): string
676
  {
677
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
678
    // caused connection reset problem on larger strings
679
680 64
    $regx = '/
681
      (
682
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
683
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
684
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
685
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
686
        ){1,100}                      # ...one or more times
687
      )
688
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
689
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
690
    /x';
691 64
    $str = (string)\preg_replace($regx, '$1', $str);
692
693 64
    if ($replace_diamond_question_mark === true) {
694 36
      $str = self::replace_diamond_question_mark($str, '');
695
    }
696
697 64
    if ($remove_invisible_characters === true) {
698 64
      $str = self::remove_invisible_characters($str);
699
    }
700
701 64
    if ($normalize_whitespace === true) {
702 38
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
703
    }
704
705 64
    if ($normalize_msword === true) {
706 15
      $str = self::normalize_msword($str);
707
    }
708
709 64
    if ($remove_bom === true) {
710 37
      $str = self::remove_bom($str);
711
    }
712
713 64
    return $str;
714
  }
715
716
  /**
717
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
718
   *
719
   * @param string $str <p>The input string.</p>
720
   *
721
   * @return string
722
   */
723 24
  public static function cleanup(string $str): string
724
  {
725 24
    if (!isset($str[0])) {
726 2
      return '';
727
    }
728
729
    // fixed ISO <-> UTF-8 Errors
730 24
    $str = self::fix_simple_utf8($str);
731
732
    // remove all none UTF-8 symbols
733
    // && remove diamond question mark (�)
734
    // && remove remove invisible characters (e.g. "\0")
735
    // && remove BOM
736
    // && normalize whitespace chars (but keep non-breaking-spaces)
737 24
    $str = self::clean(
738 24
        $str,
739 24
        true,
740 24
        true,
741 24
        false,
742 24
        true,
743 24
        true,
744 24
        true
745
    );
746
747 24
    return $str;
748
  }
749
750
  /**
751
   * Accepts a string or a array of strings and returns an array of Unicode code points.
752
   *
753
   * INFO: opposite to UTF8::string()
754
   *
755
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
756
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
757
   *                                    default, code points will be returned as integers.</p>
758
   *
759
   * @return int[] <p>The array of code points.</p>
760
   */
761 7
  public static function codepoints($arg, bool $u_style = false): array
762
  {
763 7
    if (\is_string($arg) === true) {
764 7
      $arg = self::split($arg);
765
    }
766
767 7
    $arg = \array_map(
768
        [
769 7
            self::class,
770
            'ord',
771
        ],
772 7
        $arg
773
    );
774
775 7
    if ($u_style) {
776 1
      $arg = \array_map(
777
          [
778 1
              self::class,
779
              'int_to_hex',
780
          ],
781 1
          $arg
782
      );
783
    }
784
785 7
    return $arg;
786
  }
787
788
  /**
789
   * Trims the string and replaces consecutive whitespace characters with a
790
   * single space. This includes tabs and newline characters, as well as
791
   * multibyte whitespace such as the thin space and ideographic space.
792
   *
793
   * @param string $str <p>The input string.</p>
794
   *
795
   * @return string <p>String with a trimmed $str and condensed whitespace.</p>
796
   */
797
  public static function collapse_whitespace(string $str): string
798
  {
799
    return self::trim(
800
        self::regexReplace($str, '[[:space:]]+', ' ')
801
    );
802
  }
803
804
  /**
805
   * Returns count of characters used in a string.
806
   *
807
   * @param string $str       <p>The input string.</p>
808
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
809
   *
810
   * @return int[] <p>An associative array of Character as keys and
811
   *               their count as values.</p>
812
   */
813 9
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
814
  {
815 9
    return \array_count_values(self::split($str, 1, $cleanUtf8));
816
  }
817
818
  /**
819
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
820
   * inserted before uppercase characters (with the exception of the first
821
   * character of the string), and in place of spaces as well as underscores.
822
   *
823
   * @param string $str <p>The input string.</p>
824
   *
825
   * @return string
826
   */
827
  public static function dasherize(string $str): string
828
  {
829
    return self::delimit($str, '-');
830
  }
831
832
  /**
833
   * Converts a int-value into an UTF-8 character.
834
   *
835
   * @param mixed $int
836
   *
837
   * @return string
838
   */
839 5
  public static function decimal_to_chr($int): string
840
  {
841 5
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
842
  }
843
844
  /**
845
   * Returns a lowercase and trimmed string separated by the given delimiter.
846
   * Delimiters are inserted before uppercase characters (with the exception
847
   * of the first character of the string), and in place of spaces, dashes,
848
   * and underscores. Alpha delimiters are not converted to lowercase.
849
   *
850
   * @param string $str       <p>The input string.</p>
851
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
852
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
853
   *
854
   * @return string
855
   */
856
  public static function delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
857
  {
858
    $str = self::trim($str);
859
860
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
861
862
    $str = self::strtolower($str, $encoding);
863
864
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
865
  }
866
867
  /**
868
   * Encode a string with a new charset-encoding.
869
   *
870
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
871
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
872
   *
873
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
874
   * @param string $str      <p>The input string</p>
875
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
876
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
877
   *
878
   * @return string
879
   */
880 14
  public static function encode(string $encoding, string $str, bool $force = true): string
881
  {
882 14
    if (!isset($str[0], $encoding[0])) {
883 6
      return $str;
884
    }
885
886 14
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
887 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
888
    }
889
890 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
891
      self::checkForSupport();
892
    }
893
894 14
    $encodingDetected = self::str_detect_encoding($str);
895
896
    if (
897 14
        $force === true
898
        ||
899 14
        $encodingDetected !== $encoding
900
    ) {
901
902 View Code Duplication
      if (
903 14
          $encoding === 'UTF-8'
904
          &&
905
          (
906 14
              $force === true
907 4
              || $encodingDetected === 'UTF-8'
908 4
              || $encodingDetected === 'WINDOWS-1252'
909 14
              || $encodingDetected === 'ISO-8859-1'
910
          )
911
      ) {
912 13
        return self::to_utf8($str);
913
      }
914
915 View Code Duplication
      if (
916 4
          $encoding === 'ISO-8859-1'
917
          &&
918
          (
919 2
              $force === true
920 1
              || $encodingDetected === 'ISO-8859-1'
921 1
              || $encodingDetected === 'WINDOWS-1252'
922 4
              || $encodingDetected === 'UTF-8'
923
          )
924
      ) {
925 2
        return self::to_iso8859($str);
926
      }
927
928 View Code Duplication
      if (
929 3
          $encoding !== 'UTF-8'
930
          &&
931 3
          $encoding !== 'ISO-8859-1'
932
          &&
933 3
          $encoding !== 'WINDOWS-1252'
934
          &&
935 3
          self::$SUPPORT['mbstring'] === false
936
      ) {
937
        \trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
938
      }
939
940 3
      $strEncoded = \mb_convert_encoding(
941 3
          $str,
942 3
          $encoding,
943 3
          $encodingDetected
944
      );
945
946 3
      if ($strEncoded) {
947 3
        return $strEncoded;
948
      }
949
    }
950
951 3
    return $str;
952
  }
953
954
  /**
955
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
956
   *
957
   * @param string   $str                    <p>The input string.</p>
958
   * @param string   $search                 <p>The searched string.</p>
959
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
960
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
961
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "\mb_" function</p>
962
   *
963
   * @return string
964
   */
965
  public static function extractText(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
966
  {
967
    // init
968
    $text = $str;
969
970
    if (empty($text)) {
971
      return '';
972
    }
973
974
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
975
976
    if ($length === null) {
977
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
978
    }
979
980
    if (empty($search)) {
981
982
      $stringLength = self::strlen($text, $encoding);
983
984
      if ($length > 0) {
985
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
986
      } else {
987
        $end = 0;
988
      }
989
990
      $pos = \min(
991
          self::strpos($text, ' ', $end, $encoding),
992
          self::strpos($text, '.', $end, $encoding)
993
      );
994
995
      if ($pos) {
996
        return \rtrim(
997
                   self::substr($text, 0, $pos, $encoding),
998
                   $trimChars
999
               ) . $replacerForSkippedText;
1000
      }
1001
1002
      return $text;
1003
    }
1004
1005
    $wordPos = self::stripos(
1006
        $text,
1007
        $search,
1008
        0,
1009
        $encoding
1010
    );
1011
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1012
1013
    if ($halfSide > 0) {
1014
1015
      $halfText = self::substr($text, 0, $halfSide, $encoding);
1016
      $pos_start = \max(
1017
          self::strrpos($halfText, ' ', 0, $encoding),
0 ignored issues
show
Security Bug introduced by
It seems like $halfText defined by self::substr($text, 0, $halfSide, $encoding) on line 1015 can also be of type false; however, voku\helper\UTF8::strrpos() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1018
          self::strrpos($halfText, '.', 0, $encoding)
0 ignored issues
show
Security Bug introduced by
It seems like $halfText defined by self::substr($text, 0, $halfSide, $encoding) on line 1015 can also be of type false; however, voku\helper\UTF8::strrpos() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1019
      );
1020
1021
      if (!$pos_start) {
1022
        $pos_start = 0;
1023
      }
1024
1025
    } else {
1026
      $pos_start = 0;
1027
    }
1028
1029
    if ($wordPos && $halfSide > 0) {
1030
      $l = $pos_start + $length - 1;
1031
      $realLength = self::strlen($text, $encoding);
1032
1033
      if ($l > $realLength) {
1034
        $l = $realLength;
1035
      }
1036
1037
      $pos_end = \min(
1038
                     self::strpos($text, ' ', $l, $encoding),
1039
                     self::strpos($text, '.', $l, $encoding)
1040
                 ) - $pos_start;
1041
1042
      if (!$pos_end || $pos_end <= 0) {
1043
        $extract = $replacerForSkippedText . \ltrim(
1044
                self::substr(
1045
                    $text,
1046
                    $pos_start,
1047
                    self::strlen($text),
1048
                    $encoding
1049
                ),
1050
                $trimChars
1051
            );
1052 View Code Duplication
      } else {
1053
        $extract = $replacerForSkippedText . \trim(
1054
                self::substr(
1055
                    $text,
1056
                    $pos_start,
1057
                    $pos_end,
1058
                    $encoding
1059
                ),
1060
                $trimChars
1061
            ) . $replacerForSkippedText;
1062
      }
1063
1064
    } else {
1065
1066
      $l = $length - 1;
1067
      $trueLength = self::strlen($text, $encoding);
1068
1069
      if ($l > $trueLength) {
1070
        $l = $trueLength;
1071
      }
1072
1073
      $pos_end = \min(
1074
          self::strpos($text, ' ', $l, $encoding),
1075
          self::strpos($text, '.', $l, $encoding)
1076
      );
1077
1078 View Code Duplication
      if ($pos_end) {
1079
        $extract = \rtrim(
1080
                       self::substr($text, 0, $pos_end, $encoding),
1081
                       $trimChars
1082
                   ) . $replacerForSkippedText;
1083
      } else {
1084
        $extract = $text;
1085
      }
1086
    }
1087
1088
    return $extract;
1089
  }
1090
1091
  /**
1092
   * Reads entire file into a string.
1093
   *
1094
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1095
   *
1096
   * @link http://php.net/manual/en/function.file-get-contents.php
1097
   *
1098
   * @param string        $filename         <p>
1099
   *                                        Name of the file to read.
1100
   *                                        </p>
1101
   * @param bool          $use_include_path [optional] <p>
1102
   *                                        Prior to PHP 5, this parameter is called
1103
   *                                        use_include_path and is a bool.
1104
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1105
   *                                        to trigger include path
1106
   *                                        search.
1107
   *                                        </p>
1108
   * @param resource|null $context          [optional] <p>
1109
   *                                        A valid context resource created with
1110
   *                                        stream_context_create. If you don't need to use a
1111
   *                                        custom context, you can skip this parameter by &null;.
1112
   *                                        </p>
1113
   * @param int|null      $offset           [optional] <p>
1114
   *                                        The offset where the reading starts.
1115
   *                                        </p>
1116
   * @param int|null      $maxLength        [optional] <p>
1117
   *                                        Maximum length of data read. The default is to read until end
1118
   *                                        of file is reached.
1119
   *                                        </p>
1120
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1121
   *
1122
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
1123
   *                                        images or pdf, because they used non default utf-8 chars.</p>
1124
   *
1125
   * @return string|false <p>The function returns the read data or false on failure.</p>
1126
   */
1127 6
  public static function file_get_contents(string $filename, bool $use_include_path = false, $context = null, int $offset = null, int $maxLength = null, int $timeout = 10, bool $convertToUtf8 = true)
1128
  {
1129
    // init
1130 6
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1131
1132 6
    if ($timeout && $context === null) {
1133 5
      $context = \stream_context_create(
1134
          [
1135
              'http' =>
1136
                  [
1137 5
                      'timeout' => $timeout,
1138
                  ],
1139
          ]
1140
      );
1141
    }
1142
1143 6
    if ($offset === null) {
1144 6
      $offset = 0;
1145
    }
1146
1147 6
    if (\is_int($maxLength) === true) {
1148 1
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1149
    } else {
1150 6
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1151
    }
1152
1153
    // return false on error
1154 6
    if ($data === false) {
1155
      return false;
1156
    }
1157
1158 6
    if ($convertToUtf8 === true) {
1159
      // only for non binary, but also for UTF-16 or UTF-32
1160
      if (
1161 6
          self::is_binary($data, true) !== true
1162
          ||
1163 4
          self::is_utf16($data) !== false
1164
          ||
1165 6
          self::is_utf32($data) !== false
1166
      ) {
1167 5
        $data = self::encode('UTF-8', $data, false);
1168 5
        $data = self::cleanup($data);
1169
      }
1170
    }
1171
1172 6
    return $data;
1173
  }
1174
1175
  /**
1176
   * Returns whether or not a character exists at an index. Offsets may be
1177
   * negative to count from the last character in the string. Implements
1178
   * part of the ArrayAccess interface.
1179
   *
1180
   * @param int    $offset   <p>The index to check.</p>
1181
   * @param string $str      <p>The input string.</p>
1182
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
1183
   *
1184
   *
1185
   * @return boolean <p>Whether or not the index exists.</p>
1186
   */
1187
  public static function offset_exists($offset, string $str, string $encoding = 'UTF-8'): bool
1188
  {
1189
    // init
1190
    $length = self::strlen($str, $encoding);;
1191
    $offset = (int)$offset;
1192
1193
    if ($offset >= 0) {
1194
      return ($length > $offset);
1195
    }
1196
1197
    return ($length >= \abs($offset));
1198
  }
1199
1200
  /**
1201
   * Checks if a file starts with BOM (Byte Order Mark) character.
1202
   *
1203
   * @param string $file_path <p>Path to a valid file.</p>
1204
   *
1205
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1206
   */
1207 1
  public static function file_has_bom(string $file_path): bool
1208
  {
1209 1
    return self::string_has_bom(\file_get_contents($file_path));
1210
  }
1211
1212
  /**
1213
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1214
   *
1215
   * @param mixed  $var
1216
   * @param int    $normalization_form
1217
   * @param string $leading_combining
1218
   *
1219
   * @return mixed
1220
   */
1221 9
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1222
  {
1223 9
    switch (\gettype($var)) {
1224 9 View Code Duplication
      case 'array':
1225 3
        foreach ($var as $k => $v) {
1226
          /** @noinspection AlterInForeachInspection */
1227 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1228
        }
1229 3
        break;
1230 9 View Code Duplication
      case 'object':
1231 2
        foreach ($var as $k => $v) {
1232 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1233
        }
1234 2
        break;
1235 9
      case 'string':
1236
1237 9
        if (false !== \strpos($var, "\r")) {
1238
          // Workaround https://bugs.php.net/65732
1239 2
          $var = \str_replace(["\r\n", "\r"], "\n", $var);
1240
        }
1241
1242 9
        if (self::is_ascii($var) === false) {
1243
          /** @noinspection PhpUndefinedClassInspection */
1244 9
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1245 6
            $n = '-';
1246
          } else {
1247
            /** @noinspection PhpUndefinedClassInspection */
1248 7
            $n = \Normalizer::normalize($var, $normalization_form);
1249
1250 7
            if (isset($n[0])) {
1251 4
              $var = $n;
1252
            } else {
1253 5
              $var = self::encode('UTF-8', $var, true);
1254
            }
1255
          }
1256
1257
          if (
1258 9
              $var[0] >= "\x80"
1259
              &&
1260 9
              isset($n[0], $leading_combining[0])
1261
              &&
1262 9
              \preg_match('/^\p{Mn}/u', $var)
1263
          ) {
1264
            // Prevent leading combining chars
1265
            // for NFC-safe concatenations.
1266 2
            $var = $leading_combining . $var;
1267
          }
1268
        }
1269
1270 9
        break;
1271
    }
1272
1273 9
    return $var;
1274
  }
1275
1276
  /**
1277
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1278
   *
1279
   * Gets a specific external variable by name and optionally filters it
1280
   *
1281
   * @link  http://php.net/manual/en/function.filter-input.php
1282
   *
1283
   * @param int    $type          <p>
1284
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1285
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1286
   *                              <b>INPUT_ENV</b>.
1287
   *                              </p>
1288
   * @param string $variable_name <p>
1289
   *                              Name of a variable to get.
1290
   *                              </p>
1291
   * @param int    $filter        [optional] <p>
1292
   *                              The ID of the filter to apply. The
1293
   *                              manual page lists the available filters.
1294
   *                              </p>
1295
   * @param mixed  $options       [optional] <p>
1296
   *                              Associative array of options or bitwise disjunction of flags. If filter
1297
   *                              accepts options, flags can be provided in "flags" field of array.
1298
   *                              </p>
1299
   *
1300
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1301
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1302
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1303
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1304
   * @since 5.2.0
1305
   */
1306 View Code Duplication
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1307
  {
1308
    if (4 > \func_num_args()) {
1309
      $var = \filter_input($type, $variable_name, $filter);
1310
    } else {
1311
      $var = \filter_input($type, $variable_name, $filter, $options);
1312
    }
1313
1314
    return self::filter($var);
1315
  }
1316
1317
  /**
1318
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1319
   *
1320
   * Gets external variables and optionally filters them
1321
   *
1322
   * @link  http://php.net/manual/en/function.filter-input-array.php
1323
   *
1324
   * @param int   $type       <p>
1325
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1326
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1327
   *                          <b>INPUT_ENV</b>.
1328
   *                          </p>
1329
   * @param mixed $definition [optional] <p>
1330
   *                          An array defining the arguments. A valid key is a string
1331
   *                          containing a variable name and a valid value is either a filter type, or an array
1332
   *                          optionally specifying the filter, flags and options. If the value is an
1333
   *                          array, valid keys are filter which specifies the
1334
   *                          filter type,
1335
   *                          flags which specifies any flags that apply to the
1336
   *                          filter, and options which specifies any options that
1337
   *                          apply to the filter. See the example below for a better understanding.
1338
   *                          </p>
1339
   *                          <p>
1340
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1341
   *                          input array are filtered by this filter.
1342
   *                          </p>
1343
   * @param bool  $add_empty  [optional] <p>
1344
   *                          Add missing keys as <b>NULL</b> to the return value.
1345
   *                          </p>
1346
   *
1347
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1348
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1349
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1350
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1351
   * fails.
1352
   * @since 5.2.0
1353
   */
1354 View Code Duplication
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1355
  {
1356
    if (2 > \func_num_args()) {
1357
      $a = \filter_input_array($type);
1358
    } else {
1359
      $a = \filter_input_array($type, $definition, $add_empty);
1360
    }
1361
1362
    return self::filter($a);
1363
  }
1364
1365
  /**
1366
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1367
   *
1368
   * Filters a variable with a specified filter
1369
   *
1370
   * @link  http://php.net/manual/en/function.filter-var.php
1371
   *
1372
   * @param mixed $variable <p>
1373
   *                        Value to filter.
1374
   *                        </p>
1375
   * @param int   $filter   [optional] <p>
1376
   *                        The ID of the filter to apply. The
1377
   *                        manual page lists the available filters.
1378
   *                        </p>
1379
   * @param mixed $options  [optional] <p>
1380
   *                        Associative array of options or bitwise disjunction of flags. If filter
1381
   *                        accepts options, flags can be provided in "flags" field of array. For
1382
   *                        the "callback" filter, callable type should be passed. The
1383
   *                        callback must accept one argument, the value to be filtered, and return
1384
   *                        the value after filtering/sanitizing it.
1385
   *                        </p>
1386
   *                        <p>
1387
   *                        <code>
1388
   *                        // for filters that accept options, use this format
1389
   *                        $options = array(
1390
   *                        'options' => array(
1391
   *                        'default' => 3, // value to return if the filter fails
1392
   *                        // other options here
1393
   *                        'min_range' => 0
1394
   *                        ),
1395
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1396
   *                        );
1397
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1398
   *                        // for filter that only accept flags, you can pass them directly
1399
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1400
   *                        // for filter that only accept flags, you can also pass as an array
1401
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1402
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1403
   *                        // callback validate filter
1404
   *                        function foo($value)
1405
   *                        {
1406
   *                        // Expected format: Surname, GivenNames
1407
   *                        if (strpos($value, ", ") === false) return false;
1408
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1409
   *                        $empty = (empty($surname) || empty($givennames));
1410
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1411
   *                        if ($empty || $notstrings) {
1412
   *                        return false;
1413
   *                        } else {
1414
   *                        return $value;
1415
   *                        }
1416
   *                        }
1417
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1418
   *                        </code>
1419
   *                        </p>
1420
   *
1421
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1422
   * @since 5.2.0
1423
   */
1424 1 View Code Duplication
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1425
  {
1426 1
    if (3 > \func_num_args()) {
1427 1
      $variable = \filter_var($variable, $filter);
1428
    } else {
1429 1
      $variable = \filter_var($variable, $filter, $options);
1430
    }
1431
1432 1
    return self::filter($variable);
1433
  }
1434
1435
  /**
1436
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1437
   *
1438
   * Gets multiple variables and optionally filters them
1439
   *
1440
   * @link  http://php.net/manual/en/function.filter-var-array.php
1441
   *
1442
   * @param array $data       <p>
1443
   *                          An array with string keys containing the data to filter.
1444
   *                          </p>
1445
   * @param mixed $definition [optional] <p>
1446
   *                          An array defining the arguments. A valid key is a string
1447
   *                          containing a variable name and a valid value is either a
1448
   *                          filter type, or an
1449
   *                          array optionally specifying the filter, flags and options.
1450
   *                          If the value is an array, valid keys are filter
1451
   *                          which specifies the filter type,
1452
   *                          flags which specifies any flags that apply to the
1453
   *                          filter, and options which specifies any options that
1454
   *                          apply to the filter. See the example below for a better understanding.
1455
   *                          </p>
1456
   *                          <p>
1457
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1458
   *                          input array are filtered by this filter.
1459
   *                          </p>
1460
   * @param bool  $add_empty  [optional] <p>
1461
   *                          Add missing keys as <b>NULL</b> to the return value.
1462
   *                          </p>
1463
   *
1464
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1465
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1466
   * the variable is not set.
1467
   * @since 5.2.0
1468
   */
1469 1 View Code Duplication
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1470
  {
1471 1
    if (2 > \func_num_args()) {
1472 1
      $a = \filter_var_array($data);
1473
    } else {
1474 1
      $a = \filter_var_array($data, $definition, $add_empty);
1475
    }
1476
1477 1
    return self::filter($a);
1478
  }
1479
1480
  /**
1481
   * Returns the first $n characters of the string.
1482
   *
1483
   * @param string $str      <p>The input string.</p>
1484
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1485
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
1486
   *
1487
   * @return string
1488
   */
1489
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1490
  {
1491
    if ($n <= 0) {
1492
      return '';
1493
    }
1494
1495
    return self::substr($str, 0, $n, $encoding);
1496
  }
1497
1498
  /**
1499
   * Check if the number of unicode characters are not more than the specified integer.
1500
   *
1501
   * @param string $str      The original string to be checked.
1502
   * @param int    $box_size The size in number of chars to be checked against string.
1503
   *
1504
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1505
   */
1506 1
  public static function fits_inside(string $str, int $box_size): bool
1507
  {
1508 1
    return (self::strlen($str) <= $box_size);
1509
  }
1510
1511
  /**
1512
   * Try to fix simple broken UTF-8 strings.
1513
   *
1514
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1515
   *
1516
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1517
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1518
   * See: http://en.wikipedia.org/wiki/Windows-1252
1519
   *
1520
   * @param string $str <p>The input string</p>
1521
   *
1522
   * @return string
1523
   */
1524 29 View Code Duplication
  public static function fix_simple_utf8(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1525
  {
1526 29
    if (!isset($str[0])) {
1527 2
      return '';
1528
    }
1529
1530 29
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1531 29
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1532
1533 29
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1534
1535 1
      if (self::$BROKEN_UTF8_FIX === null) {
1536 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1537
      }
1538
1539 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1540 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1541
    }
1542
1543 29
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1544
  }
1545
1546
  /**
1547
   * Fix a double (or multiple) encoded UTF8 string.
1548
   *
1549
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1550
   *
1551
   * @return string|string[] <p>Will return the fixed input-"array" or
1552
   *                         the fixed input-"string".</p>
1553
   */
1554 1
  public static function fix_utf8($str)
1555
  {
1556 1
    if (\is_array($str) === true) {
1557 1
      foreach ($str as $k => $v) {
1558 1
        $str[$k] = self::fix_utf8($v);
1559
      }
1560
1561 1
      return $str;
1562
    }
1563
1564 1
    $last = '';
1565 1
    while ($last !== $str) {
1566 1
      $last = $str;
1567 1
      $str = self::to_utf8(
1568 1
          self::utf8_decode($str, true)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str, true)) on line 1567 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1569
      );
1570
    }
1571
1572 1
    return $str;
1573
  }
1574
1575
  /**
1576
   * Get character of a specific character.
1577
   *
1578
   * @param string $char
1579
   *
1580
   * @return string <p>'RTL' or 'LTR'</p>
1581
   */
1582 1
  public static function getCharDirection(string $char): string
1583
  {
1584 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1585
      self::checkForSupport();
1586
    }
1587
1588 1
    if (self::$SUPPORT['intlChar'] === true) {
1589 1
      $tmpReturn = \IntlChar::charDirection($char);
1590
1591
      // from "IntlChar"-Class
1592
      $charDirection = [
1593 1
          'RTL' => [1, 13, 14, 15, 21],
1594
          'LTR' => [0, 11, 12, 20],
1595
      ];
1596
1597 1
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1598
        return 'LTR';
1599
      }
1600
1601 1
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1602 1
        return 'RTL';
1603
      }
1604
    }
1605
1606 1
    $c = static::chr_to_decimal($char);
1607
1608 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1609 1
      return 'LTR';
1610
    }
1611
1612 1
    if (0x85e >= $c) {
1613
1614 1
      if (0x5be === $c ||
1615 1
          0x5c0 === $c ||
1616 1
          0x5c3 === $c ||
1617 1
          0x5c6 === $c ||
1618 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1619 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1620 1
          0x608 === $c ||
1621 1
          0x60b === $c ||
1622 1
          0x60d === $c ||
1623 1
          0x61b === $c ||
1624 1
          (0x61e <= $c && 0x64a >= $c) ||
1625
          (0x66d <= $c && 0x66f >= $c) ||
1626
          (0x671 <= $c && 0x6d5 >= $c) ||
1627
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1628
          (0x6ee <= $c && 0x6ef >= $c) ||
1629
          (0x6fa <= $c && 0x70d >= $c) ||
1630
          0x710 === $c ||
1631
          (0x712 <= $c && 0x72f >= $c) ||
1632
          (0x74d <= $c && 0x7a5 >= $c) ||
1633
          0x7b1 === $c ||
1634
          (0x7c0 <= $c && 0x7ea >= $c) ||
1635
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1636
          0x7fa === $c ||
1637
          (0x800 <= $c && 0x815 >= $c) ||
1638
          0x81a === $c ||
1639
          0x824 === $c ||
1640
          0x828 === $c ||
1641
          (0x830 <= $c && 0x83e >= $c) ||
1642
          (0x840 <= $c && 0x858 >= $c) ||
1643 1
          0x85e === $c
1644
      ) {
1645 1
        return 'RTL';
1646
      }
1647
1648 1
    } elseif (0x200f === $c) {
1649
1650
      return 'RTL';
1651
1652 1
    } elseif (0xfb1d <= $c) {
1653
1654 1
      if (0xfb1d === $c ||
1655 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1656 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1657 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1658 1
          0xfb3e === $c ||
1659 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1660 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1661 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1662 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1663 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1664 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1665 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1666 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1667 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1668 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1669 1
          0x10808 === $c ||
1670 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1671 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1672 1
          0x1083c === $c ||
1673 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1674 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1675 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1676 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1677 1
          0x1093f === $c ||
1678 1
          0x10a00 === $c ||
1679 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1680 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1681 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1682 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1683 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1684 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1685 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1686 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1687 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1688 1
          (0x10b78 <= $c && 0x10b7f >= $c)
1689
      ) {
1690 1
        return 'RTL';
1691
      }
1692
    }
1693
1694 1
    return 'LTR';
1695
  }
1696
1697
  /**
1698
   * get data from "/data/*.ser"
1699
   *
1700
   * @param string $file
1701
   *
1702
   * @return bool|string|array|int <p>Will return false on error.</p>
1703
   */
1704 7
  private static function getData(string $file)
1705
  {
1706 7
    $file = __DIR__ . '/data/' . $file . '.php';
1707 7
    if (\file_exists($file)) {
1708
      /** @noinspection PhpIncludeInspection */
1709 7
      return require $file;
1710
    }
1711
1712 1
    return false;
1713
  }
1714
1715
  /**
1716
   * Check for php-support.
1717
   *
1718
   * @param string|null $key
1719
   *
1720
   * @return mixed <p>Return the full support-"array", if $key === null<br>
1721
   *               return bool-value, if $key is used and available<br>
1722
   *               otherwise return null</p>
1723
   */
1724 19
  public static function getSupportInfo(string $key = null)
1725
  {
1726 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1727
      self::checkForSupport();
1728
    }
1729
1730 19
    if ($key === null) {
1731 2
      return self::$SUPPORT;
1732
    }
1733
1734 18
    if (!isset(self::$SUPPORT[$key])) {
1735 1
      return null;
1736
    }
1737
1738 17
    return self::$SUPPORT[$key];
1739
  }
1740
1741
  /**
1742
   * @param int    $length        <p>Length of the random string.</p>
1743
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
1744
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
1745
   *
1746
   * @return string
1747
   */
1748
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
1749
  {
1750
    // init
1751
    $i = 0;
1752
    $str = '';
1753
    $maxlength = self::strlen($possibleChars, $encoding);
1754
1755
    if ($maxlength === 0) {
1756
      return '';
1757
    }
1758
1759
    // add random chars
1760
    while ($i < $length) {
1761
      try {
1762
        $randInt = \random_int(0, $maxlength - 1);
1763
      } catch (\Exception $e) {
1764
        /** @noinspection RandomApiMigrationInspection */
1765
        $randInt = \mt_rand(0, $maxlength - 1);
1766
      }
1767
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
1768
      $str .= $char;
1769
      $i++;
1770
    }
1771
1772
    return $str;
1773
  }
1774
1775
  /**
1776
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
1777
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
1778
   *
1779
   * @return string
1780
   */
1781
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
1782
  {
1783
    $uniqueHelper = \mt_rand() .
1784
                    \session_id() .
1785
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
1786
                    ($_SERVER['SERVER_ADDR'] ?? '') .
1787
                    $entropyExtra;
1788
1789
    $uniqueString = \uniqid($uniqueHelper, true);
1790
1791
    if ($md5) {
1792
      $uniqueString = \md5($uniqueString . $uniqueHelper);
1793
    }
1794
1795
    return $uniqueString;
1796
  }
1797
1798
  /**
1799
   * alias for "UTF8::string_has_bom()"
1800
   *
1801
   * @see        UTF8::string_has_bom()
1802
   *
1803
   * @param string $str
1804
   *
1805
   * @return bool
1806
   *
1807
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1808
   */
1809 1
  public static function hasBom(string $str): bool
1810
  {
1811 1
    return self::string_has_bom($str);
1812
  }
1813
1814
  /**
1815
   * Returns true if the string contains a lower case char, false otherwise.
1816
   *
1817
   * @param string $str <p>The input string.</p>
1818
   *
1819
   * @return bool <p>Whether or not the string contains a lower case character.</p>
1820
   */
1821
  public static function has_lowercase(string $str): bool
1822
  {
1823
    return self::matchesPattern($str, '.*[[:lower:]]');
1824
  }
1825
1826
  /**
1827
   * Returns true if the string contains an upper case char, false otherwise.
1828
   *
1829
   * @param string $str <p>The input string.</p>
1830
   *
1831
   * @return bool <p>Whether or not the string contains an upper case character.</p>
1832
   */
1833
  public static function has_uppercase(string $str): bool
1834
  {
1835
    return self::matchesPattern($str, '.*[[:upper:]]');
1836
  }
1837
1838
  /**
1839
   * Converts a hexadecimal-value into an UTF-8 character.
1840
   *
1841
   * @param string $hexdec <p>The hexadecimal value.</p>
1842
   *
1843
   * @return string|false <p>One single UTF-8 character.</p>
1844
   */
1845 2
  public static function hex_to_chr(string $hexdec)
1846
  {
1847 2
    return self::decimal_to_chr(\hexdec($hexdec));
1848
  }
1849
1850
  /**
1851
   * Converts hexadecimal U+xxxx code point representation to integer.
1852
   *
1853
   * INFO: opposite to UTF8::int_to_hex()
1854
   *
1855
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1856
   *
1857
   * @return int|false <p>The code point, or false on failure.</p>
1858
   */
1859 1
  public static function hex_to_int(string $hexDec)
1860
  {
1861 1
    if (!isset($hexDec[0])) {
1862 1
      return false;
1863
    }
1864
1865 1
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1866 1
      return \intval($match[1], 16);
1867
    }
1868
1869 1
    return false;
1870
  }
1871
1872
  /**
1873
   * alias for "UTF8::html_entity_decode()"
1874
   *
1875
   * @see UTF8::html_entity_decode()
1876
   *
1877
   * @param string $str
1878
   * @param int    $flags
1879
   * @param string $encoding
1880
   *
1881
   * @return string
1882
   */
1883 1
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1884
  {
1885 1
    return self::html_entity_decode($str, $flags, $encoding);
1886
  }
1887
1888
  /**
1889
   * Converts a UTF-8 string to a series of HTML numbered entities.
1890
   *
1891
   * INFO: opposite to UTF8::html_decode()
1892
   *
1893
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1894
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1895
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
1896
   *
1897
   * @return string <p>HTML numbered entities.</p>
1898
   */
1899 3
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
1900
  {
1901 3
    if (!isset($str[0])) {
1902 2
      return '';
1903
    }
1904
1905 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1906 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1907
    }
1908
1909
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1910 3
    if (\function_exists('mb_encode_numericentity')) {
1911
1912 3
      $startCode = 0x00;
1913 3
      if ($keepAsciiChars === true) {
1914 3
        $startCode = 0x80;
1915
      }
1916
1917 3
      return \mb_encode_numericentity(
1918 3
          $str,
1919 3
          [$startCode, 0xfffff, 0, 0xfffff, 0],
1920 3
          $encoding
1921
      );
1922
    }
1923
1924
    return \implode(
1925
        '',
1926
        \array_map(
1927
            function ($data) use ($keepAsciiChars, $encoding) {
1928
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1929
            },
1930
            self::split($str)
1931
        )
1932
    );
1933
  }
1934
1935
  /**
1936
   * UTF-8 version of html_entity_decode()
1937
   *
1938
   * The reason we are not using html_entity_decode() by itself is because
1939
   * while it is not technically correct to leave out the semicolon
1940
   * at the end of an entity most browsers will still interpret the entity
1941
   * correctly. html_entity_decode() does not convert entities without
1942
   * semicolons, so we are left with our own little solution here. Bummer.
1943
   *
1944
   * Convert all HTML entities to their applicable characters
1945
   *
1946
   * INFO: opposite to UTF8::html_encode()
1947
   *
1948
   * @link http://php.net/manual/en/function.html-entity-decode.php
1949
   *
1950
   * @param string $str      <p>
1951
   *                         The input string.
1952
   *                         </p>
1953
   * @param int    $flags    [optional] <p>
1954
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1955
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1956
   *                         <table>
1957
   *                         Available <i>flags</i> constants
1958
   *                         <tr valign="top">
1959
   *                         <td>Constant Name</td>
1960
   *                         <td>Description</td>
1961
   *                         </tr>
1962
   *                         <tr valign="top">
1963
   *                         <td><b>ENT_COMPAT</b></td>
1964
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1965
   *                         </tr>
1966
   *                         <tr valign="top">
1967
   *                         <td><b>ENT_QUOTES</b></td>
1968
   *                         <td>Will convert both double and single quotes.</td>
1969
   *                         </tr>
1970
   *                         <tr valign="top">
1971
   *                         <td><b>ENT_NOQUOTES</b></td>
1972
   *                         <td>Will leave both double and single quotes unconverted.</td>
1973
   *                         </tr>
1974
   *                         <tr valign="top">
1975
   *                         <td><b>ENT_HTML401</b></td>
1976
   *                         <td>
1977
   *                         Handle code as HTML 4.01.
1978
   *                         </td>
1979
   *                         </tr>
1980
   *                         <tr valign="top">
1981
   *                         <td><b>ENT_XML1</b></td>
1982
   *                         <td>
1983
   *                         Handle code as XML 1.
1984
   *                         </td>
1985
   *                         </tr>
1986
   *                         <tr valign="top">
1987
   *                         <td><b>ENT_XHTML</b></td>
1988
   *                         <td>
1989
   *                         Handle code as XHTML.
1990
   *                         </td>
1991
   *                         </tr>
1992
   *                         <tr valign="top">
1993
   *                         <td><b>ENT_HTML5</b></td>
1994
   *                         <td>
1995
   *                         Handle code as HTML 5.
1996
   *                         </td>
1997
   *                         </tr>
1998
   *                         </table>
1999
   *                         </p>
2000
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
2001
   *
2002
   * @return string <p>The decoded string.</p>
2003
   */
2004 17
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2005
  {
2006 17
    if (!isset($str[0])) {
2007 6
      return '';
2008
    }
2009
2010 17
    if (!isset($str[3])) { // examples: &; || &x;
2011 10
      return $str;
2012
    }
2013
2014
    if (
2015 16
        \strpos($str, '&') === false
2016
        ||
2017
        (
2018 16
            \strpos($str, '&#') === false
2019
            &&
2020 16
            \strpos($str, ';') === false
2021
        )
2022
    ) {
2023 9
      return $str;
2024
    }
2025
2026 16
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2027 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2028
    }
2029
2030 16
    if ($flags === null) {
2031 5
      $flags = ENT_QUOTES | ENT_HTML5;
2032
    }
2033
2034 View Code Duplication
    if (
2035 16
        $encoding !== 'UTF-8'
2036
        &&
2037 16
        $encoding !== 'ISO-8859-1'
2038
        &&
2039 16
        $encoding !== 'WINDOWS-1252'
2040
        &&
2041 16
        self::$SUPPORT['mbstring'] === false
2042
    ) {
2043
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2044
    }
2045
2046
    do {
2047 16
      $str_compare = $str;
2048
2049 16
      $str = (string)\preg_replace_callback(
2050 16
          "/&#\d{2,6};/",
2051
          function ($matches) use ($encoding) {
2052 14
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2053
2054 14
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2055 13
              return $returnTmp;
2056
            }
2057
2058 7
            return $matches[0];
2059 16
          },
2060 16
          $str
2061
      );
2062
2063
      // decode numeric & UTF16 two byte entities
2064 16
      $str = \html_entity_decode(
2065 16
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2066 16
          $flags,
2067 16
          $encoding
2068
      );
2069
2070 16
    } while ($str_compare !== $str);
2071
2072 16
    return $str;
2073
  }
2074
2075
  /**
2076
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2077
   *
2078
   * @link http://php.net/manual/en/function.htmlentities.php
2079
   *
2080
   * @param string $str           <p>
2081
   *                              The input string.
2082
   *                              </p>
2083
   * @param int    $flags         [optional] <p>
2084
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2085
   *                              invalid code unit sequences and the used document type. The default is
2086
   *                              ENT_COMPAT | ENT_HTML401.
2087
   *                              <table>
2088
   *                              Available <i>flags</i> constants
2089
   *                              <tr valign="top">
2090
   *                              <td>Constant Name</td>
2091
   *                              <td>Description</td>
2092
   *                              </tr>
2093
   *                              <tr valign="top">
2094
   *                              <td><b>ENT_COMPAT</b></td>
2095
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2096
   *                              </tr>
2097
   *                              <tr valign="top">
2098
   *                              <td><b>ENT_QUOTES</b></td>
2099
   *                              <td>Will convert both double and single quotes.</td>
2100
   *                              </tr>
2101
   *                              <tr valign="top">
2102
   *                              <td><b>ENT_NOQUOTES</b></td>
2103
   *                              <td>Will leave both double and single quotes unconverted.</td>
2104
   *                              </tr>
2105
   *                              <tr valign="top">
2106
   *                              <td><b>ENT_IGNORE</b></td>
2107
   *                              <td>
2108
   *                              Silently discard invalid code unit sequences instead of returning
2109
   *                              an empty string. Using this flag is discouraged as it
2110
   *                              may have security implications.
2111
   *                              </td>
2112
   *                              </tr>
2113
   *                              <tr valign="top">
2114
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2115
   *                              <td>
2116
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2117
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2118
   *                              </td>
2119
   *                              </tr>
2120
   *                              <tr valign="top">
2121
   *                              <td><b>ENT_DISALLOWED</b></td>
2122
   *                              <td>
2123
   *                              Replace invalid code points for the given document type with a
2124
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2125
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2126
   *                              instance, to ensure the well-formedness of XML documents with
2127
   *                              embedded external content.
2128
   *                              </td>
2129
   *                              </tr>
2130
   *                              <tr valign="top">
2131
   *                              <td><b>ENT_HTML401</b></td>
2132
   *                              <td>
2133
   *                              Handle code as HTML 4.01.
2134
   *                              </td>
2135
   *                              </tr>
2136
   *                              <tr valign="top">
2137
   *                              <td><b>ENT_XML1</b></td>
2138
   *                              <td>
2139
   *                              Handle code as XML 1.
2140
   *                              </td>
2141
   *                              </tr>
2142
   *                              <tr valign="top">
2143
   *                              <td><b>ENT_XHTML</b></td>
2144
   *                              <td>
2145
   *                              Handle code as XHTML.
2146
   *                              </td>
2147
   *                              </tr>
2148
   *                              <tr valign="top">
2149
   *                              <td><b>ENT_HTML5</b></td>
2150
   *                              <td>
2151
   *                              Handle code as HTML 5.
2152
   *                              </td>
2153
   *                              </tr>
2154
   *                              </table>
2155
   *                              </p>
2156
   * @param string $encoding      [optional] <p>
2157
   *                              Like <b>htmlspecialchars</b>,
2158
   *                              <b>htmlentities</b> takes an optional third argument
2159
   *                              <i>encoding</i> which defines encoding used in
2160
   *                              conversion.
2161
   *                              Although this argument is technically optional, you are highly
2162
   *                              encouraged to specify the correct value for your code.
2163
   *                              </p>
2164
   * @param bool   $double_encode [optional] <p>
2165
   *                              When <i>double_encode</i> is turned off PHP will not
2166
   *                              encode existing html entities. The default is to convert everything.
2167
   *                              </p>
2168
   *
2169
   *
2170
   * @return string the encoded string.
2171
   * </p>
2172
   * <p>
2173
   * If the input <i>string</i> contains an invalid code unit
2174
   * sequence within the given <i>encoding</i> an empty string
2175
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2176
   * <b>ENT_SUBSTITUTE</b> flags are set.
2177
   */
2178 2
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2179
  {
2180 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2181 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2182
    }
2183
2184 2
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2185
2186
    /**
2187
     * PHP doesn't replace a backslash to its html entity since this is something
2188
     * that's mostly used to escape characters when inserting in a database. Since
2189
     * we're using a decent database layer, we don't need this shit and we're replacing
2190
     * the double backslashes by its' html entity equivalent.
2191
     *
2192
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2193
     */
2194 2
    $str = \str_replace('\\', '&#92;', $str);
2195
2196 2
    return self::html_encode($str, true, $encoding);
2197
  }
2198
2199
  /**
2200
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2201
   *
2202
   * INFO: Take a look at "UTF8::htmlentities()"
2203
   *
2204
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2205
   *
2206
   * @param string $str           <p>
2207
   *                              The string being converted.
2208
   *                              </p>
2209
   * @param int    $flags         [optional] <p>
2210
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2211
   *                              invalid code unit sequences and the used document type. The default is
2212
   *                              ENT_COMPAT | ENT_HTML401.
2213
   *                              <table>
2214
   *                              Available <i>flags</i> constants
2215
   *                              <tr valign="top">
2216
   *                              <td>Constant Name</td>
2217
   *                              <td>Description</td>
2218
   *                              </tr>
2219
   *                              <tr valign="top">
2220
   *                              <td><b>ENT_COMPAT</b></td>
2221
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2222
   *                              </tr>
2223
   *                              <tr valign="top">
2224
   *                              <td><b>ENT_QUOTES</b></td>
2225
   *                              <td>Will convert both double and single quotes.</td>
2226
   *                              </tr>
2227
   *                              <tr valign="top">
2228
   *                              <td><b>ENT_NOQUOTES</b></td>
2229
   *                              <td>Will leave both double and single quotes unconverted.</td>
2230
   *                              </tr>
2231
   *                              <tr valign="top">
2232
   *                              <td><b>ENT_IGNORE</b></td>
2233
   *                              <td>
2234
   *                              Silently discard invalid code unit sequences instead of returning
2235
   *                              an empty string. Using this flag is discouraged as it
2236
   *                              may have security implications.
2237
   *                              </td>
2238
   *                              </tr>
2239
   *                              <tr valign="top">
2240
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2241
   *                              <td>
2242
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2243
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2244
   *                              </td>
2245
   *                              </tr>
2246
   *                              <tr valign="top">
2247
   *                              <td><b>ENT_DISALLOWED</b></td>
2248
   *                              <td>
2249
   *                              Replace invalid code points for the given document type with a
2250
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2251
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2252
   *                              instance, to ensure the well-formedness of XML documents with
2253
   *                              embedded external content.
2254
   *                              </td>
2255
   *                              </tr>
2256
   *                              <tr valign="top">
2257
   *                              <td><b>ENT_HTML401</b></td>
2258
   *                              <td>
2259
   *                              Handle code as HTML 4.01.
2260
   *                              </td>
2261
   *                              </tr>
2262
   *                              <tr valign="top">
2263
   *                              <td><b>ENT_XML1</b></td>
2264
   *                              <td>
2265
   *                              Handle code as XML 1.
2266
   *                              </td>
2267
   *                              </tr>
2268
   *                              <tr valign="top">
2269
   *                              <td><b>ENT_XHTML</b></td>
2270
   *                              <td>
2271
   *                              Handle code as XHTML.
2272
   *                              </td>
2273
   *                              </tr>
2274
   *                              <tr valign="top">
2275
   *                              <td><b>ENT_HTML5</b></td>
2276
   *                              <td>
2277
   *                              Handle code as HTML 5.
2278
   *                              </td>
2279
   *                              </tr>
2280
   *                              </table>
2281
   *                              </p>
2282
   * @param string $encoding      [optional] <p>
2283
   *                              Defines encoding used in conversion.
2284
   *                              </p>
2285
   *                              <p>
2286
   *                              For the purposes of this function, the encodings
2287
   *                              ISO-8859-1, ISO-8859-15,
2288
   *                              UTF-8, cp866,
2289
   *                              cp1251, cp1252, and
2290
   *                              KOI8-R are effectively equivalent, provided the
2291
   *                              <i>string</i> itself is valid for the encoding, as
2292
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2293
   *                              the same positions in all of these encodings.
2294
   *                              </p>
2295
   * @param bool   $double_encode [optional] <p>
2296
   *                              When <i>double_encode</i> is turned off PHP will not
2297
   *                              encode existing html entities, the default is to convert everything.
2298
   *                              </p>
2299
   *
2300
   * @return string The converted string.
2301
   * </p>
2302
   * <p>
2303
   * If the input <i>string</i> contains an invalid code unit
2304
   * sequence within the given <i>encoding</i> an empty string
2305
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2306
   * <b>ENT_SUBSTITUTE</b> flags are set.
2307
   */
2308 1 View Code Duplication
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2309
  {
2310 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2311 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2312
    }
2313
2314 1
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2315
  }
2316
2317
  /**
2318
   * Checks whether iconv is available on the server.
2319
   *
2320
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2321
   */
2322 1
  public static function iconv_loaded(): bool
2323
  {
2324 1
    return \extension_loaded('iconv') ? true : false;
2325
  }
2326
2327
  /**
2328
   * alias for "UTF8::decimal_to_chr()"
2329
   *
2330
   * @see UTF8::decimal_to_chr()
2331
   *
2332
   * @param mixed $int
2333
   *
2334
   * @return string
2335
   */
2336 2
  public static function int_to_chr($int): string
2337
  {
2338 2
    return self::decimal_to_chr($int);
2339
  }
2340
2341
  /**
2342
   * Converts Integer to hexadecimal U+xxxx code point representation.
2343
   *
2344
   * INFO: opposite to UTF8::hex_to_int()
2345
   *
2346
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2347
   * @param string $pfix [optional]
2348
   *
2349
   * @return string <p>The code point, or empty string on failure.</p>
2350
   */
2351 3
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2352
  {
2353 3
    $hex = \dechex($int);
2354
2355 3
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2356
2357 3
    return $pfix . $hex;
2358
  }
2359
2360
  /**
2361
   * Checks whether intl-char is available on the server.
2362
   *
2363
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2364
   */
2365 1
  public static function intlChar_loaded(): bool
2366
  {
2367 1
    return \class_exists('IntlChar');
2368
  }
2369
2370
  /**
2371
   * Checks whether intl is available on the server.
2372
   *
2373
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2374
   */
2375 4
  public static function intl_loaded(): bool
2376
  {
2377 4
    return \extension_loaded('intl');
2378
  }
2379
2380
  /**
2381
   * alias for "UTF8::is_ascii()"
2382
   *
2383
   * @see        UTF8::is_ascii()
2384
   *
2385
   * @param string $str
2386
   *
2387
   * @return boolean
2388
   *
2389
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2390
   */
2391 1
  public static function isAscii(string $str): bool
2392
  {
2393 1
    return self::is_ascii($str);
2394
  }
2395
2396
  /**
2397
   * alias for "UTF8::is_base64()"
2398
   *
2399
   * @see        UTF8::is_base64()
2400
   *
2401
   * @param string $str
2402
   *
2403
   * @return bool
2404
   *
2405
   * @deprecated <p>use "UTF8::is_base64()"</p>
2406
   */
2407 1
  public static function isBase64(string $str): bool
2408
  {
2409 1
    return self::is_base64($str);
2410
  }
2411
2412
  /**
2413
   * alias for "UTF8::is_binary()"
2414
   *
2415
   * @see        UTF8::is_binary()
2416
   *
2417
   * @param mixed $str
2418
   * @param bool  $strict
2419
   *
2420
   * @return bool
2421
   *
2422
   * @deprecated <p>use "UTF8::is_binary()"</p>
2423
   */
2424 2
  public static function isBinary($str, $strict = false): bool
2425
  {
2426 2
    return self::is_binary($str, $strict);
2427
  }
2428
2429
  /**
2430
   * alias for "UTF8::is_bom()"
2431
   *
2432
   * @see        UTF8::is_bom()
2433
   *
2434
   * @param string $utf8_chr
2435
   *
2436
   * @return boolean
2437
   *
2438
   * @deprecated <p>use "UTF8::is_bom()"</p>
2439
   */
2440 1
  public static function isBom(string $utf8_chr): bool
2441
  {
2442 1
    return self::is_bom($utf8_chr);
2443
  }
2444
2445
  /**
2446
   * alias for "UTF8::is_html()"
2447
   *
2448
   * @see        UTF8::is_html()
2449
   *
2450
   * @param string $str
2451
   *
2452
   * @return boolean
2453
   *
2454
   * @deprecated <p>use "UTF8::is_html()"</p>
2455
   */
2456 1
  public static function isHtml(string $str): bool
2457
  {
2458 1
    return self::is_html($str);
2459
  }
2460
2461
  /**
2462
   * alias for "UTF8::is_json()"
2463
   *
2464
   * @see        UTF8::is_json()
2465
   *
2466
   * @param string $str
2467
   *
2468
   * @return bool
2469
   *
2470
   * @deprecated <p>use "UTF8::is_json()"</p>
2471
   */
2472
  public static function isJson(string $str): bool
2473
  {
2474
    return self::is_json($str);
2475
  }
2476
2477
  /**
2478
   * alias for "UTF8::is_utf16()"
2479
   *
2480
   * @see        UTF8::is_utf16()
2481
   *
2482
   * @param string $str
2483
   *
2484
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2485
   *
2486
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2487
   */
2488 1
  public static function isUtf16(string $str)
2489
  {
2490 1
    return self::is_utf16($str);
2491
  }
2492
2493
  /**
2494
   * alias for "UTF8::is_utf32()"
2495
   *
2496
   * @see        UTF8::is_utf32()
2497
   *
2498
   * @param string $str
2499
   *
2500
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2501
   *
2502
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2503
   */
2504 1
  public static function isUtf32(string $str)
2505
  {
2506 1
    return self::is_utf32($str);
2507
  }
2508
2509
  /**
2510
   * alias for "UTF8::is_utf8()"
2511
   *
2512
   * @see        UTF8::is_utf8()
2513
   *
2514
   * @param string $str
2515
   * @param bool   $strict
2516
   *
2517
   * @return bool
2518
   *
2519
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2520
   */
2521 16
  public static function isUtf8($str, $strict = false): bool
2522
  {
2523 16
    return self::is_utf8($str, $strict);
2524
  }
2525
2526
  /**
2527
   * Returns true if the string contains only alphabetic chars, false otherwise.
2528
   *
2529
   * @param string $str
2530
   *
2531
   * @return bool <p>Whether or not $str contains only alphabetic chars.</p>
2532
   */
2533
  public static function is_alpha(string $str): bool
2534
  {
2535
    return self::matchesPattern($str, '^[[:alpha:]]*$');
2536
  }
2537
2538
  /**
2539
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2540
   *
2541
   * @param string $str
2542
   *
2543
   * @return bool <p>Whether or not $str contains only alphanumeric chars.</p>
2544
   */
2545
  public static function is_alphanumeric(string $str): bool
2546
  {
2547
    return self::matchesPattern($str, '^[[:alnum:]]*$');
2548
  }
2549
2550
  /**
2551
   * Checks if a string is 7 bit ASCII.
2552
   *
2553
   * @param string $str <p>The string to check.</p>
2554
   *
2555
   * @return bool <p>
2556
   *              <strong>true</strong> if it is ASCII<br>
2557
   *              <strong>false</strong> otherwise
2558
   *              </p>
2559
   */
2560 58
  public static function is_ascii(string $str): bool
2561
  {
2562 58
    if (!isset($str[0])) {
2563 6
      return true;
2564
    }
2565
2566 57
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2567
  }
2568
2569
  /**
2570
   * Returns true if the string is base64 encoded, false otherwise.
2571
   *
2572
   * @param string $str <p>The input string.</p>
2573
   *
2574
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2575
   */
2576 1
  public static function is_base64(string $str): bool
2577
  {
2578 1
    $base64String = (string)\base64_decode($str, true);
2579
2580 1
    return $base64String && \base64_encode($base64String) === $str;
2581
  }
2582
2583
  /**
2584
   * Check if the input is binary... (is look like a hack).
2585
   *
2586
   * @param mixed $input
2587
   * @param bool  $strict
2588
   *
2589
   * @return bool
2590
   */
2591 19
  public static function is_binary($input, bool $strict = false): bool
2592
  {
2593 19
    $input = (string)$input;
2594 19
    if (!isset($input[0])) {
2595 5
      return false;
2596
    }
2597
2598 19
    if (\preg_match('~^[01]+$~', $input)) {
2599 6
      return true;
2600
    }
2601
2602 19
    $testNull = 0;
2603 19
    $testLength = \strlen($input);
2604 19
    if ($testLength) {
2605 19
      $testNull = \substr_count($input, "\x0");
2606 19
      if (($testNull / $testLength) > 0.3) {
2607 6
        return true;
2608
      }
2609
    }
2610
2611
    if (
2612 18
        $strict === true
2613
        &&
2614 18
        \class_exists('finfo')
2615
    ) {
2616
2617 16
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
2618 16
      $finfo_encoding = $finfo->buffer($input);
2619 16
      if ($finfo_encoding && $finfo_encoding === 'binary') {
2620 16
        return true;
2621
      }
2622
2623 8
    } elseif ($testNull > 0) {
2624
2625 3
      return true;
2626
2627
    }
2628
2629 17
    return false;
2630
  }
2631
2632
  /**
2633
   * Check if the file is binary.
2634
   *
2635
   * @param string $file
2636
   *
2637
   * @return boolean
2638
   */
2639 3
  public static function is_binary_file($file): bool
2640
  {
2641
    try {
2642 3
      $fp = \fopen($file, 'rb');
2643 3
      $block = \fread($fp, 512);
2644 3
      \fclose($fp);
2645
    } catch (\Exception $e) {
2646
      $block = '';
2647
    }
2648
2649 3
    return self::is_binary($block, true);
2650
  }
2651
2652
  /**
2653
   * Returns true if the string contains only whitespace chars, false otherwise.
2654
   *
2655
   * @param string $str
2656
   *
2657
   * @return bool <p>Whether or not $str contains only whitespace characters.</p>
2658
   */
2659
  public static function is_blank(string $str): bool
2660
  {
2661
    return self::matchesPattern($str, '^[[:space:]]*$');
2662
  }
2663
2664
  /**
2665
   * Checks if the given string is equal to any "Byte Order Mark".
2666
   *
2667
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2668
   *
2669
   * @param string $str <p>The input string.</p>
2670
   *
2671
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2672
   */
2673 1
  public static function is_bom($str): bool
2674
  {
2675 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2676 1
      if ($str === $bomString) {
2677 1
        return true;
2678
      }
2679
    }
2680
2681 1
    return false;
2682
  }
2683
2684
  /**
2685
   * Determine whether the string is considered to be empty.
2686
   *
2687
   * A variable is considered empty if it does not exist or if its value equals FALSE.
2688
   * empty() does not generate a warning if the variable does not exist.
2689
   *
2690
   * @param mixed $str
2691
   *
2692
   * @return bool <p>Whether or not $str is empty().</p>
2693
   */
2694
  public static function is_empty($str): bool
2695
  {
2696
    return empty($str);
2697
  }
2698
2699
  /**
2700
   * Returns true if the string contains only hexadecimal chars, false otherwise.
2701
   *
2702
   * @param string $str
2703
   *
2704
   * @return bool <p>Whether or not $str contains only hexadecimal chars.</p>
2705
   */
2706
  public static function is_hexadecimal(string $str): bool
2707
  {
2708
    return self::matchesPattern($str, '^[[:xdigit:]]*$');
2709
  }
2710
2711
  /**
2712
   * Check if the string contains any html-tags <lall>.
2713
   *
2714
   * @param string $str <p>The input string.</p>
2715
   *
2716
   * @return boolean
2717
   */
2718 1
  public static function is_html(string $str): bool
2719
  {
2720 1
    if (!isset($str[0])) {
2721 1
      return false;
2722
    }
2723
2724
    // init
2725 1
    $matches = [];
2726
2727 1
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2728
2729 1
    return !(\count($matches) === 0);
2730
  }
2731
2732
  /**
2733
   * Try to check if "$str" is an json-string.
2734
   *
2735
   * @param string $str <p>The input string.</p>
2736
   *
2737
   * @return bool
2738
   */
2739 1
  public static function is_json(string $str): bool
2740
  {
2741 1
    if (!isset($str[0])) {
2742 1
      return false;
2743
    }
2744
2745 1
    $json = self::json_decode($str);
2746
2747
    return (
2748 1
               \is_object($json) === true
2749
               ||
2750 1
               \is_array($json) === true
2751
           )
2752
           &&
2753 1
           \json_last_error() === JSON_ERROR_NONE;
2754
  }
2755
2756
  /**
2757
   * @param string $str
2758
   *
2759
   * @return bool
2760
   */
2761
  public static function is_lowercase(string $str): bool
2762
  {
2763
    if (self::matchesPattern($str, '^[[:lower:]]*$')) {
2764
      return true;
2765
    }
2766
2767
    return false;
2768
  }
2769
2770
  /**
2771
   * Returns true if the string is serialized, false otherwise.
2772
   *
2773
   * @param string $str
2774
   *
2775
   * @return bool <p>Whether or not $str is serialized.</p>
2776
   */
2777
  public static function is_serialized(string $str): bool
2778
  {
2779
    if (!isset($str[0])) {
2780
      return false;
2781
    }
2782
2783
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2784
    /** @noinspection UnserializeExploitsInspection */
2785
    return $str === 'b:0;'
2786
           ||
2787
           @\unserialize($str) !== false;
2788
  }
2789
2790
  /**
2791
   * Returns true if the string contains only lower case chars, false
2792
   * otherwise.
2793
   *
2794
   * @param string $str <p>The input string.</p>
2795
   *
2796
   * @return bool <p>Whether or not $str contains only lower case characters.</p>
2797
   */
2798
  public static function is_uppercase(string $str): bool
2799
  {
2800
    return self::matchesPattern($str, '^[[:upper:]]*$');
2801
  }
2802
2803
  /**
2804
   * Check if the string is UTF-16.
2805
   *
2806
   * @param string $str <p>The input string.</p>
2807
   *
2808
   * @return int|false <p>
2809
   *                   <strong>false</strong> if is't not UTF-16,<br>
2810
   *                   <strong>1</strong> for UTF-16LE,<br>
2811
   *                   <strong>2</strong> for UTF-16BE.
2812
   *                   </p>
2813
   */
2814 10 View Code Duplication
  public static function is_utf16(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2815
  {
2816 10
    if (self::is_binary($str) === false) {
2817 4
      return false;
2818
    }
2819
2820
    // init
2821 8
    $strChars = [];
2822
2823 8
    $str = self::remove_bom($str);
2824
2825 8
    $maybeUTF16LE = 0;
2826 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2827 8
    if ($test) {
2828 7
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2829 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2830 7
      if ($test3 === $test) {
2831 7
        if (\count($strChars) === 0) {
2832 7
          $strChars = self::count_chars($str, true);
2833
        }
2834 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2835 7
          if (\in_array($test3char, $strChars, true) === true) {
2836 7
            $maybeUTF16LE++;
2837
          }
2838
        }
2839
      }
2840
    }
2841
2842 8
    $maybeUTF16BE = 0;
2843 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2844 8
    if ($test) {
2845 7
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2846 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2847 7
      if ($test3 === $test) {
2848 7
        if (\count($strChars) === 0) {
2849 3
          $strChars = self::count_chars($str, true);
2850
        }
2851 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2852 7
          if (\in_array($test3char, $strChars, true) === true) {
2853 7
            $maybeUTF16BE++;
2854
          }
2855
        }
2856
      }
2857
    }
2858
2859 8
    if ($maybeUTF16BE !== $maybeUTF16LE) {
2860 3
      if ($maybeUTF16LE > $maybeUTF16BE) {
2861 2
        return 1;
2862
      }
2863
2864 3
      return 2;
2865
    }
2866
2867 6
    return false;
2868
  }
2869
2870
  /**
2871
   * Check if the string is UTF-32.
2872
   *
2873
   * @param string $str
2874
   *
2875
   * @return int|false <p>
2876
   *                   <strong>false</strong> if is't not UTF-32,<br>
2877
   *                   <strong>1</strong> for UTF-32LE,<br>
2878
   *                   <strong>2</strong> for UTF-32BE.
2879
   *                   </p>
2880
   */
2881 8 View Code Duplication
  public static function is_utf32(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2882
  {
2883 8
    if (self::is_binary($str) === false) {
2884 4
      return false;
2885
    }
2886
2887
    // init
2888 6
    $strChars = [];
2889
2890 6
    $str = self::remove_bom($str);
2891
2892 6
    $maybeUTF32LE = 0;
2893 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2894 6
    if ($test) {
2895 5
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2896 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2897 5
      if ($test3 === $test) {
2898 5
        if (\count($strChars) === 0) {
2899 5
          $strChars = self::count_chars($str, true);
2900
        }
2901 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2902 5
          if (\in_array($test3char, $strChars, true) === true) {
2903 5
            $maybeUTF32LE++;
2904
          }
2905
        }
2906
      }
2907
    }
2908
2909 6
    $maybeUTF32BE = 0;
2910 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2911 6
    if ($test) {
2912 5
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2913 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2914 5
      if ($test3 === $test) {
2915 5
        if (\count($strChars) === 0) {
2916 3
          $strChars = self::count_chars($str, true);
2917
        }
2918 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2919 5
          if (\in_array($test3char, $strChars, true) === true) {
2920 5
            $maybeUTF32BE++;
2921
          }
2922
        }
2923
      }
2924
    }
2925
2926 6
    if ($maybeUTF32BE !== $maybeUTF32LE) {
2927 1
      if ($maybeUTF32LE > $maybeUTF32BE) {
2928 1
        return 1;
2929
      }
2930
2931 1
      return 2;
2932
    }
2933
2934 6
    return false;
2935
  }
2936
2937
  /**
2938
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2939
   *
2940
   * @see    http://hsivonen.iki.fi/php-utf8/
2941
   *
2942
   * @param string|string[] $str    <p>The string to be checked.</p>
2943
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2944
   *
2945
   * @return bool
2946
   */
2947 61
  public static function is_utf8($str, bool $strict = false): bool
2948
  {
2949 61
    if (\is_array($str) === true) {
2950 1
      foreach ($str as $k => $v) {
2951 1
        if (false === self::is_utf8($v, $strict)) {
2952 1
          return false;
2953
        }
2954
      }
2955
2956
      return true;
2957
    }
2958
2959 61
    if (!isset($str[0])) {
2960 3
      return true;
2961
    }
2962
2963 59
    if ($strict === true) {
2964 1
      if (self::is_utf16($str) !== false) {
2965 1
        return false;
2966
      }
2967
2968
      if (self::is_utf32($str) !== false) {
2969
        return false;
2970
      }
2971
    }
2972
2973 59
    if (self::pcre_utf8_support() !== true) {
2974
2975
      // If even just the first character can be matched, when the /u
2976
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2977
      // invalid, nothing at all will match, even if the string contains
2978
      // some valid sequences
2979
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
2980
    }
2981
2982 59
    $mState = 0; // cached expected number of octets after the current octet
2983
    // until the beginning of the next UTF8 character sequence
2984 59
    $mUcs4 = 0; // cached Unicode character
2985 59
    $mBytes = 1; // cached expected number of octets in the current sequence
2986
2987 59
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2988
      self::checkForSupport();
2989
    }
2990
2991 59
    if (self::$ORD === null) {
2992
      self::$ORD = self::getData('ord');
2993
    }
2994
2995 59
    $len = self::strlen_in_byte($str);
2996
    /** @noinspection ForeachInvariantsInspection */
2997 59
    for ($i = 0; $i < $len; $i++) {
2998 59
      $in = self::$ORD[$str[$i]];
2999 59
      if ($mState === 0) {
3000
        // When mState is zero we expect either a US-ASCII character or a
3001
        // multi-octet sequence.
3002 59
        if (0 === (0x80 & $in)) {
3003
          // US-ASCII, pass straight through.
3004 56
          $mBytes = 1;
3005 56 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
3006
          // First octet of 2 octet sequence.
3007 49
          $mUcs4 = $in;
3008 49
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3009 49
          $mState = 1;
3010 49
          $mBytes = 2;
3011 46
        } elseif (0xE0 === (0xF0 & $in)) {
3012
          // First octet of 3 octet sequence.
3013 30
          $mUcs4 = $in;
3014 30
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3015 30
          $mState = 2;
3016 30
          $mBytes = 3;
3017 23 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
3018
          // First octet of 4 octet sequence.
3019 13
          $mUcs4 = $in;
3020 13
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3021 13
          $mState = 3;
3022 13
          $mBytes = 4;
3023 11
        } elseif (0xF8 === (0xFC & $in)) {
3024
          /* First octet of 5 octet sequence.
3025
          *
3026
          * This is illegal because the encoded codepoint must be either
3027
          * (a) not the shortest form or
3028
          * (b) outside the Unicode range of 0-0x10FFFF.
3029
          * Rather than trying to resynchronize, we will carry on until the end
3030
          * of the sequence and let the later error handling code catch it.
3031
          */
3032 4
          $mUcs4 = $in;
3033 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3034 4
          $mState = 4;
3035 4
          $mBytes = 5;
3036 8 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
3037
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3038 4
          $mUcs4 = $in;
3039 4
          $mUcs4 = ($mUcs4 & 1) << 30;
3040 4
          $mState = 5;
3041 4
          $mBytes = 6;
3042
        } else {
3043
          /* Current octet is neither in the US-ASCII range nor a legal first
3044
           * octet of a multi-octet sequence.
3045
           */
3046 59
          return false;
3047
        }
3048
      } else {
3049
        // When mState is non-zero, we expect a continuation of the multi-octet
3050
        // sequence
3051 56
        if (0x80 === (0xC0 & $in)) {
3052
          // Legal continuation.
3053 50
          $shift = ($mState - 1) * 6;
3054 50
          $tmp = $in;
3055 50
          $tmp = ($tmp & 0x0000003F) << $shift;
3056 50
          $mUcs4 |= $tmp;
3057
          /**Prefix
3058
           * End of the multi-octet sequence. mUcs4 now contains the final
3059
           * Unicode code point to be output
3060
           */
3061 50
          if (0 === --$mState) {
3062
            /*
3063
            * Check for illegal sequences and code points.
3064
            */
3065
            // From Unicode 3.1, non-shortest form is illegal
3066
            if (
3067 50
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3068 50
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3069 50
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3070 50
                (4 < $mBytes) ||
3071
                // From Unicode 3.2, surrogate characters are illegal.
3072 50
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3073
                // Code points outside the Unicode range are illegal.
3074 50
                ($mUcs4 > 0x10FFFF)
3075
            ) {
3076 7
              return false;
3077
            }
3078
            // initialize UTF8 cache
3079 50
            $mState = 0;
3080 50
            $mUcs4 = 0;
3081 50
            $mBytes = 1;
3082
          }
3083
        } else {
3084
          /**
3085
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3086
           * Incomplete multi-octet sequence.
3087
           */
3088 28
          return false;
3089
        }
3090
      }
3091
    }
3092
3093 27
    return true;
3094
  }
3095
3096
  /**
3097
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3098
   * Decodes a JSON string
3099
   *
3100
   * @link http://php.net/manual/en/function.json-decode.php
3101
   *
3102
   * @param string $json    <p>
3103
   *                        The <i>json</i> string being decoded.
3104
   *                        </p>
3105
   *                        <p>
3106
   *                        This function only works with UTF-8 encoded strings.
3107
   *                        </p>
3108
   *                        <p>PHP implements a superset of
3109
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3110
   *                        only supports these values when they are nested inside an array or an object.
3111
   *                        </p>
3112
   * @param bool   $assoc   [optional] <p>
3113
   *                        When <b>TRUE</b>, returned objects will be converted into
3114
   *                        associative arrays.
3115
   *                        </p>
3116
   * @param int    $depth   [optional] <p>
3117
   *                        User specified recursion depth.
3118
   *                        </p>
3119
   * @param int    $options [optional] <p>
3120
   *                        Bitmask of JSON decode options. Currently only
3121
   *                        <b>JSON_BIGINT_AS_STRING</b>
3122
   *                        is supported (default is to cast large integers as floats)
3123
   *                        </p>
3124
   *
3125
   * @return mixed the value encoded in <i>json</i> in appropriate
3126
   * PHP type. Values true, false and
3127
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3128
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3129
   * <i>json</i> cannot be decoded or if the encoded
3130
   * data is deeper than the recursion limit.
3131
   */
3132 2
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3133
  {
3134 2
    $json = self::filter($json);
3135
3136 2
    $json = \json_decode($json, $assoc, $depth, $options);
3137
3138 2
    return $json;
3139
  }
3140
3141
  /**
3142
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3143
   * Returns the JSON representation of a value.
3144
   *
3145
   * @link http://php.net/manual/en/function.json-encode.php
3146
   *
3147
   * @param mixed $value   <p>
3148
   *                       The <i>value</i> being encoded. Can be any type except
3149
   *                       a resource.
3150
   *                       </p>
3151
   *                       <p>
3152
   *                       All string data must be UTF-8 encoded.
3153
   *                       </p>
3154
   *                       <p>PHP implements a superset of
3155
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3156
   *                       only supports these values when they are nested inside an array or an object.
3157
   *                       </p>
3158
   * @param int   $options [optional] <p>
3159
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3160
   *                       <b>JSON_HEX_TAG</b>,
3161
   *                       <b>JSON_HEX_AMP</b>,
3162
   *                       <b>JSON_HEX_APOS</b>,
3163
   *                       <b>JSON_NUMERIC_CHECK</b>,
3164
   *                       <b>JSON_PRETTY_PRINT</b>,
3165
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3166
   *                       <b>JSON_FORCE_OBJECT</b>,
3167
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3168
   *                       constants is described on
3169
   *                       the JSON constants page.
3170
   *                       </p>
3171
   * @param int   $depth   [optional] <p>
3172
   *                       Set the maximum depth. Must be greater than zero.
3173
   *                       </p>
3174
   *
3175
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3176
   */
3177 2
  public static function json_encode($value, int $options = 0, int $depth = 512): string
3178
  {
3179 2
    $value = self::filter($value);
3180
3181 2
    $json = \json_encode($value, $options, $depth);
3182
3183 2
    return $json;
3184
  }
3185
3186
  /**
3187
   * Returns the last $n characters of the string.
3188
   *
3189
   * @param string $str      <p>The input string.</p>
3190
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
3191
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3192
   *
3193
   * @return string
3194
   */
3195
  public static function last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
3196
  {
3197
    if ($n <= 0) {
3198
      return '';
3199
    }
3200
3201
    return self::substr($str, -$n, null, $encoding);
3202
  }
3203
3204
  /**
3205
   * Makes string's first char lowercase.
3206
   *
3207
   * @param string $str       <p>The input string</p>
3208
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
3209
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3210
   *
3211
   * @return string <p>The resulting string</p>
3212
   */
3213 7
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3214
  {
3215 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3216 7
    if ($strPartTwo === false) {
3217
      $strPartTwo = '';
3218
    }
3219
3220 7
    $strPartOne = self::strtolower(
3221 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3222 7
        $encoding,
3223 7
        $cleanUtf8
3224
    );
3225
3226 7
    return $strPartOne . $strPartTwo;
3227
  }
3228
3229
  /**
3230
   * alias for "UTF8::lcfirst()"
3231
   *
3232
   * @see UTF8::lcfirst()
3233
   *
3234
   * @param string $word
3235
   * @param string $encoding
3236
   * @param bool   $cleanUtf8
3237
   *
3238
   * @return string
3239
   */
3240 1
  public static function lcword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3241
  {
3242 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
3243
  }
3244
3245
  /**
3246
   * Lowercase for all words in the string.
3247
   *
3248
   * @param string   $str        <p>The input string.</p>
3249
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3250
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3251
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3252
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3253
   *
3254
   * @return string
3255
   */
3256 1
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3257
  {
3258 1
    if (!$str) {
3259 1
      return '';
3260
    }
3261
3262 1
    $words = self::str_to_words($str, $charlist);
3263 1
    $newWords = [];
3264
3265 1
    if (\count($exceptions) > 0) {
3266 1
      $useExceptions = true;
3267
    } else {
3268 1
      $useExceptions = false;
3269
    }
3270
3271 1 View Code Duplication
    foreach ($words as $word) {
3272
3273 1
      if (!$word) {
3274 1
        continue;
3275
      }
3276
3277
      if (
3278 1
          $useExceptions === false
3279
          ||
3280
          (
3281 1
              $useExceptions === true
3282
              &&
3283 1
              !\in_array($word, $exceptions, true)
3284
          )
3285
      ) {
3286 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3287
      }
3288
3289 1
      $newWords[] = $word;
3290
    }
3291
3292 1
    return \implode('', $newWords);
3293
  }
3294
3295
  /**
3296
   * Returns the longest common prefix between the string and $otherStr.
3297
   *
3298
   * @param string $str      <p>The input sting.</p>
3299
   * @param string $otherStr <p>Second string for comparison.</p>
3300
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3301
   *
3302
   * @return string
3303
   */
3304
  public static function longestCommonPrefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
3305
  {
3306
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
3307
3308
    $longestCommonPrefix = '';
3309
    for ($i = 0; $i < $maxLength; $i++) {
3310
      $char = self::substr($str, $i, 1, $encoding);
3311
3312
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
3313
        $longestCommonPrefix .= $char;
3314
      } else {
3315
        break;
3316
      }
3317
    }
3318
3319
    return $longestCommonPrefix;
3320
  }
3321
3322
  /**
3323
   * Returns the longest common substring between the string and $otherStr.
3324
   * In the case of ties, it returns that which occurs first.
3325
   *
3326
   * @param string $str
3327
   * @param string $otherStr <p>Second string for comparison.</p>
3328
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3329
   *
3330
   * @return static <p>Object with its $str being the longest common substring.</p>
3331
   */
3332
  public static function longestCommonSubstring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
3333
  {
3334
    // Uses dynamic programming to solve
3335
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
3336
    $strLength = self::strlen($str, $encoding);
3337
    $otherLength = self::strlen($otherStr, $encoding);
3338
3339
    // Return if either string is empty
3340
    if ($strLength == 0 || $otherLength == 0) {
3341
      return '';
3342
    }
3343
3344
    $len = 0;
3345
    $end = 0;
3346
    $table = \array_fill(
3347
        0,
3348
        $strLength + 1,
3349
        \array_fill(0, $otherLength + 1, 0)
3350
    );
3351
3352
    for ($i = 1; $i <= $strLength; $i++) {
3353
      for ($j = 1; $j <= $otherLength; $j++) {
3354
        $strChar = self::substr($str, $i - 1, 1, $encoding);
3355
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
3356
3357
        if ($strChar == $otherChar) {
3358
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
3359
          if ($table[$i][$j] > $len) {
3360
            $len = $table[$i][$j];
3361
            $end = $i;
3362
          }
3363
        } else {
3364
          $table[$i][$j] = 0;
3365
        }
3366
      }
3367
    }
3368
3369
    return self::substr(
3370
        $str,
3371
        $end - $len,
3372
        $len,
3373
        $encoding
3374
    );
3375
  }
3376
3377
  /**
3378
   * Returns the longest common suffix between the string and $otherStr.
3379
   *
3380
   * @param string $str
3381
   * @param string $otherStr <p>Second string for comparison.</p>
3382
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3383
   *
3384
   * @return string
3385
   */
3386
  public static function longestCommonSuffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
3387
  {
3388
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
3389
3390
    $longestCommonSuffix = '';
3391
    for ($i = 1; $i <= $maxLength; $i++) {
3392
      $char = self::substr($str, -$i, 1, $encoding);
3393
3394
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
3395
        $longestCommonSuffix = $char . $longestCommonSuffix;
3396
      } else {
3397
        break;
3398
      }
3399
    }
3400
3401
    return $longestCommonSuffix;
3402
  }
3403
3404
  /**
3405
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3406
   *
3407
   * @param string $str   <p>The string to be trimmed</p>
3408
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3409
   *
3410
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3411
   */
3412 8 View Code Duplication
  public static function ltrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3413
  {
3414 8
    if (!isset($str[0])) {
3415 2
      return '';
3416
    }
3417
3418
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3419 7
    if ($chars === INF || !$chars) {
3420 2
      $pattern = "^[\pZ\pC]+";
3421
    } else {
3422 7
      $chars = \preg_quote($chars, '/');
3423 7
      $pattern = "^[$chars]+";
3424
    }
3425
3426 7
    return self::regexReplace($str, $pattern, '', '', '/');
3427
  }
3428
3429
  /**
3430
   * Returns true if $str matches the supplied pattern, false otherwise.
3431
   *
3432
   * @param string $str     <p>The input string.</p>
3433
   * @param string $pattern <p>Regex pattern to match against.</p>
3434
   *
3435
   * @return bool <p>Whether or not $str matches the pattern.</p>
3436
   */
3437
  public static function matchesPattern(string $str, string $pattern): bool
3438
  {
3439
    if (\preg_match('/' . $pattern . '/u', $str)) {
3440
      return true;
3441
    }
3442
3443
    return false;
3444
  }
3445
3446
  /**
3447
   * Returns the UTF-8 character with the maximum code point in the given data.
3448
   *
3449
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3450
   *
3451
   * @return string <p>The character with the highest code point than others.</p>
3452
   */
3453 1 View Code Duplication
  public static function max($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3454
  {
3455 1
    if (\is_array($arg) === true) {
3456 1
      $arg = \implode('', $arg);
3457
    }
3458
3459 1
    return self::chr(\max(self::codepoints($arg)));
3460
  }
3461
3462
  /**
3463
   * Calculates and returns the maximum number of bytes taken by any
3464
   * UTF-8 encoded character in the given string.
3465
   *
3466
   * @param string $str <p>The original Unicode string.</p>
3467
   *
3468
   * @return int <p>Max byte lengths of the given chars.</p>
3469
   */
3470 1
  public static function max_chr_width(string $str): int
3471
  {
3472 1
    $bytes = self::chr_size_list($str);
3473 1
    if (\count($bytes) > 0) {
3474 1
      return (int)\max($bytes);
3475
    }
3476
3477 1
    return 0;
3478
  }
3479
3480
  /**
3481
   * Checks whether mbstring is available on the server.
3482
   *
3483
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3484
   */
3485 12
  public static function mbstring_loaded(): bool
3486
  {
3487 12
    $return = \extension_loaded('mbstring') ? true : false;
3488
3489 12
    if ($return === true) {
3490 12
      \mb_internal_encoding('UTF-8');
3491
    }
3492
3493 12
    return $return;
3494
  }
3495
3496 1
  private static function mbstring_overloaded(): bool
3497
  {
3498
    /**
3499
     * @noinspection PhpUsageOfSilenceOperatorInspection
3500
     *
3501
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3502
     */
3503
    return \defined('MB_OVERLOAD_STRING')
3504
           &&
3505 1
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3506
  }
3507
3508
  /**
3509
   * Returns the UTF-8 character with the minimum code point in the given data.
3510
   *
3511
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3512
   *
3513
   * @return string <p>The character with the lowest code point than others.</p>
3514
   */
3515 1 View Code Duplication
  public static function min($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3516
  {
3517 1
    if (\is_array($arg) === true) {
3518 1
      $arg = \implode('', $arg);
3519
    }
3520
3521 1
    return self::chr(\min(self::codepoints($arg)));
3522
  }
3523
3524
  /**
3525
   * alias for "UTF8::normalize_encoding()"
3526
   *
3527
   * @see        UTF8::normalize_encoding()
3528
   *
3529
   * @param string $encoding
3530
   * @param mixed  $fallback
3531
   *
3532
   * @return mixed
3533
   *
3534
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3535
   */
3536 1
  public static function normalizeEncoding(string $encoding, $fallback = '')
3537
  {
3538 1
    return self::normalize_encoding($encoding, $fallback);
3539
  }
3540
3541
  /**
3542
   * Normalize the encoding-"name" input.
3543
   *
3544
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3545
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3546
   *
3547
   * @return mixed <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by
3548
   *                default)</p>
3549
   */
3550 27
  public static function normalize_encoding(string $encoding, $fallback = '')
3551
  {
3552 27
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3553
3554 27
    if (!$encoding) {
3555 4
      return $fallback;
3556
    }
3557
3558
    if (
3559 26
        'UTF-8' === $encoding
3560
        ||
3561 26
        'UTF8' === $encoding
3562
    ) {
3563 11
      return 'UTF-8';
3564
    }
3565
3566 22
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3567 19
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3568
    }
3569
3570 6
    if (self::$ENCODINGS === null) {
3571 1
      self::$ENCODINGS = self::getData('encodings');
3572
    }
3573
3574 6
    if (\in_array($encoding, self::$ENCODINGS, true)) {
3575 3
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3576
3577 3
      return $encoding;
3578
    }
3579
3580 5
    $encodingOrig = $encoding;
3581 5
    $encoding = \strtoupper($encoding);
3582 5
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3583
3584
    $equivalences = [
3585 5
        'ISO8859'     => 'ISO-8859-1',
3586
        'ISO88591'    => 'ISO-8859-1',
3587
        'ISO'         => 'ISO-8859-1',
3588
        'LATIN'       => 'ISO-8859-1',
3589
        'LATIN1'      => 'ISO-8859-1', // Western European
3590
        'ISO88592'    => 'ISO-8859-2',
3591
        'LATIN2'      => 'ISO-8859-2', // Central European
3592
        'ISO88593'    => 'ISO-8859-3',
3593
        'LATIN3'      => 'ISO-8859-3', // Southern European
3594
        'ISO88594'    => 'ISO-8859-4',
3595
        'LATIN4'      => 'ISO-8859-4', // Northern European
3596
        'ISO88595'    => 'ISO-8859-5',
3597
        'ISO88596'    => 'ISO-8859-6', // Greek
3598
        'ISO88597'    => 'ISO-8859-7',
3599
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3600
        'ISO88599'    => 'ISO-8859-9',
3601
        'LATIN5'      => 'ISO-8859-9', // Turkish
3602
        'ISO885911'   => 'ISO-8859-11',
3603
        'TIS620'      => 'ISO-8859-11', // Thai
3604
        'ISO885910'   => 'ISO-8859-10',
3605
        'LATIN6'      => 'ISO-8859-10', // Nordic
3606
        'ISO885913'   => 'ISO-8859-13',
3607
        'LATIN7'      => 'ISO-8859-13', // Baltic
3608
        'ISO885914'   => 'ISO-8859-14',
3609
        'LATIN8'      => 'ISO-8859-14', // Celtic
3610
        'ISO885915'   => 'ISO-8859-15',
3611
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3612
        'ISO885916'   => 'ISO-8859-16',
3613
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3614
        'CP1250'      => 'WINDOWS-1250',
3615
        'WIN1250'     => 'WINDOWS-1250',
3616
        'WINDOWS1250' => 'WINDOWS-1250',
3617
        'CP1251'      => 'WINDOWS-1251',
3618
        'WIN1251'     => 'WINDOWS-1251',
3619
        'WINDOWS1251' => 'WINDOWS-1251',
3620
        'CP1252'      => 'WINDOWS-1252',
3621
        'WIN1252'     => 'WINDOWS-1252',
3622
        'WINDOWS1252' => 'WINDOWS-1252',
3623
        'CP1253'      => 'WINDOWS-1253',
3624
        'WIN1253'     => 'WINDOWS-1253',
3625
        'WINDOWS1253' => 'WINDOWS-1253',
3626
        'CP1254'      => 'WINDOWS-1254',
3627
        'WIN1254'     => 'WINDOWS-1254',
3628
        'WINDOWS1254' => 'WINDOWS-1254',
3629
        'CP1255'      => 'WINDOWS-1255',
3630
        'WIN1255'     => 'WINDOWS-1255',
3631
        'WINDOWS1255' => 'WINDOWS-1255',
3632
        'CP1256'      => 'WINDOWS-1256',
3633
        'WIN1256'     => 'WINDOWS-1256',
3634
        'WINDOWS1256' => 'WINDOWS-1256',
3635
        'CP1257'      => 'WINDOWS-1257',
3636
        'WIN1257'     => 'WINDOWS-1257',
3637
        'WINDOWS1257' => 'WINDOWS-1257',
3638
        'CP1258'      => 'WINDOWS-1258',
3639
        'WIN1258'     => 'WINDOWS-1258',
3640
        'WINDOWS1258' => 'WINDOWS-1258',
3641
        'UTF16'       => 'UTF-16',
3642
        'UTF32'       => 'UTF-32',
3643
        'UTF8'        => 'UTF-8',
3644
        'UTF'         => 'UTF-8',
3645
        'UTF7'        => 'UTF-7',
3646
        '8BIT'        => 'CP850',
3647
        'BINARY'      => 'CP850',
3648
    ];
3649
3650 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3651 4
      $encoding = $equivalences[$encodingUpperHelper];
3652
    }
3653
3654 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3655
3656 5
    return $encoding;
3657
  }
3658
3659
  /**
3660
   * Normalize some MS Word special characters.
3661
   *
3662
   * @param string $str <p>The string to be normalized.</p>
3663
   *
3664
   * @return string
3665
   */
3666 16 View Code Duplication
  public static function normalize_msword(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3667
  {
3668 16
    if (!isset($str[0])) {
3669 1
      return '';
3670
    }
3671
3672 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
3673 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3674
3675 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3676
3677 1
      if (self::$UTF8_MSWORD === null) {
3678 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
3679
      }
3680
3681 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
3682 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
3683
    }
3684
3685 16
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3686
  }
3687
3688
  /**
3689
   * Normalize the whitespace.
3690
   *
3691
   * @param string $str                     <p>The string to be normalized.</p>
3692
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3693
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3694
   *                                        bidirectional text chars.</p>
3695
   *
3696
   * @return string
3697
   */
3698 39
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
3699
  {
3700 39
    if (!isset($str[0])) {
3701 4
      return '';
3702
    }
3703
3704 39
    static $WHITESPACE_CACHE = [];
3705 39
    $cacheKey = (int)$keepNonBreakingSpace;
3706
3707 39
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3708
3709 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3710
3711 2
      if ($keepNonBreakingSpace === true) {
3712 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3713
      }
3714
3715 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
3716
    }
3717
3718 39
    if ($keepBidiUnicodeControls === false) {
3719 39
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3720
3721 39
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3722 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3723
      }
3724
3725 39
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3726
    }
3727
3728 39
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3729
  }
3730
3731
  /**
3732
   * Calculates Unicode code point of the given UTF-8 encoded character.
3733
   *
3734
   * INFO: opposite to UTF8::chr()
3735
   *
3736
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3737
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3738
   *
3739
   * @return int <p>
3740
   *             Unicode code point of the given character,<br>
3741
   *             0 on invalid UTF-8 byte sequence.
3742
   *             </p>
3743
   */
3744 23
  public static function ord(string $chr, string $encoding = 'UTF-8'): int
3745
  {
3746
    // init
3747 23
    static $CHAR_CACHE = [];
3748
3749
    // save the original string
3750 23
    $chr_orig = $chr;
3751
3752 23
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3753 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3754
3755
      // check again, if it's still not UTF-8
3756
      /** @noinspection NotOptimalIfConditionsInspection */
3757 2
      if ($encoding !== 'UTF-8') {
3758 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3759
      }
3760
    }
3761
3762 23
    $cacheKey = $chr_orig . $encoding;
3763 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3764 23
      return $CHAR_CACHE[$cacheKey];
3765
    }
3766
3767 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3768
      self::checkForSupport();
3769
    }
3770
3771 11
    if (self::$SUPPORT['intlChar'] === true) {
3772 10
      $code = \IntlChar::ord($chr);
3773 10
      if ($code) {
3774 9
        return $CHAR_CACHE[$cacheKey] = $code;
3775
      }
3776
    }
3777
3778
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3779 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
3780 6
    $code = $chr ? $chr[1] : 0;
3781
3782 6
    if (0xF0 <= $code && isset($chr[4])) {
3783
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3784
    }
3785
3786 6
    if (0xE0 <= $code && isset($chr[3])) {
3787 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3788
    }
3789
3790 6
    if (0xC0 <= $code && isset($chr[2])) {
3791 2
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3792
    }
3793
3794 5
    return $CHAR_CACHE[$cacheKey] = $code;
3795
  }
3796
3797
  /**
3798
   * Parses the string into an array (into the the second parameter).
3799
   *
3800
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3801
   *          if the second parameter is not set!
3802
   *
3803
   * @link http://php.net/manual/en/function.parse-str.php
3804
   *
3805
   * @param string $str       <p>The input string.</p>
3806
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
3807
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3808
   *
3809
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3810
   */
3811 1
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
3812
  {
3813 1
    if ($cleanUtf8 === true) {
3814 1
      $str = self::clean($str);
3815
    }
3816
3817
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3818 1
    $return = \mb_parse_str($str, $result);
3819
3820 1
    return !($return === false || empty($result));
3821
  }
3822
3823
  /**
3824
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3825
   *
3826
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3827
   */
3828 60
  public static function pcre_utf8_support(): bool
3829
  {
3830
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3831 60
    return (bool)@\preg_match('//u', '');
3832
  }
3833
3834
  /**
3835
   * Create an array containing a range of UTF-8 characters.
3836
   *
3837
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3838
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3839
   *
3840
   * @return string[]
3841
   */
3842 1
  public static function range($var1, $var2): array
3843
  {
3844 1
    if (!$var1 || !$var2) {
3845 1
      return [];
3846
    }
3847
3848 1 View Code Duplication
    if (\ctype_digit((string)$var1)) {
3849 1
      $start = (int)$var1;
3850 1
    } elseif (\ctype_xdigit($var1)) {
3851
      $start = (int)self::hex_to_int($var1);
3852
    } else {
3853 1
      $start = self::ord($var1);
3854
    }
3855
3856 1
    if (!$start) {
3857
      return [];
3858
    }
3859
3860 1 View Code Duplication
    if (\ctype_digit((string)$var2)) {
3861 1
      $end = (int)$var2;
3862 1
    } elseif (\ctype_xdigit($var2)) {
3863
      $end = (int)self::hex_to_int($var2);
3864
    } else {
3865 1
      $end = self::ord($var2);
3866
    }
3867
3868 1
    if (!$end) {
3869
      return [];
3870
    }
3871
3872 1
    return \array_map(
3873
        [
3874 1
            self::class,
3875
            'chr',
3876
        ],
3877 1
        \range($start, $end)
3878
    );
3879
  }
3880
3881
  /**
3882
   * Multi decode html entity & fix urlencoded-win1252-chars.
3883
   *
3884
   * e.g:
3885
   * 'test+test'                     => 'test+test'
3886
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3887
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3888
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3889
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3890
   * 'Düsseldorf'                   => 'Düsseldorf'
3891
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3892
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3893
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3894
   *
3895
   * @param string $str          <p>The input string.</p>
3896
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3897
   *
3898
   * @return string
3899
   */
3900 2 View Code Duplication
  public static function rawurldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3901
  {
3902 2
    if (!isset($str[0])) {
3903 1
      return '';
3904
    }
3905
3906 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3907 2
    if (\preg_match($pattern, $str)) {
3908 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
3909
    }
3910
3911 2
    $flags = ENT_QUOTES | ENT_HTML5;
3912
3913
    do {
3914 2
      $str_compare = $str;
3915
3916 2
      $str = self::fix_simple_utf8(
3917 2
          \rawurldecode(
3918 2
              self::html_entity_decode(
3919 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3920 2
                  $flags
3921
              )
3922
          )
3923
      );
3924
3925 2
    } while ($multi_decode === true && $str_compare !== $str);
3926
3927 2
    return $str;
3928
  }
3929
3930
  /**
3931
   * @param array $strings
3932
   * @param bool  $removeEmptyValues
3933
   * @param int   $removeShortValues
3934
   *
3935
   * @return array
3936
   */
3937 1
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
3938
  {
3939
    // init
3940 1
    $return = [];
3941
3942 1
    foreach ($strings as $str) {
3943
      if (
3944 1
          $removeShortValues !== null
3945
          &&
3946 1
          self::strlen($str) <= $removeShortValues
3947
      ) {
3948 1
        continue;
3949
      }
3950
3951
      if (
3952 1
          $removeEmptyValues === true
3953
          &&
3954 1
          \trim($str) === ''
3955
      ) {
3956 1
        continue;
3957
      }
3958
3959 1
      $return[] = $str;
3960
    }
3961
3962 1
    return $return;
3963
  }
3964
3965
  /**
3966
   * Replaces all occurrences of $pattern in $str by $replacement.
3967
   *
3968
   * @param string $str         <p>The input string.</p>
3969
   * @param string $pattern     <p>The regular expression pattern.</p>
3970
   * @param string $replacement <p>The string to replace with.</p>
3971
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
3972
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
3973
   *
3974
   * @return string
3975
   */
3976 35
  public static function regexReplace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
3977
  {
3978 35
    if ($options === 'msr') {
3979
      $options = 'ms';
3980
    }
3981
3982
    // fallback
3983 35
    if (!$delimiter) {
3984
      $delimiter = '/';
3985
    }
3986
3987 35
    $str = (string)\preg_replace(
3988 35
        $delimiter . $pattern . $delimiter . 'u' . $options,
3989 35
        $replacement,
3990 35
        $str
3991
    );
3992
3993 35
    return $str;
3994
  }
3995
3996
  /**
3997
   * alias for "UTF8::remove_bom()"
3998
   *
3999
   * @see        UTF8::remove_bom()
4000
   *
4001
   * @param string $str
4002
   *
4003
   * @return string
4004
   *
4005
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4006
   */
4007
  public static function removeBOM(string $str): string
4008
  {
4009
    return self::remove_bom($str);
4010
  }
4011
4012
  /**
4013
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4014
   *
4015
   * @param string $str <p>The input string.</p>
4016
   *
4017
   * @return string <p>String without UTF-BOM</p>
4018
   */
4019 43
  public static function remove_bom(string $str): string
4020
  {
4021 43
    if (!isset($str[0])) {
4022 3
      return '';
4023
    }
4024
4025 43
    foreach (self::$BOM as $bomString => $bomByteLength) {
4026 43
      if (0 === self::strpos($str, $bomString, 0, 'CP850')) {
4027 5
        $strTmp = self::substr($str, $bomByteLength, null, 'CP850');
4028 5
        if ($strTmp === false) {
4029
          $strTmp = '';
4030
        }
4031 43
        $str = (string)$strTmp;
4032
      }
4033
    }
4034
4035 43
    return $str;
4036
  }
4037
4038
  /**
4039
   * Removes duplicate occurrences of a string in another string.
4040
   *
4041
   * @param string          $str  <p>The base string.</p>
4042
   * @param string|string[] $what <p>String to search for in the base string.</p>
4043
   *
4044
   * @return string <p>The result string with removed duplicates.</p>
4045
   */
4046 1
  public static function remove_duplicates(string $str, $what = ' '): string
4047
  {
4048 1
    if (\is_string($what) === true) {
4049 1
      $what = [$what];
4050
    }
4051
4052 1
    if (\is_array($what) === true) {
4053
      /** @noinspection ForeachSourceInspection */
4054 1
      foreach ($what as $item) {
4055 1
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4056
      }
4057
    }
4058
4059 1
    return $str;
4060
  }
4061
4062
  /**
4063
   * Remove invisible characters from a string.
4064
   *
4065
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4066
   *
4067
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4068
   *
4069
   * @param string $str
4070
   * @param bool   $url_encoded
4071
   * @param string $replacement
4072
   *
4073
   * @return string
4074
   */
4075 65
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4076
  {
4077
    // init
4078 65
    $non_displayables = [];
4079
4080
    // every control character except newline (dec 10),
4081
    // carriage return (dec 13) and horizontal tab (dec 09)
4082 65
    if ($url_encoded) {
4083 65
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4084 65
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4085
    }
4086
4087 65
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4088
4089
    do {
4090 65
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4091 65
    } while ($count !== 0);
4092
4093 65
    return $str;
4094
  }
4095
4096
  /**
4097
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4098
   *
4099
   * @param string $str                <p>The input string</p>
4100
   * @param string $replacementChar    <p>The replacement character.</p>
4101
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4102
   *
4103
   * @return string
4104
   */
4105 37
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4106
  {
4107 37
    if (!isset($str[0])) {
4108 4
      return '';
4109
    }
4110
4111 37
    if ($processInvalidUtf8 === true) {
4112 37
      $replacementCharHelper = $replacementChar;
4113 37
      if ($replacementChar === '') {
4114 37
        $replacementCharHelper = 'none';
4115
      }
4116
4117 37
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4118
        self::checkForSupport();
4119
      }
4120
4121 37
      $save = \mb_substitute_character();
4122 37
      \mb_substitute_character($replacementCharHelper);
4123 37
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4124 37
      \mb_substitute_character($save);
4125
4126 37
      if (\is_string($strTmp)) {
4127 37
        $str = $strTmp;
4128
      } else {
4129
        $str = '';
4130
      }
4131
    }
4132
4133 37
    return str_replace(
4134
        [
4135 37
            "\xEF\xBF\xBD",
4136
            '�',
4137
        ],
4138
        [
4139 37
            $replacementChar,
4140 37
            $replacementChar,
4141
        ],
4142 37
        $str
4143
    );
4144
  }
4145
4146
  /**
4147
   * Strip whitespace or other characters from end of a UTF-8 string.
4148
   *
4149
   * @param string $str   <p>The string to be trimmed.</p>
4150
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4151
   *
4152
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4153
   */
4154 7 View Code Duplication
  public static function rtrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4155
  {
4156 7
    if (!isset($str[0])) {
4157 2
      return '';
4158
    }
4159
4160
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4161 6
    if ($chars === INF || !$chars) {
4162 3
      $pattern = "[\pZ\pC]+\$";
4163
    } else {
4164 5
      $chars = \preg_quote($chars, '/');
4165 5
      $pattern = "[$chars]+\$";
4166
    }
4167
4168 6
    return self::regexReplace($str, $pattern, '', '', '/');
4169
  }
4170
4171
  /**
4172
   * rxClass
4173
   *
4174
   * @param string $s
4175
   * @param string $class
4176
   *
4177
   * @return string
4178
   */
4179 32
  private static function rxClass(string $s, string $class = ''): string
4180
  {
4181 32
    static $RX_CLASSS_CACHE = [];
4182
4183 32
    $cacheKey = $s . $class;
4184
4185 32
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4186 20
      return $RX_CLASSS_CACHE[$cacheKey];
4187
    }
4188
4189
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4190 15
    $class = [$class];
4191
4192
    /** @noinspection SuspiciousLoopInspection */
4193 15
    foreach (self::str_split($s) as $s) {
4194 14
      if ('-' === $s) {
4195
        $class[0] = '-' . $class[0];
4196 14
      } elseif (!isset($s[2])) {
4197 14
        $class[0] .= \preg_quote($s, '/');
4198 1
      } elseif (1 === self::strlen($s)) {
4199 1
        $class[0] .= $s;
4200
      } else {
4201 14
        $class[] = $s;
4202
      }
4203
    }
4204
4205 15
    if ($class[0]) {
4206 15
      $class[0] = '[' . $class[0] . ']';
4207
    }
4208
4209 15
    if (1 === \count($class)) {
4210 15
      $return = $class[0];
4211
    } else {
4212
      $return = '(?:' . \implode('|', $class) . ')';
4213
    }
4214
4215 15
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4216
4217 15
    return $return;
4218
  }
4219
4220
  /**
4221
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4222
   */
4223 1
  public static function showSupport()
4224
  {
4225 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4226
      self::checkForSupport();
4227
    }
4228
4229 1
    echo '<pre>';
4230 1
    foreach (self::$SUPPORT as $key => $value) {
4231 1
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4232
    }
4233 1
    echo '</pre>';
4234 1
  }
4235
4236
  /**
4237
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4238
   *
4239
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4240
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4241
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
4242
   *
4243
   * @return string <p>The HTML numbered entity.</p>
4244
   */
4245 1
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4246
  {
4247 1
    if (!isset($char[0])) {
4248 1
      return '';
4249
    }
4250
4251
    if (
4252 1
        $keepAsciiChars === true
4253
        &&
4254 1
        self::is_ascii($char) === true
4255
    ) {
4256 1
      return $char;
4257
    }
4258
4259 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4260 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4261
    }
4262
4263 1
    return '&#' . self::ord($char, $encoding) . ';';
4264
  }
4265
4266
  /**
4267
   * @param string $str
4268
   * @param int    $tabLength
4269
   *
4270
   * @return string
4271
   */
4272
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4273
  {
4274
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4275
  }
4276
4277
  /**
4278
   * Convert a string to an array of Unicode characters.
4279
   *
4280
   * @param string $str       <p>The string to split into array.</p>
4281
   * @param int    $length    [optional] <p>Max character length of each array element.</p>
4282
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4283
   *
4284
   * @return string[] <p>An array containing chunks of the string.</p>
4285
   */
4286 35
  public static function split(string $str, int $length = 1, bool $cleanUtf8 = false): array
4287
  {
4288 35
    if (!isset($str[0])) {
4289 3
      return [];
4290
    }
4291
4292
    // init
4293 34
    $ret = [];
4294
4295 34
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4296
      self::checkForSupport();
4297
    }
4298
4299 34
    if ($cleanUtf8 === true) {
4300 9
      $str = self::clean($str);
4301
    }
4302
4303 34
    if (self::$SUPPORT['pcre_utf8'] === true) {
4304
4305 34
      \preg_match_all('/./us', $str, $retArray);
4306 34
      if (isset($retArray[0])) {
4307 34
        $ret = $retArray[0];
4308
      }
4309 34
      unset($retArray);
4310
4311
    } else {
4312
4313
      // fallback
4314
4315 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4316
        self::checkForSupport();
4317
      }
4318
4319 2
      $len = self::strlen_in_byte($str);
4320
4321
      /** @noinspection ForeachInvariantsInspection */
4322 2
      for ($i = 0; $i < $len; $i++) {
4323
4324 2
        if (($str[$i] & "\x80") === "\x00") {
4325
4326 2
          $ret[] = $str[$i];
4327
4328
        } elseif (
4329 2
            isset($str[$i + 1])
4330
            &&
4331 2
            ($str[$i] & "\xE0") === "\xC0"
4332
        ) {
4333
4334
          if (($str[$i + 1] & "\xC0") === "\x80") {
4335
            $ret[] = $str[$i] . $str[$i + 1];
4336
4337
            $i++;
4338
          }
4339
4340 View Code Duplication
        } elseif (
4341 2
            isset($str[$i + 2])
4342
            &&
4343 2
            ($str[$i] & "\xF0") === "\xE0"
4344
        ) {
4345
4346
          if (
4347 2
              ($str[$i + 1] & "\xC0") === "\x80"
4348
              &&
4349 2
              ($str[$i + 2] & "\xC0") === "\x80"
4350
          ) {
4351 2
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4352
4353 2
            $i += 2;
4354
          }
4355
4356
        } elseif (
4357
            isset($str[$i + 3])
4358
            &&
4359
            ($str[$i] & "\xF8") === "\xF0"
4360
        ) {
4361
4362 View Code Duplication
          if (
4363
              ($str[$i + 1] & "\xC0") === "\x80"
4364
              &&
4365
              ($str[$i + 2] & "\xC0") === "\x80"
4366
              &&
4367
              ($str[$i + 3] & "\xC0") === "\x80"
4368
          ) {
4369
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4370
4371
            $i += 3;
4372
          }
4373
4374
        }
4375
      }
4376
    }
4377
4378 34
    if ($length > 1) {
4379 5
      $ret = \array_chunk($ret, $length);
4380
4381 5
      return \array_map(
4382
          function ($item) {
4383 5
            return \implode('', $item);
4384 5
          }, $ret
4385
      );
4386
    }
4387
4388 30
    if (isset($ret[0]) && $ret[0] === '') {
4389
      return [];
4390
    }
4391
4392 30
    return $ret;
4393
  }
4394
4395
  /**
4396
   * Returns true if the string contains $needle, false otherwise. By default
4397
   * the comparison is case-sensitive, but can be made insensitive by setting
4398
   * $caseSensitive to false.
4399
   *
4400
   * @param string $haystack      <p>The input string.</p>
4401
   * @param string $needle        <p>Substring to look for.</p>
4402
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4403
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4404
   *
4405
   * @return bool <p>Whether or not $haystack contains $needle.</p>
4406
   */
4407
  public static function str_contains(string $haystack, string $needle, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4408
  {
4409
    if ($caseSensitive) {
4410
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
4411
    }
4412
4413
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
4414
  }
4415
4416
  /**
4417
   * Returns true if the string contains all $needles, false otherwise. By
4418
   * default the comparison is case-sensitive, but can be made insensitive by
4419
   * setting $caseSensitive to false.
4420
   *
4421
   * @param string $haystack      <p>The input string.</p>
4422
   * @param array  $needles       <p>SubStrings to look for.</p>
4423
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4424
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4425
   *
4426
   * @return bool <p>Whether or not $haystack contains $needle.</p>
4427
   */
4428
  public static function str_contains_all(string $haystack, array $needles, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4429
  {
4430
    if (!isset($haystack[0])) {
4431
      return false;
4432
    }
4433
4434
    if (empty($needles)) {
4435
      return false;
4436
    }
4437
4438
    foreach ($needles as $needle) {
4439
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4440
        return false;
4441
      }
4442
    }
4443
4444
    return true;
4445
  }
4446
4447
  /**
4448
   * Returns true if the string contains any $needles, false otherwise. By
4449
   * default the comparison is case-sensitive, but can be made insensitive by
4450
   * setting $caseSensitive to false.
4451
   *
4452
   * @param string <p>The input stiring.</p>
4453
   * @param array  $needles       <p>SubStrings to look for.</p>
4454
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4455
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4456
   *
4457
   * @return bool <p>Whether or not $str contains $needle.</p>
4458
   */
4459
  public static function str_contains_any(string $haystack, array $needles, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4460
  {
4461
    if (empty($needles)) {
4462
      return false;
4463
    }
4464
4465
    foreach ($needles as $needle) {
4466
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4467
        return true;
4468
      }
4469
    }
4470
4471
    return false;
4472
  }
4473
4474
  /**
4475
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4476
   *
4477
   * @param string $str <p>The input string.</p>
4478
   *
4479
   * @return false|string <p>
4480
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4481
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
4482
   *                      </p>
4483
   */
4484 15
  public static function str_detect_encoding(string $str)
4485
  {
4486
    //
4487
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
4488
    //
4489
4490 15
    if (self::is_binary($str, true) === true) {
4491
4492 5
      if (self::is_utf16($str) === 1) {
4493 1
        return 'UTF-16LE';
4494
      }
4495
4496 5
      if (self::is_utf16($str) === 2) {
4497 1
        return 'UTF-16BE';
4498
      }
4499
4500 4
      if (self::is_utf32($str) === 1) {
4501
        return 'UTF-32LE';
4502
      }
4503
4504 4
      if (self::is_utf32($str) === 2) {
4505
        return 'UTF-32BE';
4506
      }
4507
4508
      // is binary but not "UTF-16" or "UTF-32"
4509 4
      return false;
4510
    }
4511
4512
    //
4513
    // 2.) simple check for ASCII chars
4514
    //
4515
4516 13
    if (self::is_ascii($str) === true) {
4517 5
      return 'ASCII';
4518
    }
4519
4520
    //
4521
    // 3.) simple check for UTF-8 chars
4522
    //
4523
4524 13
    if (self::is_utf8($str) === true) {
4525 9
      return 'UTF-8';
4526
    }
4527
4528
    //
4529
    // 4.) check via "\mb_detect_encoding()"
4530
    //
4531
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4532
4533
    $detectOrder = [
4534 8
        'ISO-8859-1',
4535
        'ISO-8859-2',
4536
        'ISO-8859-3',
4537
        'ISO-8859-4',
4538
        'ISO-8859-5',
4539
        'ISO-8859-6',
4540
        'ISO-8859-7',
4541
        'ISO-8859-8',
4542
        'ISO-8859-9',
4543
        'ISO-8859-10',
4544
        'ISO-8859-13',
4545
        'ISO-8859-14',
4546
        'ISO-8859-15',
4547
        'ISO-8859-16',
4548
        'WINDOWS-1251',
4549
        'WINDOWS-1252',
4550
        'WINDOWS-1254',
4551
        'CP932',
4552
        'CP936',
4553
        'CP950',
4554
        'CP866',
4555
        'CP850',
4556
        'CP51932',
4557
        'CP50220',
4558
        'CP50221',
4559
        'CP50222',
4560
        'ISO-2022-JP',
4561
        'ISO-2022-KR',
4562
        'JIS',
4563
        'JIS-ms',
4564
        'EUC-CN',
4565
        'EUC-JP',
4566
    ];
4567
4568 8
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4569 8
    if ($encoding) {
4570 8
      return $encoding;
4571
    }
4572
4573
    //
4574
    // 5.) check via "iconv()"
4575
    //
4576
4577
    if (self::$ENCODINGS === null) {
4578
      self::$ENCODINGS = self::getData('encodings');
4579
    }
4580
4581
    $md5 = \md5($str);
4582
    foreach (self::$ENCODINGS as $encodingTmp) {
0 ignored issues
show
Bug introduced by
The expression self::$ENCODINGS of type boolean|string|array|integer is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
4583
      # INFO: //IGNORE and //TRANSLIT still throw notice
4584
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4585
      if (\md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4586
        return $encodingTmp;
4587
      }
4588
    }
4589
4590
    return false;
4591
  }
4592
4593
  /**
4594
   * Check if the string ends with the given substring.
4595
   *
4596
   * @param string $haystack <p>The string to search in.</p>
4597
   * @param string $needle   <p>The substring to search for.</p>
4598
   *
4599
   * @return bool
4600
   */
4601 2
  public static function str_ends_with(string $haystack, string $needle): bool
4602
  {
4603 2
    if (!isset($haystack[0], $needle[0])) {
4604 1
      return false;
4605
    }
4606
4607 2
    return \substr($haystack, -\strlen($needle)) === $needle;
4608
  }
4609
4610
  /**
4611
   * Returns true if the string ends with any of $substrings, false otherwise.
4612
   *
4613
   * - case-sensitive
4614
   *
4615
   * @param string   $str        <p>The input string.</p>
4616
   * @param string[] $substrings <p>Substrings to look for.</p>
4617
   *
4618
   * @return bool     <p>Whether or not $str ends with $substring.</p>
4619
   */
4620
  public static function str_ends_with_any(string $str, array $substrings): bool
4621
  {
4622
    if (empty($substrings)) {
4623
      return false;
4624
    }
4625
4626
    foreach ($substrings as $substring) {
4627
      if (self::str_ends_with($str, $substring)) {
4628
        return true;
4629
      }
4630
    }
4631
4632
    return false;
4633
  }
4634
4635
  /**
4636
   * Ensures that the string begins with $substring. If it doesn't, it's
4637
   * prepended.
4638
   *
4639
   * @param string str <p>The input string.</p>
4640
   * @param string $substring <p>The substring to add if not present.</p>
4641
   *
4642
   * @return string
4643
   */
4644
  public static function str_ensure_left(string $str, string $substring): string
4645
  {
4646
    if (!self::str_starts_with($str, $substring)) {
4647
      $str = $substring . $str;
4648
    }
4649
4650
    return $str;
4651
  }
4652
4653
  /**
4654
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
4655
   *
4656
   * @param string str <p>The input string.</p>
4657
   * @param string $substring <p>The substring to add if not present.</p>
4658
   *
4659
   * @return string
4660
   */
4661
  public static function str_ensure_right(string $str, string $substring): string
4662
  {
4663
    if (!self::str_ends_with($str, $substring)) {
4664
      $str .= $substring;
4665
    }
4666
4667
    return $str;
4668
  }
4669
4670
  /**
4671
   * Check if the string ends with the given substring, case insensitive.
4672
   *
4673
   * @param string $haystack <p>The string to search in.</p>
4674
   * @param string $needle   <p>The substring to search for.</p>
4675
   *
4676
   * @return bool
4677
   */
4678 2
  public static function str_iends_with(string $haystack, string $needle): bool
4679
  {
4680 2
    if (!isset($haystack[0], $needle[0])) {
4681 1
      return false;
4682
    }
4683
4684 2
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
4685 2
      return true;
4686
    }
4687
4688 2
    return false;
4689
  }
4690
4691
  /**
4692
   * Returns true if the string ends with any of $substrings, false otherwise.
4693
   *
4694
   * - case-insensitive
4695
   *
4696
   * @param string   $str        <p>The input string.</p>
4697
   * @param string[] $substrings <p>Substrings to look for.</p>
4698
   *
4699
   * @return bool     <p>Whether or not $str ends with $substring.</p>
4700
   */
4701
  public static function str_iends_with_any(string $str, array $substrings): bool
4702
  {
4703
    if (empty($substrings)) {
4704
      return false;
4705
    }
4706
4707
    foreach ($substrings as $substring) {
4708
      if (self::str_iends_with($str, $substring)) {
4709
        return true;
4710
      }
4711
    }
4712
4713
    return false;
4714
  }
4715
4716
  /**
4717
   * Inserts $substring into the string at the $index provided.
4718
   *
4719
   * @param string $str       <p>The input string.</p>
4720
   * @param string $substring <p>String to be inserted.</p>
4721
   * @param int    $index     <p>The index at which to insert the substring.</p>
4722
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
4723
   *
4724
   * @return string
4725
   */
4726
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
4727
  {
4728
    $len = self::strlen($str, $encoding);
4729
4730
    if ($index > $len) {
4731
      return $str;
4732
    }
4733
4734
    $start = self::substr($str, 0, $index, $encoding);
4735
    $end = self::substr($str, $index, $len, $encoding);
4736
4737
    return $start . $substring . $end;
4738
  }
4739
4740
  /**
4741
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4742
   *
4743
   * @link  http://php.net/manual/en/function.str-ireplace.php
4744
   *
4745
   * @param mixed $search  <p>
4746
   *                       Every replacement with search array is
4747
   *                       performed on the result of previous replacement.
4748
   *                       </p>
4749
   * @param mixed $replace <p>
4750
   *                       </p>
4751
   * @param mixed $subject <p>
4752
   *                       If subject is an array, then the search and
4753
   *                       replace is performed with every entry of
4754
   *                       subject, and the return value is an array as
4755
   *                       well.
4756
   *                       </p>
4757
   * @param int   $count   [optional] <p>
4758
   *                       The number of matched and replaced needles will
4759
   *                       be returned in count which is passed by
4760
   *                       reference.
4761
   *                       </p>
4762
   *
4763
   * @return mixed <p>A string or an array of replacements.</p>
4764
   */
4765 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4766
  {
4767 26
    $search = (array)$search;
4768
4769
    /** @noinspection AlterInForeachInspection */
4770 26
    foreach ($search as &$s) {
4771 26
      if ('' === $s .= '') {
4772 2
        $s = '/^(?<=.)$/';
4773
      } else {
4774 26
        $s = '/' . \preg_quote($s, '/') . '/ui';
4775
      }
4776
    }
4777
4778 26
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
4779 26
    $count = $replace; // used as reference parameter
4780
4781 26
    return $subject;
4782
  }
4783
4784
  /**
4785
   * Check if the string starts with the given substring, case insensitive.
4786
   *
4787
   * @param string $haystack <p>The string to search in.</p>
4788
   * @param string $needle   <p>The substring to search for.</p>
4789
   *
4790
   * @return bool
4791
   */
4792 2 View Code Duplication
  public static function str_istarts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4793
  {
4794 2
    if (!isset($haystack[0], $needle[0])) {
4795 1
      return false;
4796
    }
4797
4798 2
    if (self::stripos($haystack, $needle) === 0) {
4799 2
      return true;
4800
    }
4801
4802 2
    return false;
4803
  }
4804
4805
  /**
4806
   * Returns true if the string begins with any of $substrings, false otherwise.
4807
   *
4808
   * - case-insensitive
4809
   *
4810
   * @param string $str        <p>The input string.</p>
4811
   * @param array  $substrings <p>Substrings to look for.</p>
4812
   *
4813
   * @return bool <p>Whether or not $str starts with $substring.</p>
4814
   */
4815 View Code Duplication
  public static function str_istarts_with_any(string $str, array $substrings): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4816
  {
4817
    if (!isset($str[0])) {
4818
      return false;
4819
    }
4820
4821
    if (empty($substrings)) {
4822
      return false;
4823
    }
4824
4825
    foreach ($substrings as $substring) {
4826
      if (self::str_istarts_with($str, $substring)) {
4827
        return true;
4828
      }
4829
    }
4830
4831
    return false;
4832
  }
4833
4834
  /**
4835
   * Limit the number of characters in a string, but also after the next word.
4836
   *
4837
   * @param string $str
4838
   * @param int    $length
4839
   * @param string $strAddOn
4840
   *
4841
   * @return string
4842
   */
4843 1
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…'): string
4844
  {
4845 1
    if (!isset($str[0])) {
4846 1
      return '';
4847
    }
4848
4849 1
    if (self::strlen($str) <= $length) {
4850 1
      return $str;
4851
    }
4852
4853 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4854 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4855
    }
4856
4857 1
    $str = (string)self::substr($str, 0, $length);
4858 1
    $array = \explode(' ', $str);
4859 1
    \array_pop($array);
4860 1
    $new_str = \implode(' ', $array);
4861
4862 1
    if ($new_str === '') {
4863 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4864
    } else {
4865 1
      $str = $new_str . $strAddOn;
4866
    }
4867
4868 1
    return $str;
4869
  }
4870
4871
  /**
4872
   * Pad a UTF-8 string to given length with another string.
4873
   *
4874
   * @param string $str        <p>The input string.</p>
4875
   * @param int    $pad_length <p>The length of return string.</p>
4876
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4877
   * @param int    $pad_type   [optional] <p>
4878
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4879
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4880
   *                           </p>
4881
   *
4882
   * @encoding
4883
   *
4884
   * @return string <strong>Returns the padded string</strong>
4885
   */
4886 2
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', int $pad_type = STR_PAD_RIGHT): string
4887
  {
4888 2
    $str_length = self::strlen($str);
4889
4890
    if (
4891 2
        $pad_length > 0
4892
        &&
4893 2
        $pad_length >= $str_length
4894
    ) {
4895 2
      $ps_length = self::strlen($pad_string);
4896
4897 2
      $diff = ($pad_length - $str_length);
4898
4899
      switch ($pad_type) {
4900 2 View Code Duplication
        case STR_PAD_LEFT:
4901 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
4902 2
          $pre = (string)self::substr($pre, 0, $diff);
4903 2
          $post = '';
4904 2
          break;
4905
4906 2
        case STR_PAD_BOTH:
4907 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
4908 2
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2));
4909 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
4910 2
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2));
4911 2
          break;
4912
4913 2
        case STR_PAD_RIGHT:
4914 View Code Duplication
        default:
4915 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
4916 2
          $post = (string)self::substr($post, 0, $diff);
4917 2
          $pre = '';
4918
      }
4919
4920 2
      return $pre . $str . $post;
4921
    }
4922
4923 2
    return $str;
4924
  }
4925
4926
  /**
4927
   * Repeat a string.
4928
   *
4929
   * @param string $str        <p>
4930
   *                           The string to be repeated.
4931
   *                           </p>
4932
   * @param int    $multiplier <p>
4933
   *                           Number of time the input string should be
4934
   *                           repeated.
4935
   *                           </p>
4936
   *                           <p>
4937
   *                           multiplier has to be greater than or equal to 0.
4938
   *                           If the multiplier is set to 0, the function
4939
   *                           will return an empty string.
4940
   *                           </p>
4941
   *
4942
   * @return string <p>The repeated string.</p>
4943
   */
4944 1
  public static function str_repeat(string $str, int $multiplier): string
4945
  {
4946 1
    $str = self::filter($str);
4947
4948 1
    return \str_repeat($str, $multiplier);
4949
  }
4950
4951
  /**
4952
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4953
   *
4954
   * Replace all occurrences of the search string with the replacement string
4955
   *
4956
   * @link http://php.net/manual/en/function.str-replace.php
4957
   *
4958
   * @param mixed $search  <p>
4959
   *                       The value being searched for, otherwise known as the needle.
4960
   *                       An array may be used to designate multiple needles.
4961
   *                       </p>
4962
   * @param mixed $replace <p>
4963
   *                       The replacement value that replaces found search
4964
   *                       values. An array may be used to designate multiple replacements.
4965
   *                       </p>
4966
   * @param mixed $subject <p>
4967
   *                       The string or array being searched and replaced on,
4968
   *                       otherwise known as the haystack.
4969
   *                       </p>
4970
   *                       <p>
4971
   *                       If subject is an array, then the search and
4972
   *                       replace is performed with every entry of
4973
   *                       subject, and the return value is an array as
4974
   *                       well.
4975
   *                       </p>
4976
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4977
   *
4978
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4979
   */
4980 12
  public static function str_replace($search, $replace, $subject, int &$count = null)
4981
  {
4982 12
    return \str_replace($search, $replace, $subject, $count);
4983
  }
4984
4985
  /**
4986
   * Replace the first "$search"-term with the "$replace"-term.
4987
   *
4988
   * @param string $search
4989
   * @param string $replace
4990
   * @param string $subject
4991
   *
4992
   * @return string
4993
   */
4994 1
  public static function str_replace_first(string $search, string $replace, string $subject): string
4995
  {
4996 1
    $pos = self::strpos($subject, $search);
4997
4998 1
    if ($pos !== false) {
4999 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
5000
    }
5001
5002 1
    return $subject;
5003
  }
5004
5005
  /**
5006
   * Shuffles all the characters in the string.
5007
   *
5008
   * PS: uses random algorithm which is weak for cryptography purposes
5009
   *
5010
   * @param string $str <p>The input string</p>
5011
   *
5012
   * @return string <p>The shuffled string.</p>
5013
   */
5014 1
  public static function str_shuffle(string $str): string
5015
  {
5016 1
    $indexes = \range(0, self::strlen($str) - 1);
5017
    /** @noinspection NonSecureShuffleUsageInspection */
5018 1
    \shuffle($indexes);
5019
5020 1
    $shuffledStr = '';
5021 1
    foreach ($indexes as $i) {
5022 1
      $shuffledStr .= self::substr($str, $i, 1);
5023
    }
5024
5025 1
    return $shuffledStr;
5026
  }
5027
5028
  /**
5029
   * Sort all characters according to code points.
5030
   *
5031
   * @param string $str    <p>A UTF-8 string.</p>
5032
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
5033
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
5034
   *
5035
   * @return string <p>String of sorted characters.</p>
5036
   */
5037 1
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
5038
  {
5039 1
    $array = self::codepoints($str);
5040
5041 1
    if ($unique) {
5042 1
      $array = \array_flip(\array_flip($array));
5043
    }
5044
5045 1
    if ($desc) {
5046 1
      \arsort($array);
5047
    } else {
5048 1
      \asort($array);
5049
    }
5050
5051 1
    return self::string($array);
5052
  }
5053
5054
  /**
5055
   * Split a string into an array.
5056
   *
5057
   * @param string|string[] $str
5058
   * @param int             $len
5059
   *
5060
   * @return string[]
5061
   */
5062 18
  public static function str_split($str, int $len = 1): array
5063
  {
5064 18 View Code Duplication
    if (\is_array($str) === true) {
5065 1
      foreach ($str as $k => $v) {
5066 1
        $str[$k] = self::str_split($v, $len);
5067
      }
5068
5069 1
      return $str;
5070
    }
5071
5072 18
    if (!isset($str[0])) {
5073 1
      return [];
5074
    }
5075
5076 17
    if ($len < 1) {
5077
      return \str_split($str, $len);
5078
    }
5079
5080
    /** @noinspection NotOptimalRegularExpressionsInspection */
5081 17
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
5082 17
    $a = $a[0];
5083
5084 17
    if ($len === 1) {
5085 17
      return $a;
5086
    }
5087
5088 1
    $arrayOutput = [];
5089 1
    $p = -1;
5090
5091
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
5092 1
    foreach ($a as $l => $a) {
5093 1
      if ($l % $len) {
5094 1
        $arrayOutput[$p] .= $a;
5095
      } else {
5096 1
        $arrayOutput[++$p] = $a;
5097
      }
5098
    }
5099
5100 1
    return $arrayOutput;
5101
  }
5102
5103
  /**
5104
   * Check if the string starts with the given substring.
5105
   *
5106
   * @param string $haystack <p>The string to search in.</p>
5107
   * @param string $needle   <p>The substring to search for.</p>
5108
   *
5109
   * @return bool
5110
   */
5111 2 View Code Duplication
  public static function str_starts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5112
  {
5113 2
    if (!isset($haystack[0], $needle[0])) {
5114 1
      return false;
5115
    }
5116
5117 2
    if (\strpos($haystack, $needle) === 0) {
5118 2
      return true;
5119
    }
5120
5121 2
    return false;
5122
  }
5123
5124
  /**
5125
   * Returns true if the string begins with any of $substrings, false otherwise.
5126
   *
5127
   * - case-sensitive
5128
   *
5129
   * @param string $str        <p>The input string.</p>
5130
   * @param array  $substrings <p>Substrings to look for.</p>
5131
   *
5132
   * @return bool <p>Whether or not $str starts with $substring.</p>
5133
   */
5134 View Code Duplication
  public static function str_starts_with_any(string $str, array $substrings): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5135
  {
5136
    if (!isset($str[0])) {
5137
      return false;
5138
    }
5139
5140
    if (empty($substrings)) {
5141
      return false;
5142
    }
5143
5144
    foreach ($substrings as $substring) {
5145
      if (self::str_starts_with($str, $substring)) {
5146
        return true;
5147
      }
5148
    }
5149
5150
    return false;
5151
  }
5152
5153
  /**
5154
   * Get a binary representation of a specific string.
5155
   *
5156
   * @param string $str <p>The input string.</p>
5157
   *
5158
   * @return string
5159
   */
5160 1
  public static function str_to_binary(string $str): string
5161
  {
5162 1
    $value = \unpack('H*', $str);
5163
5164 1
    return \base_convert($value[1], 16, 2);
5165
  }
5166
5167
  /**
5168
   * @param string   $str
5169
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
5170
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
5171
   *
5172
   * @return string[]
5173
   */
5174
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
5175
  {
5176 View Code Duplication
    if (!isset($str[0])) {
5177
      if ($removeEmptyValues === true) {
5178
        return [];
5179
      }
5180
5181
      return [''];
5182
    }
5183
5184
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
5185
5186
    if (
5187
        $removeShortValues === null
5188
        &&
5189
        $removeEmptyValues === false
5190
    ) {
5191
      return $return;
5192
    }
5193
5194
    $tmpReturn = self::reduce_string_array(
5195
        $return,
5196
        $removeEmptyValues,
5197
        $removeShortValues
5198
    );
5199
5200
    return $tmpReturn;
5201
  }
5202
5203
  /**
5204
   * Convert a string into an array of words.
5205
   *
5206
   * @param string   $str
5207
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
5208
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
5209
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
5210
   *
5211
   * @return string[]
5212
   */
5213 10
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
5214
  {
5215 10 View Code Duplication
    if (!isset($str[0])) {
5216 2
      if ($removeEmptyValues === true) {
5217
        return [];
5218
      }
5219
5220 2
      return [''];
5221
    }
5222
5223 10
    $charList = self::rxClass($charList, '\pL');
5224
5225 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
5226
5227
    if (
5228 10
        $removeShortValues === null
5229
        &&
5230 10
        $removeEmptyValues === false
5231
    ) {
5232 10
      return $return;
5233
    }
5234
5235 1
    $tmpReturn = self::reduce_string_array(
5236 1
        $return,
5237 1
        $removeEmptyValues,
5238 1
        $removeShortValues
5239
    );
5240
5241 1
    return $tmpReturn;
5242
  }
5243
5244
  /**
5245
   * alias for "UTF8::to_ascii()"
5246
   *
5247
   * @see UTF8::to_ascii()
5248
   *
5249
   * @param string $str
5250
   * @param string $unknown
5251
   * @param bool   $strict
5252
   *
5253
   * @return string
5254
   */
5255 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
5256
  {
5257 7
    return self::to_ascii($str, $unknown, $strict);
5258
  }
5259
5260
  /**
5261
   * Counts number of words in the UTF-8 string.
5262
   *
5263
   * @param string $str      <p>The input string.</p>
5264
   * @param int    $format   [optional] <p>
5265
   *                         <strong>0</strong> => return a number of words (default)<br>
5266
   *                         <strong>1</strong> => return an array of words<br>
5267
   *                         <strong>2</strong> => return an array of words with word-offset as key
5268
   *                         </p>
5269
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
5270
   *
5271
   * @return string[]|int <p>The number of words in the string</p>
5272
   */
5273 1
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
5274
  {
5275 1
    $strParts = self::str_to_words($str, $charlist);
5276
5277 1
    $len = \count($strParts);
5278
5279 1
    if ($format === 1) {
5280
5281 1
      $numberOfWords = [];
5282 1
      for ($i = 1; $i < $len; $i += 2) {
5283 1
        $numberOfWords[] = $strParts[$i];
5284
      }
5285
5286 1
    } elseif ($format === 2) {
5287
5288 1
      $numberOfWords = [];
5289 1
      $offset = self::strlen($strParts[0]);
5290 1
      for ($i = 1; $i < $len; $i += 2) {
5291 1
        $numberOfWords[$offset] = $strParts[$i];
5292 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
5293
      }
5294
5295
    } else {
5296
5297 1
      $numberOfWords = ($len - 1) / 2;
5298
5299
    }
5300
5301 1
    return $numberOfWords;
5302
  }
5303
5304
  /**
5305
   * Case-insensitive string comparison.
5306
   *
5307
   * INFO: Case-insensitive version of UTF8::strcmp()
5308
   *
5309
   * @param string $str1
5310
   * @param string $str2
5311
   *
5312
   * @return int <p>
5313
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5314
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5315
   *             <strong>0</strong> if they are equal.
5316
   *             </p>
5317
   */
5318 11
  public static function strcasecmp(string $str1, string $str2): int
5319
  {
5320 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5321
  }
5322
5323
  /**
5324
   * alias for "UTF8::strstr()"
5325
   *
5326
   * @see UTF8::strstr()
5327
   *
5328
   * @param string $haystack
5329
   * @param string $needle
5330
   * @param bool   $before_needle
5331
   * @param string $encoding
5332
   * @param bool   $cleanUtf8
5333
   *
5334
   * @return string|false
5335
   */
5336 1
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5337
  {
5338 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5339
  }
5340
5341
  /**
5342
   * Case-sensitive string comparison.
5343
   *
5344
   * @param string $str1
5345
   * @param string $str2
5346
   *
5347
   * @return int  <p>
5348
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
5349
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
5350
   *              <strong>0</strong> if they are equal.
5351
   *              </p>
5352
   */
5353 14
  public static function strcmp(string $str1, string $str2): int
5354
  {
5355
    /** @noinspection PhpUndefinedClassInspection */
5356 14
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
5357 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
5358 14
        \Normalizer::normalize($str2, \Normalizer::NFD)
5359
    );
5360
  }
5361
5362
  /**
5363
   * Find length of initial segment not matching mask.
5364
   *
5365
   * @param string $str
5366
   * @param string $charList
5367
   * @param int    $offset
5368
   * @param int    $length
5369
   *
5370
   * @return int|null
5371
   */
5372 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
5373
  {
5374 15
    if ('' === $charList .= '') {
5375 1
      return null;
5376
    }
5377
5378 14 View Code Duplication
    if ($offset || $length !== null) {
5379 2
      $strTmp = self::substr($str, $offset, $length);
5380 2
      if ($strTmp === false) {
5381
        return null;
5382
      }
5383 2
      $str = (string)$strTmp;
5384
    }
5385
5386 14
    if (!isset($str[0])) {
5387 1
      return null;
5388
    }
5389
5390 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5391 13
      return self::strlen($length[1]);
5392
    }
5393
5394 1
    return self::strlen($str);
5395
  }
5396
5397
  /**
5398
   * alias for "UTF8::stristr()"
5399
   *
5400
   * @see UTF8::stristr()
5401
   *
5402
   * @param string $haystack
5403
   * @param string $needle
5404
   * @param bool   $before_needle
5405
   * @param string $encoding
5406
   * @param bool   $cleanUtf8
5407
   *
5408
   * @return string|false
5409
   */
5410 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5411
  {
5412 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5413
  }
5414
5415
  /**
5416
   * Create a UTF-8 string from code points.
5417
   *
5418
   * INFO: opposite to UTF8::codepoints()
5419
   *
5420
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
5421
   *
5422
   * @return string <p>UTF-8 encoded string.</p>
5423
   */
5424 2
  public static function string(array $array): string
5425
  {
5426 2
    return \implode(
5427 2
        '',
5428 2
        \array_map(
5429
            [
5430 2
                self::class,
5431
                'chr',
5432
            ],
5433 2
            $array
5434
        )
5435
    );
5436
  }
5437
5438
  /**
5439
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5440
   *
5441
   * @param string $str <p>The input string.</p>
5442
   *
5443
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5444
   */
5445 3
  public static function string_has_bom(string $str): bool
5446
  {
5447 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
5448 3
      if (0 === \strpos($str, $bomString)) {
5449 3
        return true;
5450
      }
5451
    }
5452
5453 3
    return false;
5454
  }
5455
5456
  /**
5457
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5458
   *
5459
   * @link http://php.net/manual/en/function.strip-tags.php
5460
   *
5461
   * @param string $str             <p>
5462
   *                                The input string.
5463
   *                                </p>
5464
   * @param string $allowable_tags  [optional] <p>
5465
   *                                You can use the optional second parameter to specify tags which should
5466
   *                                not be stripped.
5467
   *                                </p>
5468
   *                                <p>
5469
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5470
   *                                can not be changed with allowable_tags.
5471
   *                                </p>
5472
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
5473
   *
5474
   * @return string <p>The stripped string.</p>
5475
   */
5476 2
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
5477
  {
5478 2
    if (!isset($str[0])) {
5479 1
      return '';
5480
    }
5481
5482 2
    if ($cleanUtf8 === true) {
5483 1
      $str = self::clean($str);
5484
    }
5485
5486 2
    return \strip_tags($str, $allowable_tags);
5487
  }
5488
5489
  /**
5490
   * Strip all whitespace characters. This includes tabs and newline
5491
   * characters, as well as multibyte whitespace such as the thin space
5492
   * and ideographic space.
5493
   *
5494
   * @param string $str
5495
   *
5496
   * @return string
5497
   */
5498 12
  public static function strip_whitespace(string $str): string
5499
  {
5500 12
    if (!isset($str[0])) {
5501 1
      return '';
5502
    }
5503
5504 11
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
5505
  }
5506
5507
  /**
5508
   * Finds position of first occurrence of a string within another, case insensitive.
5509
   *
5510
   * @link http://php.net/manual/en/function.mb-stripos.php
5511
   *
5512
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5513
   * @param string $needle    <p>The string to find in haystack.</p>
5514
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
5515
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5516
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5517
   *
5518
   * @return int|false <p>
5519
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
5520
   *                   or false if needle is not found.
5521
   *                   </p>
5522
   */
5523 10
  public static function stripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5524
  {
5525 10
    if (!isset($haystack[0], $needle[0])) {
5526 3
      return false;
5527
    }
5528
5529 9
    if ($cleanUtf8 === true) {
5530
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5531
      // if invalid characters are found in $haystack before $needle
5532 1
      $haystack = self::clean($haystack);
5533 1
      $needle = self::clean($needle);
5534
    }
5535
5536 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5537 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5538
    }
5539
5540 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5541
      self::checkForSupport();
5542
    }
5543
5544 View Code Duplication
    if (
5545 9
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5546
        &&
5547 9
        self::$SUPPORT['intl'] === true
5548
    ) {
5549 9
      return \grapheme_stripos($haystack, $needle, $offset);
5550
    }
5551
5552
    // fallback to "mb_"-function via polyfill
5553 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5554
  }
5555
5556
  /**
5557
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5558
   *
5559
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
5560
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
5561
   * @param bool   $before_needle  [optional] <p>
5562
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5563
   *                               haystack before the first occurrence of the needle (excluding the needle).
5564
   *                               </p>
5565
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
5566
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5567
   *
5568
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
5569
   */
5570 17
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5571
  {
5572 17
    if (!isset($haystack[0], $needle[0])) {
5573 6
      return false;
5574
    }
5575
5576 11
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5577 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5578
    }
5579
5580 11
    if ($cleanUtf8 === true) {
5581
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5582
      // if invalid characters are found in $haystack before $needle
5583 1
      $needle = self::clean($needle);
5584 1
      $haystack = self::clean($haystack);
5585
    }
5586
5587 11
    if (!$needle) {
5588
      return $haystack;
5589
    }
5590
5591 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5592
      self::checkForSupport();
5593
    }
5594
5595 View Code Duplication
    if (
5596 11
        $encoding !== 'UTF-8'
5597
        &&
5598 11
        self::$SUPPORT['mbstring'] === false
5599
    ) {
5600
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5601
    }
5602
5603 11
    if (self::$SUPPORT['mbstring'] === true) {
5604 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5605
    }
5606
5607 View Code Duplication
    if (
5608
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5609
        &&
5610
        self::$SUPPORT['intl'] === true
5611
    ) {
5612
      return \grapheme_stristr($haystack, $needle, $before_needle);
5613
    }
5614
5615
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
5616
      return \stristr($haystack, $needle, $before_needle);
5617
    }
5618
5619
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
5620
5621
    if (!isset($match[1])) {
5622
      return false;
5623
    }
5624
5625
    if ($before_needle) {
5626
      return $match[1];
5627
    }
5628
5629
    return self::substr($haystack, self::strlen($match[1]));
5630
  }
5631
5632
  /**
5633
   * Get the string length, not the byte-length!
5634
   *
5635
   * @link     http://php.net/manual/en/function.mb-strlen.php
5636
   *
5637
   * @param string $str       <p>The string being checked for length.</p>
5638
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5639
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5640
   *
5641
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5642
   *             character counted as +1)</p>
5643
   */
5644 88
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
5645
  {
5646 88
    if (!isset($str[0])) {
5647 6
      return 0;
5648
    }
5649
5650 87
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5651 5
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5652
    }
5653
5654 87
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5655
      self::checkForSupport();
5656
    }
5657
5658
    switch ($encoding) {
5659 87
      case 'ASCII':
5660 87
      case 'CP850':
5661
        if (
5662 6
            $encoding === 'CP850'
5663
            &&
5664 6
            self::$SUPPORT['mbstring_func_overload'] === false
5665
        ) {
5666 6
          return \strlen($str);
5667
        }
5668
5669
        return \mb_strlen($str, 'CP850'); // 8-BIT
5670
    }
5671
5672 82
    if ($cleanUtf8 === true) {
5673
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5674
      // if invalid characters are found in $str
5675 2
      $str = self::clean($str);
5676
    }
5677
5678 View Code Duplication
    if (
5679 82
        $encoding !== 'UTF-8'
5680
        &&
5681 82
        self::$SUPPORT['mbstring'] === false
5682
        &&
5683 82
        self::$SUPPORT['iconv'] === false
5684
    ) {
5685
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5686
    }
5687
5688
    if (
5689 82
        $encoding !== 'UTF-8'
5690
        &&
5691 82
        self::$SUPPORT['iconv'] === true
5692
        &&
5693 82
        self::$SUPPORT['mbstring'] === false
5694
    ) {
5695
      $returnTmp = \iconv_strlen($str, $encoding);
5696
      if ($returnTmp !== false) {
5697
        return $returnTmp;
5698
      }
5699
    }
5700
5701 82 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
5702 81
      $returnTmp = \mb_strlen($str, $encoding);
5703 81
      if ($returnTmp !== false) {
5704 81
        return $returnTmp;
5705
      }
5706
    }
5707
5708 2 View Code Duplication
    if (self::$SUPPORT['iconv'] === true) {
5709
      $returnTmp = \iconv_strlen($str, $encoding);
5710
      if ($returnTmp !== false) {
5711
        return $returnTmp;
5712
      }
5713
    }
5714
5715
    if (
5716 2
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5717
        &&
5718 2
        self::$SUPPORT['intl'] === true
5719
    ) {
5720
      return \grapheme_strlen($str);
5721
    }
5722
5723 2
    if (self::is_ascii($str)) {
5724 1
      return \strlen($str);
5725
    }
5726
5727
    // fallback via vanilla php
5728 2
    \preg_match_all('/./us', $str, $parts);
5729 2
    $returnTmp = \count($parts[0]);
5730 2
    if ($returnTmp !== 0) {
5731 2
      return $returnTmp;
5732
    }
5733
5734
    // fallback to "mb_"-function via polyfill
5735
    return \mb_strlen($str, $encoding);
5736
  }
5737
5738
  /**
5739
   * Get string length in byte.
5740
   *
5741
   * @param string $str
5742
   *
5743
   * @return int
5744
   */
5745 72
  public static function strlen_in_byte(string $str): int
5746
  {
5747 72
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
5748
      $len = \mb_strlen($str, 'CP850'); // 8-BIT
5749
    } else {
5750 72
      $len = \strlen($str);
5751
    }
5752
5753 72
    return $len;
5754
  }
5755
5756
  /**
5757
   * Case insensitive string comparisons using a "natural order" algorithm.
5758
   *
5759
   * INFO: natural order version of UTF8::strcasecmp()
5760
   *
5761
   * @param string $str1 <p>The first string.</p>
5762
   * @param string $str2 <p>The second string.</p>
5763
   *
5764
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
5765
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
5766
   *             <strong>0</strong> if they are equal
5767
   */
5768 1
  public static function strnatcasecmp(string $str1, string $str2): int
5769
  {
5770 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5771
  }
5772
5773
  /**
5774
   * String comparisons using a "natural order" algorithm
5775
   *
5776
   * INFO: natural order version of UTF8::strcmp()
5777
   *
5778
   * @link  http://php.net/manual/en/function.strnatcmp.php
5779
   *
5780
   * @param string $str1 <p>The first string.</p>
5781
   * @param string $str2 <p>The second string.</p>
5782
   *
5783
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
5784
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
5785
   *             <strong>0</strong> if they are equal
5786
   */
5787 2
  public static function strnatcmp(string $str1, string $str2): int
5788
  {
5789 2
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5790
  }
5791
5792
  /**
5793
   * Case-insensitive string comparison of the first n characters.
5794
   *
5795
   * @link  http://php.net/manual/en/function.strncasecmp.php
5796
   *
5797
   * @param string $str1 <p>The first string.</p>
5798
   * @param string $str2 <p>The second string.</p>
5799
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5800
   *
5801
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5802
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5803
   *             <strong>0</strong> if they are equal
5804
   */
5805 1
  public static function strncasecmp(string $str1, string $str2, int $len): int
5806
  {
5807 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5808
  }
5809
5810
  /**
5811
   * String comparison of the first n characters.
5812
   *
5813
   * @link  http://php.net/manual/en/function.strncmp.php
5814
   *
5815
   * @param string $str1 <p>The first string.</p>
5816
   * @param string $str2 <p>The second string.</p>
5817
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5818
   *
5819
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5820
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5821
   *             <strong>0</strong> if they are equal
5822
   */
5823 2
  public static function strncmp(string $str1, string $str2, int $len): int
5824
  {
5825 2
    $str1 = (string)self::substr($str1, 0, $len);
5826 2
    $str2 = (string)self::substr($str2, 0, $len);
5827
5828 2
    return self::strcmp($str1, $str2);
5829
  }
5830
5831
  /**
5832
   * Search a string for any of a set of characters.
5833
   *
5834
   * @link  http://php.net/manual/en/function.strpbrk.php
5835
   *
5836
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5837
   * @param string $char_list <p>This parameter is case sensitive.</p>
5838
   *
5839
   * @return string|false <p>String starting from the character found, or false if it is not found.</p>
5840
   */
5841 1
  public static function strpbrk(string $haystack, string $char_list)
5842
  {
5843 1
    if (!isset($haystack[0], $char_list[0])) {
5844 1
      return false;
5845
    }
5846
5847 1
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5848 1
      return \substr($haystack, \strpos($haystack, $m[0]));
5849
    }
5850
5851 1
    return false;
5852
  }
5853
5854
  /**
5855
   * Find position of first occurrence of string in a string.
5856
   *
5857
   * @link http://php.net/manual/en/function.mb-strpos.php
5858
   *
5859
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5860
   * @param string $needle    <p>The string to find in haystack.</p>
5861
   * @param int    $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5862
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5863
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5864
   *
5865
   * @return int|false <p>
5866
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
5867
   *                   If needle is not found it returns false.
5868
   *                   </p>
5869
   */
5870 59
  public static function strpos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5871
  {
5872 59
    if (!isset($haystack[0], $needle[0])) {
5873 3
      return false;
5874
    }
5875
5876
    // iconv and mbstring do not support integer $needle
5877 58 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5878
      $needle = (string)self::chr((int)$needle);
5879
    }
5880
5881 58
    if ($cleanUtf8 === true) {
5882
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5883
      // if invalid characters are found in $haystack before $needle
5884 2
      $needle = self::clean($needle);
5885 2
      $haystack = self::clean($haystack);
5886
    }
5887
5888 58
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5889 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5890
    }
5891
5892 58
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5893
      self::checkForSupport();
5894
    }
5895
5896
    if (
5897 58
        $encoding === 'CP850'
5898
        &&
5899 58
        self::$SUPPORT['mbstring_func_overload'] === false
5900
    ) {
5901 44
      return \strpos($haystack, $needle, $offset);
5902
    }
5903
5904 View Code Duplication
    if (
5905 15
        $encoding !== 'UTF-8'
5906
        &&
5907 15
        self::$SUPPORT['iconv'] === false
5908
        &&
5909 15
        self::$SUPPORT['mbstring'] === false
5910
    ) {
5911
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5912
    }
5913
5914
    if (
5915 15
        $offset >= 0 // iconv_strpos() can't handle negative offset
5916
        &&
5917 15
        $encoding !== 'UTF-8'
5918
        &&
5919 15
        self::$SUPPORT['mbstring'] === false
5920
        &&
5921 15
        self::$SUPPORT['iconv'] === true
5922
    ) {
5923
      // ignore invalid negative offset to keep compatibility
5924
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5925
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5926
      if ($returnTmp !== false) {
5927
        return $returnTmp;
5928
      }
5929
    }
5930
5931 15 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
5932 15
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
5933 15
      if ($returnTmp !== false) {
5934 13
        return $returnTmp;
5935
      }
5936
    }
5937
5938 View Code Duplication
    if (
5939 10
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5940
        &&
5941 10
        self::$SUPPORT['intl'] === true
5942
    ) {
5943 9
      return \grapheme_strpos($haystack, $needle, $offset);
5944
    }
5945
5946
    if (
5947 2
        $offset >= 0 // iconv_strpos() can't handle negative offset
5948
        &&
5949 2
        self::$SUPPORT['iconv'] === true
5950
    ) {
5951
      // ignore invalid negative offset to keep compatibility
5952
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5953 1
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5954 1
      if ($returnTmp !== false) {
5955
        return $returnTmp;
5956
      }
5957
    }
5958
5959 2
    $haystackIsAscii = self::is_ascii($haystack);
5960 2
    if ($haystackIsAscii && self::is_ascii($needle)) {
5961 1
      return \strpos($haystack, $needle, $offset);
5962
    }
5963
5964
    // fallback via vanilla php
5965
5966 2
    if ($haystackIsAscii) {
5967
      $haystackTmp = \substr($haystack, $offset);
5968
    } else {
5969 2
      $haystackTmp = self::substr($haystack, $offset);
5970
    }
5971 2
    if ($haystackTmp === false) {
5972
      $haystackTmp = '';
5973
    }
5974 2
    $haystack = (string)$haystackTmp;
5975
5976 2
    if ($offset < 0) {
5977
      $offset = 0;
5978
    }
5979
5980 2
    $pos = \strpos($haystack, $needle);
5981 2
    if ($pos === false) {
5982
      return false;
5983
    }
5984
5985 2
    $returnTmp = $offset + self::strlen(\substr($haystack, 0, $pos));
5986 2
    if ($returnTmp !== false) {
5987 2
      return $returnTmp;
5988
    }
5989
5990
    // fallback to "mb_"-function via polyfill
5991
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5992
  }
5993
5994
  /**
5995
   * Finds the last occurrence of a character in a string within another.
5996
   *
5997
   * @link http://php.net/manual/en/function.mb-strrchr.php
5998
   *
5999
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
6000
   * @param string $needle        <p>The string to find in haystack</p>
6001
   * @param bool   $before_needle [optional] <p>
6002
   *                              Determines which portion of haystack
6003
   *                              this function returns.
6004
   *                              If set to true, it returns all of haystack
6005
   *                              from the beginning to the last occurrence of needle.
6006
   *                              If set to false, it returns all of haystack
6007
   *                              from the last occurrence of needle to the end,
6008
   *                              </p>
6009
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
6010
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
6011
   *
6012
   * @return string|false The portion of haystack or false if needle is not found.
6013
   */
6014 1 View Code Duplication
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6015
  {
6016 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6017 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6018
    }
6019
6020 1
    if ($cleanUtf8 === true) {
6021
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6022
      // if invalid characters are found in $haystack before $needle
6023 1
      $needle = self::clean($needle);
6024 1
      $haystack = self::clean($haystack);
6025
    }
6026
6027
    // fallback to "mb_"-function via polyfill
6028 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
6029
  }
6030
6031
  /**
6032
   * Reverses characters order in the string.
6033
   *
6034
   * @param string $str <p>The input string.</p>
6035
   *
6036
   * @return string <p>The string with characters in the reverse sequence.</p>
6037
   */
6038 4
  public static function strrev(string $str): string
6039
  {
6040 4
    if (!isset($str[0])) {
6041 2
      return '';
6042
    }
6043
6044 3
    $reversed = '';
6045 3
    $i = self::strlen($str);
6046 3
    while ($i--) {
6047 3
      $reversed .= self::substr($str, $i, 1);
6048
    }
6049
6050 3
    return $reversed;
6051
  }
6052
6053
  /**
6054
   * Finds the last occurrence of a character in a string within another, case insensitive.
6055
   *
6056
   * @link http://php.net/manual/en/function.mb-strrichr.php
6057
   *
6058
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
6059
   * @param string $needle         <p>The string to find in haystack.</p>
6060
   * @param bool   $before_needle  [optional] <p>
6061
   *                               Determines which portion of haystack
6062
   *                               this function returns.
6063
   *                               If set to true, it returns all of haystack
6064
   *                               from the beginning to the last occurrence of needle.
6065
   *                               If set to false, it returns all of haystack
6066
   *                               from the last occurrence of needle to the end,
6067
   *                               </p>
6068
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
6069
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
6070
   *
6071
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
6072
   */
6073 1 View Code Duplication
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6074
  {
6075 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6076 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6077
    }
6078
6079 1
    if ($cleanUtf8 === true) {
6080
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6081
      // if invalid characters are found in $haystack before $needle
6082 1
      $needle = self::clean($needle);
6083 1
      $haystack = self::clean($haystack);
6084
    }
6085
6086 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
6087
  }
6088
6089
  /**
6090
   * Find position of last occurrence of a case-insensitive string.
6091
   *
6092
   * @param string $haystack  <p>The string to look in.</p>
6093
   * @param string $needle    <p>The string to look for.</p>
6094
   * @param int    $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
6095
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6096
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6097
   *
6098
   * @return int|false <p>
6099
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
6100
   *                   not found, it returns false.
6101
   *                   </p>
6102
   */
6103 1
  public static function strripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6104
  {
6105 1 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
6106
      $needle = (string)self::chr((int)$needle);
6107
    }
6108
6109 1
    if (!isset($haystack[0], $needle[0])) {
6110
      return false;
6111
    }
6112
6113 1
    if ($cleanUtf8 === true) {
6114
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
6115 1
      $needle = self::clean($needle);
6116 1
      $haystack = self::clean($haystack);
6117
    }
6118
6119 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6120 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6121
    }
6122
6123 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6124
      self::checkForSupport();
6125
    }
6126
6127 View Code Duplication
    if (
6128 1
        $encoding !== 'UTF-8'
6129
        &&
6130 1
        self::$SUPPORT['mbstring'] === false
6131
    ) {
6132
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6133
    }
6134
6135 1
    if (self::$SUPPORT['mbstring'] === true) {
6136 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
6137
    }
6138
6139 View Code Duplication
    if (
6140
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6141
        &&
6142
        self::$SUPPORT['intl'] === true
6143
    ) {
6144
      return \grapheme_strripos($haystack, $needle, $offset);
6145
    }
6146
6147
    // fallback via vanilla php
6148
6149
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
6150
  }
6151
6152
  /**
6153
   * Find position of last occurrence of a string in a string.
6154
   *
6155
   * @link http://php.net/manual/en/function.mb-strrpos.php
6156
   *
6157
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
6158
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
6159
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
6160
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
6161
   *                              the end of the string.
6162
   *                              </p>
6163
   * @param string     $encoding  [optional] <p>Set the charset.</p>
6164
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6165
   *
6166
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
6167
   *                   is not found, it returns false.</p>
6168
   */
6169 10
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6170
  {
6171 10
    if ((int)$needle === $needle && $needle >= 0) {
6172 2
      $needle = (string)self::chr($needle);
6173
    }
6174 10
    $needle = (string)$needle;
6175
6176 10
    if (!isset($haystack[0], $needle[0])) {
6177 2
      return false;
6178
    }
6179
6180
    if (
6181 9
        $cleanUtf8 === true
6182
        ||
6183 9
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
6184
    ) {
6185
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
6186 3
      $needle = self::clean($needle);
6187 3
      $haystack = self::clean($haystack);
6188
    }
6189
6190 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6191 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6192
    }
6193
6194 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6195
      self::checkForSupport();
6196
    }
6197
6198 View Code Duplication
    if (
6199 9
        $encoding !== 'UTF-8'
6200
        &&
6201 9
        self::$SUPPORT['mbstring'] === false
6202
    ) {
6203
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6204
    }
6205
6206 9
    if (self::$SUPPORT['mbstring'] === true) {
6207 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
6208
    }
6209
6210 View Code Duplication
    if (
6211
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6212
        &&
6213
        self::$SUPPORT['intl'] === true
6214
    ) {
6215
      return \grapheme_strrpos($haystack, $needle, $offset);
6216
    }
6217
6218
    // fallback via vanilla php
6219
6220
    $haystackTmp = null;
6221
    if ($offset > 0) {
6222
      $haystackTmp = self::substr($haystack, $offset);
6223
    } elseif ($offset < 0) {
6224
      $haystackTmp = self::substr($haystack, 0, $offset);
6225
      $offset = 0;
6226
    }
6227
6228
    if ($haystackTmp !== null) {
6229
      if ($haystackTmp === false) {
6230
        $haystackTmp = '';
6231
      }
6232
      $haystack = (string)$haystackTmp;
6233
    }
6234
6235
    $pos = \strrpos($haystack, $needle);
6236
    if ($pos === false) {
6237
      return false;
6238
    }
6239
6240
    return $offset + self::strlen(\substr($haystack, 0, $pos));
6241
  }
6242
6243
  /**
6244
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
6245
   * mask.
6246
   *
6247
   * @param string $str    <p>The input string.</p>
6248
   * @param string $mask   <p>The mask of chars</p>
6249
   * @param int    $offset [optional]
6250
   * @param int    $length [optional]
6251
   *
6252
   * @return int
6253
   */
6254 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
6255
  {
6256 10 View Code Duplication
    if ($offset || $length !== null) {
6257 2
      $strTmp = self::substr($str, $offset, $length);
6258 2
      if ($strTmp === false) {
6259
        $strTmp = '';
6260
      }
6261 2
      $str = (string)$strTmp;
6262
    }
6263
6264 10
    if (!isset($str[0], $mask[0])) {
6265 2
      return 0;
6266
    }
6267
6268 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
6269
  }
6270
6271
  /**
6272
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
6273
   *
6274
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
6275
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
6276
   * @param bool   $before_needle  [optional] <p>
6277
   *                               If <b>TRUE</b>, strstr() returns the part of the
6278
   *                               haystack before the first occurrence of the needle (excluding the needle).
6279
   *                               </p>
6280
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
6281
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
6282
   *
6283
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
6284
   */
6285 2
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
6286
  {
6287 2
    if (!isset($haystack[0], $needle[0])) {
6288 1
      return false;
6289
    }
6290
6291 2
    if ($cleanUtf8 === true) {
6292
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6293
      // if invalid characters are found in $haystack before $needle
6294
      $needle = self::clean($needle);
6295
      $haystack = self::clean($haystack);
6296
    }
6297
6298 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6299 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6300
    }
6301
6302 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6303
      self::checkForSupport();
6304
    }
6305
6306 View Code Duplication
    if (
6307 2
        $encoding !== 'UTF-8'
6308
        &&
6309 2
        self::$SUPPORT['mbstring'] === false
6310
    ) {
6311
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6312
    }
6313
6314 2
    if (self::$SUPPORT['mbstring'] === true) {
6315 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
6316
    }
6317
6318 View Code Duplication
    if (
6319
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6320
        &&
6321
        self::$SUPPORT['intl'] === true
6322
    ) {
6323
      return \grapheme_strstr($haystack, $needle, $before_needle);
6324
    }
6325
6326
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
6327
6328
    if (!isset($match[1])) {
6329
      return false;
6330
    }
6331
6332
    if ($before_needle) {
6333
      return $match[1];
6334
    }
6335
6336
    return self::substr($haystack, self::strlen($match[1]));
6337
  }
6338
6339
  /**
6340
   * Unicode transformation for case-less matching.
6341
   *
6342
   * @link http://unicode.org/reports/tr21/tr21-5.html
6343
   *
6344
   * @param string $str        <p>The input string.</p>
6345
   * @param bool   $full       [optional] <p>
6346
   *                           <b>true</b>, replace full case folding chars (default)<br>
6347
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
6348
   *                           </p>
6349
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6350
   *
6351
   * @return string
6352
   */
6353 13
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
6354
  {
6355 13
    if (!isset($str[0])) {
6356 4
      return '';
6357
    }
6358
6359 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
6360 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
6361
6362 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
6363 1
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
6364 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
6365
    }
6366
6367 12
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
6368
6369 12
    if ($full) {
6370
6371 12
      static $FULL_CASE_FOLD = null;
6372 12
      if ($FULL_CASE_FOLD === null) {
6373 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
6374
      }
6375
6376 12
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
6377
    }
6378
6379 12
    if ($cleanUtf8 === true) {
6380 1
      $str = self::clean($str);
6381
    }
6382
6383 12
    return self::strtolower($str);
6384
  }
6385
6386
  /**
6387
   * Make a string lowercase.
6388
   *
6389
   * @link http://php.net/manual/en/function.mb-strtolower.php
6390
   *
6391
   * @param string      $str       <p>The string being lowercased.</p>
6392
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6393
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6394
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6395
   *
6396
   * @return string str with all alphabetic characters converted to lowercase.
6397
   */
6398 25 View Code Duplication
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6399
  {
6400
    // init
6401 25
    $str = (string)$str;
6402 25
    if (!isset($str[0])) {
6403 3
      return '';
6404
    }
6405
6406 23
    if ($cleanUtf8 === true) {
6407
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6408
      // if invalid characters are found in $haystack before $needle
6409 1
      $str = self::clean($str);
6410
    }
6411
6412 23
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6413 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6414
    }
6415
6416 23
    if ($lang !== null) {
6417
6418 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6419
        self::checkForSupport();
6420
      }
6421
6422 1
      if (self::$SUPPORT['intl'] === true) {
6423
6424 1
        $langCode = $lang . '-Lower';
6425 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6426
          \trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
6427
6428
          $langCode = 'Any-Lower';
6429
        }
6430
6431 1
        return transliterator_transliterate($langCode, $str);
6432
      }
6433
6434
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6435
    }
6436
6437 23
    return \mb_strtolower($str, $encoding);
6438
  }
6439
6440
  /**
6441
   * Generic case sensitive transformation for collation matching.
6442
   *
6443
   * @param string $str <p>The input string</p>
6444
   *
6445
   * @return string
6446
   */
6447 3
  private static function strtonatfold(string $str): string
6448
  {
6449
    /** @noinspection PhpUndefinedClassInspection */
6450 3
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6451
  }
6452
6453
  /**
6454
   * Make a string uppercase.
6455
   *
6456
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6457
   *
6458
   * @param string      $str       <p>The string being uppercased.</p>
6459
   * @param string      $encoding  [optional] <p>Set the charset.</p>
6460
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6461
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6462
   *
6463
   * @return string <p>$str with all alphabetic characters converted to uppercase.</p>
6464
   */
6465 19 View Code Duplication
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6466
  {
6467 19
    $str = (string)$str;
6468 19
    if (!isset($str[0])) {
6469 3
      return '';
6470
    }
6471
6472 17
    if ($cleanUtf8 === true) {
6473
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6474
      // if invalid characters are found in $haystack before $needle
6475 2
      $str = self::clean($str);
6476
    }
6477
6478 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6479 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6480
    }
6481
6482 17
    if ($lang !== null) {
6483
6484 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6485
        self::checkForSupport();
6486
      }
6487
6488 1
      if (self::$SUPPORT['intl'] === true) {
6489
6490 1
        $langCode = $lang . '-Upper';
6491 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6492
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6493
6494
          $langCode = 'Any-Upper';
6495
        }
6496
6497 1
        return transliterator_transliterate($langCode, $str);
6498
      }
6499
6500
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6501
    }
6502
6503 17
    return \mb_strtoupper($str, $encoding);
6504
  }
6505
6506
  /**
6507
   * Translate characters or replace sub-strings.
6508
   *
6509
   * @link  http://php.net/manual/en/function.strtr.php
6510
   *
6511
   * @param string          $str  <p>The string being translated.</p>
6512
   * @param string|string[] $from <p>The string replacing from.</p>
6513
   * @param string|string[] $to   <p>The string being translated to to.</p>
6514
   *
6515
   * @return string <p>
6516
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6517
   *                corresponding character in to.
6518
   *                </p>
6519
   */
6520 1
  public static function strtr(string $str, $from, $to = INF): string
6521
  {
6522 1
    if (!isset($str[0])) {
6523
      return '';
6524
    }
6525
6526 1
    if ($from === $to) {
6527
      return $str;
6528
    }
6529
6530 1
    if (INF !== $to) {
6531 1
      $from = self::str_split($from);
6532 1
      $to = self::str_split($to);
6533 1
      $countFrom = \count($from);
6534 1
      $countTo = \count($to);
6535
6536 1
      if ($countFrom > $countTo) {
6537 1
        $from = \array_slice($from, 0, $countTo);
6538 1
      } elseif ($countFrom < $countTo) {
6539 1
        $to = \array_slice($to, 0, $countFrom);
6540
      }
6541
6542 1
      $from = \array_combine($from, $to);
6543
    }
6544
6545 1
    if (\is_string($from)) {
6546 1
      return \str_replace($from, '', $str);
6547
    }
6548
6549 1
    return \strtr($str, $from);
6550
  }
6551
6552
  /**
6553
   * Return the width of a string.
6554
   *
6555
   * @param string $str       <p>The input string.</p>
6556
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6557
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6558
   *
6559
   * @return int
6560
   */
6561 1 View Code Duplication
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6562
  {
6563 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6564 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6565
    }
6566
6567 1
    if ($cleanUtf8 === true) {
6568
      // iconv and mbstring are not tolerant to invalid encoding
6569
      // further, their behaviour is inconsistent with that of PHP's substr
6570 1
      $str = self::clean($str);
6571
    }
6572
6573
    // fallback to "mb_"-function via polyfill
6574 1
    return \mb_strwidth($str, $encoding);
6575
  }
6576
6577
  /**
6578
   * Get part of a string.
6579
   *
6580
   * @link http://php.net/manual/en/function.mb-substr.php
6581
   *
6582
   * @param string $str       <p>The string being checked.</p>
6583
   * @param int    $offset    <p>The first position used in str.</p>
6584
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
6585
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6586
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6587
   *
6588
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
6589
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
6590
   *                      characters long, <b>FALSE</b> will be returned.</p>
6591
   */
6592 75
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6593
  {
6594 75
    if (!isset($str[0])) {
6595 10
      return '';
6596
    }
6597
6598
    // Empty string
6599 72
    if ($length === 0) {
6600 3
      return '';
6601
    }
6602
6603 71
    if ($cleanUtf8 === true) {
6604
      // iconv and mbstring are not tolerant to invalid encoding
6605
      // further, their behaviour is inconsistent with that of PHP's substr
6606 1
      $str = self::clean($str);
6607
    }
6608
6609
    // Whole string
6610 71
    if (!$offset && $length === null) {
6611 3
      return $str;
6612
    }
6613
6614 68
    $str_length = 0;
6615 68
    if ($offset || $length === null) {
6616 48
      $str_length = self::strlen($str, $encoding);
6617
    }
6618
6619
    // Empty string
6620 68
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be zero. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
6621 10
      return '';
6622
    }
6623
6624
    // Impossible
6625 68
    if ($offset && $offset > $str_length) {
6626 2
      return false;
6627
    }
6628
6629 66
    if ($length === null) {
6630 27
      $length = $str_length;
6631
    } else {
6632 57
      $length = (int)$length;
6633
    }
6634
6635 66
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6636 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6637
    }
6638
6639 66
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6640
      self::checkForSupport();
6641
    }
6642
6643
    if (
6644 66
        $encoding === 'CP850'
6645
        &&
6646 66
        self::$SUPPORT['mbstring_func_overload'] === false
6647
    ) {
6648 16
      return \substr($str, $offset, $length ?? $str_length);
6649
    }
6650
6651 View Code Duplication
    if (
6652 50
        $encoding !== 'UTF-8'
6653
        &&
6654 50
        self::$SUPPORT['mbstring'] === false
6655
    ) {
6656
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6657
    }
6658
6659 50
    if (self::$SUPPORT['mbstring'] === true) {
6660 50
      return \mb_substr($str, $offset, $length, $encoding);
6661
    }
6662
6663
    if (
6664
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6665
        &&
6666
        self::$SUPPORT['intl'] === true
6667
    ) {
6668
      return \grapheme_substr($str, $offset, $length);
6669
    }
6670
6671
    if (
6672
        $length >= 0 // "iconv_substr()" can't handle negative length
6673
        &&
6674
        self::$SUPPORT['iconv'] === true
6675
    ) {
6676
      $returnTmp = \iconv_substr($str, $offset, $length);
6677
      if ($returnTmp !== false) {
6678
        return $returnTmp;
6679
      }
6680
    }
6681
6682
    if (self::is_ascii($str)) {
6683
      return ($length === null) ?
6684
          \substr($str, $offset) :
6685
          \substr($str, $offset, $length);
6686
    }
6687
6688
    // fallback via vanilla php
6689
6690
    // split to array, and remove invalid characters
6691
    $array = self::split($str);
6692
6693
    // extract relevant part, and join to make sting again
6694
    return \implode('', \array_slice($array, $offset, $length));
6695
  }
6696
6697
  /**
6698
   * Binary safe comparison of two strings from an offset, up to length characters.
6699
   *
6700
   * @param string   $str1               <p>The main string being compared.</p>
6701
   * @param string   $str2               <p>The secondary string being compared.</p>
6702
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
6703
   *                                     counting from the end of the string.</p>
6704
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
6705
   *                                     the length of the str compared to the length of main_str less the offset.</p>
6706
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6707
   *                                     insensitive.</p>
6708
   *
6709
   * @return int <p>
6710
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
6711
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
6712
   *             <strong>0</strong> if they are equal.
6713
   *             </p>
6714
   */
6715 1
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
6716
  {
6717
    if (
6718 1
        $offset !== 0
6719
        ||
6720 1
        $length !== null
6721
    ) {
6722 1
      $str1Tmp = self::substr($str1, $offset, $length);
6723 1
      if ($str1Tmp === false) {
6724
        $str1Tmp = '';
6725
      }
6726 1
      $str1 = (string)$str1Tmp;
6727
6728 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
6729 1
      if ($str2Tmp === false) {
6730
        $str2Tmp = '';
6731
      }
6732 1
      $str2 = (string)$str2Tmp;
6733
    }
6734
6735 1
    if ($case_insensitivity === true) {
6736 1
      return self::strcasecmp($str1, $str2);
6737
    }
6738
6739 1
    return self::strcmp($str1, $str2);
6740
  }
6741
6742
  /**
6743
   * Count the number of substring occurrences.
6744
   *
6745
   * @link  http://php.net/manual/en/function.substr-count.php
6746
   *
6747
   * @param string $haystack   <p>The string to search in.</p>
6748
   * @param string $needle     <p>The substring to search for.</p>
6749
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
6750
   * @param int    $length     [optional] <p>
6751
   *                           The maximum length after the specified offset to search for the
6752
   *                           substring. It outputs a warning if the offset plus the length is
6753
   *                           greater than the haystack length.
6754
   *                           </p>
6755
   * @param string $encoding   [optional] <p>Set the charset for e.g. "\mb_" function</p>
6756
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6757
   *
6758
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6759
   */
6760 1
  public static function substr_count(string $haystack, string $needle, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6761
  {
6762 1
    if (!isset($haystack[0], $needle[0])) {
6763 1
      return false;
6764
    }
6765
6766 1
    if ($offset || $length !== null) {
6767
6768 1
      if ($length === null) {
6769 1
        $length = self::strlen($haystack);
6770
      }
6771
6772
      if (
6773
          (
6774 1
              $length !== 0
6775
              &&
6776 1
              $offset !== 0
6777
          )
6778
          &&
6779 1
          ($length + $offset) <= 0
6780
          &&
6781 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6782
      ) {
6783
        return false;
6784
      }
6785
6786 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6787 1
      if ($haystackTmp === false) {
6788
        $haystackTmp = '';
6789
      }
6790 1
      $haystack = (string)$haystackTmp;
6791
    }
6792
6793 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6794 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6795
    }
6796
6797 1
    if ($cleanUtf8 === true) {
6798
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6799
      // if invalid characters are found in $haystack before $needle
6800
      $needle = self::clean($needle);
6801
      $haystack = self::clean($haystack);
6802
    }
6803
6804 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6805
      self::checkForSupport();
6806
    }
6807
6808 View Code Duplication
    if (
6809 1
        $encoding !== 'UTF-8'
6810
        &&
6811 1
        self::$SUPPORT['mbstring'] === false
6812
    ) {
6813
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6814
    }
6815
6816 1
    if (self::$SUPPORT['mbstring'] === true) {
6817 1
      return \mb_substr_count($haystack, $needle, $encoding);
6818
    }
6819
6820
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6821
6822
    return \count($matches);
6823
  }
6824
6825
  /**
6826
   * Returns the number of occurrences of $substring in the given string.
6827
   * By default, the comparison is case-sensitive, but can be made insensitive
6828
   * by setting $caseSensitive to false.
6829
   *
6830
   * @param string $str           <p>The input string.</p>
6831
   * @param string $substring     <p>The substring to search for.</p>
6832
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6833
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
6834
   *
6835
   * @return int
6836
   */
6837
  public static function substr_count_simple(string $str, string $substring, bool $caseSensitive = true, string $encoding = 'UTF-8'): int
6838
  {
6839
    if (!$caseSensitive) {
6840
      $str = self::strtoupper($str, $encoding);
6841
      $substring = self::strtoupper($substring, $encoding);
6842
    }
6843
6844
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
6845
  }
6846
6847
  /**
6848
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6849
   *
6850
   * @param string $haystack <p>The string to search in.</p>
6851
   * @param string $needle   <p>The substring to search for.</p>
6852
   *
6853
   * @return string <p>Return the sub-string.</p>
6854
   */
6855 1 View Code Duplication
  public static function substr_ileft(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6856
  {
6857 1
    if (!isset($haystack[0])) {
6858 1
      return '';
6859
    }
6860
6861 1
    if (!isset($needle[0])) {
6862 1
      return $haystack;
6863
    }
6864
6865 1
    if (self::str_istarts_with($haystack, $needle) === true) {
6866 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6867 1
      if ($haystackTmp === false) {
6868
        $haystackTmp = '';
6869
      }
6870 1
      $haystack = (string)$haystackTmp;
6871
    }
6872
6873 1
    return $haystack;
6874
  }
6875
6876
  /**
6877
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6878
   *
6879
   * @param string $haystack <p>The string to search in.</p>
6880
   * @param string $needle   <p>The substring to search for.</p>
6881
   *
6882
   * @return string <p>Return the sub-string.</p>
6883
   */
6884 1 View Code Duplication
  public static function substr_iright(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6885
  {
6886 1
    if (!isset($haystack[0])) {
6887 1
      return '';
6888
    }
6889
6890 1
    if (!isset($needle[0])) {
6891 1
      return $haystack;
6892
    }
6893
6894 1
    if (self::str_iends_with($haystack, $needle) === true) {
6895 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6896 1
      if ($haystackTmp === false) {
6897
        $haystackTmp = '';
6898
      }
6899 1
      $haystack = (string)$haystackTmp;
6900
    }
6901
6902 1
    return $haystack;
6903
  }
6904
6905
  /**
6906
   * Removes an prefix ($needle) from start of the string ($haystack).
6907
   *
6908
   * @param string $haystack <p>The string to search in.</p>
6909
   * @param string $needle   <p>The substring to search for.</p>
6910
   *
6911
   * @return string <p>Return the sub-string.</p>
6912
   */
6913 1 View Code Duplication
  public static function substr_left(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6914
  {
6915 1
    if (!isset($haystack[0])) {
6916 1
      return '';
6917
    }
6918
6919 1
    if (!isset($needle[0])) {
6920 1
      return $haystack;
6921
    }
6922
6923 1
    if (self::str_starts_with($haystack, $needle) === true) {
6924 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6925 1
      if ($haystackTmp === false) {
6926
        $haystackTmp = '';
6927
      }
6928 1
      $haystack = (string)$haystackTmp;
6929
    }
6930
6931 1
    return $haystack;
6932
  }
6933
6934
  /**
6935
   * Replace text within a portion of a string.
6936
   *
6937
   * source: https://gist.github.com/stemar/8287074
6938
   *
6939
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6940
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6941
   * @param int|int[]       $offset           <p>
6942
   *                                          If start is positive, the replacing will begin at the start'th offset
6943
   *                                          into string.
6944
   *                                          <br><br>
6945
   *                                          If start is negative, the replacing will begin at the start'th character
6946
   *                                          from the end of string.
6947
   *                                          </p>
6948
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
6949
   *                                          portion of string which is to be replaced. If it is negative, it
6950
   *                                          represents the number of characters from the end of string at which to
6951
   *                                          stop replacing. If it is not given, then it will default to strlen(
6952
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6953
   *                                          length is zero then this function will have the effect of inserting
6954
   *                                          replacement into string at the given start offset.</p>
6955
   *
6956
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6957
   */
6958 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
6959
  {
6960 7
    if (\is_array($str) === true) {
6961 1
      $num = \count($str);
6962
6963
      // the replacement
6964 1
      if (\is_array($replacement) === true) {
6965 1
        $replacement = \array_slice($replacement, 0, $num);
6966
      } else {
6967 1
        $replacement = \array_pad([$replacement], $num, $replacement);
6968
      }
6969
6970
      // the offset
6971 1 View Code Duplication
      if (\is_array($offset) === true) {
6972 1
        $offset = \array_slice($offset, 0, $num);
6973 1
        foreach ($offset as &$valueTmp) {
6974 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6975
        }
6976 1
        unset($valueTmp);
6977
      } else {
6978 1
        $offset = \array_pad([$offset], $num, $offset);
6979
      }
6980
6981
      // the length
6982 1
      if (null === $length) {
6983 1
        $length = \array_fill(0, $num, 0);
6984 1 View Code Duplication
      } elseif (\is_array($length) === true) {
6985 1
        $length = \array_slice($length, 0, $num);
6986 1
        foreach ($length as &$valueTmpV2) {
6987 1
          if (null !== $valueTmpV2) {
6988 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6989
          } else {
6990 1
            $valueTmpV2 = 0;
6991
          }
6992
        }
6993 1
        unset($valueTmpV2);
6994
      } else {
6995 1
        $length = \array_pad([$length], $num, $length);
6996
      }
6997
6998
      // recursive call
6999 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return \array_map(array(...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
7000
    }
7001
7002 7
    if (\is_array($replacement) === true) {
7003 1
      if (\count($replacement) > 0) {
7004 1
        $replacement = $replacement[0];
7005
      } else {
7006 1
        $replacement = '';
7007
      }
7008
    }
7009
7010
    // init
7011 7
    $str = (string)$str;
7012 7
    $replacement = (string)$replacement;
7013
7014 7
    if (!isset($str[0])) {
7015 1
      return $replacement;
7016
    }
7017
7018 6
    if (self::is_ascii($str)) {
7019 3
      return ($length === null) ?
7020
          \substr_replace($str, $replacement, $offset) :
7021 3
          \substr_replace($str, $replacement, $offset, $length);
7022
    }
7023
7024 5
    \preg_match_all('/./us', $str, $smatches);
7025 5
    \preg_match_all('/./us', $replacement, $rmatches);
7026
7027 5
    if ($length === null) {
7028 3
      $length = self::strlen($str);
7029
    }
7030
7031 5
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
7032
7033 5
    return \implode('', $smatches[0]);
7034
  }
7035
7036
  /**
7037
   * Removes an suffix ($needle) from end of the string ($haystack).
7038
   *
7039
   * @param string $haystack <p>The string to search in.</p>
7040
   * @param string $needle   <p>The substring to search for.</p>
7041
   *
7042
   * @return string <p>Return the sub-string.</p>
7043
   */
7044 1 View Code Duplication
  public static function substr_right(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7045
  {
7046 1
    if (!isset($haystack[0])) {
7047 1
      return '';
7048
    }
7049
7050 1
    if (!isset($needle[0])) {
7051 1
      return $haystack;
7052
    }
7053
7054 1
    if (self::str_ends_with($haystack, $needle) === true) {
7055 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
7056 1
      if ($haystackTmp === false) {
7057
        $haystackTmp = '';
7058
      }
7059 1
      $haystack = (string)$haystackTmp;
7060
    }
7061
7062 1
    return $haystack;
7063
  }
7064
7065
  /**
7066
   * Returns a case swapped version of the string.
7067
   *
7068
   * @param string $str       <p>The input string.</p>
7069
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7070
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7071
   *
7072
   * @return string <p>Each character's case swapped.</p>
7073
   */
7074 1
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7075
  {
7076 1
    if (!isset($str[0])) {
7077 1
      return '';
7078
    }
7079
7080 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7081 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7082
    }
7083
7084 1
    if ($cleanUtf8 === true) {
7085
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7086
      // if invalid characters are found in $haystack before $needle
7087 1
      $str = self::clean($str);
7088
    }
7089
7090 1
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
7091
  }
7092
7093
  /**
7094
   * @param string $str
7095
   * @param int    $tabLength
7096
   *
7097
   * @return string
7098
   */
7099
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
7100
  {
7101
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
7102
  }
7103
7104
  /**
7105
   * Converts the first character of each word in the string to uppercase
7106
   * and all other chars to lowercase.
7107
   *
7108
   * @param string $str      <p>The input string.</p>
7109
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
7110
   *
7111
   * @return string <p>String with all characters of $str being title-cased.</p>
7112
   */
7113
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
7114
  {
7115
    // "mb_convert_case()" used a polyfill from the "UTF8"-Class
7116
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
7117
  }
7118
7119
  /**
7120
   * alias for "UTF8::to_ascii()"
7121
   *
7122
   * @see        UTF8::to_ascii()
7123
   *
7124
   * @param string $str
7125
   * @param string $subst_chr
7126
   * @param bool   $strict
7127
   *
7128
   * @return string
7129
   *
7130
   * @deprecated <p>use "UTF8::to_ascii()"</p>
7131
   */
7132 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
7133
  {
7134 7
    return self::to_ascii($str, $subst_chr, $strict);
7135
  }
7136
7137
  /**
7138
   * alias for "UTF8::to_iso8859()"
7139
   *
7140
   * @see        UTF8::to_iso8859()
7141
   *
7142
   * @param string|string[] $str
7143
   *
7144
   * @return string|string[]
7145
   *
7146
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
7147
   */
7148 1
  public static function toIso8859($str)
7149
  {
7150 1
    return self::to_iso8859($str);
7151
  }
7152
7153
  /**
7154
   * alias for "UTF8::to_latin1()"
7155
   *
7156
   * @see        UTF8::to_latin1()
7157
   *
7158
   * @param string|string[] $str
7159
   *
7160
   * @return string|string[]
7161
   *
7162
   * @deprecated <p>use "UTF8::to_latin1()"</p>
7163
   */
7164 1
  public static function toLatin1($str)
7165
  {
7166 1
    return self::to_latin1($str);
7167
  }
7168
7169
  /**
7170
   * alias for "UTF8::to_utf8()"
7171
   *
7172
   * @see        UTF8::to_utf8()
7173
   *
7174
   * @param string|string[] $str
7175
   *
7176
   * @return string|string[]
7177
   *
7178
   * @deprecated <p>use "UTF8::to_utf8()"</p>
7179
   */
7180 1
  public static function toUTF8($str)
7181
  {
7182 1
    return self::to_utf8($str);
7183
  }
7184
7185
  /**
7186
   * Convert a string into ASCII.
7187
   *
7188
   * @param string $str     <p>The input string.</p>
7189
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
7190
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
7191
   *                        performance</p>
7192
   *
7193
   * @return string
7194
   */
7195 21
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
7196
  {
7197 21
    static $UTF8_TO_ASCII;
7198
7199 21
    if (!isset($str[0])) {
7200 4
      return '';
7201
    }
7202
7203
    // check if we only have ASCII, first (better performance)
7204 18
    if (self::is_ascii($str) === true) {
7205 6
      return $str;
7206
    }
7207
7208 13
    $str = self::clean(
7209 13
        $str,
7210 13
        true,
7211 13
        true,
7212 13
        true,
7213 13
        false,
7214 13
        true,
7215 13
        true
7216
    );
7217
7218
    // check again, if we only have ASCII, now ...
7219 13
    if (self::is_ascii($str) === true) {
7220 7
      return $str;
7221
    }
7222
7223 7
    if ($strict === true) {
7224
7225 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7226
        self::checkForSupport();
7227
      }
7228
7229 1
      if (self::$SUPPORT['intl'] === true) {
7230
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
7231 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
7232
7233
        // check again, if we only have ASCII, now ...
7234 1
        if (self::is_ascii($str) === true) {
7235 1
          return $str;
7236
        }
7237
7238
      }
7239
    }
7240
7241 7
    if (self::$ORD === null) {
7242
      self::$ORD = self::getData('ord');
7243
    }
7244
7245 7
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
7246 7
    $chars = $ar[0];
7247 7
    foreach ($chars as &$c) {
7248
7249 7
      $ordC0 = self::$ORD[$c[0]];
7250
7251 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
7252 7
        continue;
7253
      }
7254
7255 7
      $ordC1 = self::$ORD[$c[1]];
7256
7257
      // ASCII - next please
7258 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
7259 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
7260
      }
7261
7262 7
      if ($ordC0 >= 224) {
7263 2
        $ordC2 = self::$ORD[$c[2]];
7264
7265 2
        if ($ordC0 <= 239) {
7266 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
7267
        }
7268
7269 2
        if ($ordC0 >= 240) {
7270 1
          $ordC3 = self::$ORD[$c[3]];
7271
7272 1
          if ($ordC0 <= 247) {
7273 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
7274
          }
7275
7276 1
          if ($ordC0 >= 248) {
7277
            $ordC4 = self::$ORD[$c[4]];
7278
7279 View Code Duplication
            if ($ordC0 <= 251) {
7280
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
7281
            }
7282
7283
            if ($ordC0 >= 252) {
7284
              $ordC5 = self::$ORD[$c[5]];
7285
7286 View Code Duplication
              if ($ordC0 <= 253) {
7287
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
7288
              }
7289
            }
7290
          }
7291
        }
7292
      }
7293
7294 7
      if ($ordC0 === 254 || $ordC0 === 255) {
7295
        $c = $unknown;
7296
        continue;
7297
      }
7298
7299 7
      if (!isset($ord)) {
7300
        $c = $unknown;
7301
        continue;
7302
      }
7303
7304 7
      $bank = $ord >> 8;
7305 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
7306 3
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
7307 3
        if ($UTF8_TO_ASCII[$bank] === false) {
7308 1
          $UTF8_TO_ASCII[$bank] = [];
7309
        }
7310
      }
7311
7312 7
      $newchar = $ord & 255;
7313
7314 7
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
7315
7316
        // keep for debugging
7317
        /*
7318
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7319
        echo "char: " . $c . "\n";
7320
        echo "ord: " . $ord . "\n";
7321
        echo "newchar: " . $newchar . "\n";
7322
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
7323
        echo "bank:" . $bank . "\n\n";
7324
        */
7325
7326 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
7327
      } else {
7328
7329
        // keep for debugging missing chars
7330
        /*
7331
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7332
        echo "char: " . $c . "\n";
7333
        echo "ord: " . $ord . "\n";
7334
        echo "newchar: " . $newchar . "\n";
7335
        echo "bank:" . $bank . "\n\n";
7336
        */
7337
7338 7
        $c = $unknown;
7339
      }
7340
    }
7341
7342 7
    return \implode('', $chars);
7343
  }
7344
7345
  /**
7346
   * @param string $str
7347
   *
7348
   * @return bool
7349
   */
7350
  public static function to_boolean(string $str): bool
7351
  {
7352
    $key = \strtolower($str);
7353
7354
    // Info: http://php.net/manual/en/filter.filters.validate.php
7355
    $map = [
7356
        'true'  => true,
7357
        '1'     => true,
7358
        'on'    => true,
7359
        'yes'   => true,
7360
        'false' => false,
7361
        '0'     => false,
7362
        'off'   => false,
7363
        'no'    => false,
7364
    ];
7365
7366
    if (isset($map[$key])) {
7367
      return $map[$key];
7368
    }
7369
7370
    if (\is_numeric($str)) {
7371
      return $str + 0 > 0;
7372
    }
7373
7374
    return (bool)self::trim($str);
7375
  }
7376
7377
  /**
7378
   * Convert a string into "ISO-8859"-encoding (Latin-1).
7379
   *
7380
   * @param string|string[] $str
7381
   *
7382
   * @return string|string[]
7383
   */
7384 3
  public static function to_iso8859($str)
7385
  {
7386 3
    if (\is_array($str) === true) {
7387 1
      foreach ($str as $k => $v) {
7388 1
        $str[$k] = self::to_iso8859($v);
7389
      }
7390
7391 1
      return $str;
7392
    }
7393
7394 3
    $str = (string)$str;
7395 3
    if (!isset($str[0])) {
7396 1
      return '';
7397
    }
7398
7399 3
    return self::utf8_decode($str);
7400
  }
7401
7402
  /**
7403
   * alias for "UTF8::to_iso8859()"
7404
   *
7405
   * @see UTF8::to_iso8859()
7406
   *
7407
   * @param string|string[] $str
7408
   *
7409
   * @return string|string[]
7410
   */
7411 1
  public static function to_latin1($str)
7412
  {
7413 1
    return self::to_iso8859($str);
7414
  }
7415
7416
  /**
7417
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
7418
   *
7419
   * <ul>
7420
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
7421
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
7422
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
7423
   * case.</li>
7424
   * </ul>
7425
   *
7426
   * @param string|string[] $str                    <p>Any string or array.</p>
7427
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
7428
   *
7429
   * @return string|string[] <p>The UTF-8 encoded string.</p>
7430
   */
7431 22
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
7432
  {
7433 22 View Code Duplication
    if (\is_array($str) === true) {
7434 2
      foreach ($str as $k => $v) {
7435 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
7436
      }
7437
7438 2
      return $str;
7439
    }
7440
7441 22
    $str = (string)$str;
7442 22
    if (!isset($str[0])) {
7443 3
      return $str;
7444
    }
7445
7446 22
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7447
      self::checkForSupport();
7448
    }
7449
7450 22
    $max = self::strlen_in_byte($str);
7451 22
    $buf = '';
7452
7453
    /** @noinspection ForeachInvariantsInspection */
7454 22
    for ($i = 0; $i < $max; $i++) {
7455 22
      $c1 = $str[$i];
7456
7457 22
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
7458
7459 22
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
7460
7461 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7462
7463 20
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
7464 15
            $buf .= $c1 . $c2;
7465 15
            $i++;
7466
          } else { // not valid UTF8 - convert it
7467 20
            $buf .= self::to_utf8_convert($c1);
7468
          }
7469
7470 21
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
7471
7472 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7473 20
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7474
7475 20
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
7476 12
            $buf .= $c1 . $c2 . $c3;
7477 12
            $i += 2;
7478
          } else { // not valid UTF8 - convert it
7479 20
            $buf .= self::to_utf8_convert($c1);
7480
          }
7481
7482 14
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
7483
7484 14
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7485 14
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7486 14
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
7487
7488 14
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
7489 5
            $buf .= $c1 . $c2 . $c3 . $c4;
7490 5
            $i += 3;
7491
          } else { // not valid UTF8 - convert it
7492 14
            $buf .= self::to_utf8_convert($c1);
7493
          }
7494
7495
        } else { // doesn't look like UTF8, but should be converted
7496 22
          $buf .= self::to_utf8_convert($c1);
7497
        }
7498
7499 20
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
7500
7501 2
        $buf .= self::to_utf8_convert($c1);
7502
7503
      } else { // it doesn't need conversion
7504 20
        $buf .= $c1;
7505
      }
7506
    }
7507
7508
    // decode unicode escape sequences
7509 22
    $buf = \preg_replace_callback(
7510 22
        '/\\\\u([0-9a-f]{4})/i',
7511 22
        function ($match) {
7512 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
7513 22
        },
7514 22
        $buf
7515
    );
7516
7517
    // decode UTF-8 codepoints
7518 22
    if ($decodeHtmlEntityToUtf8 === true) {
7519 1
      $buf = self::html_entity_decode($buf);
7520
    }
7521
7522 22
    return $buf;
7523
  }
7524
7525
  /**
7526
   * @param int $int
7527
   *
7528
   * @return string
7529
   */
7530 16
  private static function to_utf8_convert($int): string
7531
  {
7532
    // init
7533 16
    $buf = '';
7534
7535 16
    if (self::$ORD === null) {
7536 1
      self::$ORD = self::getData('ord');
7537
    }
7538
7539 16
    if (self::$CHR === null) {
7540 1
      self::$CHR = self::getData('chr');
7541
    }
7542
7543 16
    if (self::$WIN1252_TO_UTF8 === null) {
7544 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
7545
    }
7546
7547 16
    $ordC1 = self::$ORD[$int];
7548 16
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
7549 16
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
7550
    } else {
7551 1
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
7552 1
      $cc2 = ($int & "\x3F") | "\x80";
7553 1
      $buf .= $cc1 . $cc2;
7554
    }
7555
7556 16
    return $buf;
7557
  }
7558
7559
  /**
7560
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7561
   *
7562
   * INFO: This is slower then "trim()"
7563
   *
7564
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7565
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7566
   *
7567
   * @param string $str   <p>The string to be trimmed</p>
7568
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
7569
   *
7570
   * @return string <p>The trimmed string.</p>
7571
   */
7572 26 View Code Duplication
  public static function trim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7573
  {
7574 26
    if (!isset($str[0])) {
7575 5
      return '';
7576
    }
7577
7578
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7579 22
    if ($chars === INF || !$chars) {
7580 6
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
7581
    } else {
7582 16
      $chars = \preg_quote($chars, '/');
7583 16
      $pattern = "^[$chars]+|[$chars]+\$";
7584
    }
7585
7586 22
    return self::regexReplace($str, $pattern, '', '', '/');
7587
  }
7588
7589
  /**
7590
   * Makes string's first char uppercase.
7591
   *
7592
   * @param string $str       <p>The input string.</p>
7593
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7594
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7595
   *
7596
   * @return string <p>The resulting string</p>
7597
   */
7598 14
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7599
  {
7600 14
    if ($cleanUtf8 === true) {
7601
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7602
      // if invalid characters are found in $haystack before $needle
7603 1
      $str = self::clean($str);
7604
    }
7605
7606 14
    $strPartTwo = self::substr($str, 1, null, $encoding);
7607 14
    if ($strPartTwo === false) {
7608
      $strPartTwo = '';
7609
    }
7610
7611 14
    $strPartOne = self::strtoupper(
7612 14
        (string)self::substr($str, 0, 1, $encoding),
7613 14
        $encoding,
7614 14
        $cleanUtf8
7615
    );
7616
7617 14
    return $strPartOne . $strPartTwo;
7618
  }
7619
7620
  /**
7621
   * alias for "UTF8::ucfirst()"
7622
   *
7623
   * @see UTF8::ucfirst()
7624
   *
7625
   * @param string $word
7626
   * @param string $encoding
7627
   * @param bool   $cleanUtf8
7628
   *
7629
   * @return string
7630
   */
7631 1
  public static function ucword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7632
  {
7633 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
7634
  }
7635
7636
  /**
7637
   * Uppercase for all words in the string.
7638
   *
7639
   * @param string   $str        <p>The input string.</p>
7640
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7641
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7642
   * @param string   $encoding   [optional] <p>Set the charset.</p>
7643
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7644
   *
7645
   * @return string
7646
   */
7647 8
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7648
  {
7649 8
    if (!$str) {
7650 2
      return '';
7651
    }
7652
7653
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
7654
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
7655
7656 7
    if ($cleanUtf8 === true) {
7657
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7658
      // if invalid characters are found in $haystack before $needle
7659 1
      $str = self::clean($str);
7660
    }
7661
7662 7
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
7663
7664
    if (
7665 7
        $usePhpDefaultFunctions === true
7666
        &&
7667 7
        self::is_ascii($str) === true
7668
    ) {
7669
      return \ucwords($str);
7670
    }
7671
7672 7
    $words = self::str_to_words($str, $charlist);
7673 7
    $newWords = [];
7674
7675 7
    if (\count($exceptions) > 0) {
7676 1
      $useExceptions = true;
7677
    } else {
7678 7
      $useExceptions = false;
7679
    }
7680
7681 7 View Code Duplication
    foreach ($words as $word) {
7682
7683 7
      if (!$word) {
7684 7
        continue;
7685
      }
7686
7687
      if (
7688 7
          $useExceptions === false
7689
          ||
7690
          (
7691 1
              $useExceptions === true
7692
              &&
7693 7
              !\in_array($word, $exceptions, true)
7694
          )
7695
      ) {
7696 7
        $word = self::ucfirst($word, $encoding);
7697
      }
7698
7699 7
      $newWords[] = $word;
7700
    }
7701
7702 7
    return \implode('', $newWords);
7703
  }
7704
7705
  /**
7706
   * Multi decode html entity & fix urlencoded-win1252-chars.
7707
   *
7708
   * e.g:
7709
   * 'test+test'                     => 'test test'
7710
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7711
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7712
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7713
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7714
   * 'Düsseldorf'                   => 'Düsseldorf'
7715
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7716
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7717
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7718
   *
7719
   * @param string $str          <p>The input string.</p>
7720
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7721
   *
7722
   * @return string
7723
   */
7724 1 View Code Duplication
  public static function urldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7725
  {
7726 1
    if (!isset($str[0])) {
7727 1
      return '';
7728
    }
7729
7730 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
7731 1
    if (\preg_match($pattern, $str)) {
7732 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
7733
    }
7734
7735 1
    $flags = ENT_QUOTES | ENT_HTML5;
7736
7737
    do {
7738 1
      $str_compare = $str;
7739
7740 1
      $str = self::fix_simple_utf8(
7741 1
          \urldecode(
7742 1
              self::html_entity_decode(
7743 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7744 1
                  $flags
7745
              )
7746
          )
7747
      );
7748
7749 1
    } while ($multi_decode === true && $str_compare !== $str);
7750
7751 1
    return $str;
7752
  }
7753
7754
  /**
7755
   * Return a array with "urlencoded"-win1252 -> UTF-8
7756
   *
7757
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
7758
   *
7759
   * @return string[]
7760
   */
7761 1
  public static function urldecode_fix_win1252_chars(): array
7762
  {
7763
    return [
7764 1
        '%20' => ' ',
7765
        '%21' => '!',
7766
        '%22' => '"',
7767
        '%23' => '#',
7768
        '%24' => '$',
7769
        '%25' => '%',
7770
        '%26' => '&',
7771
        '%27' => "'",
7772
        '%28' => '(',
7773
        '%29' => ')',
7774
        '%2A' => '*',
7775
        '%2B' => '+',
7776
        '%2C' => ',',
7777
        '%2D' => '-',
7778
        '%2E' => '.',
7779
        '%2F' => '/',
7780
        '%30' => '0',
7781
        '%31' => '1',
7782
        '%32' => '2',
7783
        '%33' => '3',
7784
        '%34' => '4',
7785
        '%35' => '5',
7786
        '%36' => '6',
7787
        '%37' => '7',
7788
        '%38' => '8',
7789
        '%39' => '9',
7790
        '%3A' => ':',
7791
        '%3B' => ';',
7792
        '%3C' => '<',
7793
        '%3D' => '=',
7794
        '%3E' => '>',
7795
        '%3F' => '?',
7796
        '%40' => '@',
7797
        '%41' => 'A',
7798
        '%42' => 'B',
7799
        '%43' => 'C',
7800
        '%44' => 'D',
7801
        '%45' => 'E',
7802
        '%46' => 'F',
7803
        '%47' => 'G',
7804
        '%48' => 'H',
7805
        '%49' => 'I',
7806
        '%4A' => 'J',
7807
        '%4B' => 'K',
7808
        '%4C' => 'L',
7809
        '%4D' => 'M',
7810
        '%4E' => 'N',
7811
        '%4F' => 'O',
7812
        '%50' => 'P',
7813
        '%51' => 'Q',
7814
        '%52' => 'R',
7815
        '%53' => 'S',
7816
        '%54' => 'T',
7817
        '%55' => 'U',
7818
        '%56' => 'V',
7819
        '%57' => 'W',
7820
        '%58' => 'X',
7821
        '%59' => 'Y',
7822
        '%5A' => 'Z',
7823
        '%5B' => '[',
7824
        '%5C' => '\\',
7825
        '%5D' => ']',
7826
        '%5E' => '^',
7827
        '%5F' => '_',
7828
        '%60' => '`',
7829
        '%61' => 'a',
7830
        '%62' => 'b',
7831
        '%63' => 'c',
7832
        '%64' => 'd',
7833
        '%65' => 'e',
7834
        '%66' => 'f',
7835
        '%67' => 'g',
7836
        '%68' => 'h',
7837
        '%69' => 'i',
7838
        '%6A' => 'j',
7839
        '%6B' => 'k',
7840
        '%6C' => 'l',
7841
        '%6D' => 'm',
7842
        '%6E' => 'n',
7843
        '%6F' => 'o',
7844
        '%70' => 'p',
7845
        '%71' => 'q',
7846
        '%72' => 'r',
7847
        '%73' => 's',
7848
        '%74' => 't',
7849
        '%75' => 'u',
7850
        '%76' => 'v',
7851
        '%77' => 'w',
7852
        '%78' => 'x',
7853
        '%79' => 'y',
7854
        '%7A' => 'z',
7855
        '%7B' => '{',
7856
        '%7C' => '|',
7857
        '%7D' => '}',
7858
        '%7E' => '~',
7859
        '%7F' => '',
7860
        '%80' => '`',
7861
        '%81' => '',
7862
        '%82' => '‚',
7863
        '%83' => 'ƒ',
7864
        '%84' => '„',
7865
        '%85' => '…',
7866
        '%86' => '†',
7867
        '%87' => '‡',
7868
        '%88' => 'ˆ',
7869
        '%89' => '‰',
7870
        '%8A' => 'Š',
7871
        '%8B' => '‹',
7872
        '%8C' => 'Œ',
7873
        '%8D' => '',
7874
        '%8E' => 'Ž',
7875
        '%8F' => '',
7876
        '%90' => '',
7877
        '%91' => '‘',
7878
        '%92' => '’',
7879
        '%93' => '“',
7880
        '%94' => '”',
7881
        '%95' => '•',
7882
        '%96' => '–',
7883
        '%97' => '—',
7884
        '%98' => '˜',
7885
        '%99' => '™',
7886
        '%9A' => 'š',
7887
        '%9B' => '›',
7888
        '%9C' => 'œ',
7889
        '%9D' => '',
7890
        '%9E' => 'ž',
7891
        '%9F' => 'Ÿ',
7892
        '%A0' => '',
7893
        '%A1' => '¡',
7894
        '%A2' => '¢',
7895
        '%A3' => '£',
7896
        '%A4' => '¤',
7897
        '%A5' => '¥',
7898
        '%A6' => '¦',
7899
        '%A7' => '§',
7900
        '%A8' => '¨',
7901
        '%A9' => '©',
7902
        '%AA' => 'ª',
7903
        '%AB' => '«',
7904
        '%AC' => '¬',
7905
        '%AD' => '',
7906
        '%AE' => '®',
7907
        '%AF' => '¯',
7908
        '%B0' => '°',
7909
        '%B1' => '±',
7910
        '%B2' => '²',
7911
        '%B3' => '³',
7912
        '%B4' => '´',
7913
        '%B5' => 'µ',
7914
        '%B6' => '¶',
7915
        '%B7' => '·',
7916
        '%B8' => '¸',
7917
        '%B9' => '¹',
7918
        '%BA' => 'º',
7919
        '%BB' => '»',
7920
        '%BC' => '¼',
7921
        '%BD' => '½',
7922
        '%BE' => '¾',
7923
        '%BF' => '¿',
7924
        '%C0' => 'À',
7925
        '%C1' => 'Á',
7926
        '%C2' => 'Â',
7927
        '%C3' => 'Ã',
7928
        '%C4' => 'Ä',
7929
        '%C5' => 'Å',
7930
        '%C6' => 'Æ',
7931
        '%C7' => 'Ç',
7932
        '%C8' => 'È',
7933
        '%C9' => 'É',
7934
        '%CA' => 'Ê',
7935
        '%CB' => 'Ë',
7936
        '%CC' => 'Ì',
7937
        '%CD' => 'Í',
7938
        '%CE' => 'Î',
7939
        '%CF' => 'Ï',
7940
        '%D0' => 'Ð',
7941
        '%D1' => 'Ñ',
7942
        '%D2' => 'Ò',
7943
        '%D3' => 'Ó',
7944
        '%D4' => 'Ô',
7945
        '%D5' => 'Õ',
7946
        '%D6' => 'Ö',
7947
        '%D7' => '×',
7948
        '%D8' => 'Ø',
7949
        '%D9' => 'Ù',
7950
        '%DA' => 'Ú',
7951
        '%DB' => 'Û',
7952
        '%DC' => 'Ü',
7953
        '%DD' => 'Ý',
7954
        '%DE' => 'Þ',
7955
        '%DF' => 'ß',
7956
        '%E0' => 'à',
7957
        '%E1' => 'á',
7958
        '%E2' => 'â',
7959
        '%E3' => 'ã',
7960
        '%E4' => 'ä',
7961
        '%E5' => 'å',
7962
        '%E6' => 'æ',
7963
        '%E7' => 'ç',
7964
        '%E8' => 'è',
7965
        '%E9' => 'é',
7966
        '%EA' => 'ê',
7967
        '%EB' => 'ë',
7968
        '%EC' => 'ì',
7969
        '%ED' => 'í',
7970
        '%EE' => 'î',
7971
        '%EF' => 'ï',
7972
        '%F0' => 'ð',
7973
        '%F1' => 'ñ',
7974
        '%F2' => 'ò',
7975
        '%F3' => 'ó',
7976
        '%F4' => 'ô',
7977
        '%F5' => 'õ',
7978
        '%F6' => 'ö',
7979
        '%F7' => '÷',
7980
        '%F8' => 'ø',
7981
        '%F9' => 'ù',
7982
        '%FA' => 'ú',
7983
        '%FB' => 'û',
7984
        '%FC' => 'ü',
7985
        '%FD' => 'ý',
7986
        '%FE' => 'þ',
7987
        '%FF' => 'ÿ',
7988
    ];
7989
  }
7990
7991
  /**
7992
   * Decodes an UTF-8 string to ISO-8859-1.
7993
   *
7994
   * @param string $str <p>The input string.</p>
7995
   * @param bool   $keepUtf8Chars
7996
   *
7997
   * @return string
7998
   */
7999 6
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
8000
  {
8001 6
    if (!isset($str[0])) {
8002 3
      return '';
8003
    }
8004
8005 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
8006 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
8007
8008 6 View Code Duplication
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
8009
8010 1
      if (self::$WIN1252_TO_UTF8 === null) {
8011
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
8012
      }
8013
8014 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
8015 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
8016
    }
8017
8018
    /** @noinspection PhpInternalEntityUsedInspection */
8019 6
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
8020
8021 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8022
      self::checkForSupport();
8023
    }
8024
8025
    // save for later comparision
8026 6
    $str_backup = $str;
8027 6
    $len = self::strlen_in_byte($str);
8028
8029 6
    if (self::$ORD === null) {
8030
      self::$ORD = self::getData('ord');
8031
    }
8032
8033 6
    if (self::$CHR === null) {
8034
      self::$CHR = self::getData('chr');
8035
    }
8036
8037 6
    $noCharFound = '?';
8038
    /** @noinspection ForeachInvariantsInspection */
8039 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
8040 6
      switch ($str[$i] & "\xF0") {
8041 6
        case "\xC0":
8042 6
        case "\xD0":
8043 6
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
8044 6
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
8045 6
          break;
8046
8047
        /** @noinspection PhpMissingBreakStatementInspection */
8048 6
        case "\xF0":
8049
          ++$i;
8050 6
        case "\xE0":
8051 5
          $str[$j] = $noCharFound;
8052 5
          $i += 2;
8053 5
          break;
8054
8055
        default:
8056 6
          $str[$j] = $str[$i];
8057
      }
8058
    }
8059
8060 6
    $return = (string)self::substr($str, 0, $j, 'CP850'); // 8-BIT
8061
8062
    if (
8063 6
        $keepUtf8Chars === true
8064
        &&
8065 6
        self::strlen($return) >= self::strlen($str_backup)
8066
    ) {
8067 1
      return $str_backup;
8068
    }
8069
8070 6
    return $return;
8071
  }
8072
8073
  /**
8074
   * Encodes an ISO-8859-1 string to UTF-8.
8075
   *
8076
   * @param string $str <p>The input string.</p>
8077
   *
8078
   * @return string
8079
   */
8080 7
  public static function utf8_encode(string $str): string
8081
  {
8082 7
    if (!isset($str[0])) {
8083 7
      return '';
8084
    }
8085
8086 7
    $strTmp = \utf8_encode($str);
8087
8088
    // the polyfill maybe return false
8089 7
    if ($strTmp === false) {
8090
      return '';
8091
    }
8092
8093 7
    $str = (string)$strTmp;
8094 7
    if (false === \strpos($str, "\xC2")) {
8095 3
      return $str;
8096
    }
8097
8098 6
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
8099 6
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
8100
8101 6 View Code Duplication
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
8102
8103 1
      if (self::$WIN1252_TO_UTF8 === null) {
8104
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
8105
      }
8106
8107 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
8108 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
8109
    }
8110
8111 6
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
8112
  }
8113
8114
  /**
8115
   * fix -> utf8-win1252 chars
8116
   *
8117
   * @param string $str <p>The input string.</p>
8118
   *
8119
   * @return string
8120
   *
8121
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
8122
   */
8123 1
  public static function utf8_fix_win1252_chars(string $str): string
8124
  {
8125 1
    return self::fix_simple_utf8($str);
8126
  }
8127
8128
  /**
8129
   * Returns an array with all utf8 whitespace characters.
8130
   *
8131
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
8132
   *
8133
   * @author: Derek E. [email protected]
8134
   *
8135
   * @return string[] <p>
8136
   *               An array with all known whitespace characters as values and the type of whitespace as keys
8137
   *               as defined in above URL.
8138
   *               </p>
8139
   */
8140 1
  public static function whitespace_table(): array
8141
  {
8142 1
    return self::$WHITESPACE_TABLE;
8143
  }
8144
8145
  /**
8146
   * Limit the number of words in a string.
8147
   *
8148
   * @param string $str      <p>The input string.</p>
8149
   * @param int    $limit    <p>The limit of words as integer.</p>
8150
   * @param string $strAddOn <p>Replacement for the striped string.</p>
8151
   *
8152
   * @return string
8153
   */
8154 1
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
8155
  {
8156 1
    if (!isset($str[0])) {
8157 1
      return '';
8158
    }
8159
8160 1
    if ($limit < 1) {
8161 1
      return '';
8162
    }
8163
8164 1
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
8165
8166
    if (
8167 1
        !isset($matches[0])
8168
        ||
8169 1
        self::strlen($str) === self::strlen($matches[0])
8170
    ) {
8171 1
      return $str;
8172
    }
8173
8174 1
    return self::rtrim($matches[0]) . $strAddOn;
8175
  }
8176
8177
  /**
8178
   * Wraps a string to a given number of characters
8179
   *
8180
   * @link  http://php.net/manual/en/function.wordwrap.php
8181
   *
8182
   * @param string $str   <p>The input string.</p>
8183
   * @param int    $width [optional] <p>The column width.</p>
8184
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
8185
   * @param bool   $cut   [optional] <p>
8186
   *                      If the cut is set to true, the string is
8187
   *                      always wrapped at or before the specified width. So if you have
8188
   *                      a word that is larger than the given width, it is broken apart.
8189
   *                      </p>
8190
   *
8191
   * @return string <p>The given string wrapped at the specified column.</p>
8192
   */
8193 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
8194
  {
8195 10
    if (!isset($str[0], $break[0])) {
8196 3
      return '';
8197
    }
8198
8199 8
    $w = '';
8200 8
    $strSplit = \explode($break, $str);
8201 8
    $count = \count($strSplit);
8202
8203 8
    $chars = [];
8204
    /** @noinspection ForeachInvariantsInspection */
8205 8
    for ($i = 0; $i < $count; ++$i) {
8206
8207 8
      if ($i) {
8208 1
        $chars[] = $break;
8209 1
        $w .= '#';
8210
      }
8211
8212 8
      $c = $strSplit[$i];
8213 8
      unset($strSplit[$i]);
8214
8215 8
      foreach (self::split($c) as $c) {
8216 8
        $chars[] = $c;
8217 8
        $w .= ' ' === $c ? ' ' : '?';
8218
      }
8219
    }
8220
8221 8
    $strReturn = '';
8222 8
    $j = 0;
8223 8
    $b = $i = -1;
8224 8
    $w = \wordwrap($w, $width, '#', $cut);
8225
8226 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
8227 6
      for (++$i; $i < $b; ++$i) {
8228 6
        $strReturn .= $chars[$j];
8229 6
        unset($chars[$j++]);
8230
      }
8231
8232 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
8233 3
        unset($chars[$j++]);
8234
      }
8235
8236 6
      $strReturn .= $break;
8237
    }
8238
8239 8
    return $strReturn . \implode('', $chars);
8240
  }
8241
8242
  /**
8243
   * Returns an array of Unicode White Space characters.
8244
   *
8245
   * @return string[] <p>An array with numeric code point as key and White Space Character as value.</p>
8246
   */
8247 1
  public static function ws(): array
8248
  {
8249 1
    return self::$WHITESPACE;
8250
  }
8251
8252
}
8253