Completed
Push — master ( 224446...a1c48e )
by Lars
02:37
created

UTF8::str_iends_with_any()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 20

Importance

Changes 0
Metric Value
dl 0
loc 14
ccs 0
cts 7
cp 0
rs 9.7998
c 0
b 0
f 0
cc 4
nc 4
nop 2
crap 20
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206
   * __construct()
207
   */
208 16
  public function __construct()
209
  {
210 16
    self::checkForSupport();
211 16
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219
   * @return string <p>Single Multi-Byte character.</p>
220
   */
221 2
  public static function access(string $str, int $pos): string
222
  {
223 2
    if (!isset($str[0])) {
224 1
      return '';
225
    }
226
227 2
    if ($pos < 0) {
228 1
      return '';
229
    }
230
231 2
    return (string)self::substr($str, $pos, 1);
232
  }
233
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string <p>The output string that contains BOM.</p>
242
   */
243 1
  public static function add_bom_to_string(string $str): string
244
  {
245 1
    if (self::string_has_bom($str) === false) {
246 1
      $str = self::bom() . $str;
247
    }
248
249 1
    return $str;
250
  }
251
252
  /**
253
   * Changes all keys in an array.
254
   *
255
   * @param array $array <p>The array to work on</p>
256
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
257
   *                     or <strong>CASE_LOWER</strong> (default)</p>
258
   *
259
   * @return string[] <p>An array with its keys lower or uppercased.</p>
260
   */
261 1
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
262
  {
263
    if (
264 1
        $case !== CASE_LOWER
265
        &&
266 1
        $case !== CASE_UPPER
267
    ) {
268
      $case = CASE_LOWER;
269
    }
270
271 1
    $return = [];
272 1
    foreach ($array as $key => $value) {
273 1
      if ($case === CASE_LOWER) {
274 1
        $key = self::strtolower($key);
275
      } else {
276 1
        $key = self::strtoupper($key);
277
      }
278
279 1
      $return[$key] = $value;
280
    }
281
282 1
    return $return;
283
  }
284
285
  /**
286
   * Returns the substring between $start and $end, if found, or an empty
287
   * string. An optional offset may be supplied from which to begin the
288
   * search for the start string.
289
   *
290
   * @param string $str
291
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
292
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
293
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
294
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
295
   *
296
   * @return string
297
   */
298
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
299
  {
300
    $posStart = self::strpos($str, $start, $offset, $encoding);
301
    if ($posStart === false) {
302
      return '';
303
    }
304
305
    $substrIndex = $posStart + self::strlen($start, $encoding);
306
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
307
    if (
308
        $posEnd === false
309
        ||
310
        $posEnd === $substrIndex
311
    ) {
312
      return '';
313
    }
314
315
    return self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
316
  }
317
318
  /**
319
   * Convert binary into an string.
320
   *
321
   * @param mixed $bin 1|0
322
   *
323
   * @return string
324
   */
325 1
  public static function binary_to_str($bin): string
326
  {
327 1
    if (!isset($bin[0])) {
328
      return '';
329
    }
330
331 1
    $convert = \base_convert($bin, 2, 16);
332 1
    if ($convert === '0') {
333 1
      return '';
334
    }
335
336 1
    return \pack('H*', $convert);
337
  }
338
339
  /**
340
   * Returns the UTF-8 Byte Order Mark Character.
341
   *
342
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
343
   *
344
   * @return string UTF-8 Byte Order Mark
345
   */
346 2
  public static function bom(): string
347
  {
348 2
    return "\xef\xbb\xbf";
349
  }
350
351
  /**
352
   * @alias of UTF8::chr_map()
353
   *
354
   * @see   UTF8::chr_map()
355
   *
356
   * @param string|array $callback
357
   * @param string       $str
358
   *
359
   * @return string[]
360
   */
361 1
  public static function callback($callback, string $str): array
362
  {
363 1
    return self::chr_map($callback, $str);
364
  }
365
366
  /**
367
   * Returns the character at $index, with indexes starting at 0.
368
   *
369
   * @param string $str
370
   * @param int    $index <p>Position of the character.</p>
371
   *
372
   * @return string <p>The character at $index.</p>
373
   */
374
  public static function char_at(string $str, int $index): string
375
  {
376
    return self::substr($str, $index, 1);
377
  }
378
379
  /**
380
   * Returns an array consisting of the characters in the string.
381
   *
382
   * @param string $str <p>The input string.</p>
383
   *
384
   * @return string[] <p>An array of chars.</p>
385
   */
386
  public static function chars(string $str): array
387
  {
388
    return self::str_split($str, 1);
389
  }
390
391
  /**
392
   * This method will auto-detect your server environment for UTF-8 support.
393
   *
394
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
395
   */
396 19
  public static function checkForSupport()
397
  {
398 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
399
400 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
401
402
      // http://php.net/manual/en/book.mbstring.php
403 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
404 1
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
405
406
      // http://php.net/manual/en/book.iconv.php
407 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
408
409
      // http://php.net/manual/en/book.intl.php
410 1
      self::$SUPPORT['intl'] = self::intl_loaded();
411 1
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
412
      if (
413 1
          self::$SUPPORT['intl'] === true
414
          &&
415 1
          \function_exists('transliterator_list_ids') === true
416
      ) {
417 1
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
418
      }
419
420
      // http://php.net/manual/en/class.intlchar.php
421 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
422
423
      // http://php.net/manual/en/book.pcre.php
424 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
425
    }
426 19
  }
427
428
  /**
429
   * Generates a UTF-8 encoded character from the given code point.
430
   *
431
   * INFO: opposite to UTF8::ord()
432
   *
433
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
434
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
435
   *
436
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
437
   */
438 10
  public static function chr($code_point, string $encoding = 'UTF-8')
439
  {
440
    // init
441 10
    static $CHAR_CACHE = [];
442
443 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
444
      self::checkForSupport();
445
    }
446
447 10
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
448 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
449
    }
450
451 View Code Duplication
    if (
452 10
        $encoding !== 'UTF-8'
453
        &&
454 10
        $encoding !== 'ISO-8859-1'
455
        &&
456 10
        $encoding !== 'WINDOWS-1252'
457
        &&
458 10
        self::$SUPPORT['mbstring'] === false
459
    ) {
460
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
461
    }
462
463 10
    $cacheKey = $code_point . $encoding;
464 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
465 8
      return $CHAR_CACHE[$cacheKey];
466
    }
467
468 9
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
469
470 7
      if (self::$CHR === null) {
471
        self::$CHR = self::getData('chr');
472
      }
473
474 7
      $chr = self::$CHR[$code_point];
475
476 7
      if ($encoding !== 'UTF-8') {
477 1
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
478
      }
479
480 7
      return $CHAR_CACHE[$cacheKey] = $chr;
481
    }
482
483 7
    if (self::$SUPPORT['intlChar'] === true) {
484 7
      $chr = \IntlChar::chr($code_point);
485
486 7
      if ($encoding !== 'UTF-8') {
487
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
488
      }
489
490 7
      return $CHAR_CACHE[$cacheKey] = $chr;
491
    }
492
493
    if (self::$CHR === null) {
494
      self::$CHR = self::getData('chr');
495
    }
496
497
    if ($code_point <= 0x7F) {
498
      $chr = self::$CHR[$code_point];
499
    } elseif ($code_point <= 0x7FF) {
500
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
501
             self::$CHR[($code_point & 0x3F) + 0x80];
502
    } elseif ($code_point <= 0xFFFF) {
503
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
504
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
505
             self::$CHR[($code_point & 0x3F) + 0x80];
506
    } else {
507
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
508
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
509
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
510
             self::$CHR[($code_point & 0x3F) + 0x80];
511
    }
512
513
    if ($encoding !== 'UTF-8') {
514
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
515
    }
516
517
    return $CHAR_CACHE[$cacheKey] = $chr;
518
  }
519
520
  /**
521
   * Applies callback to all characters of a string.
522
   *
523
   * @param string|array $callback <p>The callback function.</p>
524
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
525
   *
526
   * @return string[] <p>The outcome of callback.</p>
527
   */
528 1
  public static function chr_map($callback, string $str): array
529
  {
530 1
    $chars = self::split($str);
531
532 1
    return \array_map($callback, $chars);
533
  }
534
535
  /**
536
   * Generates an array of byte length of each character of a Unicode string.
537
   *
538
   * 1 byte => U+0000  - U+007F
539
   * 2 byte => U+0080  - U+07FF
540
   * 3 byte => U+0800  - U+FFFF
541
   * 4 byte => U+10000 - U+10FFFF
542
   *
543
   * @param string $str <p>The original unicode string.</p>
544
   *
545
   * @return int[] <p>An array of byte lengths of each character.</p>
546
   */
547 2
  public static function chr_size_list(string $str): array
548
  {
549 2
    if (!isset($str[0])) {
550 2
      return [];
551
    }
552
553 2
    $strSplit = self::split($str);
554
555 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
556
      return \array_map(
557
          function ($data) {
558
            return UTF8::strlen($data, 'CP850'); // 8-BIT
559
          },
560
          $strSplit
561
      );
562
    }
563
564 2
    return \array_map('\strlen', $strSplit);
565
  }
566
567
  /**
568
   * Get a decimal code representation of a specific character.
569
   *
570
   * @param string $char <p>The input character.</p>
571
   *
572
   * @return int
573
   */
574 2
  public static function chr_to_decimal(string $char): int
575
  {
576 2
    $code = self::ord($char[0]);
577 2
    $bytes = 1;
578
579 2
    if (!($code & 0x80)) {
580
      // 0xxxxxxx
581 2
      return $code;
582
    }
583
584 2
    if (($code & 0xe0) === 0xc0) {
585
      // 110xxxxx
586 2
      $bytes = 2;
587 2
      $code &= ~0xc0;
588 2
    } elseif (($code & 0xf0) === 0xe0) {
589
      // 1110xxxx
590 2
      $bytes = 3;
591 2
      $code &= ~0xe0;
592 1
    } elseif (($code & 0xf8) === 0xf0) {
593
      // 11110xxx
594 1
      $bytes = 4;
595 1
      $code &= ~0xf0;
596
    }
597
598 2
    for ($i = 2; $i <= $bytes; $i++) {
599
      // 10xxxxxx
600 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
601
    }
602
603 2
    return $code;
604
  }
605
606
  /**
607
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
608
   *
609
   * @param string $char <p>The input character</p>
610
   * @param string $pfix [optional]
611
   *
612
   * @return string <p>The code point encoded as U+xxxx<p>
613
   */
614 1
  public static function chr_to_hex(string $char, string $pfix = 'U+'): string
615
  {
616 1
    if (!isset($char[0])) {
617 1
      return '';
618
    }
619
620 1
    if ($char === '&#0;') {
621 1
      $char = '';
622
    }
623
624 1
    return self::int_to_hex(self::ord($char), $pfix);
625
  }
626
627
  /**
628
   * alias for "UTF8::chr_to_decimal()"
629
   *
630
   * @see UTF8::chr_to_decimal()
631
   *
632
   * @param string $chr
633
   *
634
   * @return int
635
   */
636 1
  public static function chr_to_int(string $chr): int
637
  {
638 1
    return self::chr_to_decimal($chr);
639
  }
640
641
  /**
642
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
643
   *
644
   * @param string $body     <p>The original string to be split.</p>
645
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
646
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
647
   *
648
   * @return string <p>The chunked string</p>
649
   */
650 1
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
651
  {
652 1
    return \implode($end, self::split($body, $chunklen));
653
  }
654
655
  /**
656
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
657
   *
658
   * @param string $str                           <p>The string to be sanitized.</p>
659
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
660
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
661
   *                                              whitespace.</p>
662
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
663
   *                                              e.g.: "…"
664
   *                                              => "..."</p>
665
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
666
   *                                              combination with
667
   *                                              $normalize_whitespace</p>
668
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
669
   *                                              mark e.g.: "�"</p>
670
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
671
   *                                              characters e.g.: "\0"</p>
672
   *
673
   * @return string <p>Clean UTF-8 encoded string.</p>
674
   */
675 64
  public static function clean(string $str, bool $remove_bom = false, bool $normalize_whitespace = false, bool $normalize_msword = false, bool $keep_non_breaking_space = false, bool $replace_diamond_question_mark = false, bool $remove_invisible_characters = true): string
676
  {
677
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
678
    // caused connection reset problem on larger strings
679
680 64
    $regx = '/
681
      (
682
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
683
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
684
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
685
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
686
        ){1,100}                      # ...one or more times
687
      )
688
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
689
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
690
    /x';
691 64
    $str = (string)\preg_replace($regx, '$1', $str);
692
693 64
    if ($replace_diamond_question_mark === true) {
694 36
      $str = self::replace_diamond_question_mark($str, '');
695
    }
696
697 64
    if ($remove_invisible_characters === true) {
698 64
      $str = self::remove_invisible_characters($str);
699
    }
700
701 64
    if ($normalize_whitespace === true) {
702 38
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
703
    }
704
705 64
    if ($normalize_msword === true) {
706 15
      $str = self::normalize_msword($str);
707
    }
708
709 64
    if ($remove_bom === true) {
710 37
      $str = self::remove_bom($str);
711
    }
712
713 64
    return $str;
714
  }
715
716
  /**
717
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
718
   *
719
   * @param string $str <p>The input string.</p>
720
   *
721
   * @return string
722
   */
723 24
  public static function cleanup(string $str): string
724
  {
725 24
    if (!isset($str[0])) {
726 2
      return '';
727
    }
728
729
    // fixed ISO <-> UTF-8 Errors
730 24
    $str = self::fix_simple_utf8($str);
731
732
    // remove all none UTF-8 symbols
733
    // && remove diamond question mark (�)
734
    // && remove remove invisible characters (e.g. "\0")
735
    // && remove BOM
736
    // && normalize whitespace chars (but keep non-breaking-spaces)
737 24
    $str = self::clean(
738 24
        $str,
739 24
        true,
740 24
        true,
741 24
        false,
742 24
        true,
743 24
        true,
744 24
        true
745
    );
746
747 24
    return $str;
748
  }
749
750
  /**
751
   * Accepts a string or a array of strings and returns an array of Unicode code points.
752
   *
753
   * INFO: opposite to UTF8::string()
754
   *
755
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
756
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
757
   *                                    default, code points will be returned as integers.</p>
758
   *
759
   * @return int[] <p>The array of code points.</p>
760
   */
761 7
  public static function codepoints($arg, bool $u_style = false): array
762
  {
763 7
    if (\is_string($arg) === true) {
764 7
      $arg = self::split($arg);
765
    }
766
767 7
    $arg = \array_map(
768
        [
769 7
            self::class,
770
            'ord',
771
        ],
772 7
        $arg
773
    );
774
775 7
    if ($u_style) {
776 1
      $arg = \array_map(
777
          [
778 1
              self::class,
779
              'int_to_hex',
780
          ],
781 1
          $arg
782
      );
783
    }
784
785 7
    return $arg;
786
  }
787
788
  /**
789
   * Trims the string and replaces consecutive whitespace characters with a
790
   * single space. This includes tabs and newline characters, as well as
791
   * multibyte whitespace such as the thin space and ideographic space.
792
   *
793
   * @param string $str <p>The input string.</p>
794
   *
795
   * @return string <p>String with a trimmed $str and condensed whitespace.</p>
796
   */
797
  public static function collapse_whitespace(string $str): string
798
  {
799
    return self::trim(
800
        self::regexReplace($str, '[[:space:]]+', ' ')
801
    );
802
  }
803
804
  /**
805
   * Returns count of characters used in a string.
806
   *
807
   * @param string $str       <p>The input string.</p>
808
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
809
   *
810
   * @return int[] <p>An associative array of Character as keys and
811
   *               their count as values.</p>
812
   */
813 9
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
814
  {
815 9
    return \array_count_values(self::split($str, 1, $cleanUtf8));
816
  }
817
818
  /**
819
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
820
   * inserted before uppercase characters (with the exception of the first
821
   * character of the string), and in place of spaces as well as underscores.
822
   *
823
   * @param string $str <p>The input string.</p>
824
   *
825
   * @return string
826
   */
827
  public static function dasherize(string $str): string
828
  {
829
    return self::delimit($str, '-');
830
  }
831
832
  /**
833
   * Converts a int-value into an UTF-8 character.
834
   *
835
   * @param mixed $int
836
   *
837
   * @return string
838
   */
839 5
  public static function decimal_to_chr($int): string
840
  {
841 5
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
842
  }
843
844
  /**
845
   * Returns a lowercase and trimmed string separated by the given delimiter.
846
   * Delimiters are inserted before uppercase characters (with the exception
847
   * of the first character of the string), and in place of spaces, dashes,
848
   * and underscores. Alpha delimiters are not converted to lowercase.
849
   *
850
   * @param string $str       <p>The input string.</p>
851
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
852
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
853
   *
854
   * @return string
855
   */
856
  public static function delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
857
  {
858
    $str = self::trim($str);
859
860
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
861
862
    $str = self::strtolower($str, $encoding);
863
864
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
865
  }
866
867
  /**
868
   * Encode a string with a new charset-encoding.
869
   *
870
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
871
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
872
   *
873
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
874
   * @param string $str      <p>The input string</p>
875
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
876
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
877
   *
878
   * @return string
879
   */
880 14
  public static function encode(string $encoding, string $str, bool $force = true): string
881
  {
882 14
    if (!isset($str[0], $encoding[0])) {
883 6
      return $str;
884
    }
885
886 14
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
887 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
888
    }
889
890 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
891
      self::checkForSupport();
892
    }
893
894 14
    $encodingDetected = self::str_detect_encoding($str);
895
896
    // DEBUG
897
    //var_dump($encoding, $encodingDetected, $str, "\n\n");
898
899
    if (
900 14
        $encodingDetected !== false
901
        &&
902
        (
903 12
          $force === true
904
          ||
905 14
          $encodingDetected !== $encoding
906
        )
907
    ) {
908
909 View Code Duplication
      if (
910 12
          $encoding === 'UTF-8'
911
          &&
912
          (
913 12
              $force === true
914 4
              || $encodingDetected === 'UTF-8'
915 4
              || $encodingDetected === 'WINDOWS-1252'
916 12
              || $encodingDetected === 'ISO-8859-1'
917
          )
918
      ) {
919 11
        return self::to_utf8($str);
920
      }
921
922 View Code Duplication
      if (
923 4
          $encoding === 'ISO-8859-1'
924
          &&
925
          (
926 2
              $force === true
927 1
              || $encodingDetected === 'ISO-8859-1'
928 1
              || $encodingDetected === 'WINDOWS-1252'
929 4
              || $encodingDetected === 'UTF-8'
930
          )
931
      ) {
932 2
        return self::to_iso8859($str);
933
      }
934
935 View Code Duplication
      if (
936 3
          $encoding !== 'UTF-8'
937
          &&
938 3
          $encoding !== 'ISO-8859-1'
939
          &&
940 3
          $encoding !== 'WINDOWS-1252'
941
          &&
942 3
          self::$SUPPORT['mbstring'] === false
943
      ) {
944
        \trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
945
      }
946
947 3
      $strEncoded = \mb_convert_encoding(
948 3
          $str,
949 3
          $encoding,
950 3
          $encodingDetected
951
      );
952
953 3
      if ($strEncoded) {
954 3
        return $strEncoded;
955
      }
956
    }
957
958 5
    return $str;
959
  }
960
961
  /**
962
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
963
   *
964
   * @param string   $str                    <p>The input string.</p>
965
   * @param string   $search                 <p>The searched string.</p>
966
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
967
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
968
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "\mb_" function</p>
969
   *
970
   * @return string
971
   */
972
  public static function extractText(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
973
  {
974
    // init
975
    $text = $str;
976
977
    if (empty($text)) {
978
      return '';
979
    }
980
981
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
982
983
    if ($length === null) {
984
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
985
    }
986
987
    if (empty($search)) {
988
989
      $stringLength = self::strlen($text, $encoding);
990
991
      if ($length > 0) {
992
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
993
      } else {
994
        $end = 0;
995
      }
996
997
      $pos = \min(
998
          self::strpos($text, ' ', $end, $encoding),
999
          self::strpos($text, '.', $end, $encoding)
1000
      );
1001
1002
      if ($pos) {
1003
        return \rtrim(
1004
                   self::substr($text, 0, $pos, $encoding),
1005
                   $trimChars
1006
               ) . $replacerForSkippedText;
1007
      }
1008
1009
      return $text;
1010
    }
1011
1012
    $wordPos = self::stripos(
1013
        $text,
1014
        $search,
1015
        0,
1016
        $encoding
1017
    );
1018
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1019
1020
    if ($halfSide > 0) {
1021
1022
      $halfText = self::substr($text, 0, $halfSide, $encoding);
1023
      $pos_start = \max(
1024
          self::strrpos($halfText, ' ', 0, $encoding),
0 ignored issues
show
Security Bug introduced by
It seems like $halfText defined by self::substr($text, 0, $halfSide, $encoding) on line 1022 can also be of type false; however, voku\helper\UTF8::strrpos() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1025
          self::strrpos($halfText, '.', 0, $encoding)
0 ignored issues
show
Security Bug introduced by
It seems like $halfText defined by self::substr($text, 0, $halfSide, $encoding) on line 1022 can also be of type false; however, voku\helper\UTF8::strrpos() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1026
      );
1027
1028
      if (!$pos_start) {
1029
        $pos_start = 0;
1030
      }
1031
1032
    } else {
1033
      $pos_start = 0;
1034
    }
1035
1036
    if ($wordPos && $halfSide > 0) {
1037
      $l = $pos_start + $length - 1;
1038
      $realLength = self::strlen($text, $encoding);
1039
1040
      if ($l > $realLength) {
1041
        $l = $realLength;
1042
      }
1043
1044
      $pos_end = \min(
1045
                     self::strpos($text, ' ', $l, $encoding),
1046
                     self::strpos($text, '.', $l, $encoding)
1047
                 ) - $pos_start;
1048
1049
      if (!$pos_end || $pos_end <= 0) {
1050
        $extract = $replacerForSkippedText . \ltrim(
1051
                self::substr(
1052
                    $text,
1053
                    $pos_start,
1054
                    self::strlen($text),
1055
                    $encoding
1056
                ),
1057
                $trimChars
1058
            );
1059 View Code Duplication
      } else {
1060
        $extract = $replacerForSkippedText . \trim(
1061
                self::substr(
1062
                    $text,
1063
                    $pos_start,
1064
                    $pos_end,
1065
                    $encoding
1066
                ),
1067
                $trimChars
1068
            ) . $replacerForSkippedText;
1069
      }
1070
1071
    } else {
1072
1073
      $l = $length - 1;
1074
      $trueLength = self::strlen($text, $encoding);
1075
1076
      if ($l > $trueLength) {
1077
        $l = $trueLength;
1078
      }
1079
1080
      $pos_end = \min(
1081
          self::strpos($text, ' ', $l, $encoding),
1082
          self::strpos($text, '.', $l, $encoding)
1083
      );
1084
1085 View Code Duplication
      if ($pos_end) {
1086
        $extract = \rtrim(
1087
                       self::substr($text, 0, $pos_end, $encoding),
1088
                       $trimChars
1089
                   ) . $replacerForSkippedText;
1090
      } else {
1091
        $extract = $text;
1092
      }
1093
    }
1094
1095
    return $extract;
1096
  }
1097
1098
  /**
1099
   * Reads entire file into a string.
1100
   *
1101
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1102
   *
1103
   * @link http://php.net/manual/en/function.file-get-contents.php
1104
   *
1105
   * @param string        $filename         <p>
1106
   *                                        Name of the file to read.
1107
   *                                        </p>
1108
   * @param bool          $use_include_path [optional] <p>
1109
   *                                        Prior to PHP 5, this parameter is called
1110
   *                                        use_include_path and is a bool.
1111
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1112
   *                                        to trigger include path
1113
   *                                        search.
1114
   *                                        </p>
1115
   * @param resource|null $context          [optional] <p>
1116
   *                                        A valid context resource created with
1117
   *                                        stream_context_create. If you don't need to use a
1118
   *                                        custom context, you can skip this parameter by &null;.
1119
   *                                        </p>
1120
   * @param int|null      $offset           [optional] <p>
1121
   *                                        The offset where the reading starts.
1122
   *                                        </p>
1123
   * @param int|null      $maxLength        [optional] <p>
1124
   *                                        Maximum length of data read. The default is to read until end
1125
   *                                        of file is reached.
1126
   *                                        </p>
1127
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1128
   *
1129
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
1130
   *                                        images or pdf, because they used non default utf-8 chars.</p>
1131
   *
1132
   * @return string|false <p>The function returns the read data or false on failure.</p>
1133
   */
1134 6
  public static function file_get_contents(string $filename, bool $use_include_path = false, $context = null, int $offset = null, int $maxLength = null, int $timeout = 10, bool $convertToUtf8 = true)
1135
  {
1136
    // init
1137 6
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1138
1139 6
    if ($timeout && $context === null) {
1140 5
      $context = \stream_context_create(
1141
          [
1142
              'http' =>
1143
                  [
1144 5
                      'timeout' => $timeout,
1145
                  ],
1146
          ]
1147
      );
1148
    }
1149
1150 6
    if ($offset === null) {
1151 6
      $offset = 0;
1152
    }
1153
1154 6
    if (\is_int($maxLength) === true) {
1155 1
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1156
    } else {
1157 6
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1158
    }
1159
1160
    // return false on error
1161 6
    if ($data === false) {
1162
      return false;
1163
    }
1164
1165 6
    if ($convertToUtf8 === true) {
1166
      // only for non binary, but also for UTF-16 or UTF-32
1167
      if (
1168 6
          self::is_binary($data, true) !== true
1169
          ||
1170 4
          self::is_utf16($data) !== false
1171
          ||
1172 6
          self::is_utf32($data) !== false
1173
      ) {
1174 5
        $data = self::encode('UTF-8', $data, false);
1175 5
        $data = self::cleanup($data);
1176
      }
1177
    }
1178
1179 6
    return $data;
1180
  }
1181
1182
  /**
1183
   * Returns whether or not a character exists at an index. Offsets may be
1184
   * negative to count from the last character in the string. Implements
1185
   * part of the ArrayAccess interface.
1186
   *
1187
   * @param int    $offset   <p>The index to check.</p>
1188
   * @param string $str      <p>The input string.</p>
1189
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
1190
   *
1191
   *
1192
   * @return boolean <p>Whether or not the index exists.</p>
1193
   */
1194
  public static function offset_exists($offset, string $str, string $encoding = 'UTF-8'): bool
1195
  {
1196
    // init
1197
    $length = self::strlen($str, $encoding);;
1198
    $offset = (int)$offset;
1199
1200
    if ($offset >= 0) {
1201
      return ($length > $offset);
1202
    }
1203
1204
    return ($length >= \abs($offset));
1205
  }
1206
1207
  /**
1208
   * Checks if a file starts with BOM (Byte Order Mark) character.
1209
   *
1210
   * @param string $file_path <p>Path to a valid file.</p>
1211
   *
1212
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1213
   */
1214 1
  public static function file_has_bom(string $file_path): bool
1215
  {
1216 1
    return self::string_has_bom(\file_get_contents($file_path));
1217
  }
1218
1219
  /**
1220
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1221
   *
1222
   * @param mixed  $var
1223
   * @param int    $normalization_form
1224
   * @param string $leading_combining
1225
   *
1226
   * @return mixed
1227
   */
1228 9
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1229
  {
1230 9
    switch (\gettype($var)) {
1231 9 View Code Duplication
      case 'array':
1232 2
        foreach ($var as $k => $v) {
1233
          /** @noinspection AlterInForeachInspection */
1234 2
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1235
        }
1236 2
        break;
1237 9 View Code Duplication
      case 'object':
1238 1
        foreach ($var as $k => $v) {
1239 1
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1240
        }
1241 1
        break;
1242 9
      case 'string':
1243
1244 9
        if (false !== \strpos($var, "\r")) {
1245
          // Workaround https://bugs.php.net/65732
1246 1
          $var = \str_replace(["\r\n", "\r"], "\n", $var);
1247
        }
1248
1249 9
        if (self::is_ascii($var) === false) {
1250
          /** @noinspection PhpUndefinedClassInspection */
1251 9
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1252 5
            $n = '-';
1253
          } else {
1254
            /** @noinspection PhpUndefinedClassInspection */
1255 7
            $n = \Normalizer::normalize($var, $normalization_form);
1256
1257 7
            if (isset($n[0])) {
1258 3
              $var = $n;
1259
            } else {
1260 5
              $var = self::encode('UTF-8', $var, true);
1261
            }
1262
          }
1263
1264
          if (
1265 9
              $var[0] >= "\x80"
1266
              &&
1267 9
              isset($n[0], $leading_combining[0])
1268
              &&
1269 9
              \preg_match('/^\p{Mn}/u', $var)
1270
          ) {
1271
            // Prevent leading combining chars
1272
            // for NFC-safe concatenations.
1273 1
            $var = $leading_combining . $var;
1274
          }
1275
        }
1276
1277 9
        break;
1278
    }
1279
1280 9
    return $var;
1281
  }
1282
1283
  /**
1284
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1285
   *
1286
   * Gets a specific external variable by name and optionally filters it
1287
   *
1288
   * @link  http://php.net/manual/en/function.filter-input.php
1289
   *
1290
   * @param int    $type          <p>
1291
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1292
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1293
   *                              <b>INPUT_ENV</b>.
1294
   *                              </p>
1295
   * @param string $variable_name <p>
1296
   *                              Name of a variable to get.
1297
   *                              </p>
1298
   * @param int    $filter        [optional] <p>
1299
   *                              The ID of the filter to apply. The
1300
   *                              manual page lists the available filters.
1301
   *                              </p>
1302
   * @param mixed  $options       [optional] <p>
1303
   *                              Associative array of options or bitwise disjunction of flags. If filter
1304
   *                              accepts options, flags can be provided in "flags" field of array.
1305
   *                              </p>
1306
   *
1307
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1308
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1309
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1310
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1311
   * @since 5.2.0
1312
   */
1313 View Code Duplication
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1314
  {
1315
    if (4 > \func_num_args()) {
1316
      $var = \filter_input($type, $variable_name, $filter);
1317
    } else {
1318
      $var = \filter_input($type, $variable_name, $filter, $options);
1319
    }
1320
1321
    return self::filter($var);
1322
  }
1323
1324
  /**
1325
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1326
   *
1327
   * Gets external variables and optionally filters them
1328
   *
1329
   * @link  http://php.net/manual/en/function.filter-input-array.php
1330
   *
1331
   * @param int   $type       <p>
1332
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1333
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1334
   *                          <b>INPUT_ENV</b>.
1335
   *                          </p>
1336
   * @param mixed $definition [optional] <p>
1337
   *                          An array defining the arguments. A valid key is a string
1338
   *                          containing a variable name and a valid value is either a filter type, or an array
1339
   *                          optionally specifying the filter, flags and options. If the value is an
1340
   *                          array, valid keys are filter which specifies the
1341
   *                          filter type,
1342
   *                          flags which specifies any flags that apply to the
1343
   *                          filter, and options which specifies any options that
1344
   *                          apply to the filter. See the example below for a better understanding.
1345
   *                          </p>
1346
   *                          <p>
1347
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1348
   *                          input array are filtered by this filter.
1349
   *                          </p>
1350
   * @param bool  $add_empty  [optional] <p>
1351
   *                          Add missing keys as <b>NULL</b> to the return value.
1352
   *                          </p>
1353
   *
1354
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1355
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1356
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1357
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1358
   * fails.
1359
   * @since 5.2.0
1360
   */
1361 View Code Duplication
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1362
  {
1363
    if (2 > \func_num_args()) {
1364
      $a = \filter_input_array($type);
1365
    } else {
1366
      $a = \filter_input_array($type, $definition, $add_empty);
1367
    }
1368
1369
    return self::filter($a);
1370
  }
1371
1372
  /**
1373
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1374
   *
1375
   * Filters a variable with a specified filter
1376
   *
1377
   * @link  http://php.net/manual/en/function.filter-var.php
1378
   *
1379
   * @param mixed $variable <p>
1380
   *                        Value to filter.
1381
   *                        </p>
1382
   * @param int   $filter   [optional] <p>
1383
   *                        The ID of the filter to apply. The
1384
   *                        manual page lists the available filters.
1385
   *                        </p>
1386
   * @param mixed $options  [optional] <p>
1387
   *                        Associative array of options or bitwise disjunction of flags. If filter
1388
   *                        accepts options, flags can be provided in "flags" field of array. For
1389
   *                        the "callback" filter, callable type should be passed. The
1390
   *                        callback must accept one argument, the value to be filtered, and return
1391
   *                        the value after filtering/sanitizing it.
1392
   *                        </p>
1393
   *                        <p>
1394
   *                        <code>
1395
   *                        // for filters that accept options, use this format
1396
   *                        $options = array(
1397
   *                        'options' => array(
1398
   *                        'default' => 3, // value to return if the filter fails
1399
   *                        // other options here
1400
   *                        'min_range' => 0
1401
   *                        ),
1402
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1403
   *                        );
1404
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1405
   *                        // for filter that only accept flags, you can pass them directly
1406
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1407
   *                        // for filter that only accept flags, you can also pass as an array
1408
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1409
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1410
   *                        // callback validate filter
1411
   *                        function foo($value)
1412
   *                        {
1413
   *                        // Expected format: Surname, GivenNames
1414
   *                        if (strpos($value, ", ") === false) return false;
1415
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1416
   *                        $empty = (empty($surname) || empty($givennames));
1417
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1418
   *                        if ($empty || $notstrings) {
1419
   *                        return false;
1420
   *                        } else {
1421
   *                        return $value;
1422
   *                        }
1423
   *                        }
1424
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1425
   *                        </code>
1426
   *                        </p>
1427
   *
1428
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1429
   * @since 5.2.0
1430
   */
1431 1 View Code Duplication
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1432
  {
1433 1
    if (3 > \func_num_args()) {
1434 1
      $variable = \filter_var($variable, $filter);
1435
    } else {
1436 1
      $variable = \filter_var($variable, $filter, $options);
1437
    }
1438
1439 1
    return self::filter($variable);
1440
  }
1441
1442
  /**
1443
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1444
   *
1445
   * Gets multiple variables and optionally filters them
1446
   *
1447
   * @link  http://php.net/manual/en/function.filter-var-array.php
1448
   *
1449
   * @param array $data       <p>
1450
   *                          An array with string keys containing the data to filter.
1451
   *                          </p>
1452
   * @param mixed $definition [optional] <p>
1453
   *                          An array defining the arguments. A valid key is a string
1454
   *                          containing a variable name and a valid value is either a
1455
   *                          filter type, or an
1456
   *                          array optionally specifying the filter, flags and options.
1457
   *                          If the value is an array, valid keys are filter
1458
   *                          which specifies the filter type,
1459
   *                          flags which specifies any flags that apply to the
1460
   *                          filter, and options which specifies any options that
1461
   *                          apply to the filter. See the example below for a better understanding.
1462
   *                          </p>
1463
   *                          <p>
1464
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1465
   *                          input array are filtered by this filter.
1466
   *                          </p>
1467
   * @param bool  $add_empty  [optional] <p>
1468
   *                          Add missing keys as <b>NULL</b> to the return value.
1469
   *                          </p>
1470
   *
1471
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1472
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1473
   * the variable is not set.
1474
   * @since 5.2.0
1475
   */
1476 1 View Code Duplication
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1477
  {
1478 1
    if (2 > \func_num_args()) {
1479 1
      $a = \filter_var_array($data);
1480
    } else {
1481 1
      $a = \filter_var_array($data, $definition, $add_empty);
1482
    }
1483
1484 1
    return self::filter($a);
1485
  }
1486
1487
  /**
1488
   * Returns the first $n characters of the string.
1489
   *
1490
   * @param string $str      <p>The input string.</p>
1491
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1492
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
1493
   *
1494
   * @return string
1495
   */
1496
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1497
  {
1498
    if ($n <= 0) {
1499
      return '';
1500
    }
1501
1502
    return self::substr($str, 0, $n, $encoding);
1503
  }
1504
1505
  /**
1506
   * Check if the number of unicode characters are not more than the specified integer.
1507
   *
1508
   * @param string $str      The original string to be checked.
1509
   * @param int    $box_size The size in number of chars to be checked against string.
1510
   *
1511
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1512
   */
1513 1
  public static function fits_inside(string $str, int $box_size): bool
1514
  {
1515 1
    return (self::strlen($str) <= $box_size);
1516
  }
1517
1518
  /**
1519
   * Try to fix simple broken UTF-8 strings.
1520
   *
1521
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1522
   *
1523
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1524
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1525
   * See: http://en.wikipedia.org/wiki/Windows-1252
1526
   *
1527
   * @param string $str <p>The input string</p>
1528
   *
1529
   * @return string
1530
   */
1531 29 View Code Duplication
  public static function fix_simple_utf8(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1532
  {
1533 29
    if (!isset($str[0])) {
1534 2
      return '';
1535
    }
1536
1537 29
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1538 29
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1539
1540 29
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1541
1542 1
      if (self::$BROKEN_UTF8_FIX === null) {
1543 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1544
      }
1545
1546 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1547 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1548
    }
1549
1550 29
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1551
  }
1552
1553
  /**
1554
   * Fix a double (or multiple) encoded UTF8 string.
1555
   *
1556
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1557
   *
1558
   * @return string|string[] <p>Will return the fixed input-"array" or
1559
   *                         the fixed input-"string".</p>
1560
   */
1561 1
  public static function fix_utf8($str)
1562
  {
1563 1
    if (\is_array($str) === true) {
1564 1
      foreach ($str as $k => $v) {
1565 1
        $str[$k] = self::fix_utf8($v);
1566
      }
1567
1568 1
      return $str;
1569
    }
1570
1571 1
    $last = '';
1572 1
    while ($last !== $str) {
1573 1
      $last = $str;
1574 1
      $str = self::to_utf8(
1575 1
          self::utf8_decode($str, true)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str, true)) on line 1574 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1576
      );
1577
    }
1578
1579 1
    return $str;
1580
  }
1581
1582
  /**
1583
   * Get character of a specific character.
1584
   *
1585
   * @param string $char
1586
   *
1587
   * @return string <p>'RTL' or 'LTR'</p>
1588
   */
1589 1
  public static function getCharDirection(string $char): string
1590
  {
1591 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1592
      self::checkForSupport();
1593
    }
1594
1595 1
    if (self::$SUPPORT['intlChar'] === true) {
1596 1
      $tmpReturn = \IntlChar::charDirection($char);
1597
1598
      // from "IntlChar"-Class
1599
      $charDirection = [
1600 1
          'RTL' => [1, 13, 14, 15, 21],
1601
          'LTR' => [0, 11, 12, 20],
1602
      ];
1603
1604 1
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1605
        return 'LTR';
1606
      }
1607
1608 1
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1609 1
        return 'RTL';
1610
      }
1611
    }
1612
1613 1
    $c = static::chr_to_decimal($char);
1614
1615 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1616 1
      return 'LTR';
1617
    }
1618
1619 1
    if (0x85e >= $c) {
1620
1621 1
      if (0x5be === $c ||
1622 1
          0x5c0 === $c ||
1623 1
          0x5c3 === $c ||
1624 1
          0x5c6 === $c ||
1625 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1626 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1627 1
          0x608 === $c ||
1628 1
          0x60b === $c ||
1629 1
          0x60d === $c ||
1630 1
          0x61b === $c ||
1631 1
          (0x61e <= $c && 0x64a >= $c) ||
1632
          (0x66d <= $c && 0x66f >= $c) ||
1633
          (0x671 <= $c && 0x6d5 >= $c) ||
1634
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1635
          (0x6ee <= $c && 0x6ef >= $c) ||
1636
          (0x6fa <= $c && 0x70d >= $c) ||
1637
          0x710 === $c ||
1638
          (0x712 <= $c && 0x72f >= $c) ||
1639
          (0x74d <= $c && 0x7a5 >= $c) ||
1640
          0x7b1 === $c ||
1641
          (0x7c0 <= $c && 0x7ea >= $c) ||
1642
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1643
          0x7fa === $c ||
1644
          (0x800 <= $c && 0x815 >= $c) ||
1645
          0x81a === $c ||
1646
          0x824 === $c ||
1647
          0x828 === $c ||
1648
          (0x830 <= $c && 0x83e >= $c) ||
1649
          (0x840 <= $c && 0x858 >= $c) ||
1650 1
          0x85e === $c
1651
      ) {
1652 1
        return 'RTL';
1653
      }
1654
1655 1
    } elseif (0x200f === $c) {
1656
1657
      return 'RTL';
1658
1659 1
    } elseif (0xfb1d <= $c) {
1660
1661 1
      if (0xfb1d === $c ||
1662 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1663 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1664 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1665 1
          0xfb3e === $c ||
1666 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1667 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1668 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1669 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1670 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1671 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1672 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1673 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1674 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1675 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1676 1
          0x10808 === $c ||
1677 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1678 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1679 1
          0x1083c === $c ||
1680 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1681 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1682 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1683 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1684 1
          0x1093f === $c ||
1685 1
          0x10a00 === $c ||
1686 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1687 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1688 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1689 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1690 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1691 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1692 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1693 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1694 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1695 1
          (0x10b78 <= $c && 0x10b7f >= $c)
1696
      ) {
1697 1
        return 'RTL';
1698
      }
1699
    }
1700
1701 1
    return 'LTR';
1702
  }
1703
1704
  /**
1705
   * get data from "/data/*.ser"
1706
   *
1707
   * @param string $file
1708
   *
1709
   * @return bool|string|array|int <p>Will return false on error.</p>
1710
   */
1711 6
  private static function getData(string $file)
1712
  {
1713 6
    $file = __DIR__ . '/data/' . $file . '.php';
1714 6
    if (\file_exists($file)) {
1715
      /** @noinspection PhpIncludeInspection */
1716 6
      return require $file;
1717
    }
1718
1719 1
    return false;
1720
  }
1721
1722
  /**
1723
   * Check for php-support.
1724
   *
1725
   * @param string|null $key
1726
   *
1727
   * @return mixed <p>Return the full support-"array", if $key === null<br>
1728
   *               return bool-value, if $key is used and available<br>
1729
   *               otherwise return null</p>
1730
   */
1731 19
  public static function getSupportInfo(string $key = null)
1732
  {
1733 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1734
      self::checkForSupport();
1735
    }
1736
1737 19
    if ($key === null) {
1738 2
      return self::$SUPPORT;
1739
    }
1740
1741 18
    if (!isset(self::$SUPPORT[$key])) {
1742 1
      return null;
1743
    }
1744
1745 17
    return self::$SUPPORT[$key];
1746
  }
1747
1748
  /**
1749
   * @param int    $length        <p>Length of the random string.</p>
1750
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
1751
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
1752
   *
1753
   * @return string
1754
   */
1755
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
1756
  {
1757
    // init
1758
    $i = 0;
1759
    $str = '';
1760
    $maxlength = self::strlen($possibleChars, $encoding);
1761
1762
    if ($maxlength === 0) {
1763
      return '';
1764
    }
1765
1766
    // add random chars
1767
    while ($i < $length) {
1768
      try {
1769
        $randInt = \random_int(0, $maxlength - 1);
1770
      } catch (\Exception $e) {
1771
        /** @noinspection RandomApiMigrationInspection */
1772
        $randInt = \mt_rand(0, $maxlength - 1);
1773
      }
1774
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
1775
      $str .= $char;
1776
      $i++;
1777
    }
1778
1779
    return $str;
1780
  }
1781
1782
  /**
1783
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
1784
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
1785
   *
1786
   * @return string
1787
   */
1788
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
1789
  {
1790
    $uniqueHelper = \mt_rand() .
1791
                    \session_id() .
1792
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
1793
                    ($_SERVER['SERVER_ADDR'] ?? '') .
1794
                    $entropyExtra;
1795
1796
    $uniqueString = \uniqid($uniqueHelper, true);
1797
1798
    if ($md5) {
1799
      $uniqueString = \md5($uniqueString . $uniqueHelper);
1800
    }
1801
1802
    return $uniqueString;
1803
  }
1804
1805
  /**
1806
   * alias for "UTF8::string_has_bom()"
1807
   *
1808
   * @see        UTF8::string_has_bom()
1809
   *
1810
   * @param string $str
1811
   *
1812
   * @return bool
1813
   *
1814
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1815
   */
1816 1
  public static function hasBom(string $str): bool
1817
  {
1818 1
    return self::string_has_bom($str);
1819
  }
1820
1821
  /**
1822
   * Returns true if the string contains a lower case char, false otherwise.
1823
   *
1824
   * @param string $str <p>The input string.</p>
1825
   *
1826
   * @return bool <p>Whether or not the string contains a lower case character.</p>
1827
   */
1828
  public static function has_lowercase(string $str): bool
1829
  {
1830
    return self::matchesPattern($str, '.*[[:lower:]]');
1831
  }
1832
1833
  /**
1834
   * Returns true if the string contains an upper case char, false otherwise.
1835
   *
1836
   * @param string $str <p>The input string.</p>
1837
   *
1838
   * @return bool <p>Whether or not the string contains an upper case character.</p>
1839
   */
1840
  public static function has_uppercase(string $str): bool
1841
  {
1842
    return self::matchesPattern($str, '.*[[:upper:]]');
1843
  }
1844
1845
  /**
1846
   * Converts a hexadecimal-value into an UTF-8 character.
1847
   *
1848
   * @param string $hexdec <p>The hexadecimal value.</p>
1849
   *
1850
   * @return string|false <p>One single UTF-8 character.</p>
1851
   */
1852 2
  public static function hex_to_chr(string $hexdec)
1853
  {
1854 2
    return self::decimal_to_chr(\hexdec($hexdec));
1855
  }
1856
1857
  /**
1858
   * Converts hexadecimal U+xxxx code point representation to integer.
1859
   *
1860
   * INFO: opposite to UTF8::int_to_hex()
1861
   *
1862
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1863
   *
1864
   * @return int|false <p>The code point, or false on failure.</p>
1865
   */
1866 1
  public static function hex_to_int(string $hexDec)
1867
  {
1868 1
    if (!isset($hexDec[0])) {
1869 1
      return false;
1870
    }
1871
1872 1
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1873 1
      return \intval($match[1], 16);
1874
    }
1875
1876 1
    return false;
1877
  }
1878
1879
  /**
1880
   * alias for "UTF8::html_entity_decode()"
1881
   *
1882
   * @see UTF8::html_entity_decode()
1883
   *
1884
   * @param string $str
1885
   * @param int    $flags
1886
   * @param string $encoding
1887
   *
1888
   * @return string
1889
   */
1890 1
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1891
  {
1892 1
    return self::html_entity_decode($str, $flags, $encoding);
1893
  }
1894
1895
  /**
1896
   * Converts a UTF-8 string to a series of HTML numbered entities.
1897
   *
1898
   * INFO: opposite to UTF8::html_decode()
1899
   *
1900
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1901
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1902
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
1903
   *
1904
   * @return string <p>HTML numbered entities.</p>
1905
   */
1906 3
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
1907
  {
1908 3
    if (!isset($str[0])) {
1909 2
      return '';
1910
    }
1911
1912 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1913 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1914
    }
1915
1916
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1917 3
    if (\function_exists('mb_encode_numericentity')) {
1918
1919 3
      $startCode = 0x00;
1920 3
      if ($keepAsciiChars === true) {
1921 3
        $startCode = 0x80;
1922
      }
1923
1924 3
      return \mb_encode_numericentity(
1925 3
          $str,
1926 3
          [$startCode, 0xfffff, 0, 0xfffff, 0],
1927 3
          $encoding
1928
      );
1929
    }
1930
1931
    return \implode(
1932
        '',
1933
        \array_map(
1934
            function ($data) use ($keepAsciiChars, $encoding) {
1935
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1936
            },
1937
            self::split($str)
1938
        )
1939
    );
1940
  }
1941
1942
  /**
1943
   * UTF-8 version of html_entity_decode()
1944
   *
1945
   * The reason we are not using html_entity_decode() by itself is because
1946
   * while it is not technically correct to leave out the semicolon
1947
   * at the end of an entity most browsers will still interpret the entity
1948
   * correctly. html_entity_decode() does not convert entities without
1949
   * semicolons, so we are left with our own little solution here. Bummer.
1950
   *
1951
   * Convert all HTML entities to their applicable characters
1952
   *
1953
   * INFO: opposite to UTF8::html_encode()
1954
   *
1955
   * @link http://php.net/manual/en/function.html-entity-decode.php
1956
   *
1957
   * @param string $str      <p>
1958
   *                         The input string.
1959
   *                         </p>
1960
   * @param int    $flags    [optional] <p>
1961
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1962
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1963
   *                         <table>
1964
   *                         Available <i>flags</i> constants
1965
   *                         <tr valign="top">
1966
   *                         <td>Constant Name</td>
1967
   *                         <td>Description</td>
1968
   *                         </tr>
1969
   *                         <tr valign="top">
1970
   *                         <td><b>ENT_COMPAT</b></td>
1971
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1972
   *                         </tr>
1973
   *                         <tr valign="top">
1974
   *                         <td><b>ENT_QUOTES</b></td>
1975
   *                         <td>Will convert both double and single quotes.</td>
1976
   *                         </tr>
1977
   *                         <tr valign="top">
1978
   *                         <td><b>ENT_NOQUOTES</b></td>
1979
   *                         <td>Will leave both double and single quotes unconverted.</td>
1980
   *                         </tr>
1981
   *                         <tr valign="top">
1982
   *                         <td><b>ENT_HTML401</b></td>
1983
   *                         <td>
1984
   *                         Handle code as HTML 4.01.
1985
   *                         </td>
1986
   *                         </tr>
1987
   *                         <tr valign="top">
1988
   *                         <td><b>ENT_XML1</b></td>
1989
   *                         <td>
1990
   *                         Handle code as XML 1.
1991
   *                         </td>
1992
   *                         </tr>
1993
   *                         <tr valign="top">
1994
   *                         <td><b>ENT_XHTML</b></td>
1995
   *                         <td>
1996
   *                         Handle code as XHTML.
1997
   *                         </td>
1998
   *                         </tr>
1999
   *                         <tr valign="top">
2000
   *                         <td><b>ENT_HTML5</b></td>
2001
   *                         <td>
2002
   *                         Handle code as HTML 5.
2003
   *                         </td>
2004
   *                         </tr>
2005
   *                         </table>
2006
   *                         </p>
2007
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
2008
   *
2009
   * @return string <p>The decoded string.</p>
2010
   */
2011 17
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2012
  {
2013 17
    if (!isset($str[0])) {
2014 6
      return '';
2015
    }
2016
2017 17
    if (!isset($str[3])) { // examples: &; || &x;
2018 10
      return $str;
2019
    }
2020
2021
    if (
2022 16
        \strpos($str, '&') === false
2023
        ||
2024
        (
2025 16
            \strpos($str, '&#') === false
2026
            &&
2027 16
            \strpos($str, ';') === false
2028
        )
2029
    ) {
2030 9
      return $str;
2031
    }
2032
2033 16
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2034 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2035
    }
2036
2037 16
    if ($flags === null) {
2038 5
      $flags = ENT_QUOTES | ENT_HTML5;
2039
    }
2040
2041 View Code Duplication
    if (
2042 16
        $encoding !== 'UTF-8'
2043
        &&
2044 16
        $encoding !== 'ISO-8859-1'
2045
        &&
2046 16
        $encoding !== 'WINDOWS-1252'
2047
        &&
2048 16
        self::$SUPPORT['mbstring'] === false
2049
    ) {
2050
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2051
    }
2052
2053
    do {
2054 16
      $str_compare = $str;
2055
2056 16
      $str = (string)\preg_replace_callback(
2057 16
          "/&#\d{2,6};/",
2058
          function ($matches) use ($encoding) {
2059 14
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2060
2061 14
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2062 13
              return $returnTmp;
2063
            }
2064
2065 7
            return $matches[0];
2066 16
          },
2067 16
          $str
2068
      );
2069
2070
      // decode numeric & UTF16 two byte entities
2071 16
      $str = \html_entity_decode(
2072 16
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2073 16
          $flags,
2074 16
          $encoding
2075
      );
2076
2077 16
    } while ($str_compare !== $str);
2078
2079 16
    return $str;
2080
  }
2081
2082
  /**
2083
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2084
   *
2085
   * @link http://php.net/manual/en/function.htmlentities.php
2086
   *
2087
   * @param string $str           <p>
2088
   *                              The input string.
2089
   *                              </p>
2090
   * @param int    $flags         [optional] <p>
2091
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2092
   *                              invalid code unit sequences and the used document type. The default is
2093
   *                              ENT_COMPAT | ENT_HTML401.
2094
   *                              <table>
2095
   *                              Available <i>flags</i> constants
2096
   *                              <tr valign="top">
2097
   *                              <td>Constant Name</td>
2098
   *                              <td>Description</td>
2099
   *                              </tr>
2100
   *                              <tr valign="top">
2101
   *                              <td><b>ENT_COMPAT</b></td>
2102
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2103
   *                              </tr>
2104
   *                              <tr valign="top">
2105
   *                              <td><b>ENT_QUOTES</b></td>
2106
   *                              <td>Will convert both double and single quotes.</td>
2107
   *                              </tr>
2108
   *                              <tr valign="top">
2109
   *                              <td><b>ENT_NOQUOTES</b></td>
2110
   *                              <td>Will leave both double and single quotes unconverted.</td>
2111
   *                              </tr>
2112
   *                              <tr valign="top">
2113
   *                              <td><b>ENT_IGNORE</b></td>
2114
   *                              <td>
2115
   *                              Silently discard invalid code unit sequences instead of returning
2116
   *                              an empty string. Using this flag is discouraged as it
2117
   *                              may have security implications.
2118
   *                              </td>
2119
   *                              </tr>
2120
   *                              <tr valign="top">
2121
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2122
   *                              <td>
2123
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2124
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2125
   *                              </td>
2126
   *                              </tr>
2127
   *                              <tr valign="top">
2128
   *                              <td><b>ENT_DISALLOWED</b></td>
2129
   *                              <td>
2130
   *                              Replace invalid code points for the given document type with a
2131
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2132
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2133
   *                              instance, to ensure the well-formedness of XML documents with
2134
   *                              embedded external content.
2135
   *                              </td>
2136
   *                              </tr>
2137
   *                              <tr valign="top">
2138
   *                              <td><b>ENT_HTML401</b></td>
2139
   *                              <td>
2140
   *                              Handle code as HTML 4.01.
2141
   *                              </td>
2142
   *                              </tr>
2143
   *                              <tr valign="top">
2144
   *                              <td><b>ENT_XML1</b></td>
2145
   *                              <td>
2146
   *                              Handle code as XML 1.
2147
   *                              </td>
2148
   *                              </tr>
2149
   *                              <tr valign="top">
2150
   *                              <td><b>ENT_XHTML</b></td>
2151
   *                              <td>
2152
   *                              Handle code as XHTML.
2153
   *                              </td>
2154
   *                              </tr>
2155
   *                              <tr valign="top">
2156
   *                              <td><b>ENT_HTML5</b></td>
2157
   *                              <td>
2158
   *                              Handle code as HTML 5.
2159
   *                              </td>
2160
   *                              </tr>
2161
   *                              </table>
2162
   *                              </p>
2163
   * @param string $encoding      [optional] <p>
2164
   *                              Like <b>htmlspecialchars</b>,
2165
   *                              <b>htmlentities</b> takes an optional third argument
2166
   *                              <i>encoding</i> which defines encoding used in
2167
   *                              conversion.
2168
   *                              Although this argument is technically optional, you are highly
2169
   *                              encouraged to specify the correct value for your code.
2170
   *                              </p>
2171
   * @param bool   $double_encode [optional] <p>
2172
   *                              When <i>double_encode</i> is turned off PHP will not
2173
   *                              encode existing html entities. The default is to convert everything.
2174
   *                              </p>
2175
   *
2176
   *
2177
   * @return string the encoded string.
2178
   * </p>
2179
   * <p>
2180
   * If the input <i>string</i> contains an invalid code unit
2181
   * sequence within the given <i>encoding</i> an empty string
2182
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2183
   * <b>ENT_SUBSTITUTE</b> flags are set.
2184
   */
2185 2
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2186
  {
2187 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2188 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2189
    }
2190
2191 2
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2192
2193
    /**
2194
     * PHP doesn't replace a backslash to its html entity since this is something
2195
     * that's mostly used to escape characters when inserting in a database. Since
2196
     * we're using a decent database layer, we don't need this shit and we're replacing
2197
     * the double backslashes by its' html entity equivalent.
2198
     *
2199
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2200
     */
2201 2
    $str = \str_replace('\\', '&#92;', $str);
2202
2203 2
    return self::html_encode($str, true, $encoding);
2204
  }
2205
2206
  /**
2207
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2208
   *
2209
   * INFO: Take a look at "UTF8::htmlentities()"
2210
   *
2211
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2212
   *
2213
   * @param string $str           <p>
2214
   *                              The string being converted.
2215
   *                              </p>
2216
   * @param int    $flags         [optional] <p>
2217
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2218
   *                              invalid code unit sequences and the used document type. The default is
2219
   *                              ENT_COMPAT | ENT_HTML401.
2220
   *                              <table>
2221
   *                              Available <i>flags</i> constants
2222
   *                              <tr valign="top">
2223
   *                              <td>Constant Name</td>
2224
   *                              <td>Description</td>
2225
   *                              </tr>
2226
   *                              <tr valign="top">
2227
   *                              <td><b>ENT_COMPAT</b></td>
2228
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2229
   *                              </tr>
2230
   *                              <tr valign="top">
2231
   *                              <td><b>ENT_QUOTES</b></td>
2232
   *                              <td>Will convert both double and single quotes.</td>
2233
   *                              </tr>
2234
   *                              <tr valign="top">
2235
   *                              <td><b>ENT_NOQUOTES</b></td>
2236
   *                              <td>Will leave both double and single quotes unconverted.</td>
2237
   *                              </tr>
2238
   *                              <tr valign="top">
2239
   *                              <td><b>ENT_IGNORE</b></td>
2240
   *                              <td>
2241
   *                              Silently discard invalid code unit sequences instead of returning
2242
   *                              an empty string. Using this flag is discouraged as it
2243
   *                              may have security implications.
2244
   *                              </td>
2245
   *                              </tr>
2246
   *                              <tr valign="top">
2247
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2248
   *                              <td>
2249
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2250
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2251
   *                              </td>
2252
   *                              </tr>
2253
   *                              <tr valign="top">
2254
   *                              <td><b>ENT_DISALLOWED</b></td>
2255
   *                              <td>
2256
   *                              Replace invalid code points for the given document type with a
2257
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2258
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2259
   *                              instance, to ensure the well-formedness of XML documents with
2260
   *                              embedded external content.
2261
   *                              </td>
2262
   *                              </tr>
2263
   *                              <tr valign="top">
2264
   *                              <td><b>ENT_HTML401</b></td>
2265
   *                              <td>
2266
   *                              Handle code as HTML 4.01.
2267
   *                              </td>
2268
   *                              </tr>
2269
   *                              <tr valign="top">
2270
   *                              <td><b>ENT_XML1</b></td>
2271
   *                              <td>
2272
   *                              Handle code as XML 1.
2273
   *                              </td>
2274
   *                              </tr>
2275
   *                              <tr valign="top">
2276
   *                              <td><b>ENT_XHTML</b></td>
2277
   *                              <td>
2278
   *                              Handle code as XHTML.
2279
   *                              </td>
2280
   *                              </tr>
2281
   *                              <tr valign="top">
2282
   *                              <td><b>ENT_HTML5</b></td>
2283
   *                              <td>
2284
   *                              Handle code as HTML 5.
2285
   *                              </td>
2286
   *                              </tr>
2287
   *                              </table>
2288
   *                              </p>
2289
   * @param string $encoding      [optional] <p>
2290
   *                              Defines encoding used in conversion.
2291
   *                              </p>
2292
   *                              <p>
2293
   *                              For the purposes of this function, the encodings
2294
   *                              ISO-8859-1, ISO-8859-15,
2295
   *                              UTF-8, cp866,
2296
   *                              cp1251, cp1252, and
2297
   *                              KOI8-R are effectively equivalent, provided the
2298
   *                              <i>string</i> itself is valid for the encoding, as
2299
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2300
   *                              the same positions in all of these encodings.
2301
   *                              </p>
2302
   * @param bool   $double_encode [optional] <p>
2303
   *                              When <i>double_encode</i> is turned off PHP will not
2304
   *                              encode existing html entities, the default is to convert everything.
2305
   *                              </p>
2306
   *
2307
   * @return string The converted string.
2308
   * </p>
2309
   * <p>
2310
   * If the input <i>string</i> contains an invalid code unit
2311
   * sequence within the given <i>encoding</i> an empty string
2312
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2313
   * <b>ENT_SUBSTITUTE</b> flags are set.
2314
   */
2315 1 View Code Duplication
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2316
  {
2317 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2318 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2319
    }
2320
2321 1
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2322
  }
2323
2324
  /**
2325
   * Checks whether iconv is available on the server.
2326
   *
2327
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2328
   */
2329 1
  public static function iconv_loaded(): bool
2330
  {
2331 1
    return \extension_loaded('iconv') ? true : false;
2332
  }
2333
2334
  /**
2335
   * alias for "UTF8::decimal_to_chr()"
2336
   *
2337
   * @see UTF8::decimal_to_chr()
2338
   *
2339
   * @param mixed $int
2340
   *
2341
   * @return string
2342
   */
2343 2
  public static function int_to_chr($int): string
2344
  {
2345 2
    return self::decimal_to_chr($int);
2346
  }
2347
2348
  /**
2349
   * Converts Integer to hexadecimal U+xxxx code point representation.
2350
   *
2351
   * INFO: opposite to UTF8::hex_to_int()
2352
   *
2353
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2354
   * @param string $pfix [optional]
2355
   *
2356
   * @return string <p>The code point, or empty string on failure.</p>
2357
   */
2358 3
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2359
  {
2360 3
    $hex = \dechex($int);
2361
2362 3
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2363
2364 3
    return $pfix . $hex;
2365
  }
2366
2367
  /**
2368
   * Checks whether intl-char is available on the server.
2369
   *
2370
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2371
   */
2372 1
  public static function intlChar_loaded(): bool
2373
  {
2374 1
    return \class_exists('IntlChar');
2375
  }
2376
2377
  /**
2378
   * Checks whether intl is available on the server.
2379
   *
2380
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2381
   */
2382 4
  public static function intl_loaded(): bool
2383
  {
2384 4
    return \extension_loaded('intl');
2385
  }
2386
2387
  /**
2388
   * alias for "UTF8::is_ascii()"
2389
   *
2390
   * @see        UTF8::is_ascii()
2391
   *
2392
   * @param string $str
2393
   *
2394
   * @return boolean
2395
   *
2396
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2397
   */
2398 1
  public static function isAscii(string $str): bool
2399
  {
2400 1
    return self::is_ascii($str);
2401
  }
2402
2403
  /**
2404
   * alias for "UTF8::is_base64()"
2405
   *
2406
   * @see        UTF8::is_base64()
2407
   *
2408
   * @param string $str
2409
   *
2410
   * @return bool
2411
   *
2412
   * @deprecated <p>use "UTF8::is_base64()"</p>
2413
   */
2414 1
  public static function isBase64(string $str): bool
2415
  {
2416 1
    return self::is_base64($str);
2417
  }
2418
2419
  /**
2420
   * alias for "UTF8::is_binary()"
2421
   *
2422
   * @see        UTF8::is_binary()
2423
   *
2424
   * @param mixed $str
2425
   * @param bool  $strict
2426
   *
2427
   * @return bool
2428
   *
2429
   * @deprecated <p>use "UTF8::is_binary()"</p>
2430
   */
2431 2
  public static function isBinary($str, $strict = false): bool
2432
  {
2433 2
    return self::is_binary($str, $strict);
2434
  }
2435
2436
  /**
2437
   * alias for "UTF8::is_bom()"
2438
   *
2439
   * @see        UTF8::is_bom()
2440
   *
2441
   * @param string $utf8_chr
2442
   *
2443
   * @return boolean
2444
   *
2445
   * @deprecated <p>use "UTF8::is_bom()"</p>
2446
   */
2447 1
  public static function isBom(string $utf8_chr): bool
2448
  {
2449 1
    return self::is_bom($utf8_chr);
2450
  }
2451
2452
  /**
2453
   * alias for "UTF8::is_html()"
2454
   *
2455
   * @see        UTF8::is_html()
2456
   *
2457
   * @param string $str
2458
   *
2459
   * @return boolean
2460
   *
2461
   * @deprecated <p>use "UTF8::is_html()"</p>
2462
   */
2463 1
  public static function isHtml(string $str): bool
2464
  {
2465 1
    return self::is_html($str);
2466
  }
2467
2468
  /**
2469
   * alias for "UTF8::is_json()"
2470
   *
2471
   * @see        UTF8::is_json()
2472
   *
2473
   * @param string $str
2474
   *
2475
   * @return bool
2476
   *
2477
   * @deprecated <p>use "UTF8::is_json()"</p>
2478
   */
2479
  public static function isJson(string $str): bool
2480
  {
2481
    return self::is_json($str);
2482
  }
2483
2484
  /**
2485
   * alias for "UTF8::is_utf16()"
2486
   *
2487
   * @see        UTF8::is_utf16()
2488
   *
2489
   * @param string $str
2490
   *
2491
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2492
   *
2493
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2494
   */
2495 1
  public static function isUtf16(string $str)
2496
  {
2497 1
    return self::is_utf16($str);
2498
  }
2499
2500
  /**
2501
   * alias for "UTF8::is_utf32()"
2502
   *
2503
   * @see        UTF8::is_utf32()
2504
   *
2505
   * @param string $str
2506
   *
2507
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2508
   *
2509
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2510
   */
2511 1
  public static function isUtf32(string $str)
2512
  {
2513 1
    return self::is_utf32($str);
2514
  }
2515
2516
  /**
2517
   * alias for "UTF8::is_utf8()"
2518
   *
2519
   * @see        UTF8::is_utf8()
2520
   *
2521
   * @param string $str
2522
   * @param bool   $strict
2523
   *
2524
   * @return bool
2525
   *
2526
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2527
   */
2528 16
  public static function isUtf8($str, $strict = false): bool
2529
  {
2530 16
    return self::is_utf8($str, $strict);
2531
  }
2532
2533
  /**
2534
   * Returns true if the string contains only alphabetic chars, false otherwise.
2535
   *
2536
   * @param string $str
2537
   *
2538
   * @return bool <p>Whether or not $str contains only alphabetic chars.</p>
2539
   */
2540
  public static function is_alpha(string $str): bool
2541
  {
2542
    return self::matchesPattern($str, '^[[:alpha:]]*$');
2543
  }
2544
2545
  /**
2546
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2547
   *
2548
   * @param string $str
2549
   *
2550
   * @return bool <p>Whether or not $str contains only alphanumeric chars.</p>
2551
   */
2552
  public static function is_alphanumeric(string $str): bool
2553
  {
2554
    return self::matchesPattern($str, '^[[:alnum:]]*$');
2555
  }
2556
2557
  /**
2558
   * Checks if a string is 7 bit ASCII.
2559
   *
2560
   * @param string $str <p>The string to check.</p>
2561
   *
2562
   * @return bool <p>
2563
   *              <strong>true</strong> if it is ASCII<br>
2564
   *              <strong>false</strong> otherwise
2565
   *              </p>
2566
   */
2567 58
  public static function is_ascii(string $str): bool
2568
  {
2569 58
    if (!isset($str[0])) {
2570 6
      return true;
2571
    }
2572
2573 57
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2574
  }
2575
2576
  /**
2577
   * Returns true if the string is base64 encoded, false otherwise.
2578
   *
2579
   * @param string $str <p>The input string.</p>
2580
   *
2581
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2582
   */
2583 1
  public static function is_base64(string $str): bool
2584
  {
2585 1
    $base64String = (string)\base64_decode($str, true);
2586
2587 1
    return $base64String && \base64_encode($base64String) === $str;
2588
  }
2589
2590
  /**
2591
   * Check if the input is binary... (is look like a hack).
2592
   *
2593
   * @param mixed $input
2594
   * @param bool  $strict
2595
   *
2596
   * @return bool
2597
   */
2598 19
  public static function is_binary($input, bool $strict = false): bool
2599
  {
2600 19
    $input = (string)$input;
2601 19
    if (!isset($input[0])) {
2602 5
      return false;
2603
    }
2604
2605 19
    if (\preg_match('~^[01]+$~', $input)) {
2606 6
      return true;
2607
    }
2608
2609 19
    $testNull = 0;
2610 19
    $testLength = \strlen($input);
2611 19
    if ($testLength) {
2612 19
      $testNull = \substr_count($input, "\x0");
2613 19
      if (($testNull / $testLength) > 0.3) {
2614 6
        return true;
2615
      }
2616
    }
2617
2618
    if (
2619 18
        $strict === true
2620
        &&
2621 18
        \class_exists('finfo')
2622
    ) {
2623
2624 16
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
2625 16
      $finfo_encoding = $finfo->buffer($input);
2626 16
      if ($finfo_encoding && $finfo_encoding === 'binary') {
2627 16
        return true;
2628
      }
2629
2630 8
    } elseif ($testNull > 0) {
2631
2632 3
      return true;
2633
2634
    }
2635
2636 17
    return false;
2637
  }
2638
2639
  /**
2640
   * Check if the file is binary.
2641
   *
2642
   * @param string $file
2643
   *
2644
   * @return boolean
2645
   */
2646 3
  public static function is_binary_file($file): bool
2647
  {
2648
    try {
2649 3
      $fp = \fopen($file, 'rb');
2650 3
      $block = \fread($fp, 512);
2651 3
      \fclose($fp);
2652
    } catch (\Exception $e) {
2653
      $block = '';
2654
    }
2655
2656 3
    return self::is_binary($block, true);
2657
  }
2658
2659
  /**
2660
   * Returns true if the string contains only whitespace chars, false otherwise.
2661
   *
2662
   * @param string $str
2663
   *
2664
   * @return bool <p>Whether or not $str contains only whitespace characters.</p>
2665
   */
2666
  public static function is_blank(string $str): bool
2667
  {
2668
    return self::matchesPattern($str, '^[[:space:]]*$');
2669
  }
2670
2671
  /**
2672
   * Checks if the given string is equal to any "Byte Order Mark".
2673
   *
2674
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2675
   *
2676
   * @param string $str <p>The input string.</p>
2677
   *
2678
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2679
   */
2680 1
  public static function is_bom($str): bool
2681
  {
2682 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2683 1
      if ($str === $bomString) {
2684 1
        return true;
2685
      }
2686
    }
2687
2688 1
    return false;
2689
  }
2690
2691
  /**
2692
   * Determine whether the string is considered to be empty.
2693
   *
2694
   * A variable is considered empty if it does not exist or if its value equals FALSE.
2695
   * empty() does not generate a warning if the variable does not exist.
2696
   *
2697
   * @param mixed $str
2698
   *
2699
   * @return bool <p>Whether or not $str is empty().</p>
2700
   */
2701
  public static function is_empty($str): bool
2702
  {
2703
    return empty($str);
2704
  }
2705
2706
  /**
2707
   * Returns true if the string contains only hexadecimal chars, false otherwise.
2708
   *
2709
   * @param string $str
2710
   *
2711
   * @return bool <p>Whether or not $str contains only hexadecimal chars.</p>
2712
   */
2713
  public static function is_hexadecimal(string $str): bool
2714
  {
2715
    return self::matchesPattern($str, '^[[:xdigit:]]*$');
2716
  }
2717
2718
  /**
2719
   * Check if the string contains any html-tags <lall>.
2720
   *
2721
   * @param string $str <p>The input string.</p>
2722
   *
2723
   * @return boolean
2724
   */
2725 1
  public static function is_html(string $str): bool
2726
  {
2727 1
    if (!isset($str[0])) {
2728 1
      return false;
2729
    }
2730
2731
    // init
2732 1
    $matches = [];
2733
2734 1
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2735
2736 1
    return !(\count($matches) === 0);
2737
  }
2738
2739
  /**
2740
   * Try to check if "$str" is an json-string.
2741
   *
2742
   * @param string $str <p>The input string.</p>
2743
   *
2744
   * @return bool
2745
   */
2746 1
  public static function is_json(string $str): bool
2747
  {
2748 1
    if (!isset($str[0])) {
2749 1
      return false;
2750
    }
2751
2752 1
    $json = self::json_decode($str);
2753
2754
    return (
2755 1
               \is_object($json) === true
2756
               ||
2757 1
               \is_array($json) === true
2758
           )
2759
           &&
2760 1
           \json_last_error() === JSON_ERROR_NONE;
2761
  }
2762
2763
  /**
2764
   * @param string $str
2765
   *
2766
   * @return bool
2767
   */
2768
  public static function is_lowercase(string $str): bool
2769
  {
2770
    if (self::matchesPattern($str, '^[[:lower:]]*$')) {
2771
      return true;
2772
    }
2773
2774
    return false;
2775
  }
2776
2777
  /**
2778
   * Returns true if the string is serialized, false otherwise.
2779
   *
2780
   * @param string $str
2781
   *
2782
   * @return bool <p>Whether or not $str is serialized.</p>
2783
   */
2784
  public static function is_serialized(string $str): bool
2785
  {
2786
    if (!isset($str[0])) {
2787
      return false;
2788
    }
2789
2790
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2791
    /** @noinspection UnserializeExploitsInspection */
2792
    return $str === 'b:0;'
2793
           ||
2794
           @\unserialize($str) !== false;
2795
  }
2796
2797
  /**
2798
   * Returns true if the string contains only lower case chars, false
2799
   * otherwise.
2800
   *
2801
   * @param string $str <p>The input string.</p>
2802
   *
2803
   * @return bool <p>Whether or not $str contains only lower case characters.</p>
2804
   */
2805
  public static function is_uppercase(string $str): bool
2806
  {
2807
    return self::matchesPattern($str, '^[[:upper:]]*$');
2808
  }
2809
2810
  /**
2811
   * Check if the string is UTF-16.
2812
   *
2813
   * @param string $str <p>The input string.</p>
2814
   *
2815
   * @return int|false <p>
2816
   *                   <strong>false</strong> if is't not UTF-16,<br>
2817
   *                   <strong>1</strong> for UTF-16LE,<br>
2818
   *                   <strong>2</strong> for UTF-16BE.
2819
   *                   </p>
2820
   */
2821 10 View Code Duplication
  public static function is_utf16(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2822
  {
2823 10
    if (self::is_binary($str) === false) {
2824 4
      return false;
2825
    }
2826
2827
    // init
2828 8
    $strChars = [];
2829
2830 8
    $str = self::remove_bom($str);
2831
2832 8
    $maybeUTF16LE = 0;
2833 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2834 8
    if ($test) {
2835 7
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2836 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2837 7
      if ($test3 === $test) {
2838 7
        if (\count($strChars) === 0) {
2839 7
          $strChars = self::count_chars($str, true);
2840
        }
2841 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2842 7
          if (\in_array($test3char, $strChars, true) === true) {
2843 7
            $maybeUTF16LE++;
2844
          }
2845
        }
2846
      }
2847
    }
2848
2849 8
    $maybeUTF16BE = 0;
2850 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2851 8
    if ($test) {
2852 7
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2853 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2854 7
      if ($test3 === $test) {
2855 7
        if (\count($strChars) === 0) {
2856 3
          $strChars = self::count_chars($str, true);
2857
        }
2858 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2859 7
          if (\in_array($test3char, $strChars, true) === true) {
2860 7
            $maybeUTF16BE++;
2861
          }
2862
        }
2863
      }
2864
    }
2865
2866 8
    if ($maybeUTF16BE !== $maybeUTF16LE) {
2867 3
      if ($maybeUTF16LE > $maybeUTF16BE) {
2868 2
        return 1;
2869
      }
2870
2871 3
      return 2;
2872
    }
2873
2874 6
    return false;
2875
  }
2876
2877
  /**
2878
   * Check if the string is UTF-32.
2879
   *
2880
   * @param string $str
2881
   *
2882
   * @return int|false <p>
2883
   *                   <strong>false</strong> if is't not UTF-32,<br>
2884
   *                   <strong>1</strong> for UTF-32LE,<br>
2885
   *                   <strong>2</strong> for UTF-32BE.
2886
   *                   </p>
2887
   */
2888 8 View Code Duplication
  public static function is_utf32(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2889
  {
2890 8
    if (self::is_binary($str) === false) {
2891 4
      return false;
2892
    }
2893
2894
    // init
2895 6
    $strChars = [];
2896
2897 6
    $str = self::remove_bom($str);
2898
2899 6
    $maybeUTF32LE = 0;
2900 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2901 6
    if ($test) {
2902 5
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2903 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2904 5
      if ($test3 === $test) {
2905 5
        if (\count($strChars) === 0) {
2906 5
          $strChars = self::count_chars($str, true);
2907
        }
2908 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2909 5
          if (\in_array($test3char, $strChars, true) === true) {
2910 5
            $maybeUTF32LE++;
2911
          }
2912
        }
2913
      }
2914
    }
2915
2916 6
    $maybeUTF32BE = 0;
2917 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2918 6
    if ($test) {
2919 5
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2920 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2921 5
      if ($test3 === $test) {
2922 5
        if (\count($strChars) === 0) {
2923 3
          $strChars = self::count_chars($str, true);
2924
        }
2925 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2926 5
          if (\in_array($test3char, $strChars, true) === true) {
2927 5
            $maybeUTF32BE++;
2928
          }
2929
        }
2930
      }
2931
    }
2932
2933 6
    if ($maybeUTF32BE !== $maybeUTF32LE) {
2934 1
      if ($maybeUTF32LE > $maybeUTF32BE) {
2935 1
        return 1;
2936
      }
2937
2938 1
      return 2;
2939
    }
2940
2941 6
    return false;
2942
  }
2943
2944
  /**
2945
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2946
   *
2947
   * @see    http://hsivonen.iki.fi/php-utf8/
2948
   *
2949
   * @param string|string[] $str    <p>The string to be checked.</p>
2950
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2951
   *
2952
   * @return bool
2953
   */
2954 61
  public static function is_utf8($str, bool $strict = false): bool
2955
  {
2956 61
    if (\is_array($str) === true) {
2957 1
      foreach ($str as $k => $v) {
2958 1
        if (false === self::is_utf8($v, $strict)) {
2959 1
          return false;
2960
        }
2961
      }
2962
2963
      return true;
2964
    }
2965
2966 61
    if (!isset($str[0])) {
2967 3
      return true;
2968
    }
2969
2970 59
    if ($strict === true) {
2971 1
      if (self::is_utf16($str) !== false) {
2972 1
        return false;
2973
      }
2974
2975
      if (self::is_utf32($str) !== false) {
2976
        return false;
2977
      }
2978
    }
2979
2980 59
    if (self::pcre_utf8_support() !== true) {
2981
2982
      // If even just the first character can be matched, when the /u
2983
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2984
      // invalid, nothing at all will match, even if the string contains
2985
      // some valid sequences
2986
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
2987
    }
2988
2989 59
    $mState = 0; // cached expected number of octets after the current octet
2990
    // until the beginning of the next UTF8 character sequence
2991 59
    $mUcs4 = 0; // cached Unicode character
2992 59
    $mBytes = 1; // cached expected number of octets in the current sequence
2993
2994 59
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2995
      self::checkForSupport();
2996
    }
2997
2998 59
    if (self::$ORD === null) {
2999 1
      self::$ORD = self::getData('ord');
3000
    }
3001
3002 59
    $len = self::strlen_in_byte($str);
3003
    /** @noinspection ForeachInvariantsInspection */
3004 59
    for ($i = 0; $i < $len; $i++) {
3005 59
      $in = self::$ORD[$str[$i]];
3006 59
      if ($mState === 0) {
3007
        // When mState is zero we expect either a US-ASCII character or a
3008
        // multi-octet sequence.
3009 59
        if (0 === (0x80 & $in)) {
3010
          // US-ASCII, pass straight through.
3011 56
          $mBytes = 1;
3012 56 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
3013
          // First octet of 2 octet sequence.
3014 49
          $mUcs4 = $in;
3015 49
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3016 49
          $mState = 1;
3017 49
          $mBytes = 2;
3018 46
        } elseif (0xE0 === (0xF0 & $in)) {
3019
          // First octet of 3 octet sequence.
3020 30
          $mUcs4 = $in;
3021 30
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3022 30
          $mState = 2;
3023 30
          $mBytes = 3;
3024 23 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
3025
          // First octet of 4 octet sequence.
3026 13
          $mUcs4 = $in;
3027 13
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3028 13
          $mState = 3;
3029 13
          $mBytes = 4;
3030 11
        } elseif (0xF8 === (0xFC & $in)) {
3031
          /* First octet of 5 octet sequence.
3032
          *
3033
          * This is illegal because the encoded codepoint must be either
3034
          * (a) not the shortest form or
3035
          * (b) outside the Unicode range of 0-0x10FFFF.
3036
          * Rather than trying to resynchronize, we will carry on until the end
3037
          * of the sequence and let the later error handling code catch it.
3038
          */
3039 4
          $mUcs4 = $in;
3040 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3041 4
          $mState = 4;
3042 4
          $mBytes = 5;
3043 8 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
3044
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3045 4
          $mUcs4 = $in;
3046 4
          $mUcs4 = ($mUcs4 & 1) << 30;
3047 4
          $mState = 5;
3048 4
          $mBytes = 6;
3049
        } else {
3050
          /* Current octet is neither in the US-ASCII range nor a legal first
3051
           * octet of a multi-octet sequence.
3052
           */
3053 59
          return false;
3054
        }
3055
      } else {
3056
        // When mState is non-zero, we expect a continuation of the multi-octet
3057
        // sequence
3058 56
        if (0x80 === (0xC0 & $in)) {
3059
          // Legal continuation.
3060 50
          $shift = ($mState - 1) * 6;
3061 50
          $tmp = $in;
3062 50
          $tmp = ($tmp & 0x0000003F) << $shift;
3063 50
          $mUcs4 |= $tmp;
3064
          /**Prefix
3065
           * End of the multi-octet sequence. mUcs4 now contains the final
3066
           * Unicode code point to be output
3067
           */
3068 50
          if (0 === --$mState) {
3069
            /*
3070
            * Check for illegal sequences and code points.
3071
            */
3072
            // From Unicode 3.1, non-shortest form is illegal
3073
            if (
3074 50
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3075 50
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3076 50
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3077 50
                (4 < $mBytes) ||
3078
                // From Unicode 3.2, surrogate characters are illegal.
3079 50
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3080
                // Code points outside the Unicode range are illegal.
3081 50
                ($mUcs4 > 0x10FFFF)
3082
            ) {
3083 7
              return false;
3084
            }
3085
            // initialize UTF8 cache
3086 50
            $mState = 0;
3087 50
            $mUcs4 = 0;
3088 50
            $mBytes = 1;
3089
          }
3090
        } else {
3091
          /**
3092
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3093
           * Incomplete multi-octet sequence.
3094
           */
3095 28
          return false;
3096
        }
3097
      }
3098
    }
3099
3100 27
    return true;
3101
  }
3102
3103
  /**
3104
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3105
   * Decodes a JSON string
3106
   *
3107
   * @link http://php.net/manual/en/function.json-decode.php
3108
   *
3109
   * @param string $json    <p>
3110
   *                        The <i>json</i> string being decoded.
3111
   *                        </p>
3112
   *                        <p>
3113
   *                        This function only works with UTF-8 encoded strings.
3114
   *                        </p>
3115
   *                        <p>PHP implements a superset of
3116
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3117
   *                        only supports these values when they are nested inside an array or an object.
3118
   *                        </p>
3119
   * @param bool   $assoc   [optional] <p>
3120
   *                        When <b>TRUE</b>, returned objects will be converted into
3121
   *                        associative arrays.
3122
   *                        </p>
3123
   * @param int    $depth   [optional] <p>
3124
   *                        User specified recursion depth.
3125
   *                        </p>
3126
   * @param int    $options [optional] <p>
3127
   *                        Bitmask of JSON decode options. Currently only
3128
   *                        <b>JSON_BIGINT_AS_STRING</b>
3129
   *                        is supported (default is to cast large integers as floats)
3130
   *                        </p>
3131
   *
3132
   * @return mixed the value encoded in <i>json</i> in appropriate
3133
   * PHP type. Values true, false and
3134
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3135
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3136
   * <i>json</i> cannot be decoded or if the encoded
3137
   * data is deeper than the recursion limit.
3138
   */
3139 2
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3140
  {
3141 2
    $json = self::filter($json);
3142
3143 2
    $json = \json_decode($json, $assoc, $depth, $options);
3144
3145 2
    return $json;
3146
  }
3147
3148
  /**
3149
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3150
   * Returns the JSON representation of a value.
3151
   *
3152
   * @link http://php.net/manual/en/function.json-encode.php
3153
   *
3154
   * @param mixed $value   <p>
3155
   *                       The <i>value</i> being encoded. Can be any type except
3156
   *                       a resource.
3157
   *                       </p>
3158
   *                       <p>
3159
   *                       All string data must be UTF-8 encoded.
3160
   *                       </p>
3161
   *                       <p>PHP implements a superset of
3162
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3163
   *                       only supports these values when they are nested inside an array or an object.
3164
   *                       </p>
3165
   * @param int   $options [optional] <p>
3166
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3167
   *                       <b>JSON_HEX_TAG</b>,
3168
   *                       <b>JSON_HEX_AMP</b>,
3169
   *                       <b>JSON_HEX_APOS</b>,
3170
   *                       <b>JSON_NUMERIC_CHECK</b>,
3171
   *                       <b>JSON_PRETTY_PRINT</b>,
3172
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3173
   *                       <b>JSON_FORCE_OBJECT</b>,
3174
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3175
   *                       constants is described on
3176
   *                       the JSON constants page.
3177
   *                       </p>
3178
   * @param int   $depth   [optional] <p>
3179
   *                       Set the maximum depth. Must be greater than zero.
3180
   *                       </p>
3181
   *
3182
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3183
   */
3184 2
  public static function json_encode($value, int $options = 0, int $depth = 512): string
3185
  {
3186 2
    $value = self::filter($value);
3187
3188 2
    $json = \json_encode($value, $options, $depth);
3189
3190 2
    return $json;
3191
  }
3192
3193
  /**
3194
   * Returns the last $n characters of the string.
3195
   *
3196
   * @param string $str      <p>The input string.</p>
3197
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
3198
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3199
   *
3200
   * @return string
3201
   */
3202
  public static function last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
3203
  {
3204
    if ($n <= 0) {
3205
      return '';
3206
    }
3207
3208
    return self::substr($str, -$n, null, $encoding);
3209
  }
3210
3211
  /**
3212
   * Makes string's first char lowercase.
3213
   *
3214
   * @param string $str       <p>The input string</p>
3215
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
3216
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3217
   *
3218
   * @return string <p>The resulting string</p>
3219
   */
3220 7
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3221
  {
3222 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3223 7
    if ($strPartTwo === false) {
3224
      $strPartTwo = '';
3225
    }
3226
3227 7
    $strPartOne = self::strtolower(
3228 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3229 7
        $encoding,
3230 7
        $cleanUtf8
3231
    );
3232
3233 7
    return $strPartOne . $strPartTwo;
3234
  }
3235
3236
  /**
3237
   * alias for "UTF8::lcfirst()"
3238
   *
3239
   * @see UTF8::lcfirst()
3240
   *
3241
   * @param string $word
3242
   * @param string $encoding
3243
   * @param bool   $cleanUtf8
3244
   *
3245
   * @return string
3246
   */
3247 1
  public static function lcword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3248
  {
3249 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
3250
  }
3251
3252
  /**
3253
   * Lowercase for all words in the string.
3254
   *
3255
   * @param string   $str        <p>The input string.</p>
3256
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3257
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3258
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3259
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3260
   *
3261
   * @return string
3262
   */
3263 1
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3264
  {
3265 1
    if (!$str) {
3266 1
      return '';
3267
    }
3268
3269 1
    $words = self::str_to_words($str, $charlist);
3270 1
    $newWords = [];
3271
3272 1
    if (\count($exceptions) > 0) {
3273 1
      $useExceptions = true;
3274
    } else {
3275 1
      $useExceptions = false;
3276
    }
3277
3278 1 View Code Duplication
    foreach ($words as $word) {
3279
3280 1
      if (!$word) {
3281 1
        continue;
3282
      }
3283
3284
      if (
3285 1
          $useExceptions === false
3286
          ||
3287
          (
3288 1
              $useExceptions === true
3289
              &&
3290 1
              !\in_array($word, $exceptions, true)
3291
          )
3292
      ) {
3293 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3294
      }
3295
3296 1
      $newWords[] = $word;
3297
    }
3298
3299 1
    return \implode('', $newWords);
3300
  }
3301
3302
  /**
3303
   * Returns the longest common prefix between the string and $otherStr.
3304
   *
3305
   * @param string $str      <p>The input sting.</p>
3306
   * @param string $otherStr <p>Second string for comparison.</p>
3307
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3308
   *
3309
   * @return string
3310
   */
3311
  public static function longestCommonPrefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
3312
  {
3313
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
3314
3315
    $longestCommonPrefix = '';
3316
    for ($i = 0; $i < $maxLength; $i++) {
3317
      $char = self::substr($str, $i, 1, $encoding);
3318
3319
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
3320
        $longestCommonPrefix .= $char;
3321
      } else {
3322
        break;
3323
      }
3324
    }
3325
3326
    return $longestCommonPrefix;
3327
  }
3328
3329
  /**
3330
   * Returns the longest common substring between the string and $otherStr.
3331
   * In the case of ties, it returns that which occurs first.
3332
   *
3333
   * @param string $str
3334
   * @param string $otherStr <p>Second string for comparison.</p>
3335
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3336
   *
3337
   * @return static <p>Object with its $str being the longest common substring.</p>
3338
   */
3339
  public static function longestCommonSubstring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
3340
  {
3341
    // Uses dynamic programming to solve
3342
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
3343
    $strLength = self::strlen($str, $encoding);
3344
    $otherLength = self::strlen($otherStr, $encoding);
3345
3346
    // Return if either string is empty
3347
    if ($strLength == 0 || $otherLength == 0) {
3348
      return '';
3349
    }
3350
3351
    $len = 0;
3352
    $end = 0;
3353
    $table = \array_fill(
3354
        0,
3355
        $strLength + 1,
3356
        \array_fill(0, $otherLength + 1, 0)
3357
    );
3358
3359
    for ($i = 1; $i <= $strLength; $i++) {
3360
      for ($j = 1; $j <= $otherLength; $j++) {
3361
        $strChar = self::substr($str, $i - 1, 1, $encoding);
3362
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
3363
3364
        if ($strChar == $otherChar) {
3365
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
3366
          if ($table[$i][$j] > $len) {
3367
            $len = $table[$i][$j];
3368
            $end = $i;
3369
          }
3370
        } else {
3371
          $table[$i][$j] = 0;
3372
        }
3373
      }
3374
    }
3375
3376
    return self::substr(
3377
        $str,
3378
        $end - $len,
3379
        $len,
3380
        $encoding
3381
    );
3382
  }
3383
3384
  /**
3385
   * Returns the longest common suffix between the string and $otherStr.
3386
   *
3387
   * @param string $str
3388
   * @param string $otherStr <p>Second string for comparison.</p>
3389
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3390
   *
3391
   * @return string
3392
   */
3393
  public static function longestCommonSuffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
3394
  {
3395
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
3396
3397
    $longestCommonSuffix = '';
3398
    for ($i = 1; $i <= $maxLength; $i++) {
3399
      $char = self::substr($str, -$i, 1, $encoding);
3400
3401
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
3402
        $longestCommonSuffix = $char . $longestCommonSuffix;
3403
      } else {
3404
        break;
3405
      }
3406
    }
3407
3408
    return $longestCommonSuffix;
3409
  }
3410
3411
  /**
3412
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3413
   *
3414
   * @param string $str   <p>The string to be trimmed</p>
3415
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3416
   *
3417
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3418
   */
3419 8 View Code Duplication
  public static function ltrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3420
  {
3421 8
    if (!isset($str[0])) {
3422 2
      return '';
3423
    }
3424
3425
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3426 7
    if ($chars === INF || !$chars) {
3427 2
      $pattern = "^[\pZ\pC]+";
3428
    } else {
3429 7
      $chars = \preg_quote($chars, '/');
3430 7
      $pattern = "^[$chars]+";
3431
    }
3432
3433 7
    return self::regexReplace($str, $pattern, '', '', '/');
3434
  }
3435
3436
  /**
3437
   * Returns true if $str matches the supplied pattern, false otherwise.
3438
   *
3439
   * @param string $str     <p>The input string.</p>
3440
   * @param string $pattern <p>Regex pattern to match against.</p>
3441
   *
3442
   * @return bool <p>Whether or not $str matches the pattern.</p>
3443
   */
3444
  public static function matchesPattern(string $str, string $pattern): bool
3445
  {
3446
    if (\preg_match('/' . $pattern . '/u', $str)) {
3447
      return true;
3448
    }
3449
3450
    return false;
3451
  }
3452
3453
  /**
3454
   * Returns the UTF-8 character with the maximum code point in the given data.
3455
   *
3456
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3457
   *
3458
   * @return string <p>The character with the highest code point than others.</p>
3459
   */
3460 1 View Code Duplication
  public static function max($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3461
  {
3462 1
    if (\is_array($arg) === true) {
3463 1
      $arg = \implode('', $arg);
3464
    }
3465
3466 1
    return self::chr(\max(self::codepoints($arg)));
3467
  }
3468
3469
  /**
3470
   * Calculates and returns the maximum number of bytes taken by any
3471
   * UTF-8 encoded character in the given string.
3472
   *
3473
   * @param string $str <p>The original Unicode string.</p>
3474
   *
3475
   * @return int <p>Max byte lengths of the given chars.</p>
3476
   */
3477 1
  public static function max_chr_width(string $str): int
3478
  {
3479 1
    $bytes = self::chr_size_list($str);
3480 1
    if (\count($bytes) > 0) {
3481 1
      return (int)\max($bytes);
3482
    }
3483
3484 1
    return 0;
3485
  }
3486
3487
  /**
3488
   * Checks whether mbstring is available on the server.
3489
   *
3490
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3491
   */
3492 12
  public static function mbstring_loaded(): bool
3493
  {
3494 12
    $return = \extension_loaded('mbstring') ? true : false;
3495
3496 12
    if ($return === true) {
3497 12
      \mb_internal_encoding('UTF-8');
3498
    }
3499
3500 12
    return $return;
3501
  }
3502
3503 1
  private static function mbstring_overloaded(): bool
3504
  {
3505
    /**
3506
     * @noinspection PhpUsageOfSilenceOperatorInspection
3507
     *
3508
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3509
     */
3510 1
    return \defined('MB_OVERLOAD_STRING')
3511
           &&
3512 1
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3513
  }
3514
3515
  /**
3516
   * Returns the UTF-8 character with the minimum code point in the given data.
3517
   *
3518
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3519
   *
3520
   * @return string <p>The character with the lowest code point than others.</p>
3521
   */
3522 1 View Code Duplication
  public static function min($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3523
  {
3524 1
    if (\is_array($arg) === true) {
3525 1
      $arg = \implode('', $arg);
3526
    }
3527
3528 1
    return self::chr(\min(self::codepoints($arg)));
3529
  }
3530
3531
  /**
3532
   * alias for "UTF8::normalize_encoding()"
3533
   *
3534
   * @see        UTF8::normalize_encoding()
3535
   *
3536
   * @param string $encoding
3537
   * @param mixed  $fallback
3538
   *
3539
   * @return mixed
3540
   *
3541
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3542
   */
3543 1
  public static function normalizeEncoding(string $encoding, $fallback = '')
3544
  {
3545 1
    return self::normalize_encoding($encoding, $fallback);
3546
  }
3547
3548
  /**
3549
   * Normalize the encoding-"name" input.
3550
   *
3551
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3552
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3553
   *
3554
   * @return mixed <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by
3555
   *                default)</p>
3556
   */
3557 27
  public static function normalize_encoding(string $encoding, $fallback = '')
3558
  {
3559 27
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3560
3561 27
    if (!$encoding) {
3562 4
      return $fallback;
3563
    }
3564
3565
    if (
3566 26
        'UTF-8' === $encoding
3567
        ||
3568 26
        'UTF8' === $encoding
3569
    ) {
3570 11
      return 'UTF-8';
3571
    }
3572
3573 22
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3574 19
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3575
    }
3576
3577 6
    if (self::$ENCODINGS === null) {
3578 1
      self::$ENCODINGS = self::getData('encodings');
3579
    }
3580
3581 6
    if (\in_array($encoding, self::$ENCODINGS, true)) {
3582 3
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3583
3584 3
      return $encoding;
3585
    }
3586
3587 5
    $encodingOrig = $encoding;
3588 5
    $encoding = \strtoupper($encoding);
3589 5
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3590
3591
    $equivalences = [
3592 5
        'ISO8859'     => 'ISO-8859-1',
3593
        'ISO88591'    => 'ISO-8859-1',
3594
        'ISO'         => 'ISO-8859-1',
3595
        'LATIN'       => 'ISO-8859-1',
3596
        'LATIN1'      => 'ISO-8859-1', // Western European
3597
        'ISO88592'    => 'ISO-8859-2',
3598
        'LATIN2'      => 'ISO-8859-2', // Central European
3599
        'ISO88593'    => 'ISO-8859-3',
3600
        'LATIN3'      => 'ISO-8859-3', // Southern European
3601
        'ISO88594'    => 'ISO-8859-4',
3602
        'LATIN4'      => 'ISO-8859-4', // Northern European
3603
        'ISO88595'    => 'ISO-8859-5',
3604
        'ISO88596'    => 'ISO-8859-6', // Greek
3605
        'ISO88597'    => 'ISO-8859-7',
3606
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3607
        'ISO88599'    => 'ISO-8859-9',
3608
        'LATIN5'      => 'ISO-8859-9', // Turkish
3609
        'ISO885911'   => 'ISO-8859-11',
3610
        'TIS620'      => 'ISO-8859-11', // Thai
3611
        'ISO885910'   => 'ISO-8859-10',
3612
        'LATIN6'      => 'ISO-8859-10', // Nordic
3613
        'ISO885913'   => 'ISO-8859-13',
3614
        'LATIN7'      => 'ISO-8859-13', // Baltic
3615
        'ISO885914'   => 'ISO-8859-14',
3616
        'LATIN8'      => 'ISO-8859-14', // Celtic
3617
        'ISO885915'   => 'ISO-8859-15',
3618
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3619
        'ISO885916'   => 'ISO-8859-16',
3620
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3621
        'CP1250'      => 'WINDOWS-1250',
3622
        'WIN1250'     => 'WINDOWS-1250',
3623
        'WINDOWS1250' => 'WINDOWS-1250',
3624
        'CP1251'      => 'WINDOWS-1251',
3625
        'WIN1251'     => 'WINDOWS-1251',
3626
        'WINDOWS1251' => 'WINDOWS-1251',
3627
        'CP1252'      => 'WINDOWS-1252',
3628
        'WIN1252'     => 'WINDOWS-1252',
3629
        'WINDOWS1252' => 'WINDOWS-1252',
3630
        'CP1253'      => 'WINDOWS-1253',
3631
        'WIN1253'     => 'WINDOWS-1253',
3632
        'WINDOWS1253' => 'WINDOWS-1253',
3633
        'CP1254'      => 'WINDOWS-1254',
3634
        'WIN1254'     => 'WINDOWS-1254',
3635
        'WINDOWS1254' => 'WINDOWS-1254',
3636
        'CP1255'      => 'WINDOWS-1255',
3637
        'WIN1255'     => 'WINDOWS-1255',
3638
        'WINDOWS1255' => 'WINDOWS-1255',
3639
        'CP1256'      => 'WINDOWS-1256',
3640
        'WIN1256'     => 'WINDOWS-1256',
3641
        'WINDOWS1256' => 'WINDOWS-1256',
3642
        'CP1257'      => 'WINDOWS-1257',
3643
        'WIN1257'     => 'WINDOWS-1257',
3644
        'WINDOWS1257' => 'WINDOWS-1257',
3645
        'CP1258'      => 'WINDOWS-1258',
3646
        'WIN1258'     => 'WINDOWS-1258',
3647
        'WINDOWS1258' => 'WINDOWS-1258',
3648
        'UTF16'       => 'UTF-16',
3649
        'UTF32'       => 'UTF-32',
3650
        'UTF8'        => 'UTF-8',
3651
        'UTF'         => 'UTF-8',
3652
        'UTF7'        => 'UTF-7',
3653
        '8BIT'        => 'CP850',
3654
        'BINARY'      => 'CP850',
3655
    ];
3656
3657 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3658 4
      $encoding = $equivalences[$encodingUpperHelper];
3659
    }
3660
3661 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3662
3663 5
    return $encoding;
3664
  }
3665
3666
  /**
3667
   * Normalize some MS Word special characters.
3668
   *
3669
   * @param string $str <p>The string to be normalized.</p>
3670
   *
3671
   * @return string
3672
   */
3673 16 View Code Duplication
  public static function normalize_msword(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3674
  {
3675 16
    if (!isset($str[0])) {
3676 1
      return '';
3677
    }
3678
3679 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
3680 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3681
3682 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3683
3684 1
      if (self::$UTF8_MSWORD === null) {
3685 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
3686
      }
3687
3688 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
3689 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
3690
    }
3691
3692 16
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3693
  }
3694
3695
  /**
3696
   * Normalize the whitespace.
3697
   *
3698
   * @param string $str                     <p>The string to be normalized.</p>
3699
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3700
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3701
   *                                        bidirectional text chars.</p>
3702
   *
3703
   * @return string
3704
   */
3705 39
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
3706
  {
3707 39
    if (!isset($str[0])) {
3708 4
      return '';
3709
    }
3710
3711 39
    static $WHITESPACE_CACHE = [];
3712 39
    $cacheKey = (int)$keepNonBreakingSpace;
3713
3714 39
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3715
3716 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3717
3718 2
      if ($keepNonBreakingSpace === true) {
3719 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3720
      }
3721
3722 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
3723
    }
3724
3725 39
    if ($keepBidiUnicodeControls === false) {
3726 39
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3727
3728 39
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3729 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3730
      }
3731
3732 39
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3733
    }
3734
3735 39
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3736
  }
3737
3738
  /**
3739
   * Calculates Unicode code point of the given UTF-8 encoded character.
3740
   *
3741
   * INFO: opposite to UTF8::chr()
3742
   *
3743
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3744
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3745
   *
3746
   * @return int <p>
3747
   *             Unicode code point of the given character,<br>
3748
   *             0 on invalid UTF-8 byte sequence.
3749
   *             </p>
3750
   */
3751 23
  public static function ord(string $chr, string $encoding = 'UTF-8'): int
3752
  {
3753
    // init
3754 23
    static $CHAR_CACHE = [];
3755
3756
    // save the original string
3757 23
    $chr_orig = $chr;
3758
3759 23
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3760 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3761
3762
      // check again, if it's still not UTF-8
3763
      /** @noinspection NotOptimalIfConditionsInspection */
3764 2
      if ($encoding !== 'UTF-8') {
3765 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3766
      }
3767
    }
3768
3769 23
    $cacheKey = $chr_orig . $encoding;
3770 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3771 23
      return $CHAR_CACHE[$cacheKey];
3772
    }
3773
3774 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3775
      self::checkForSupport();
3776
    }
3777
3778 11
    if (self::$SUPPORT['intlChar'] === true) {
3779 10
      $code = \IntlChar::ord($chr);
3780 10
      if ($code) {
3781 9
        return $CHAR_CACHE[$cacheKey] = $code;
3782
      }
3783
    }
3784
3785
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3786 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
3787 6
    $code = $chr ? $chr[1] : 0;
3788
3789 6
    if (0xF0 <= $code && isset($chr[4])) {
3790
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3791
    }
3792
3793 6
    if (0xE0 <= $code && isset($chr[3])) {
3794 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3795
    }
3796
3797 6
    if (0xC0 <= $code && isset($chr[2])) {
3798 2
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3799
    }
3800
3801 5
    return $CHAR_CACHE[$cacheKey] = $code;
3802
  }
3803
3804
  /**
3805
   * Parses the string into an array (into the the second parameter).
3806
   *
3807
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3808
   *          if the second parameter is not set!
3809
   *
3810
   * @link http://php.net/manual/en/function.parse-str.php
3811
   *
3812
   * @param string $str       <p>The input string.</p>
3813
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
3814
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3815
   *
3816
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3817
   */
3818 1
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
3819
  {
3820 1
    if ($cleanUtf8 === true) {
3821 1
      $str = self::clean($str);
3822
    }
3823
3824
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3825 1
    $return = \mb_parse_str($str, $result);
3826
3827 1
    return !($return === false || empty($result));
3828
  }
3829
3830
  /**
3831
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3832
   *
3833
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3834
   */
3835 60
  public static function pcre_utf8_support(): bool
3836
  {
3837
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3838 60
    return (bool)@\preg_match('//u', '');
3839
  }
3840
3841
  /**
3842
   * Create an array containing a range of UTF-8 characters.
3843
   *
3844
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3845
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3846
   *
3847
   * @return string[]
3848
   */
3849 1
  public static function range($var1, $var2): array
3850
  {
3851 1
    if (!$var1 || !$var2) {
3852 1
      return [];
3853
    }
3854
3855 1 View Code Duplication
    if (\ctype_digit((string)$var1)) {
3856 1
      $start = (int)$var1;
3857 1
    } elseif (\ctype_xdigit($var1)) {
3858
      $start = (int)self::hex_to_int($var1);
3859
    } else {
3860 1
      $start = self::ord($var1);
3861
    }
3862
3863 1
    if (!$start) {
3864
      return [];
3865
    }
3866
3867 1 View Code Duplication
    if (\ctype_digit((string)$var2)) {
3868 1
      $end = (int)$var2;
3869 1
    } elseif (\ctype_xdigit($var2)) {
3870
      $end = (int)self::hex_to_int($var2);
3871
    } else {
3872 1
      $end = self::ord($var2);
3873
    }
3874
3875 1
    if (!$end) {
3876
      return [];
3877
    }
3878
3879 1
    return \array_map(
3880
        [
3881 1
            self::class,
3882
            'chr',
3883
        ],
3884 1
        \range($start, $end)
3885
    );
3886
  }
3887
3888
  /**
3889
   * Multi decode html entity & fix urlencoded-win1252-chars.
3890
   *
3891
   * e.g:
3892
   * 'test+test'                     => 'test+test'
3893
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3894
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3895
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3896
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3897
   * 'Düsseldorf'                   => 'Düsseldorf'
3898
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3899
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3900
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3901
   *
3902
   * @param string $str          <p>The input string.</p>
3903
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3904
   *
3905
   * @return string
3906
   */
3907 2 View Code Duplication
  public static function rawurldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3908
  {
3909 2
    if (!isset($str[0])) {
3910 1
      return '';
3911
    }
3912
3913 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3914 2
    if (\preg_match($pattern, $str)) {
3915 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
3916
    }
3917
3918 2
    $flags = ENT_QUOTES | ENT_HTML5;
3919
3920
    do {
3921 2
      $str_compare = $str;
3922
3923 2
      $str = self::fix_simple_utf8(
3924 2
          \rawurldecode(
3925 2
              self::html_entity_decode(
3926 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3927 2
                  $flags
3928
              )
3929
          )
3930
      );
3931
3932 2
    } while ($multi_decode === true && $str_compare !== $str);
3933
3934 2
    return $str;
3935
  }
3936
3937
  /**
3938
   * @param array $strings
3939
   * @param bool  $removeEmptyValues
3940
   * @param int   $removeShortValues
3941
   *
3942
   * @return array
3943
   */
3944 1
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
3945
  {
3946
    // init
3947 1
    $return = [];
3948
3949 1
    foreach ($strings as $str) {
3950
      if (
3951 1
          $removeShortValues !== null
3952
          &&
3953 1
          self::strlen($str) <= $removeShortValues
3954
      ) {
3955 1
        continue;
3956
      }
3957
3958
      if (
3959 1
          $removeEmptyValues === true
3960
          &&
3961 1
          \trim($str) === ''
3962
      ) {
3963 1
        continue;
3964
      }
3965
3966 1
      $return[] = $str;
3967
    }
3968
3969 1
    return $return;
3970
  }
3971
3972
  /**
3973
   * Replaces all occurrences of $pattern in $str by $replacement.
3974
   *
3975
   * @param string $str         <p>The input string.</p>
3976
   * @param string $pattern     <p>The regular expression pattern.</p>
3977
   * @param string $replacement <p>The string to replace with.</p>
3978
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
3979
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
3980
   *
3981
   * @return string
3982
   */
3983 35
  public static function regexReplace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
3984
  {
3985 35
    if ($options === 'msr') {
3986
      $options = 'ms';
3987
    }
3988
3989
    // fallback
3990 35
    if (!$delimiter) {
3991
      $delimiter = '/';
3992
    }
3993
3994 35
    $str = (string)\preg_replace(
3995 35
        $delimiter . $pattern . $delimiter . 'u' . $options,
3996 35
        $replacement,
3997 35
        $str
3998
    );
3999
4000 35
    return $str;
4001
  }
4002
4003
  /**
4004
   * alias for "UTF8::remove_bom()"
4005
   *
4006
   * @see        UTF8::remove_bom()
4007
   *
4008
   * @param string $str
4009
   *
4010
   * @return string
4011
   *
4012
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4013
   */
4014
  public static function removeBOM(string $str): string
4015
  {
4016
    return self::remove_bom($str);
4017
  }
4018
4019
  /**
4020
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4021
   *
4022
   * @param string $str <p>The input string.</p>
4023
   *
4024
   * @return string <p>String without UTF-BOM</p>
4025
   */
4026 43
  public static function remove_bom(string $str): string
4027
  {
4028 43
    if (!isset($str[0])) {
4029 3
      return '';
4030
    }
4031
4032 43
    foreach (self::$BOM as $bomString => $bomByteLength) {
4033 43
      if (0 === self::strpos($str, $bomString, 0, 'CP850')) {
4034 5
        $strTmp = self::substr($str, $bomByteLength, null, 'CP850');
4035 5
        if ($strTmp === false) {
4036
          $strTmp = '';
4037
        }
4038 43
        $str = (string)$strTmp;
4039
      }
4040
    }
4041
4042 43
    return $str;
4043
  }
4044
4045
  /**
4046
   * Removes duplicate occurrences of a string in another string.
4047
   *
4048
   * @param string          $str  <p>The base string.</p>
4049
   * @param string|string[] $what <p>String to search for in the base string.</p>
4050
   *
4051
   * @return string <p>The result string with removed duplicates.</p>
4052
   */
4053 1
  public static function remove_duplicates(string $str, $what = ' '): string
4054
  {
4055 1
    if (\is_string($what) === true) {
4056 1
      $what = [$what];
4057
    }
4058
4059 1
    if (\is_array($what) === true) {
4060
      /** @noinspection ForeachSourceInspection */
4061 1
      foreach ($what as $item) {
4062 1
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4063
      }
4064
    }
4065
4066 1
    return $str;
4067
  }
4068
4069
  /**
4070
   * Remove invisible characters from a string.
4071
   *
4072
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4073
   *
4074
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4075
   *
4076
   * @param string $str
4077
   * @param bool   $url_encoded
4078
   * @param string $replacement
4079
   *
4080
   * @return string
4081
   */
4082 65
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4083
  {
4084
    // init
4085 65
    $non_displayables = [];
4086
4087
    // every control character except newline (dec 10),
4088
    // carriage return (dec 13) and horizontal tab (dec 09)
4089 65
    if ($url_encoded) {
4090 65
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4091 65
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4092
    }
4093
4094 65
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4095
4096
    do {
4097 65
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4098 65
    } while ($count !== 0);
4099
4100 65
    return $str;
4101
  }
4102
4103
  /**
4104
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4105
   *
4106
   * @param string $str                <p>The input string</p>
4107
   * @param string $replacementChar    <p>The replacement character.</p>
4108
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4109
   *
4110
   * @return string
4111
   */
4112 37
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4113
  {
4114 37
    if (!isset($str[0])) {
4115 4
      return '';
4116
    }
4117
4118 37
    if ($processInvalidUtf8 === true) {
4119 37
      $replacementCharHelper = $replacementChar;
4120 37
      if ($replacementChar === '') {
4121 37
        $replacementCharHelper = 'none';
4122
      }
4123
4124 37
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4125
        self::checkForSupport();
4126
      }
4127
4128 37
      $save = \mb_substitute_character();
4129 37
      \mb_substitute_character($replacementCharHelper);
4130 37
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4131 37
      \mb_substitute_character($save);
4132
4133 37
      if (\is_string($strTmp)) {
4134 37
        $str = $strTmp;
4135
      } else {
4136
        $str = '';
4137
      }
4138
    }
4139
4140 37
    return str_replace(
4141
        [
4142 37
            "\xEF\xBF\xBD",
4143
            '�',
4144
        ],
4145
        [
4146 37
            $replacementChar,
4147 37
            $replacementChar,
4148
        ],
4149 37
        $str
4150
    );
4151
  }
4152
4153
  /**
4154
   * Strip whitespace or other characters from end of a UTF-8 string.
4155
   *
4156
   * @param string $str   <p>The string to be trimmed.</p>
4157
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4158
   *
4159
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4160
   */
4161 7 View Code Duplication
  public static function rtrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4162
  {
4163 7
    if (!isset($str[0])) {
4164 2
      return '';
4165
    }
4166
4167
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4168 6
    if ($chars === INF || !$chars) {
4169 3
      $pattern = "[\pZ\pC]+\$";
4170
    } else {
4171 5
      $chars = \preg_quote($chars, '/');
4172 5
      $pattern = "[$chars]+\$";
4173
    }
4174
4175 6
    return self::regexReplace($str, $pattern, '', '', '/');
4176
  }
4177
4178
  /**
4179
   * rxClass
4180
   *
4181
   * @param string $s
4182
   * @param string $class
4183
   *
4184
   * @return string
4185
   */
4186 32
  private static function rxClass(string $s, string $class = ''): string
4187
  {
4188 32
    static $RX_CLASSS_CACHE = [];
4189
4190 32
    $cacheKey = $s . $class;
4191
4192 32
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4193 20
      return $RX_CLASSS_CACHE[$cacheKey];
4194
    }
4195
4196
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4197 15
    $class = [$class];
4198
4199
    /** @noinspection SuspiciousLoopInspection */
4200 15
    foreach (self::str_split($s) as $s) {
4201 14
      if ('-' === $s) {
4202
        $class[0] = '-' . $class[0];
4203 14
      } elseif (!isset($s[2])) {
4204 14
        $class[0] .= \preg_quote($s, '/');
4205 1
      } elseif (1 === self::strlen($s)) {
4206 1
        $class[0] .= $s;
4207
      } else {
4208 14
        $class[] = $s;
4209
      }
4210
    }
4211
4212 15
    if ($class[0]) {
4213 15
      $class[0] = '[' . $class[0] . ']';
4214
    }
4215
4216 15
    if (1 === \count($class)) {
4217 15
      $return = $class[0];
4218
    } else {
4219
      $return = '(?:' . \implode('|', $class) . ')';
4220
    }
4221
4222 15
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4223
4224 15
    return $return;
4225
  }
4226
4227
  /**
4228
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4229
   */
4230 1
  public static function showSupport()
4231
  {
4232 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4233
      self::checkForSupport();
4234
    }
4235
4236 1
    echo '<pre>';
4237 1
    foreach (self::$SUPPORT as $key => $value) {
4238 1
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4239
    }
4240 1
    echo '</pre>';
4241 1
  }
4242
4243
  /**
4244
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4245
   *
4246
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4247
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4248
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
4249
   *
4250
   * @return string <p>The HTML numbered entity.</p>
4251
   */
4252 1
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4253
  {
4254 1
    if (!isset($char[0])) {
4255 1
      return '';
4256
    }
4257
4258
    if (
4259 1
        $keepAsciiChars === true
4260
        &&
4261 1
        self::is_ascii($char) === true
4262
    ) {
4263 1
      return $char;
4264
    }
4265
4266 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4267 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4268
    }
4269
4270 1
    return '&#' . self::ord($char, $encoding) . ';';
4271
  }
4272
4273
  /**
4274
   * @param string $str
4275
   * @param int    $tabLength
4276
   *
4277
   * @return string
4278
   */
4279
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4280
  {
4281
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4282
  }
4283
4284
  /**
4285
   * Convert a string to an array of Unicode characters.
4286
   *
4287
   * @param string $str       <p>The string to split into array.</p>
4288
   * @param int    $length    [optional] <p>Max character length of each array element.</p>
4289
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4290
   *
4291
   * @return string[] <p>An array containing chunks of the string.</p>
4292
   */
4293 35
  public static function split(string $str, int $length = 1, bool $cleanUtf8 = false): array
4294
  {
4295 35
    if (!isset($str[0])) {
4296 3
      return [];
4297
    }
4298
4299
    // init
4300 34
    $ret = [];
4301
4302 34
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4303
      self::checkForSupport();
4304
    }
4305
4306 34
    if ($cleanUtf8 === true) {
4307 9
      $str = self::clean($str);
4308
    }
4309
4310 34
    if (self::$SUPPORT['pcre_utf8'] === true) {
4311
4312 34
      \preg_match_all('/./us', $str, $retArray);
4313 34
      if (isset($retArray[0])) {
4314 34
        $ret = $retArray[0];
4315
      }
4316 34
      unset($retArray);
4317
4318
    } else {
4319
4320
      // fallback
4321
4322 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4323
        self::checkForSupport();
4324
      }
4325
4326 2
      $len = self::strlen_in_byte($str);
4327
4328
      /** @noinspection ForeachInvariantsInspection */
4329 2
      for ($i = 0; $i < $len; $i++) {
4330
4331 2
        if (($str[$i] & "\x80") === "\x00") {
4332
4333 2
          $ret[] = $str[$i];
4334
4335
        } elseif (
4336 2
            isset($str[$i + 1])
4337
            &&
4338 2
            ($str[$i] & "\xE0") === "\xC0"
4339
        ) {
4340
4341
          if (($str[$i + 1] & "\xC0") === "\x80") {
4342
            $ret[] = $str[$i] . $str[$i + 1];
4343
4344
            $i++;
4345
          }
4346
4347 View Code Duplication
        } elseif (
4348 2
            isset($str[$i + 2])
4349
            &&
4350 2
            ($str[$i] & "\xF0") === "\xE0"
4351
        ) {
4352
4353
          if (
4354 2
              ($str[$i + 1] & "\xC0") === "\x80"
4355
              &&
4356 2
              ($str[$i + 2] & "\xC0") === "\x80"
4357
          ) {
4358 2
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4359
4360 2
            $i += 2;
4361
          }
4362
4363
        } elseif (
4364
            isset($str[$i + 3])
4365
            &&
4366
            ($str[$i] & "\xF8") === "\xF0"
4367
        ) {
4368
4369 View Code Duplication
          if (
4370
              ($str[$i + 1] & "\xC0") === "\x80"
4371
              &&
4372
              ($str[$i + 2] & "\xC0") === "\x80"
4373
              &&
4374
              ($str[$i + 3] & "\xC0") === "\x80"
4375
          ) {
4376
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4377
4378
            $i += 3;
4379
          }
4380
4381
        }
4382
      }
4383
    }
4384
4385 34
    if ($length > 1) {
4386 5
      $ret = \array_chunk($ret, $length);
4387
4388 5
      return \array_map(
4389
          function ($item) {
4390 5
            return \implode('', $item);
4391 5
          }, $ret
4392
      );
4393
    }
4394
4395 30
    if (isset($ret[0]) && $ret[0] === '') {
4396
      return [];
4397
    }
4398
4399 30
    return $ret;
4400
  }
4401
4402
  /**
4403
   * Returns true if the string contains $needle, false otherwise. By default
4404
   * the comparison is case-sensitive, but can be made insensitive by setting
4405
   * $caseSensitive to false.
4406
   *
4407
   * @param string $haystack      <p>The input string.</p>
4408
   * @param string $needle        <p>Substring to look for.</p>
4409
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4410
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4411
   *
4412
   * @return bool <p>Whether or not $haystack contains $needle.</p>
4413
   */
4414
  public static function str_contains(string $haystack, string $needle, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4415
  {
4416
    if ($caseSensitive) {
4417
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
4418
    }
4419
4420
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
4421
  }
4422
4423
  /**
4424
   * Returns true if the string contains all $needles, false otherwise. By
4425
   * default the comparison is case-sensitive, but can be made insensitive by
4426
   * setting $caseSensitive to false.
4427
   *
4428
   * @param string $haystack      <p>The input string.</p>
4429
   * @param array  $needles       <p>SubStrings to look for.</p>
4430
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4431
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4432
   *
4433
   * @return bool <p>Whether or not $haystack contains $needle.</p>
4434
   */
4435
  public static function str_contains_all(string $haystack, array $needles, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4436
  {
4437
    if (!isset($haystack[0])) {
4438
      return false;
4439
    }
4440
4441
    if (empty($needles)) {
4442
      return false;
4443
    }
4444
4445
    foreach ($needles as $needle) {
4446
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4447
        return false;
4448
      }
4449
    }
4450
4451
    return true;
4452
  }
4453
4454
  /**
4455
   * Returns true if the string contains any $needles, false otherwise. By
4456
   * default the comparison is case-sensitive, but can be made insensitive by
4457
   * setting $caseSensitive to false.
4458
   *
4459
   * @param string <p>The input stiring.</p>
4460
   * @param array  $needles       <p>SubStrings to look for.</p>
4461
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4462
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4463
   *
4464
   * @return bool <p>Whether or not $str contains $needle.</p>
4465
   */
4466
  public static function str_contains_any(string $haystack, array $needles, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4467
  {
4468
    if (empty($needles)) {
4469
      return false;
4470
    }
4471
4472
    foreach ($needles as $needle) {
4473
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4474
        return true;
4475
      }
4476
    }
4477
4478
    return false;
4479
  }
4480
4481
  /**
4482
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4483
   *
4484
   * @param string $str <p>The input string.</p>
4485
   *
4486
   * @return false|string <p>
4487
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4488
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
4489
   *                      </p>
4490
   */
4491 15
  public static function str_detect_encoding(string $str)
4492
  {
4493
    //
4494
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
4495
    //
4496
4497 15
    if (self::is_binary($str, true) === true) {
4498
4499 5
      if (self::is_utf16($str) === 1) {
4500 1
        return 'UTF-16LE';
4501
      }
4502
4503 5
      if (self::is_utf16($str) === 2) {
4504 1
        return 'UTF-16BE';
4505
      }
4506
4507 4
      if (self::is_utf32($str) === 1) {
4508
        return 'UTF-32LE';
4509
      }
4510
4511 4
      if (self::is_utf32($str) === 2) {
4512
        return 'UTF-32BE';
4513
      }
4514
4515
      // is binary but not "UTF-16" or "UTF-32"
4516 4
      return false;
4517
    }
4518
4519
    //
4520
    // 2.) simple check for ASCII chars
4521
    //
4522
4523 13
    if (self::is_ascii($str) === true) {
4524 5
      return 'ASCII';
4525
    }
4526
4527
    //
4528
    // 3.) simple check for UTF-8 chars
4529
    //
4530
4531 13
    if (self::is_utf8($str) === true) {
4532 9
      return 'UTF-8';
4533
    }
4534
4535
    //
4536
    // 4.) check via "\mb_detect_encoding()"
4537
    //
4538
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4539
4540
    $detectOrder = [
4541 8
        'ISO-8859-1',
4542
        'ISO-8859-2',
4543
        'ISO-8859-3',
4544
        'ISO-8859-4',
4545
        'ISO-8859-5',
4546
        'ISO-8859-6',
4547
        'ISO-8859-7',
4548
        'ISO-8859-8',
4549
        'ISO-8859-9',
4550
        'ISO-8859-10',
4551
        'ISO-8859-13',
4552
        'ISO-8859-14',
4553
        'ISO-8859-15',
4554
        'ISO-8859-16',
4555
        'WINDOWS-1251',
4556
        'WINDOWS-1252',
4557
        'WINDOWS-1254',
4558
        'CP932',
4559
        'CP936',
4560
        'CP950',
4561
        'CP866',
4562
        'CP850',
4563
        'CP51932',
4564
        'CP50220',
4565
        'CP50221',
4566
        'CP50222',
4567
        'ISO-2022-JP',
4568
        'ISO-2022-KR',
4569
        'JIS',
4570
        'JIS-ms',
4571
        'EUC-CN',
4572
        'EUC-JP',
4573
    ];
4574
4575 8
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4576 8
    if ($encoding) {
4577 8
      return $encoding;
4578
    }
4579
4580
    //
4581
    // 5.) check via "iconv()"
4582
    //
4583
4584
    if (self::$ENCODINGS === null) {
4585
      self::$ENCODINGS = self::getData('encodings');
4586
    }
4587
4588
    $md5 = \md5($str);
4589
    foreach (self::$ENCODINGS as $encodingTmp) {
0 ignored issues
show
Bug introduced by
The expression self::$ENCODINGS of type boolean|string|array|integer is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
4590
      # INFO: //IGNORE and //TRANSLIT still throw notice
4591
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4592
      if (\md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4593
        return $encodingTmp;
4594
      }
4595
    }
4596
4597
    return false;
4598
  }
4599
4600
  /**
4601
   * Check if the string ends with the given substring.
4602
   *
4603
   * @param string $haystack <p>The string to search in.</p>
4604
   * @param string $needle   <p>The substring to search for.</p>
4605
   *
4606
   * @return bool
4607
   */
4608 2
  public static function str_ends_with(string $haystack, string $needle): bool
4609
  {
4610 2
    if (!isset($haystack[0], $needle[0])) {
4611 1
      return false;
4612
    }
4613
4614 2
    return \substr($haystack, -\strlen($needle)) === $needle;
4615
  }
4616
4617
  /**
4618
   * Returns true if the string ends with any of $substrings, false otherwise.
4619
   *
4620
   * - case-sensitive
4621
   *
4622
   * @param string   $str        <p>The input string.</p>
4623
   * @param string[] $substrings <p>Substrings to look for.</p>
4624
   *
4625
   * @return bool     <p>Whether or not $str ends with $substring.</p>
4626
   */
4627
  public static function str_ends_with_any(string $str, array $substrings): bool
4628
  {
4629
    if (empty($substrings)) {
4630
      return false;
4631
    }
4632
4633
    foreach ($substrings as $substring) {
4634
      if (self::str_ends_with($str, $substring)) {
4635
        return true;
4636
      }
4637
    }
4638
4639
    return false;
4640
  }
4641
4642
  /**
4643
   * Ensures that the string begins with $substring. If it doesn't, it's
4644
   * prepended.
4645
   *
4646
   * @param string str <p>The input string.</p>
4647
   * @param string $substring <p>The substring to add if not present.</p>
4648
   *
4649
   * @return string
4650
   */
4651
  public static function str_ensure_left(string $str, string $substring): string
4652
  {
4653
    if (!self::str_starts_with($str, $substring)) {
4654
      $str = $substring . $str;
4655
    }
4656
4657
    return $str;
4658
  }
4659
4660
  /**
4661
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
4662
   *
4663
   * @param string str <p>The input string.</p>
4664
   * @param string $substring <p>The substring to add if not present.</p>
4665
   *
4666
   * @return string
4667
   */
4668
  public static function str_ensure_right(string $str, string $substring): string
4669
  {
4670
    if (!self::str_ends_with($str, $substring)) {
4671
      $str .= $substring;
4672
    }
4673
4674
    return $str;
4675
  }
4676
4677
  /**
4678
   * Check if the string ends with the given substring, case insensitive.
4679
   *
4680
   * @param string $haystack <p>The string to search in.</p>
4681
   * @param string $needle   <p>The substring to search for.</p>
4682
   *
4683
   * @return bool
4684
   */
4685 2
  public static function str_iends_with(string $haystack, string $needle): bool
4686
  {
4687 2
    if (!isset($haystack[0], $needle[0])) {
4688 1
      return false;
4689
    }
4690
4691 2
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
4692 2
      return true;
4693
    }
4694
4695 2
    return false;
4696
  }
4697
4698
  /**
4699
   * Returns true if the string ends with any of $substrings, false otherwise.
4700
   *
4701
   * - case-insensitive
4702
   *
4703
   * @param string   $str        <p>The input string.</p>
4704
   * @param string[] $substrings <p>Substrings to look for.</p>
4705
   *
4706
   * @return bool     <p>Whether or not $str ends with $substring.</p>
4707
   */
4708
  public static function str_iends_with_any(string $str, array $substrings): bool
4709
  {
4710
    if (empty($substrings)) {
4711
      return false;
4712
    }
4713
4714
    foreach ($substrings as $substring) {
4715
      if (self::str_iends_with($str, $substring)) {
4716
        return true;
4717
      }
4718
    }
4719
4720
    return false;
4721
  }
4722
4723
  /**
4724
   * Inserts $substring into the string at the $index provided.
4725
   *
4726
   * @param string $str       <p>The input string.</p>
4727
   * @param string $substring <p>String to be inserted.</p>
4728
   * @param int    $index     <p>The index at which to insert the substring.</p>
4729
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
4730
   *
4731
   * @return string
4732
   */
4733
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
4734
  {
4735
    $len = self::strlen($str, $encoding);
4736
4737
    if ($index > $len) {
4738
      return $str;
4739
    }
4740
4741
    $start = self::substr($str, 0, $index, $encoding);
4742
    $end = self::substr($str, $index, $len, $encoding);
4743
4744
    return $start . $substring . $end;
4745
  }
4746
4747
  /**
4748
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4749
   *
4750
   * @link  http://php.net/manual/en/function.str-ireplace.php
4751
   *
4752
   * @param mixed $search  <p>
4753
   *                       Every replacement with search array is
4754
   *                       performed on the result of previous replacement.
4755
   *                       </p>
4756
   * @param mixed $replace <p>
4757
   *                       </p>
4758
   * @param mixed $subject <p>
4759
   *                       If subject is an array, then the search and
4760
   *                       replace is performed with every entry of
4761
   *                       subject, and the return value is an array as
4762
   *                       well.
4763
   *                       </p>
4764
   * @param int   $count   [optional] <p>
4765
   *                       The number of matched and replaced needles will
4766
   *                       be returned in count which is passed by
4767
   *                       reference.
4768
   *                       </p>
4769
   *
4770
   * @return mixed <p>A string or an array of replacements.</p>
4771
   */
4772 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4773
  {
4774 26
    $search = (array)$search;
4775
4776
    /** @noinspection AlterInForeachInspection */
4777 26
    foreach ($search as &$s) {
4778 26
      if ('' === $s .= '') {
4779 2
        $s = '/^(?<=.)$/';
4780
      } else {
4781 26
        $s = '/' . \preg_quote($s, '/') . '/ui';
4782
      }
4783
    }
4784
4785 26
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
4786 26
    $count = $replace; // used as reference parameter
4787
4788 26
    return $subject;
4789
  }
4790
4791
  /**
4792
   * Check if the string starts with the given substring, case insensitive.
4793
   *
4794
   * @param string $haystack <p>The string to search in.</p>
4795
   * @param string $needle   <p>The substring to search for.</p>
4796
   *
4797
   * @return bool
4798
   */
4799 2 View Code Duplication
  public static function str_istarts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4800
  {
4801 2
    if (!isset($haystack[0], $needle[0])) {
4802 1
      return false;
4803
    }
4804
4805 2
    if (self::stripos($haystack, $needle) === 0) {
4806 2
      return true;
4807
    }
4808
4809 2
    return false;
4810
  }
4811
4812
  /**
4813
   * Returns true if the string begins with any of $substrings, false otherwise.
4814
   *
4815
   * - case-insensitive
4816
   *
4817
   * @param string $str        <p>The input string.</p>
4818
   * @param array  $substrings <p>Substrings to look for.</p>
4819
   *
4820
   * @return bool <p>Whether or not $str starts with $substring.</p>
4821
   */
4822 View Code Duplication
  public static function str_istarts_with_any(string $str, array $substrings): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4823
  {
4824
    if (!isset($str[0])) {
4825
      return false;
4826
    }
4827
4828
    if (empty($substrings)) {
4829
      return false;
4830
    }
4831
4832
    foreach ($substrings as $substring) {
4833
      if (self::str_istarts_with($str, $substring)) {
4834
        return true;
4835
      }
4836
    }
4837
4838
    return false;
4839
  }
4840
4841
  /**
4842
   * Limit the number of characters in a string, but also after the next word.
4843
   *
4844
   * @param string $str
4845
   * @param int    $length
4846
   * @param string $strAddOn
4847
   *
4848
   * @return string
4849
   */
4850 1
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…'): string
4851
  {
4852 1
    if (!isset($str[0])) {
4853 1
      return '';
4854
    }
4855
4856 1
    if (self::strlen($str) <= $length) {
4857 1
      return $str;
4858
    }
4859
4860 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4861 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4862
    }
4863
4864 1
    $str = (string)self::substr($str, 0, $length);
4865 1
    $array = \explode(' ', $str);
4866 1
    \array_pop($array);
4867 1
    $new_str = \implode(' ', $array);
4868
4869 1
    if ($new_str === '') {
4870 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4871
    } else {
4872 1
      $str = $new_str . $strAddOn;
4873
    }
4874
4875 1
    return $str;
4876
  }
4877
4878
  /**
4879
   * Pad a UTF-8 string to given length with another string.
4880
   *
4881
   * @param string $str        <p>The input string.</p>
4882
   * @param int    $pad_length <p>The length of return string.</p>
4883
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4884
   * @param int    $pad_type   [optional] <p>
4885
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4886
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4887
   *                           </p>
4888
   *
4889
   * @encoding
4890
   *
4891
   * @return string <strong>Returns the padded string</strong>
4892
   */
4893 2
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', int $pad_type = STR_PAD_RIGHT): string
4894
  {
4895 2
    $str_length = self::strlen($str);
4896
4897
    if (
4898 2
        $pad_length > 0
4899
        &&
4900 2
        $pad_length >= $str_length
4901
    ) {
4902 2
      $ps_length = self::strlen($pad_string);
4903
4904 2
      $diff = ($pad_length - $str_length);
4905
4906
      switch ($pad_type) {
4907 2 View Code Duplication
        case STR_PAD_LEFT:
4908 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
4909 2
          $pre = (string)self::substr($pre, 0, $diff);
4910 2
          $post = '';
4911 2
          break;
4912
4913 2
        case STR_PAD_BOTH:
4914 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
4915 2
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2));
4916 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
4917 2
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2));
4918 2
          break;
4919
4920 2
        case STR_PAD_RIGHT:
4921 View Code Duplication
        default:
4922 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
4923 2
          $post = (string)self::substr($post, 0, $diff);
4924 2
          $pre = '';
4925
      }
4926
4927 2
      return $pre . $str . $post;
4928
    }
4929
4930 2
    return $str;
4931
  }
4932
4933
  /**
4934
   * Repeat a string.
4935
   *
4936
   * @param string $str        <p>
4937
   *                           The string to be repeated.
4938
   *                           </p>
4939
   * @param int    $multiplier <p>
4940
   *                           Number of time the input string should be
4941
   *                           repeated.
4942
   *                           </p>
4943
   *                           <p>
4944
   *                           multiplier has to be greater than or equal to 0.
4945
   *                           If the multiplier is set to 0, the function
4946
   *                           will return an empty string.
4947
   *                           </p>
4948
   *
4949
   * @return string <p>The repeated string.</p>
4950
   */
4951 1
  public static function str_repeat(string $str, int $multiplier): string
4952
  {
4953 1
    $str = self::filter($str);
4954
4955 1
    return \str_repeat($str, $multiplier);
4956
  }
4957
4958
  /**
4959
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4960
   *
4961
   * Replace all occurrences of the search string with the replacement string
4962
   *
4963
   * @link http://php.net/manual/en/function.str-replace.php
4964
   *
4965
   * @param mixed $search  <p>
4966
   *                       The value being searched for, otherwise known as the needle.
4967
   *                       An array may be used to designate multiple needles.
4968
   *                       </p>
4969
   * @param mixed $replace <p>
4970
   *                       The replacement value that replaces found search
4971
   *                       values. An array may be used to designate multiple replacements.
4972
   *                       </p>
4973
   * @param mixed $subject <p>
4974
   *                       The string or array being searched and replaced on,
4975
   *                       otherwise known as the haystack.
4976
   *                       </p>
4977
   *                       <p>
4978
   *                       If subject is an array, then the search and
4979
   *                       replace is performed with every entry of
4980
   *                       subject, and the return value is an array as
4981
   *                       well.
4982
   *                       </p>
4983
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4984
   *
4985
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4986
   */
4987 12
  public static function str_replace($search, $replace, $subject, int &$count = null)
4988
  {
4989 12
    return \str_replace($search, $replace, $subject, $count);
4990
  }
4991
4992
  /**
4993
   * Replace the first "$search"-term with the "$replace"-term.
4994
   *
4995
   * @param string $search
4996
   * @param string $replace
4997
   * @param string $subject
4998
   *
4999
   * @return string
5000
   */
5001 1
  public static function str_replace_first(string $search, string $replace, string $subject): string
5002
  {
5003 1
    $pos = self::strpos($subject, $search);
5004
5005 1
    if ($pos !== false) {
5006 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
5007
    }
5008
5009 1
    return $subject;
5010
  }
5011
5012
  /**
5013
   * Shuffles all the characters in the string.
5014
   *
5015
   * PS: uses random algorithm which is weak for cryptography purposes
5016
   *
5017
   * @param string $str <p>The input string</p>
5018
   *
5019
   * @return string <p>The shuffled string.</p>
5020
   */
5021 1
  public static function str_shuffle(string $str): string
5022
  {
5023 1
    $indexes = \range(0, self::strlen($str) - 1);
5024
    /** @noinspection NonSecureShuffleUsageInspection */
5025 1
    \shuffle($indexes);
5026
5027 1
    $shuffledStr = '';
5028 1
    foreach ($indexes as $i) {
5029 1
      $shuffledStr .= self::substr($str, $i, 1);
5030
    }
5031
5032 1
    return $shuffledStr;
5033
  }
5034
5035
  /**
5036
   * Sort all characters according to code points.
5037
   *
5038
   * @param string $str    <p>A UTF-8 string.</p>
5039
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
5040
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
5041
   *
5042
   * @return string <p>String of sorted characters.</p>
5043
   */
5044 1
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
5045
  {
5046 1
    $array = self::codepoints($str);
5047
5048 1
    if ($unique) {
5049 1
      $array = \array_flip(\array_flip($array));
5050
    }
5051
5052 1
    if ($desc) {
5053 1
      \arsort($array);
5054
    } else {
5055 1
      \asort($array);
5056
    }
5057
5058 1
    return self::string($array);
5059
  }
5060
5061
  /**
5062
   * Split a string into an array.
5063
   *
5064
   * @param string|string[] $str
5065
   * @param int             $len
5066
   *
5067
   * @return string[]
5068
   */
5069 18
  public static function str_split($str, int $len = 1): array
5070
  {
5071 18 View Code Duplication
    if (\is_array($str) === true) {
5072 1
      foreach ($str as $k => $v) {
5073 1
        $str[$k] = self::str_split($v, $len);
5074
      }
5075
5076 1
      return $str;
5077
    }
5078
5079 18
    if (!isset($str[0])) {
5080 1
      return [];
5081
    }
5082
5083 17
    if ($len < 1) {
5084
      return \str_split($str, $len);
5085
    }
5086
5087
    /** @noinspection NotOptimalRegularExpressionsInspection */
5088 17
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
5089 17
    $a = $a[0];
5090
5091 17
    if ($len === 1) {
5092 17
      return $a;
5093
    }
5094
5095 1
    $arrayOutput = [];
5096 1
    $p = -1;
5097
5098
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
5099 1
    foreach ($a as $l => $a) {
5100 1
      if ($l % $len) {
5101 1
        $arrayOutput[$p] .= $a;
5102
      } else {
5103 1
        $arrayOutput[++$p] = $a;
5104
      }
5105
    }
5106
5107 1
    return $arrayOutput;
5108
  }
5109
5110
  /**
5111
   * Check if the string starts with the given substring.
5112
   *
5113
   * @param string $haystack <p>The string to search in.</p>
5114
   * @param string $needle   <p>The substring to search for.</p>
5115
   *
5116
   * @return bool
5117
   */
5118 2 View Code Duplication
  public static function str_starts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5119
  {
5120 2
    if (!isset($haystack[0], $needle[0])) {
5121 1
      return false;
5122
    }
5123
5124 2
    if (\strpos($haystack, $needle) === 0) {
5125 2
      return true;
5126
    }
5127
5128 2
    return false;
5129
  }
5130
5131
  /**
5132
   * Returns true if the string begins with any of $substrings, false otherwise.
5133
   *
5134
   * - case-sensitive
5135
   *
5136
   * @param string $str        <p>The input string.</p>
5137
   * @param array  $substrings <p>Substrings to look for.</p>
5138
   *
5139
   * @return bool <p>Whether or not $str starts with $substring.</p>
5140
   */
5141 View Code Duplication
  public static function str_starts_with_any(string $str, array $substrings): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5142
  {
5143
    if (!isset($str[0])) {
5144
      return false;
5145
    }
5146
5147
    if (empty($substrings)) {
5148
      return false;
5149
    }
5150
5151
    foreach ($substrings as $substring) {
5152
      if (self::str_starts_with($str, $substring)) {
5153
        return true;
5154
      }
5155
    }
5156
5157
    return false;
5158
  }
5159
5160
  /**
5161
   * Get a binary representation of a specific string.
5162
   *
5163
   * @param string $str <p>The input string.</p>
5164
   *
5165
   * @return string
5166
   */
5167 1
  public static function str_to_binary(string $str): string
5168
  {
5169 1
    $value = \unpack('H*', $str);
5170
5171 1
    return \base_convert($value[1], 16, 2);
5172
  }
5173
5174
  /**
5175
   * @param string   $str
5176
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
5177
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
5178
   *
5179
   * @return string[]
5180
   */
5181
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
5182
  {
5183 View Code Duplication
    if (!isset($str[0])) {
5184
      if ($removeEmptyValues === true) {
5185
        return [];
5186
      }
5187
5188
      return [''];
5189
    }
5190
5191
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
5192
5193
    if (
5194
        $removeShortValues === null
5195
        &&
5196
        $removeEmptyValues === false
5197
    ) {
5198
      return $return;
5199
    }
5200
5201
    $tmpReturn = self::reduce_string_array(
5202
        $return,
5203
        $removeEmptyValues,
5204
        $removeShortValues
5205
    );
5206
5207
    return $tmpReturn;
5208
  }
5209
5210
  /**
5211
   * Convert a string into an array of words.
5212
   *
5213
   * @param string   $str
5214
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
5215
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
5216
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
5217
   *
5218
   * @return string[]
5219
   */
5220 10
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
5221
  {
5222 10 View Code Duplication
    if (!isset($str[0])) {
5223 2
      if ($removeEmptyValues === true) {
5224
        return [];
5225
      }
5226
5227 2
      return [''];
5228
    }
5229
5230 10
    $charList = self::rxClass($charList, '\pL');
5231
5232 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
5233
5234
    if (
5235 10
        $removeShortValues === null
5236
        &&
5237 10
        $removeEmptyValues === false
5238
    ) {
5239 10
      return $return;
5240
    }
5241
5242 1
    $tmpReturn = self::reduce_string_array(
5243 1
        $return,
5244 1
        $removeEmptyValues,
5245 1
        $removeShortValues
5246
    );
5247
5248 1
    return $tmpReturn;
5249
  }
5250
5251
  /**
5252
   * alias for "UTF8::to_ascii()"
5253
   *
5254
   * @see UTF8::to_ascii()
5255
   *
5256
   * @param string $str
5257
   * @param string $unknown
5258
   * @param bool   $strict
5259
   *
5260
   * @return string
5261
   */
5262 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
5263
  {
5264 7
    return self::to_ascii($str, $unknown, $strict);
5265
  }
5266
5267
  /**
5268
   * Counts number of words in the UTF-8 string.
5269
   *
5270
   * @param string $str      <p>The input string.</p>
5271
   * @param int    $format   [optional] <p>
5272
   *                         <strong>0</strong> => return a number of words (default)<br>
5273
   *                         <strong>1</strong> => return an array of words<br>
5274
   *                         <strong>2</strong> => return an array of words with word-offset as key
5275
   *                         </p>
5276
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
5277
   *
5278
   * @return string[]|int <p>The number of words in the string</p>
5279
   */
5280 1
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
5281
  {
5282 1
    $strParts = self::str_to_words($str, $charlist);
5283
5284 1
    $len = \count($strParts);
5285
5286 1
    if ($format === 1) {
5287
5288 1
      $numberOfWords = [];
5289 1
      for ($i = 1; $i < $len; $i += 2) {
5290 1
        $numberOfWords[] = $strParts[$i];
5291
      }
5292
5293 1
    } elseif ($format === 2) {
5294
5295 1
      $numberOfWords = [];
5296 1
      $offset = self::strlen($strParts[0]);
5297 1
      for ($i = 1; $i < $len; $i += 2) {
5298 1
        $numberOfWords[$offset] = $strParts[$i];
5299 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
5300
      }
5301
5302
    } else {
5303
5304 1
      $numberOfWords = ($len - 1) / 2;
5305
5306
    }
5307
5308 1
    return $numberOfWords;
5309
  }
5310
5311
  /**
5312
   * Case-insensitive string comparison.
5313
   *
5314
   * INFO: Case-insensitive version of UTF8::strcmp()
5315
   *
5316
   * @param string $str1
5317
   * @param string $str2
5318
   *
5319
   * @return int <p>
5320
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5321
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5322
   *             <strong>0</strong> if they are equal.
5323
   *             </p>
5324
   */
5325 11
  public static function strcasecmp(string $str1, string $str2): int
5326
  {
5327 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5328
  }
5329
5330
  /**
5331
   * alias for "UTF8::strstr()"
5332
   *
5333
   * @see UTF8::strstr()
5334
   *
5335
   * @param string $haystack
5336
   * @param string $needle
5337
   * @param bool   $before_needle
5338
   * @param string $encoding
5339
   * @param bool   $cleanUtf8
5340
   *
5341
   * @return string|false
5342
   */
5343 1
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5344
  {
5345 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5346
  }
5347
5348
  /**
5349
   * Case-sensitive string comparison.
5350
   *
5351
   * @param string $str1
5352
   * @param string $str2
5353
   *
5354
   * @return int  <p>
5355
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
5356
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
5357
   *              <strong>0</strong> if they are equal.
5358
   *              </p>
5359
   */
5360 14
  public static function strcmp(string $str1, string $str2): int
5361
  {
5362
    /** @noinspection PhpUndefinedClassInspection */
5363 14
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
5364 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
5365 14
        \Normalizer::normalize($str2, \Normalizer::NFD)
5366
    );
5367
  }
5368
5369
  /**
5370
   * Find length of initial segment not matching mask.
5371
   *
5372
   * @param string $str
5373
   * @param string $charList
5374
   * @param int    $offset
5375
   * @param int    $length
5376
   *
5377
   * @return int|null
5378
   */
5379 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
5380
  {
5381 15
    if ('' === $charList .= '') {
5382 1
      return null;
5383
    }
5384
5385 14 View Code Duplication
    if ($offset || $length !== null) {
5386 2
      $strTmp = self::substr($str, $offset, $length);
5387 2
      if ($strTmp === false) {
5388
        return null;
5389
      }
5390 2
      $str = (string)$strTmp;
5391
    }
5392
5393 14
    if (!isset($str[0])) {
5394 1
      return null;
5395
    }
5396
5397 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5398 13
      return self::strlen($length[1]);
5399
    }
5400
5401 1
    return self::strlen($str);
5402
  }
5403
5404
  /**
5405
   * alias for "UTF8::stristr()"
5406
   *
5407
   * @see UTF8::stristr()
5408
   *
5409
   * @param string $haystack
5410
   * @param string $needle
5411
   * @param bool   $before_needle
5412
   * @param string $encoding
5413
   * @param bool   $cleanUtf8
5414
   *
5415
   * @return string|false
5416
   */
5417 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5418
  {
5419 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5420
  }
5421
5422
  /**
5423
   * Create a UTF-8 string from code points.
5424
   *
5425
   * INFO: opposite to UTF8::codepoints()
5426
   *
5427
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
5428
   *
5429
   * @return string <p>UTF-8 encoded string.</p>
5430
   */
5431 2
  public static function string(array $array): string
5432
  {
5433 2
    return \implode(
5434 2
        '',
5435 2
        \array_map(
5436
            [
5437 2
                self::class,
5438
                'chr',
5439
            ],
5440 2
            $array
5441
        )
5442
    );
5443
  }
5444
5445
  /**
5446
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5447
   *
5448
   * @param string $str <p>The input string.</p>
5449
   *
5450
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5451
   */
5452 3
  public static function string_has_bom(string $str): bool
5453
  {
5454 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
5455 3
      if (0 === \strpos($str, $bomString)) {
5456 3
        return true;
5457
      }
5458
    }
5459
5460 3
    return false;
5461
  }
5462
5463
  /**
5464
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5465
   *
5466
   * @link http://php.net/manual/en/function.strip-tags.php
5467
   *
5468
   * @param string $str             <p>
5469
   *                                The input string.
5470
   *                                </p>
5471
   * @param string $allowable_tags  [optional] <p>
5472
   *                                You can use the optional second parameter to specify tags which should
5473
   *                                not be stripped.
5474
   *                                </p>
5475
   *                                <p>
5476
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5477
   *                                can not be changed with allowable_tags.
5478
   *                                </p>
5479
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
5480
   *
5481
   * @return string <p>The stripped string.</p>
5482
   */
5483 2
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
5484
  {
5485 2
    if (!isset($str[0])) {
5486 1
      return '';
5487
    }
5488
5489 2
    if ($cleanUtf8 === true) {
5490 1
      $str = self::clean($str);
5491
    }
5492
5493 2
    return \strip_tags($str, $allowable_tags);
5494
  }
5495
5496
  /**
5497
   * Strip all whitespace characters. This includes tabs and newline
5498
   * characters, as well as multibyte whitespace such as the thin space
5499
   * and ideographic space.
5500
   *
5501
   * @param string $str
5502
   *
5503
   * @return string
5504
   */
5505 12
  public static function strip_whitespace(string $str): string
5506
  {
5507 12
    if (!isset($str[0])) {
5508 1
      return '';
5509
    }
5510
5511 11
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
5512
  }
5513
5514
  /**
5515
   * Finds position of first occurrence of a string within another, case insensitive.
5516
   *
5517
   * @link http://php.net/manual/en/function.mb-stripos.php
5518
   *
5519
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5520
   * @param string $needle    <p>The string to find in haystack.</p>
5521
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
5522
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5523
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5524
   *
5525
   * @return int|false <p>
5526
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
5527
   *                   or false if needle is not found.
5528
   *                   </p>
5529
   */
5530 10
  public static function stripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5531
  {
5532 10
    if (!isset($haystack[0], $needle[0])) {
5533 3
      return false;
5534
    }
5535
5536 9
    if ($cleanUtf8 === true) {
5537
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5538
      // if invalid characters are found in $haystack before $needle
5539 1
      $haystack = self::clean($haystack);
5540 1
      $needle = self::clean($needle);
5541
    }
5542
5543 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5544 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5545
    }
5546
5547 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5548
      self::checkForSupport();
5549
    }
5550
5551 View Code Duplication
    if (
5552 9
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5553
        &&
5554 9
        self::$SUPPORT['intl'] === true
5555
    ) {
5556 9
      return \grapheme_stripos($haystack, $needle, $offset);
5557
    }
5558
5559
    // fallback to "mb_"-function via polyfill
5560 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5561
  }
5562
5563
  /**
5564
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5565
   *
5566
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
5567
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
5568
   * @param bool   $before_needle  [optional] <p>
5569
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5570
   *                               haystack before the first occurrence of the needle (excluding the needle).
5571
   *                               </p>
5572
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
5573
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5574
   *
5575
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
5576
   */
5577 17
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5578
  {
5579 17
    if (!isset($haystack[0], $needle[0])) {
5580 6
      return false;
5581
    }
5582
5583 11
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5584 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5585
    }
5586
5587 11
    if ($cleanUtf8 === true) {
5588
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5589
      // if invalid characters are found in $haystack before $needle
5590 1
      $needle = self::clean($needle);
5591 1
      $haystack = self::clean($haystack);
5592
    }
5593
5594 11
    if (!$needle) {
5595
      return $haystack;
5596
    }
5597
5598 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5599
      self::checkForSupport();
5600
    }
5601
5602 View Code Duplication
    if (
5603 11
        $encoding !== 'UTF-8'
5604
        &&
5605 11
        self::$SUPPORT['mbstring'] === false
5606
    ) {
5607
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5608
    }
5609
5610 11
    if (self::$SUPPORT['mbstring'] === true) {
5611 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5612
    }
5613
5614 View Code Duplication
    if (
5615
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5616
        &&
5617
        self::$SUPPORT['intl'] === true
5618
    ) {
5619
      return \grapheme_stristr($haystack, $needle, $before_needle);
5620
    }
5621
5622
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
5623
      return \stristr($haystack, $needle, $before_needle);
5624
    }
5625
5626
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
5627
5628
    if (!isset($match[1])) {
5629
      return false;
5630
    }
5631
5632
    if ($before_needle) {
5633
      return $match[1];
5634
    }
5635
5636
    return self::substr($haystack, self::strlen($match[1]));
5637
  }
5638
5639
  /**
5640
   * Get the string length, not the byte-length!
5641
   *
5642
   * @link     http://php.net/manual/en/function.mb-strlen.php
5643
   *
5644
   * @param string $str       <p>The string being checked for length.</p>
5645
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5646
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5647
   *
5648
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5649
   *             character counted as +1)</p>
5650
   */
5651 88
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
5652
  {
5653 88
    if (!isset($str[0])) {
5654 6
      return 0;
5655
    }
5656
5657 87
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5658 5
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5659
    }
5660
5661 87
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5662
      self::checkForSupport();
5663
    }
5664
5665
    switch ($encoding) {
5666 87
      case 'ASCII':
5667 87
      case 'CP850':
5668
        if (
5669 6
            $encoding === 'CP850'
5670
            &&
5671 6
            self::$SUPPORT['mbstring_func_overload'] === false
5672
        ) {
5673 6
          return \strlen($str);
5674
        }
5675
5676
        return \mb_strlen($str, 'CP850'); // 8-BIT
5677
    }
5678
5679 82
    if ($cleanUtf8 === true) {
5680
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5681
      // if invalid characters are found in $str
5682 2
      $str = self::clean($str);
5683
    }
5684
5685 View Code Duplication
    if (
5686 82
        $encoding !== 'UTF-8'
5687
        &&
5688 82
        self::$SUPPORT['mbstring'] === false
5689
        &&
5690 82
        self::$SUPPORT['iconv'] === false
5691
    ) {
5692
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5693
    }
5694
5695
    if (
5696 82
        $encoding !== 'UTF-8'
5697
        &&
5698 82
        self::$SUPPORT['iconv'] === true
5699
        &&
5700 82
        self::$SUPPORT['mbstring'] === false
5701
    ) {
5702
      $returnTmp = \iconv_strlen($str, $encoding);
5703
      if ($returnTmp !== false) {
5704
        return $returnTmp;
5705
      }
5706
    }
5707
5708 82 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
5709 81
      $returnTmp = \mb_strlen($str, $encoding);
5710 81
      if ($returnTmp !== false) {
5711 81
        return $returnTmp;
5712
      }
5713
    }
5714
5715 2 View Code Duplication
    if (self::$SUPPORT['iconv'] === true) {
5716
      $returnTmp = \iconv_strlen($str, $encoding);
5717
      if ($returnTmp !== false) {
5718
        return $returnTmp;
5719
      }
5720
    }
5721
5722
    if (
5723 2
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5724
        &&
5725 2
        self::$SUPPORT['intl'] === true
5726
    ) {
5727
      return \grapheme_strlen($str);
5728
    }
5729
5730 2
    if (self::is_ascii($str)) {
5731 1
      return \strlen($str);
5732
    }
5733
5734
    // fallback via vanilla php
5735 2
    \preg_match_all('/./us', $str, $parts);
5736 2
    $returnTmp = \count($parts[0]);
5737 2
    if ($returnTmp !== 0) {
5738 2
      return $returnTmp;
5739
    }
5740
5741
    // fallback to "mb_"-function via polyfill
5742
    return \mb_strlen($str, $encoding);
5743
  }
5744
5745
  /**
5746
   * Get string length in byte.
5747
   *
5748
   * @param string $str
5749
   *
5750
   * @return int
5751
   */
5752 70
  public static function strlen_in_byte(string $str): int
5753
  {
5754 70
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
5755
      $len = \mb_strlen($str, 'CP850'); // 8-BIT
5756
    } else {
5757 70
      $len = \strlen($str);
5758
    }
5759
5760 70
    return $len;
5761
  }
5762
5763
  /**
5764
   * Case insensitive string comparisons using a "natural order" algorithm.
5765
   *
5766
   * INFO: natural order version of UTF8::strcasecmp()
5767
   *
5768
   * @param string $str1 <p>The first string.</p>
5769
   * @param string $str2 <p>The second string.</p>
5770
   *
5771
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
5772
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
5773
   *             <strong>0</strong> if they are equal
5774
   */
5775 1
  public static function strnatcasecmp(string $str1, string $str2): int
5776
  {
5777 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5778
  }
5779
5780
  /**
5781
   * String comparisons using a "natural order" algorithm
5782
   *
5783
   * INFO: natural order version of UTF8::strcmp()
5784
   *
5785
   * @link  http://php.net/manual/en/function.strnatcmp.php
5786
   *
5787
   * @param string $str1 <p>The first string.</p>
5788
   * @param string $str2 <p>The second string.</p>
5789
   *
5790
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
5791
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
5792
   *             <strong>0</strong> if they are equal
5793
   */
5794 2
  public static function strnatcmp(string $str1, string $str2): int
5795
  {
5796 2
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5797
  }
5798
5799
  /**
5800
   * Case-insensitive string comparison of the first n characters.
5801
   *
5802
   * @link  http://php.net/manual/en/function.strncasecmp.php
5803
   *
5804
   * @param string $str1 <p>The first string.</p>
5805
   * @param string $str2 <p>The second string.</p>
5806
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5807
   *
5808
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5809
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5810
   *             <strong>0</strong> if they are equal
5811
   */
5812 1
  public static function strncasecmp(string $str1, string $str2, int $len): int
5813
  {
5814 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5815
  }
5816
5817
  /**
5818
   * String comparison of the first n characters.
5819
   *
5820
   * @link  http://php.net/manual/en/function.strncmp.php
5821
   *
5822
   * @param string $str1 <p>The first string.</p>
5823
   * @param string $str2 <p>The second string.</p>
5824
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5825
   *
5826
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5827
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5828
   *             <strong>0</strong> if they are equal
5829
   */
5830 2
  public static function strncmp(string $str1, string $str2, int $len): int
5831
  {
5832 2
    $str1 = (string)self::substr($str1, 0, $len);
5833 2
    $str2 = (string)self::substr($str2, 0, $len);
5834
5835 2
    return self::strcmp($str1, $str2);
5836
  }
5837
5838
  /**
5839
   * Search a string for any of a set of characters.
5840
   *
5841
   * @link  http://php.net/manual/en/function.strpbrk.php
5842
   *
5843
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5844
   * @param string $char_list <p>This parameter is case sensitive.</p>
5845
   *
5846
   * @return string|false <p>String starting from the character found, or false if it is not found.</p>
5847
   */
5848 1
  public static function strpbrk(string $haystack, string $char_list)
5849
  {
5850 1
    if (!isset($haystack[0], $char_list[0])) {
5851 1
      return false;
5852
    }
5853
5854 1
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5855 1
      return \substr($haystack, \strpos($haystack, $m[0]));
5856
    }
5857
5858 1
    return false;
5859
  }
5860
5861
  /**
5862
   * Find position of first occurrence of string in a string.
5863
   *
5864
   * @link http://php.net/manual/en/function.mb-strpos.php
5865
   *
5866
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5867
   * @param string $needle    <p>The string to find in haystack.</p>
5868
   * @param int    $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5869
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5870
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5871
   *
5872
   * @return int|false <p>
5873
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
5874
   *                   If needle is not found it returns false.
5875
   *                   </p>
5876
   */
5877 59
  public static function strpos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5878
  {
5879 59
    if (!isset($haystack[0], $needle[0])) {
5880 3
      return false;
5881
    }
5882
5883
    // iconv and mbstring do not support integer $needle
5884 58 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5885
      $needle = (string)self::chr((int)$needle);
5886
    }
5887
5888 58
    if ($cleanUtf8 === true) {
5889
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5890
      // if invalid characters are found in $haystack before $needle
5891 2
      $needle = self::clean($needle);
5892 2
      $haystack = self::clean($haystack);
5893
    }
5894
5895 58
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5896 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5897
    }
5898
5899 58
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5900
      self::checkForSupport();
5901
    }
5902
5903
    if (
5904 58
        $encoding === 'CP850'
5905
        &&
5906 58
        self::$SUPPORT['mbstring_func_overload'] === false
5907
    ) {
5908 44
      return \strpos($haystack, $needle, $offset);
5909
    }
5910
5911 View Code Duplication
    if (
5912 15
        $encoding !== 'UTF-8'
5913
        &&
5914 15
        self::$SUPPORT['iconv'] === false
5915
        &&
5916 15
        self::$SUPPORT['mbstring'] === false
5917
    ) {
5918
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5919
    }
5920
5921
    if (
5922 15
        $offset >= 0 // iconv_strpos() can't handle negative offset
5923
        &&
5924 15
        $encoding !== 'UTF-8'
5925
        &&
5926 15
        self::$SUPPORT['mbstring'] === false
5927
        &&
5928 15
        self::$SUPPORT['iconv'] === true
5929
    ) {
5930
      // ignore invalid negative offset to keep compatibility
5931
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5932
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5933
      if ($returnTmp !== false) {
5934
        return $returnTmp;
5935
      }
5936
    }
5937
5938 15 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
5939 15
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
5940 15
      if ($returnTmp !== false) {
5941 13
        return $returnTmp;
5942
      }
5943
    }
5944
5945 View Code Duplication
    if (
5946 10
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5947
        &&
5948 10
        self::$SUPPORT['intl'] === true
5949
    ) {
5950 9
      return \grapheme_strpos($haystack, $needle, $offset);
5951
    }
5952
5953
    if (
5954 2
        $offset >= 0 // iconv_strpos() can't handle negative offset
5955
        &&
5956 2
        self::$SUPPORT['iconv'] === true
5957
    ) {
5958
      // ignore invalid negative offset to keep compatibility
5959
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5960 1
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5961 1
      if ($returnTmp !== false) {
5962
        return $returnTmp;
5963
      }
5964
    }
5965
5966 2
    $haystackIsAscii = self::is_ascii($haystack);
5967 2
    if ($haystackIsAscii && self::is_ascii($needle)) {
5968 1
      return \strpos($haystack, $needle, $offset);
5969
    }
5970
5971
    // fallback via vanilla php
5972
5973 2
    if ($haystackIsAscii) {
5974
      $haystackTmp = \substr($haystack, $offset);
5975
    } else {
5976 2
      $haystackTmp = self::substr($haystack, $offset);
5977
    }
5978 2
    if ($haystackTmp === false) {
5979
      $haystackTmp = '';
5980
    }
5981 2
    $haystack = (string)$haystackTmp;
5982
5983 2
    if ($offset < 0) {
5984
      $offset = 0;
5985
    }
5986
5987 2
    $pos = \strpos($haystack, $needle);
5988 2
    if ($pos === false) {
5989
      return false;
5990
    }
5991
5992 2
    $returnTmp = $offset + self::strlen(\substr($haystack, 0, $pos));
5993 2
    if ($returnTmp !== false) {
5994 2
      return $returnTmp;
5995
    }
5996
5997
    // fallback to "mb_"-function via polyfill
5998
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5999
  }
6000
6001
  /**
6002
   * Finds the last occurrence of a character in a string within another.
6003
   *
6004
   * @link http://php.net/manual/en/function.mb-strrchr.php
6005
   *
6006
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
6007
   * @param string $needle        <p>The string to find in haystack</p>
6008
   * @param bool   $before_needle [optional] <p>
6009
   *                              Determines which portion of haystack
6010
   *                              this function returns.
6011
   *                              If set to true, it returns all of haystack
6012
   *                              from the beginning to the last occurrence of needle.
6013
   *                              If set to false, it returns all of haystack
6014
   *                              from the last occurrence of needle to the end,
6015
   *                              </p>
6016
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
6017
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
6018
   *
6019
   * @return string|false The portion of haystack or false if needle is not found.
6020
   */
6021 1 View Code Duplication
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6022
  {
6023 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6024 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6025
    }
6026
6027 1
    if ($cleanUtf8 === true) {
6028
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6029
      // if invalid characters are found in $haystack before $needle
6030 1
      $needle = self::clean($needle);
6031 1
      $haystack = self::clean($haystack);
6032
    }
6033
6034
    // fallback to "mb_"-function via polyfill
6035 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
6036
  }
6037
6038
  /**
6039
   * Reverses characters order in the string.
6040
   *
6041
   * @param string $str <p>The input string.</p>
6042
   *
6043
   * @return string <p>The string with characters in the reverse sequence.</p>
6044
   */
6045 4
  public static function strrev(string $str): string
6046
  {
6047 4
    if (!isset($str[0])) {
6048 2
      return '';
6049
    }
6050
6051 3
    $reversed = '';
6052 3
    $i = self::strlen($str);
6053 3
    while ($i--) {
6054 3
      $reversed .= self::substr($str, $i, 1);
6055
    }
6056
6057 3
    return $reversed;
6058
  }
6059
6060
  /**
6061
   * Finds the last occurrence of a character in a string within another, case insensitive.
6062
   *
6063
   * @link http://php.net/manual/en/function.mb-strrichr.php
6064
   *
6065
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
6066
   * @param string $needle         <p>The string to find in haystack.</p>
6067
   * @param bool   $before_needle  [optional] <p>
6068
   *                               Determines which portion of haystack
6069
   *                               this function returns.
6070
   *                               If set to true, it returns all of haystack
6071
   *                               from the beginning to the last occurrence of needle.
6072
   *                               If set to false, it returns all of haystack
6073
   *                               from the last occurrence of needle to the end,
6074
   *                               </p>
6075
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
6076
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
6077
   *
6078
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
6079
   */
6080 1 View Code Duplication
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6081
  {
6082 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6083 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6084
    }
6085
6086 1
    if ($cleanUtf8 === true) {
6087
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6088
      // if invalid characters are found in $haystack before $needle
6089 1
      $needle = self::clean($needle);
6090 1
      $haystack = self::clean($haystack);
6091
    }
6092
6093 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
6094
  }
6095
6096
  /**
6097
   * Find position of last occurrence of a case-insensitive string.
6098
   *
6099
   * @param string $haystack  <p>The string to look in.</p>
6100
   * @param string $needle    <p>The string to look for.</p>
6101
   * @param int    $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
6102
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6103
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6104
   *
6105
   * @return int|false <p>
6106
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
6107
   *                   not found, it returns false.
6108
   *                   </p>
6109
   */
6110 1
  public static function strripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6111
  {
6112 1 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
6113
      $needle = (string)self::chr((int)$needle);
6114
    }
6115
6116 1
    if (!isset($haystack[0], $needle[0])) {
6117
      return false;
6118
    }
6119
6120 1
    if ($cleanUtf8 === true) {
6121
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
6122 1
      $needle = self::clean($needle);
6123 1
      $haystack = self::clean($haystack);
6124
    }
6125
6126 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6127 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6128
    }
6129
6130 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6131
      self::checkForSupport();
6132
    }
6133
6134 View Code Duplication
    if (
6135 1
        $encoding !== 'UTF-8'
6136
        &&
6137 1
        self::$SUPPORT['mbstring'] === false
6138
    ) {
6139
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6140
    }
6141
6142 1
    if (self::$SUPPORT['mbstring'] === true) {
6143 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
6144
    }
6145
6146 View Code Duplication
    if (
6147
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6148
        &&
6149
        self::$SUPPORT['intl'] === true
6150
    ) {
6151
      return \grapheme_strripos($haystack, $needle, $offset);
6152
    }
6153
6154
    // fallback via vanilla php
6155
6156
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
6157
  }
6158
6159
  /**
6160
   * Find position of last occurrence of a string in a string.
6161
   *
6162
   * @link http://php.net/manual/en/function.mb-strrpos.php
6163
   *
6164
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
6165
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
6166
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
6167
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
6168
   *                              the end of the string.
6169
   *                              </p>
6170
   * @param string     $encoding  [optional] <p>Set the charset.</p>
6171
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6172
   *
6173
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
6174
   *                   is not found, it returns false.</p>
6175
   */
6176 10
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6177
  {
6178 10
    if ((int)$needle === $needle && $needle >= 0) {
6179 2
      $needle = (string)self::chr($needle);
6180
    }
6181 10
    $needle = (string)$needle;
6182
6183 10
    if (!isset($haystack[0], $needle[0])) {
6184 2
      return false;
6185
    }
6186
6187
    if (
6188 9
        $cleanUtf8 === true
6189
        ||
6190 9
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
6191
    ) {
6192
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
6193 3
      $needle = self::clean($needle);
6194 3
      $haystack = self::clean($haystack);
6195
    }
6196
6197 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6198 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6199
    }
6200
6201 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6202
      self::checkForSupport();
6203
    }
6204
6205 View Code Duplication
    if (
6206 9
        $encoding !== 'UTF-8'
6207
        &&
6208 9
        self::$SUPPORT['mbstring'] === false
6209
    ) {
6210
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6211
    }
6212
6213 9
    if (self::$SUPPORT['mbstring'] === true) {
6214 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
6215
    }
6216
6217 View Code Duplication
    if (
6218
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6219
        &&
6220
        self::$SUPPORT['intl'] === true
6221
    ) {
6222
      return \grapheme_strrpos($haystack, $needle, $offset);
6223
    }
6224
6225
    // fallback via vanilla php
6226
6227
    $haystackTmp = null;
6228
    if ($offset > 0) {
6229
      $haystackTmp = self::substr($haystack, $offset);
6230
    } elseif ($offset < 0) {
6231
      $haystackTmp = self::substr($haystack, 0, $offset);
6232
      $offset = 0;
6233
    }
6234
6235
    if ($haystackTmp !== null) {
6236
      if ($haystackTmp === false) {
6237
        $haystackTmp = '';
6238
      }
6239
      $haystack = (string)$haystackTmp;
6240
    }
6241
6242
    $pos = \strrpos($haystack, $needle);
6243
    if ($pos === false) {
6244
      return false;
6245
    }
6246
6247
    return $offset + self::strlen(\substr($haystack, 0, $pos));
6248
  }
6249
6250
  /**
6251
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
6252
   * mask.
6253
   *
6254
   * @param string $str    <p>The input string.</p>
6255
   * @param string $mask   <p>The mask of chars</p>
6256
   * @param int    $offset [optional]
6257
   * @param int    $length [optional]
6258
   *
6259
   * @return int
6260
   */
6261 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
6262
  {
6263 10 View Code Duplication
    if ($offset || $length !== null) {
6264 2
      $strTmp = self::substr($str, $offset, $length);
6265 2
      if ($strTmp === false) {
6266
        $strTmp = '';
6267
      }
6268 2
      $str = (string)$strTmp;
6269
    }
6270
6271 10
    if (!isset($str[0], $mask[0])) {
6272 2
      return 0;
6273
    }
6274
6275 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
6276
  }
6277
6278
  /**
6279
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
6280
   *
6281
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
6282
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
6283
   * @param bool   $before_needle  [optional] <p>
6284
   *                               If <b>TRUE</b>, strstr() returns the part of the
6285
   *                               haystack before the first occurrence of the needle (excluding the needle).
6286
   *                               </p>
6287
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
6288
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
6289
   *
6290
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
6291
   */
6292 2
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
6293
  {
6294 2
    if (!isset($haystack[0], $needle[0])) {
6295 1
      return false;
6296
    }
6297
6298 2
    if ($cleanUtf8 === true) {
6299
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6300
      // if invalid characters are found in $haystack before $needle
6301
      $needle = self::clean($needle);
6302
      $haystack = self::clean($haystack);
6303
    }
6304
6305 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6306 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6307
    }
6308
6309 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6310
      self::checkForSupport();
6311
    }
6312
6313 View Code Duplication
    if (
6314 2
        $encoding !== 'UTF-8'
6315
        &&
6316 2
        self::$SUPPORT['mbstring'] === false
6317
    ) {
6318
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6319
    }
6320
6321 2
    if (self::$SUPPORT['mbstring'] === true) {
6322 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
6323
    }
6324
6325 View Code Duplication
    if (
6326
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6327
        &&
6328
        self::$SUPPORT['intl'] === true
6329
    ) {
6330
      return \grapheme_strstr($haystack, $needle, $before_needle);
6331
    }
6332
6333
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
6334
6335
    if (!isset($match[1])) {
6336
      return false;
6337
    }
6338
6339
    if ($before_needle) {
6340
      return $match[1];
6341
    }
6342
6343
    return self::substr($haystack, self::strlen($match[1]));
6344
  }
6345
6346
  /**
6347
   * Unicode transformation for case-less matching.
6348
   *
6349
   * @link http://unicode.org/reports/tr21/tr21-5.html
6350
   *
6351
   * @param string $str        <p>The input string.</p>
6352
   * @param bool   $full       [optional] <p>
6353
   *                           <b>true</b>, replace full case folding chars (default)<br>
6354
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
6355
   *                           </p>
6356
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6357
   *
6358
   * @return string
6359
   */
6360 13
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
6361
  {
6362 13
    if (!isset($str[0])) {
6363 4
      return '';
6364
    }
6365
6366 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
6367 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
6368
6369 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
6370 1
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
6371 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
6372
    }
6373
6374 12
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
6375
6376 12
    if ($full) {
6377
6378 12
      static $FULL_CASE_FOLD = null;
6379 12
      if ($FULL_CASE_FOLD === null) {
6380 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
6381
      }
6382
6383 12
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
6384
    }
6385
6386 12
    if ($cleanUtf8 === true) {
6387 1
      $str = self::clean($str);
6388
    }
6389
6390 12
    return self::strtolower($str);
6391
  }
6392
6393
  /**
6394
   * Make a string lowercase.
6395
   *
6396
   * @link http://php.net/manual/en/function.mb-strtolower.php
6397
   *
6398
   * @param string      $str       <p>The string being lowercased.</p>
6399
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6400
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6401
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6402
   *
6403
   * @return string str with all alphabetic characters converted to lowercase.
6404
   */
6405 25 View Code Duplication
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6406
  {
6407
    // init
6408 25
    $str = (string)$str;
6409 25
    if (!isset($str[0])) {
6410 3
      return '';
6411
    }
6412
6413 23
    if ($cleanUtf8 === true) {
6414
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6415
      // if invalid characters are found in $haystack before $needle
6416 1
      $str = self::clean($str);
6417
    }
6418
6419 23
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6420 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6421
    }
6422
6423 23
    if ($lang !== null) {
6424
6425 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6426
        self::checkForSupport();
6427
      }
6428
6429 1
      if (self::$SUPPORT['intl'] === true) {
6430
6431 1
        $langCode = $lang . '-Lower';
6432 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6433
          \trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
6434
6435
          $langCode = 'Any-Lower';
6436
        }
6437
6438 1
        return transliterator_transliterate($langCode, $str);
6439
      }
6440
6441
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6442
    }
6443
6444 23
    return \mb_strtolower($str, $encoding);
6445
  }
6446
6447
  /**
6448
   * Generic case sensitive transformation for collation matching.
6449
   *
6450
   * @param string $str <p>The input string</p>
6451
   *
6452
   * @return string
6453
   */
6454 3
  private static function strtonatfold(string $str): string
6455
  {
6456
    /** @noinspection PhpUndefinedClassInspection */
6457 3
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6458
  }
6459
6460
  /**
6461
   * Make a string uppercase.
6462
   *
6463
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6464
   *
6465
   * @param string      $str       <p>The string being uppercased.</p>
6466
   * @param string      $encoding  [optional] <p>Set the charset.</p>
6467
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6468
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6469
   *
6470
   * @return string <p>$str with all alphabetic characters converted to uppercase.</p>
6471
   */
6472 19 View Code Duplication
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6473
  {
6474 19
    $str = (string)$str;
6475 19
    if (!isset($str[0])) {
6476 3
      return '';
6477
    }
6478
6479 17
    if ($cleanUtf8 === true) {
6480
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6481
      // if invalid characters are found in $haystack before $needle
6482 2
      $str = self::clean($str);
6483
    }
6484
6485 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6486 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6487
    }
6488
6489 17
    if ($lang !== null) {
6490
6491 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6492
        self::checkForSupport();
6493
      }
6494
6495 1
      if (self::$SUPPORT['intl'] === true) {
6496
6497 1
        $langCode = $lang . '-Upper';
6498 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6499
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6500
6501
          $langCode = 'Any-Upper';
6502
        }
6503
6504 1
        return transliterator_transliterate($langCode, $str);
6505
      }
6506
6507
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6508
    }
6509
6510 17
    return \mb_strtoupper($str, $encoding);
6511
  }
6512
6513
  /**
6514
   * Translate characters or replace sub-strings.
6515
   *
6516
   * @link  http://php.net/manual/en/function.strtr.php
6517
   *
6518
   * @param string          $str  <p>The string being translated.</p>
6519
   * @param string|string[] $from <p>The string replacing from.</p>
6520
   * @param string|string[] $to   <p>The string being translated to to.</p>
6521
   *
6522
   * @return string <p>
6523
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6524
   *                corresponding character in to.
6525
   *                </p>
6526
   */
6527 1
  public static function strtr(string $str, $from, $to = INF): string
6528
  {
6529 1
    if (!isset($str[0])) {
6530
      return '';
6531
    }
6532
6533 1
    if ($from === $to) {
6534
      return $str;
6535
    }
6536
6537 1
    if (INF !== $to) {
6538 1
      $from = self::str_split($from);
6539 1
      $to = self::str_split($to);
6540 1
      $countFrom = \count($from);
6541 1
      $countTo = \count($to);
6542
6543 1
      if ($countFrom > $countTo) {
6544 1
        $from = \array_slice($from, 0, $countTo);
6545 1
      } elseif ($countFrom < $countTo) {
6546 1
        $to = \array_slice($to, 0, $countFrom);
6547
      }
6548
6549 1
      $from = \array_combine($from, $to);
6550
    }
6551
6552 1
    if (\is_string($from)) {
6553 1
      return \str_replace($from, '', $str);
6554
    }
6555
6556 1
    return \strtr($str, $from);
6557
  }
6558
6559
  /**
6560
   * Return the width of a string.
6561
   *
6562
   * @param string $str       <p>The input string.</p>
6563
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6564
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6565
   *
6566
   * @return int
6567
   */
6568 1 View Code Duplication
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6569
  {
6570 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6571 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6572
    }
6573
6574 1
    if ($cleanUtf8 === true) {
6575
      // iconv and mbstring are not tolerant to invalid encoding
6576
      // further, their behaviour is inconsistent with that of PHP's substr
6577 1
      $str = self::clean($str);
6578
    }
6579
6580
    // fallback to "mb_"-function via polyfill
6581 1
    return \mb_strwidth($str, $encoding);
6582
  }
6583
6584
  /**
6585
   * Get part of a string.
6586
   *
6587
   * @link http://php.net/manual/en/function.mb-substr.php
6588
   *
6589
   * @param string $str       <p>The string being checked.</p>
6590
   * @param int    $offset    <p>The first position used in str.</p>
6591
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
6592
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6593
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6594
   *
6595
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
6596
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
6597
   *                      characters long, <b>FALSE</b> will be returned.</p>
6598
   */
6599 75
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6600
  {
6601 75
    if (!isset($str[0])) {
6602 10
      return '';
6603
    }
6604
6605
    // Empty string
6606 72
    if ($length === 0) {
6607 3
      return '';
6608
    }
6609
6610 71
    if ($cleanUtf8 === true) {
6611
      // iconv and mbstring are not tolerant to invalid encoding
6612
      // further, their behaviour is inconsistent with that of PHP's substr
6613 1
      $str = self::clean($str);
6614
    }
6615
6616
    // Whole string
6617 71
    if (!$offset && $length === null) {
6618 3
      return $str;
6619
    }
6620
6621 68
    $str_length = 0;
6622 68
    if ($offset || $length === null) {
6623 48
      $str_length = self::strlen($str, $encoding);
6624
    }
6625
6626
    // Empty string
6627 68
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be zero. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
6628 10
      return '';
6629
    }
6630
6631
    // Impossible
6632 68
    if ($offset && $offset > $str_length) {
6633 2
      return false;
6634
    }
6635
6636 66
    if ($length === null) {
6637 27
      $length = $str_length;
6638
    } else {
6639 57
      $length = (int)$length;
6640
    }
6641
6642 66
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6643 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6644
    }
6645
6646 66
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6647
      self::checkForSupport();
6648
    }
6649
6650
    if (
6651 66
        $encoding === 'CP850'
6652
        &&
6653 66
        self::$SUPPORT['mbstring_func_overload'] === false
6654
    ) {
6655 16
      return \substr($str, $offset, $length ?? $str_length);
6656
    }
6657
6658 View Code Duplication
    if (
6659 50
        $encoding !== 'UTF-8'
6660
        &&
6661 50
        self::$SUPPORT['mbstring'] === false
6662
    ) {
6663
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6664
    }
6665
6666 50
    if (self::$SUPPORT['mbstring'] === true) {
6667 50
      return \mb_substr($str, $offset, $length, $encoding);
6668
    }
6669
6670
    if (
6671
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6672
        &&
6673
        self::$SUPPORT['intl'] === true
6674
    ) {
6675
      return \grapheme_substr($str, $offset, $length);
6676
    }
6677
6678
    if (
6679
        $length >= 0 // "iconv_substr()" can't handle negative length
6680
        &&
6681
        self::$SUPPORT['iconv'] === true
6682
    ) {
6683
      $returnTmp = \iconv_substr($str, $offset, $length);
6684
      if ($returnTmp !== false) {
6685
        return $returnTmp;
6686
      }
6687
    }
6688
6689
    if (self::is_ascii($str)) {
6690
      return ($length === null) ?
6691
          \substr($str, $offset) :
6692
          \substr($str, $offset, $length);
6693
    }
6694
6695
    // fallback via vanilla php
6696
6697
    // split to array, and remove invalid characters
6698
    $array = self::split($str);
6699
6700
    // extract relevant part, and join to make sting again
6701
    return \implode('', \array_slice($array, $offset, $length));
6702
  }
6703
6704
  /**
6705
   * Binary safe comparison of two strings from an offset, up to length characters.
6706
   *
6707
   * @param string   $str1               <p>The main string being compared.</p>
6708
   * @param string   $str2               <p>The secondary string being compared.</p>
6709
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
6710
   *                                     counting from the end of the string.</p>
6711
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
6712
   *                                     the length of the str compared to the length of main_str less the offset.</p>
6713
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6714
   *                                     insensitive.</p>
6715
   *
6716
   * @return int <p>
6717
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
6718
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
6719
   *             <strong>0</strong> if they are equal.
6720
   *             </p>
6721
   */
6722 1
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
6723
  {
6724
    if (
6725 1
        $offset !== 0
6726
        ||
6727 1
        $length !== null
6728
    ) {
6729 1
      $str1Tmp = self::substr($str1, $offset, $length);
6730 1
      if ($str1Tmp === false) {
6731
        $str1Tmp = '';
6732
      }
6733 1
      $str1 = (string)$str1Tmp;
6734
6735 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
6736 1
      if ($str2Tmp === false) {
6737
        $str2Tmp = '';
6738
      }
6739 1
      $str2 = (string)$str2Tmp;
6740
    }
6741
6742 1
    if ($case_insensitivity === true) {
6743 1
      return self::strcasecmp($str1, $str2);
6744
    }
6745
6746 1
    return self::strcmp($str1, $str2);
6747
  }
6748
6749
  /**
6750
   * Count the number of substring occurrences.
6751
   *
6752
   * @link  http://php.net/manual/en/function.substr-count.php
6753
   *
6754
   * @param string $haystack   <p>The string to search in.</p>
6755
   * @param string $needle     <p>The substring to search for.</p>
6756
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
6757
   * @param int    $length     [optional] <p>
6758
   *                           The maximum length after the specified offset to search for the
6759
   *                           substring. It outputs a warning if the offset plus the length is
6760
   *                           greater than the haystack length.
6761
   *                           </p>
6762
   * @param string $encoding   [optional] <p>Set the charset for e.g. "\mb_" function</p>
6763
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6764
   *
6765
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6766
   */
6767 1
  public static function substr_count(string $haystack, string $needle, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6768
  {
6769 1
    if (!isset($haystack[0], $needle[0])) {
6770 1
      return false;
6771
    }
6772
6773 1
    if ($offset || $length !== null) {
6774
6775 1
      if ($length === null) {
6776 1
        $length = self::strlen($haystack);
6777
      }
6778
6779
      if (
6780
          (
6781 1
              $length !== 0
6782
              &&
6783 1
              $offset !== 0
6784
          )
6785
          &&
6786 1
          ($length + $offset) <= 0
6787
          &&
6788 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6789
      ) {
6790 1
        return false;
6791
      }
6792
6793 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6794 1
      if ($haystackTmp === false) {
6795
        $haystackTmp = '';
6796
      }
6797 1
      $haystack = (string)$haystackTmp;
6798
    }
6799
6800 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6801 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6802
    }
6803
6804 1
    if ($cleanUtf8 === true) {
6805
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6806
      // if invalid characters are found in $haystack before $needle
6807
      $needle = self::clean($needle);
6808
      $haystack = self::clean($haystack);
6809
    }
6810
6811 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6812
      self::checkForSupport();
6813
    }
6814
6815 View Code Duplication
    if (
6816 1
        $encoding !== 'UTF-8'
6817
        &&
6818 1
        self::$SUPPORT['mbstring'] === false
6819
    ) {
6820
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6821
    }
6822
6823 1
    if (self::$SUPPORT['mbstring'] === true) {
6824 1
      return \mb_substr_count($haystack, $needle, $encoding);
6825
    }
6826
6827
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6828
6829
    return \count($matches);
6830
  }
6831
6832
  /**
6833
   * Returns the number of occurrences of $substring in the given string.
6834
   * By default, the comparison is case-sensitive, but can be made insensitive
6835
   * by setting $caseSensitive to false.
6836
   *
6837
   * @param string $str           <p>The input string.</p>
6838
   * @param string $substring     <p>The substring to search for.</p>
6839
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6840
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
6841
   *
6842
   * @return int
6843
   */
6844
  public static function substr_count_simple(string $str, string $substring, bool $caseSensitive = true, string $encoding = 'UTF-8'): int
6845
  {
6846
    if (!$caseSensitive) {
6847
      $str = self::strtoupper($str, $encoding);
6848
      $substring = self::strtoupper($substring, $encoding);
6849
    }
6850
6851
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
6852
  }
6853
6854
  /**
6855
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6856
   *
6857
   * @param string $haystack <p>The string to search in.</p>
6858
   * @param string $needle   <p>The substring to search for.</p>
6859
   *
6860
   * @return string <p>Return the sub-string.</p>
6861
   */
6862 1 View Code Duplication
  public static function substr_ileft(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6863
  {
6864 1
    if (!isset($haystack[0])) {
6865 1
      return '';
6866
    }
6867
6868 1
    if (!isset($needle[0])) {
6869 1
      return $haystack;
6870
    }
6871
6872 1
    if (self::str_istarts_with($haystack, $needle) === true) {
6873 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6874 1
      if ($haystackTmp === false) {
6875
        $haystackTmp = '';
6876
      }
6877 1
      $haystack = (string)$haystackTmp;
6878
    }
6879
6880 1
    return $haystack;
6881
  }
6882
6883
  /**
6884
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6885
   *
6886
   * @param string $haystack <p>The string to search in.</p>
6887
   * @param string $needle   <p>The substring to search for.</p>
6888
   *
6889
   * @return string <p>Return the sub-string.</p>
6890
   */
6891 1 View Code Duplication
  public static function substr_iright(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6892
  {
6893 1
    if (!isset($haystack[0])) {
6894 1
      return '';
6895
    }
6896
6897 1
    if (!isset($needle[0])) {
6898 1
      return $haystack;
6899
    }
6900
6901 1
    if (self::str_iends_with($haystack, $needle) === true) {
6902 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6903 1
      if ($haystackTmp === false) {
6904
        $haystackTmp = '';
6905
      }
6906 1
      $haystack = (string)$haystackTmp;
6907
    }
6908
6909 1
    return $haystack;
6910
  }
6911
6912
  /**
6913
   * Removes an prefix ($needle) from start of the string ($haystack).
6914
   *
6915
   * @param string $haystack <p>The string to search in.</p>
6916
   * @param string $needle   <p>The substring to search for.</p>
6917
   *
6918
   * @return string <p>Return the sub-string.</p>
6919
   */
6920 1 View Code Duplication
  public static function substr_left(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6921
  {
6922 1
    if (!isset($haystack[0])) {
6923 1
      return '';
6924
    }
6925
6926 1
    if (!isset($needle[0])) {
6927 1
      return $haystack;
6928
    }
6929
6930 1
    if (self::str_starts_with($haystack, $needle) === true) {
6931 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6932 1
      if ($haystackTmp === false) {
6933
        $haystackTmp = '';
6934
      }
6935 1
      $haystack = (string)$haystackTmp;
6936
    }
6937
6938 1
    return $haystack;
6939
  }
6940
6941
  /**
6942
   * Replace text within a portion of a string.
6943
   *
6944
   * source: https://gist.github.com/stemar/8287074
6945
   *
6946
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6947
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6948
   * @param int|int[]       $offset           <p>
6949
   *                                          If start is positive, the replacing will begin at the start'th offset
6950
   *                                          into string.
6951
   *                                          <br><br>
6952
   *                                          If start is negative, the replacing will begin at the start'th character
6953
   *                                          from the end of string.
6954
   *                                          </p>
6955
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
6956
   *                                          portion of string which is to be replaced. If it is negative, it
6957
   *                                          represents the number of characters from the end of string at which to
6958
   *                                          stop replacing. If it is not given, then it will default to strlen(
6959
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6960
   *                                          length is zero then this function will have the effect of inserting
6961
   *                                          replacement into string at the given start offset.</p>
6962
   *
6963
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6964
   */
6965 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
6966
  {
6967 7
    if (\is_array($str) === true) {
6968 1
      $num = \count($str);
6969
6970
      // the replacement
6971 1
      if (\is_array($replacement) === true) {
6972 1
        $replacement = \array_slice($replacement, 0, $num);
6973
      } else {
6974 1
        $replacement = \array_pad([$replacement], $num, $replacement);
6975
      }
6976
6977
      // the offset
6978 1 View Code Duplication
      if (\is_array($offset) === true) {
6979 1
        $offset = \array_slice($offset, 0, $num);
6980 1
        foreach ($offset as &$valueTmp) {
6981 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6982
        }
6983 1
        unset($valueTmp);
6984
      } else {
6985 1
        $offset = \array_pad([$offset], $num, $offset);
6986
      }
6987
6988
      // the length
6989 1
      if (null === $length) {
6990 1
        $length = \array_fill(0, $num, 0);
6991 1 View Code Duplication
      } elseif (\is_array($length) === true) {
6992 1
        $length = \array_slice($length, 0, $num);
6993 1
        foreach ($length as &$valueTmpV2) {
6994 1
          if (null !== $valueTmpV2) {
6995 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6996
          } else {
6997 1
            $valueTmpV2 = 0;
6998
          }
6999
        }
7000 1
        unset($valueTmpV2);
7001
      } else {
7002 1
        $length = \array_pad([$length], $num, $length);
7003
      }
7004
7005
      // recursive call
7006 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return \array_map(array(...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
7007
    }
7008
7009 7
    if (\is_array($replacement) === true) {
7010 1
      if (\count($replacement) > 0) {
7011 1
        $replacement = $replacement[0];
7012
      } else {
7013 1
        $replacement = '';
7014
      }
7015
    }
7016
7017
    // init
7018 7
    $str = (string)$str;
7019 7
    $replacement = (string)$replacement;
7020
7021 7
    if (!isset($str[0])) {
7022 1
      return $replacement;
7023
    }
7024
7025 6
    if (self::is_ascii($str)) {
7026 3
      return ($length === null) ?
7027
          \substr_replace($str, $replacement, $offset) :
7028 3
          \substr_replace($str, $replacement, $offset, $length);
7029
    }
7030
7031 5
    \preg_match_all('/./us', $str, $smatches);
7032 5
    \preg_match_all('/./us', $replacement, $rmatches);
7033
7034 5
    if ($length === null) {
7035 3
      $length = self::strlen($str);
7036
    }
7037
7038 5
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
7039
7040 5
    return \implode('', $smatches[0]);
7041
  }
7042
7043
  /**
7044
   * Removes an suffix ($needle) from end of the string ($haystack).
7045
   *
7046
   * @param string $haystack <p>The string to search in.</p>
7047
   * @param string $needle   <p>The substring to search for.</p>
7048
   *
7049
   * @return string <p>Return the sub-string.</p>
7050
   */
7051 1 View Code Duplication
  public static function substr_right(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7052
  {
7053 1
    if (!isset($haystack[0])) {
7054 1
      return '';
7055
    }
7056
7057 1
    if (!isset($needle[0])) {
7058 1
      return $haystack;
7059
    }
7060
7061 1
    if (self::str_ends_with($haystack, $needle) === true) {
7062 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
7063 1
      if ($haystackTmp === false) {
7064
        $haystackTmp = '';
7065
      }
7066 1
      $haystack = (string)$haystackTmp;
7067
    }
7068
7069 1
    return $haystack;
7070
  }
7071
7072
  /**
7073
   * Returns a case swapped version of the string.
7074
   *
7075
   * @param string $str       <p>The input string.</p>
7076
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7077
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7078
   *
7079
   * @return string <p>Each character's case swapped.</p>
7080
   */
7081 1
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7082
  {
7083 1
    if (!isset($str[0])) {
7084 1
      return '';
7085
    }
7086
7087 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7088 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7089
    }
7090
7091 1
    if ($cleanUtf8 === true) {
7092
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7093
      // if invalid characters are found in $haystack before $needle
7094 1
      $str = self::clean($str);
7095
    }
7096
7097 1
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
7098
  }
7099
7100
  /**
7101
   * @param string $str
7102
   * @param int    $tabLength
7103
   *
7104
   * @return string
7105
   */
7106
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
7107
  {
7108
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
7109
  }
7110
7111
  /**
7112
   * Converts the first character of each word in the string to uppercase
7113
   * and all other chars to lowercase.
7114
   *
7115
   * @param string $str      <p>The input string.</p>
7116
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
7117
   *
7118
   * @return string <p>String with all characters of $str being title-cased.</p>
7119
   */
7120
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
7121
  {
7122
    // "mb_convert_case()" used a polyfill from the "UTF8"-Class
7123
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
7124
  }
7125
7126
  /**
7127
   * alias for "UTF8::to_ascii()"
7128
   *
7129
   * @see        UTF8::to_ascii()
7130
   *
7131
   * @param string $str
7132
   * @param string $subst_chr
7133
   * @param bool   $strict
7134
   *
7135
   * @return string
7136
   *
7137
   * @deprecated <p>use "UTF8::to_ascii()"</p>
7138
   */
7139 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
7140
  {
7141 7
    return self::to_ascii($str, $subst_chr, $strict);
7142
  }
7143
7144
  /**
7145
   * alias for "UTF8::to_iso8859()"
7146
   *
7147
   * @see        UTF8::to_iso8859()
7148
   *
7149
   * @param string|string[] $str
7150
   *
7151
   * @return string|string[]
7152
   *
7153
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
7154
   */
7155 1
  public static function toIso8859($str)
7156
  {
7157 1
    return self::to_iso8859($str);
7158
  }
7159
7160
  /**
7161
   * alias for "UTF8::to_latin1()"
7162
   *
7163
   * @see        UTF8::to_latin1()
7164
   *
7165
   * @param string|string[] $str
7166
   *
7167
   * @return string|string[]
7168
   *
7169
   * @deprecated <p>use "UTF8::to_latin1()"</p>
7170
   */
7171 1
  public static function toLatin1($str)
7172
  {
7173 1
    return self::to_latin1($str);
7174
  }
7175
7176
  /**
7177
   * alias for "UTF8::to_utf8()"
7178
   *
7179
   * @see        UTF8::to_utf8()
7180
   *
7181
   * @param string|string[] $str
7182
   *
7183
   * @return string|string[]
7184
   *
7185
   * @deprecated <p>use "UTF8::to_utf8()"</p>
7186
   */
7187 1
  public static function toUTF8($str)
7188
  {
7189 1
    return self::to_utf8($str);
7190
  }
7191
7192
  /**
7193
   * Convert a string into ASCII.
7194
   *
7195
   * @param string $str     <p>The input string.</p>
7196
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
7197
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
7198
   *                        performance</p>
7199
   *
7200
   * @return string
7201
   */
7202 21
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
7203
  {
7204 21
    static $UTF8_TO_ASCII;
7205
7206 21
    if (!isset($str[0])) {
7207 4
      return '';
7208
    }
7209
7210
    // check if we only have ASCII, first (better performance)
7211 18
    if (self::is_ascii($str) === true) {
7212 6
      return $str;
7213
    }
7214
7215 13
    $str = self::clean(
7216 13
        $str,
7217 13
        true,
7218 13
        true,
7219 13
        true,
7220 13
        false,
7221 13
        true,
7222 13
        true
7223
    );
7224
7225
    // check again, if we only have ASCII, now ...
7226 13
    if (self::is_ascii($str) === true) {
7227 7
      return $str;
7228
    }
7229
7230 7
    if ($strict === true) {
7231
7232 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7233
        self::checkForSupport();
7234
      }
7235
7236 1
      if (self::$SUPPORT['intl'] === true) {
7237
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
7238 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
7239
7240
        // check again, if we only have ASCII, now ...
7241 1
        if (self::is_ascii($str) === true) {
7242 1
          return $str;
7243
        }
7244
7245
      }
7246
    }
7247
7248 7
    if (self::$ORD === null) {
7249
      self::$ORD = self::getData('ord');
7250
    }
7251
7252 7
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
7253 7
    $chars = $ar[0];
7254 7
    foreach ($chars as &$c) {
7255
7256 7
      $ordC0 = self::$ORD[$c[0]];
7257
7258 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
7259 7
        continue;
7260
      }
7261
7262 7
      $ordC1 = self::$ORD[$c[1]];
7263
7264
      // ASCII - next please
7265 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
7266 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
7267
      }
7268
7269 7
      if ($ordC0 >= 224) {
7270 2
        $ordC2 = self::$ORD[$c[2]];
7271
7272 2
        if ($ordC0 <= 239) {
7273 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
7274
        }
7275
7276 2
        if ($ordC0 >= 240) {
7277 1
          $ordC3 = self::$ORD[$c[3]];
7278
7279 1
          if ($ordC0 <= 247) {
7280 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
7281
          }
7282
7283 1
          if ($ordC0 >= 248) {
7284
            $ordC4 = self::$ORD[$c[4]];
7285
7286 View Code Duplication
            if ($ordC0 <= 251) {
7287
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
7288
            }
7289
7290
            if ($ordC0 >= 252) {
7291
              $ordC5 = self::$ORD[$c[5]];
7292
7293 View Code Duplication
              if ($ordC0 <= 253) {
7294
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
7295
              }
7296
            }
7297
          }
7298
        }
7299
      }
7300
7301 7
      if ($ordC0 === 254 || $ordC0 === 255) {
7302
        $c = $unknown;
7303
        continue;
7304
      }
7305
7306 7
      if (!isset($ord)) {
7307
        $c = $unknown;
7308
        continue;
7309
      }
7310
7311 7
      $bank = $ord >> 8;
7312 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
7313 3
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
7314 3
        if ($UTF8_TO_ASCII[$bank] === false) {
7315 1
          $UTF8_TO_ASCII[$bank] = [];
7316
        }
7317
      }
7318
7319 7
      $newchar = $ord & 255;
7320
7321 7
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
7322
7323
        // keep for debugging
7324
        /*
7325
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7326
        echo "char: " . $c . "\n";
7327
        echo "ord: " . $ord . "\n";
7328
        echo "newchar: " . $newchar . "\n";
7329
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
7330
        echo "bank:" . $bank . "\n\n";
7331
        */
7332
7333 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
7334
      } else {
7335
7336
        // keep for debugging missing chars
7337
        /*
7338
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7339
        echo "char: " . $c . "\n";
7340
        echo "ord: " . $ord . "\n";
7341
        echo "newchar: " . $newchar . "\n";
7342
        echo "bank:" . $bank . "\n\n";
7343
        */
7344
7345 7
        $c = $unknown;
7346
      }
7347
    }
7348
7349 7
    return \implode('', $chars);
7350
  }
7351
7352
  /**
7353
   * @param string $str
7354
   *
7355
   * @return bool
7356
   */
7357
  public static function to_boolean(string $str): bool
7358
  {
7359
    $key = \strtolower($str);
7360
7361
    // Info: http://php.net/manual/en/filter.filters.validate.php
7362
    $map = [
7363
        'true'  => true,
7364
        '1'     => true,
7365
        'on'    => true,
7366
        'yes'   => true,
7367
        'false' => false,
7368
        '0'     => false,
7369
        'off'   => false,
7370
        'no'    => false,
7371
    ];
7372
7373
    if (isset($map[$key])) {
7374
      return $map[$key];
7375
    }
7376
7377
    if (\is_numeric($str)) {
7378
      return $str + 0 > 0;
7379
    }
7380
7381
    return (bool)self::trim($str);
7382
  }
7383
7384
  /**
7385
   * Convert a string into "ISO-8859"-encoding (Latin-1).
7386
   *
7387
   * @param string|string[] $str
7388
   *
7389
   * @return string|string[]
7390
   */
7391 3
  public static function to_iso8859($str)
7392
  {
7393 3
    if (\is_array($str) === true) {
7394 1
      foreach ($str as $k => $v) {
7395 1
        $str[$k] = self::to_iso8859($v);
7396
      }
7397
7398 1
      return $str;
7399
    }
7400
7401 3
    $str = (string)$str;
7402 3
    if (!isset($str[0])) {
7403 1
      return '';
7404
    }
7405
7406 3
    return self::utf8_decode($str);
7407
  }
7408
7409
  /**
7410
   * alias for "UTF8::to_iso8859()"
7411
   *
7412
   * @see UTF8::to_iso8859()
7413
   *
7414
   * @param string|string[] $str
7415
   *
7416
   * @return string|string[]
7417
   */
7418 1
  public static function to_latin1($str)
7419
  {
7420 1
    return self::to_iso8859($str);
7421
  }
7422
7423
  /**
7424
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
7425
   *
7426
   * <ul>
7427
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
7428
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
7429
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
7430
   * case.</li>
7431
   * </ul>
7432
   *
7433
   * @param string|string[] $str                    <p>Any string or array.</p>
7434
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
7435
   *
7436
   * @return string|string[] <p>The UTF-8 encoded string.</p>
7437
   */
7438 20
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
7439
  {
7440 20 View Code Duplication
    if (\is_array($str) === true) {
7441 2
      foreach ($str as $k => $v) {
7442 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
7443
      }
7444
7445 2
      return $str;
7446
    }
7447
7448 20
    $str = (string)$str;
7449 20
    if (!isset($str[0])) {
7450 3
      return $str;
7451
    }
7452
7453 20
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7454
      self::checkForSupport();
7455
    }
7456
7457 20
    $max = self::strlen_in_byte($str);
7458 20
    $buf = '';
7459
7460
    /** @noinspection ForeachInvariantsInspection */
7461 20
    for ($i = 0; $i < $max; $i++) {
7462 20
      $c1 = $str[$i];
7463
7464 20
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
7465
7466 20
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
7467
7468 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7469
7470 20
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
7471 15
            $buf .= $c1 . $c2;
7472 15
            $i++;
7473
          } else { // not valid UTF8 - convert it
7474 20
            $buf .= self::to_utf8_convert($c1);
7475
          }
7476
7477 19
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
7478
7479 18
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7480 18
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7481
7482 18
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
7483 12
            $buf .= $c1 . $c2 . $c3;
7484 12
            $i += 2;
7485
          } else { // not valid UTF8 - convert it
7486 18
            $buf .= self::to_utf8_convert($c1);
7487
          }
7488
7489 14
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
7490
7491 14
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7492 14
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7493 14
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
7494
7495 14
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
7496 5
            $buf .= $c1 . $c2 . $c3 . $c4;
7497 5
            $i += 3;
7498
          } else { // not valid UTF8 - convert it
7499 14
            $buf .= self::to_utf8_convert($c1);
7500
          }
7501
7502
        } else { // doesn't look like UTF8, but should be converted
7503 20
          $buf .= self::to_utf8_convert($c1);
7504
        }
7505
7506 20
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
7507
7508 2
        $buf .= self::to_utf8_convert($c1);
7509
7510
      } else { // it doesn't need conversion
7511 20
        $buf .= $c1;
7512
      }
7513
    }
7514
7515
    // decode unicode escape sequences
7516 20
    $buf = \preg_replace_callback(
7517 20
        '/\\\\u([0-9a-f]{4})/i',
7518 20
        function ($match) {
7519 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
7520 20
        },
7521 20
        $buf
7522
    );
7523
7524
    // decode UTF-8 codepoints
7525 20
    if ($decodeHtmlEntityToUtf8 === true) {
7526 1
      $buf = self::html_entity_decode($buf);
7527
    }
7528
7529 20
    return $buf;
7530
  }
7531
7532
  /**
7533
   * @param int $int
7534
   *
7535
   * @return string
7536
   */
7537 14
  private static function to_utf8_convert($int): string
7538
  {
7539
    // init
7540 14
    $buf = '';
7541
7542 14
    if (self::$ORD === null) {
7543
      self::$ORD = self::getData('ord');
7544
    }
7545
7546 14
    if (self::$CHR === null) {
7547 1
      self::$CHR = self::getData('chr');
7548
    }
7549
7550 14
    if (self::$WIN1252_TO_UTF8 === null) {
7551 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
7552
    }
7553
7554 14
    $ordC1 = self::$ORD[$int];
7555 14
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
7556 14
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
7557
    } else {
7558 1
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
7559 1
      $cc2 = ($int & "\x3F") | "\x80";
7560 1
      $buf .= $cc1 . $cc2;
7561
    }
7562
7563 14
    return $buf;
7564
  }
7565
7566
  /**
7567
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7568
   *
7569
   * INFO: This is slower then "trim()"
7570
   *
7571
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7572
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7573
   *
7574
   * @param string $str   <p>The string to be trimmed</p>
7575
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
7576
   *
7577
   * @return string <p>The trimmed string.</p>
7578
   */
7579 26 View Code Duplication
  public static function trim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7580
  {
7581 26
    if (!isset($str[0])) {
7582 5
      return '';
7583
    }
7584
7585
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7586 22
    if ($chars === INF || !$chars) {
7587 6
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
7588
    } else {
7589 16
      $chars = \preg_quote($chars, '/');
7590 16
      $pattern = "^[$chars]+|[$chars]+\$";
7591
    }
7592
7593 22
    return self::regexReplace($str, $pattern, '', '', '/');
7594
  }
7595
7596
  /**
7597
   * Makes string's first char uppercase.
7598
   *
7599
   * @param string $str       <p>The input string.</p>
7600
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7601
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7602
   *
7603
   * @return string <p>The resulting string</p>
7604
   */
7605 14
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7606
  {
7607 14
    if ($cleanUtf8 === true) {
7608
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7609
      // if invalid characters are found in $haystack before $needle
7610 1
      $str = self::clean($str);
7611
    }
7612
7613 14
    $strPartTwo = self::substr($str, 1, null, $encoding);
7614 14
    if ($strPartTwo === false) {
7615
      $strPartTwo = '';
7616
    }
7617
7618 14
    $strPartOne = self::strtoupper(
7619 14
        (string)self::substr($str, 0, 1, $encoding),
7620 14
        $encoding,
7621 14
        $cleanUtf8
7622
    );
7623
7624 14
    return $strPartOne . $strPartTwo;
7625
  }
7626
7627
  /**
7628
   * alias for "UTF8::ucfirst()"
7629
   *
7630
   * @see UTF8::ucfirst()
7631
   *
7632
   * @param string $word
7633
   * @param string $encoding
7634
   * @param bool   $cleanUtf8
7635
   *
7636
   * @return string
7637
   */
7638 1
  public static function ucword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7639
  {
7640 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
7641
  }
7642
7643
  /**
7644
   * Uppercase for all words in the string.
7645
   *
7646
   * @param string   $str        <p>The input string.</p>
7647
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7648
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7649
   * @param string   $encoding   [optional] <p>Set the charset.</p>
7650
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7651
   *
7652
   * @return string
7653
   */
7654 8
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7655
  {
7656 8
    if (!$str) {
7657 2
      return '';
7658
    }
7659
7660
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
7661
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
7662
7663 7
    if ($cleanUtf8 === true) {
7664
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7665
      // if invalid characters are found in $haystack before $needle
7666 1
      $str = self::clean($str);
7667
    }
7668
7669 7
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
7670
7671
    if (
7672 7
        $usePhpDefaultFunctions === true
7673
        &&
7674 7
        self::is_ascii($str) === true
7675
    ) {
7676
      return \ucwords($str);
7677
    }
7678
7679 7
    $words = self::str_to_words($str, $charlist);
7680 7
    $newWords = [];
7681
7682 7
    if (\count($exceptions) > 0) {
7683 1
      $useExceptions = true;
7684
    } else {
7685 7
      $useExceptions = false;
7686
    }
7687
7688 7 View Code Duplication
    foreach ($words as $word) {
7689
7690 7
      if (!$word) {
7691 7
        continue;
7692
      }
7693
7694
      if (
7695 7
          $useExceptions === false
7696
          ||
7697
          (
7698 1
              $useExceptions === true
7699
              &&
7700 7
              !\in_array($word, $exceptions, true)
7701
          )
7702
      ) {
7703 7
        $word = self::ucfirst($word, $encoding);
7704
      }
7705
7706 7
      $newWords[] = $word;
7707
    }
7708
7709 7
    return \implode('', $newWords);
7710
  }
7711
7712
  /**
7713
   * Multi decode html entity & fix urlencoded-win1252-chars.
7714
   *
7715
   * e.g:
7716
   * 'test+test'                     => 'test test'
7717
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7718
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7719
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7720
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7721
   * 'Düsseldorf'                   => 'Düsseldorf'
7722
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7723
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7724
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7725
   *
7726
   * @param string $str          <p>The input string.</p>
7727
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7728
   *
7729
   * @return string
7730
   */
7731 1 View Code Duplication
  public static function urldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7732
  {
7733 1
    if (!isset($str[0])) {
7734 1
      return '';
7735
    }
7736
7737 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
7738 1
    if (\preg_match($pattern, $str)) {
7739 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
7740
    }
7741
7742 1
    $flags = ENT_QUOTES | ENT_HTML5;
7743
7744
    do {
7745 1
      $str_compare = $str;
7746
7747 1
      $str = self::fix_simple_utf8(
7748 1
          \urldecode(
7749 1
              self::html_entity_decode(
7750 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7751 1
                  $flags
7752
              )
7753
          )
7754
      );
7755
7756 1
    } while ($multi_decode === true && $str_compare !== $str);
7757
7758 1
    return $str;
7759
  }
7760
7761
  /**
7762
   * Return a array with "urlencoded"-win1252 -> UTF-8
7763
   *
7764
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
7765
   *
7766
   * @return string[]
7767
   */
7768 1
  public static function urldecode_fix_win1252_chars(): array
7769
  {
7770
    return [
7771 1
        '%20' => ' ',
7772
        '%21' => '!',
7773
        '%22' => '"',
7774
        '%23' => '#',
7775
        '%24' => '$',
7776
        '%25' => '%',
7777
        '%26' => '&',
7778
        '%27' => "'",
7779
        '%28' => '(',
7780
        '%29' => ')',
7781
        '%2A' => '*',
7782
        '%2B' => '+',
7783
        '%2C' => ',',
7784
        '%2D' => '-',
7785
        '%2E' => '.',
7786
        '%2F' => '/',
7787
        '%30' => '0',
7788
        '%31' => '1',
7789
        '%32' => '2',
7790
        '%33' => '3',
7791
        '%34' => '4',
7792
        '%35' => '5',
7793
        '%36' => '6',
7794
        '%37' => '7',
7795
        '%38' => '8',
7796
        '%39' => '9',
7797
        '%3A' => ':',
7798
        '%3B' => ';',
7799
        '%3C' => '<',
7800
        '%3D' => '=',
7801
        '%3E' => '>',
7802
        '%3F' => '?',
7803
        '%40' => '@',
7804
        '%41' => 'A',
7805
        '%42' => 'B',
7806
        '%43' => 'C',
7807
        '%44' => 'D',
7808
        '%45' => 'E',
7809
        '%46' => 'F',
7810
        '%47' => 'G',
7811
        '%48' => 'H',
7812
        '%49' => 'I',
7813
        '%4A' => 'J',
7814
        '%4B' => 'K',
7815
        '%4C' => 'L',
7816
        '%4D' => 'M',
7817
        '%4E' => 'N',
7818
        '%4F' => 'O',
7819
        '%50' => 'P',
7820
        '%51' => 'Q',
7821
        '%52' => 'R',
7822
        '%53' => 'S',
7823
        '%54' => 'T',
7824
        '%55' => 'U',
7825
        '%56' => 'V',
7826
        '%57' => 'W',
7827
        '%58' => 'X',
7828
        '%59' => 'Y',
7829
        '%5A' => 'Z',
7830
        '%5B' => '[',
7831
        '%5C' => '\\',
7832
        '%5D' => ']',
7833
        '%5E' => '^',
7834
        '%5F' => '_',
7835
        '%60' => '`',
7836
        '%61' => 'a',
7837
        '%62' => 'b',
7838
        '%63' => 'c',
7839
        '%64' => 'd',
7840
        '%65' => 'e',
7841
        '%66' => 'f',
7842
        '%67' => 'g',
7843
        '%68' => 'h',
7844
        '%69' => 'i',
7845
        '%6A' => 'j',
7846
        '%6B' => 'k',
7847
        '%6C' => 'l',
7848
        '%6D' => 'm',
7849
        '%6E' => 'n',
7850
        '%6F' => 'o',
7851
        '%70' => 'p',
7852
        '%71' => 'q',
7853
        '%72' => 'r',
7854
        '%73' => 's',
7855
        '%74' => 't',
7856
        '%75' => 'u',
7857
        '%76' => 'v',
7858
        '%77' => 'w',
7859
        '%78' => 'x',
7860
        '%79' => 'y',
7861
        '%7A' => 'z',
7862
        '%7B' => '{',
7863
        '%7C' => '|',
7864
        '%7D' => '}',
7865
        '%7E' => '~',
7866
        '%7F' => '',
7867
        '%80' => '`',
7868
        '%81' => '',
7869
        '%82' => '‚',
7870
        '%83' => 'ƒ',
7871
        '%84' => '„',
7872
        '%85' => '…',
7873
        '%86' => '†',
7874
        '%87' => '‡',
7875
        '%88' => 'ˆ',
7876
        '%89' => '‰',
7877
        '%8A' => 'Š',
7878
        '%8B' => '‹',
7879
        '%8C' => 'Œ',
7880
        '%8D' => '',
7881
        '%8E' => 'Ž',
7882
        '%8F' => '',
7883
        '%90' => '',
7884
        '%91' => '‘',
7885
        '%92' => '’',
7886
        '%93' => '“',
7887
        '%94' => '”',
7888
        '%95' => '•',
7889
        '%96' => '–',
7890
        '%97' => '—',
7891
        '%98' => '˜',
7892
        '%99' => '™',
7893
        '%9A' => 'š',
7894
        '%9B' => '›',
7895
        '%9C' => 'œ',
7896
        '%9D' => '',
7897
        '%9E' => 'ž',
7898
        '%9F' => 'Ÿ',
7899
        '%A0' => '',
7900
        '%A1' => '¡',
7901
        '%A2' => '¢',
7902
        '%A3' => '£',
7903
        '%A4' => '¤',
7904
        '%A5' => '¥',
7905
        '%A6' => '¦',
7906
        '%A7' => '§',
7907
        '%A8' => '¨',
7908
        '%A9' => '©',
7909
        '%AA' => 'ª',
7910
        '%AB' => '«',
7911
        '%AC' => '¬',
7912
        '%AD' => '',
7913
        '%AE' => '®',
7914
        '%AF' => '¯',
7915
        '%B0' => '°',
7916
        '%B1' => '±',
7917
        '%B2' => '²',
7918
        '%B3' => '³',
7919
        '%B4' => '´',
7920
        '%B5' => 'µ',
7921
        '%B6' => '¶',
7922
        '%B7' => '·',
7923
        '%B8' => '¸',
7924
        '%B9' => '¹',
7925
        '%BA' => 'º',
7926
        '%BB' => '»',
7927
        '%BC' => '¼',
7928
        '%BD' => '½',
7929
        '%BE' => '¾',
7930
        '%BF' => '¿',
7931
        '%C0' => 'À',
7932
        '%C1' => 'Á',
7933
        '%C2' => 'Â',
7934
        '%C3' => 'Ã',
7935
        '%C4' => 'Ä',
7936
        '%C5' => 'Å',
7937
        '%C6' => 'Æ',
7938
        '%C7' => 'Ç',
7939
        '%C8' => 'È',
7940
        '%C9' => 'É',
7941
        '%CA' => 'Ê',
7942
        '%CB' => 'Ë',
7943
        '%CC' => 'Ì',
7944
        '%CD' => 'Í',
7945
        '%CE' => 'Î',
7946
        '%CF' => 'Ï',
7947
        '%D0' => 'Ð',
7948
        '%D1' => 'Ñ',
7949
        '%D2' => 'Ò',
7950
        '%D3' => 'Ó',
7951
        '%D4' => 'Ô',
7952
        '%D5' => 'Õ',
7953
        '%D6' => 'Ö',
7954
        '%D7' => '×',
7955
        '%D8' => 'Ø',
7956
        '%D9' => 'Ù',
7957
        '%DA' => 'Ú',
7958
        '%DB' => 'Û',
7959
        '%DC' => 'Ü',
7960
        '%DD' => 'Ý',
7961
        '%DE' => 'Þ',
7962
        '%DF' => 'ß',
7963
        '%E0' => 'à',
7964
        '%E1' => 'á',
7965
        '%E2' => 'â',
7966
        '%E3' => 'ã',
7967
        '%E4' => 'ä',
7968
        '%E5' => 'å',
7969
        '%E6' => 'æ',
7970
        '%E7' => 'ç',
7971
        '%E8' => 'è',
7972
        '%E9' => 'é',
7973
        '%EA' => 'ê',
7974
        '%EB' => 'ë',
7975
        '%EC' => 'ì',
7976
        '%ED' => 'í',
7977
        '%EE' => 'î',
7978
        '%EF' => 'ï',
7979
        '%F0' => 'ð',
7980
        '%F1' => 'ñ',
7981
        '%F2' => 'ò',
7982
        '%F3' => 'ó',
7983
        '%F4' => 'ô',
7984
        '%F5' => 'õ',
7985
        '%F6' => 'ö',
7986
        '%F7' => '÷',
7987
        '%F8' => 'ø',
7988
        '%F9' => 'ù',
7989
        '%FA' => 'ú',
7990
        '%FB' => 'û',
7991
        '%FC' => 'ü',
7992
        '%FD' => 'ý',
7993
        '%FE' => 'þ',
7994
        '%FF' => 'ÿ',
7995
    ];
7996
  }
7997
7998
  /**
7999
   * Decodes an UTF-8 string to ISO-8859-1.
8000
   *
8001
   * @param string $str <p>The input string.</p>
8002
   * @param bool   $keepUtf8Chars
8003
   *
8004
   * @return string
8005
   */
8006 6
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
8007
  {
8008 6
    if (!isset($str[0])) {
8009 3
      return '';
8010
    }
8011
8012 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
8013 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
8014
8015 6 View Code Duplication
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
8016
8017 1
      if (self::$WIN1252_TO_UTF8 === null) {
8018
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
8019
      }
8020
8021 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
8022 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
8023
    }
8024
8025
    /** @noinspection PhpInternalEntityUsedInspection */
8026 6
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
8027
8028 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8029
      self::checkForSupport();
8030
    }
8031
8032
    // save for later comparision
8033 6
    $str_backup = $str;
8034 6
    $len = self::strlen_in_byte($str);
8035
8036 6
    if (self::$ORD === null) {
8037
      self::$ORD = self::getData('ord');
8038
    }
8039
8040 6
    if (self::$CHR === null) {
8041
      self::$CHR = self::getData('chr');
8042
    }
8043
8044 6
    $noCharFound = '?';
8045
    /** @noinspection ForeachInvariantsInspection */
8046 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
8047 6
      switch ($str[$i] & "\xF0") {
8048 6
        case "\xC0":
8049 6
        case "\xD0":
8050 6
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
8051 6
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
8052 6
          break;
8053
8054
        /** @noinspection PhpMissingBreakStatementInspection */
8055 6
        case "\xF0":
8056
          ++$i;
8057 6
        case "\xE0":
8058 5
          $str[$j] = $noCharFound;
8059 5
          $i += 2;
8060 5
          break;
8061
8062
        default:
8063 6
          $str[$j] = $str[$i];
8064
      }
8065
    }
8066
8067 6
    $return = (string)self::substr($str, 0, $j, 'CP850'); // 8-BIT
8068
8069
    if (
8070 6
        $keepUtf8Chars === true
8071
        &&
8072 6
        self::strlen($return) >= self::strlen($str_backup)
8073
    ) {
8074 1
      return $str_backup;
8075
    }
8076
8077 6
    return $return;
8078
  }
8079
8080
  /**
8081
   * Encodes an ISO-8859-1 string to UTF-8.
8082
   *
8083
   * @param string $str <p>The input string.</p>
8084
   *
8085
   * @return string
8086
   */
8087 7
  public static function utf8_encode(string $str): string
8088
  {
8089 7
    if (!isset($str[0])) {
8090 7
      return '';
8091
    }
8092
8093 7
    $strTmp = \utf8_encode($str);
8094
8095
    // the polyfill maybe return false
8096 7
    if ($strTmp === false) {
8097
      return '';
8098
    }
8099
8100 7
    $str = (string)$strTmp;
8101 7
    if (false === \strpos($str, "\xC2")) {
8102 3
      return $str;
8103
    }
8104
8105 6
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
8106 6
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
8107
8108 6 View Code Duplication
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
8109
8110 1
      if (self::$WIN1252_TO_UTF8 === null) {
8111
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
8112
      }
8113
8114 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
8115 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
8116
    }
8117
8118 6
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
8119
  }
8120
8121
  /**
8122
   * fix -> utf8-win1252 chars
8123
   *
8124
   * @param string $str <p>The input string.</p>
8125
   *
8126
   * @return string
8127
   *
8128
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
8129
   */
8130 1
  public static function utf8_fix_win1252_chars(string $str): string
8131
  {
8132 1
    return self::fix_simple_utf8($str);
8133
  }
8134
8135
  /**
8136
   * Returns an array with all utf8 whitespace characters.
8137
   *
8138
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
8139
   *
8140
   * @author: Derek E. [email protected]
8141
   *
8142
   * @return string[] <p>
8143
   *               An array with all known whitespace characters as values and the type of whitespace as keys
8144
   *               as defined in above URL.
8145
   *               </p>
8146
   */
8147 1
  public static function whitespace_table(): array
8148
  {
8149 1
    return self::$WHITESPACE_TABLE;
8150
  }
8151
8152
  /**
8153
   * Limit the number of words in a string.
8154
   *
8155
   * @param string $str      <p>The input string.</p>
8156
   * @param int    $limit    <p>The limit of words as integer.</p>
8157
   * @param string $strAddOn <p>Replacement for the striped string.</p>
8158
   *
8159
   * @return string
8160
   */
8161 1
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
8162
  {
8163 1
    if (!isset($str[0])) {
8164 1
      return '';
8165
    }
8166
8167 1
    if ($limit < 1) {
8168 1
      return '';
8169
    }
8170
8171 1
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
8172
8173
    if (
8174 1
        !isset($matches[0])
8175
        ||
8176 1
        self::strlen($str) === self::strlen($matches[0])
8177
    ) {
8178 1
      return $str;
8179
    }
8180
8181 1
    return self::rtrim($matches[0]) . $strAddOn;
8182
  }
8183
8184
  /**
8185
   * Wraps a string to a given number of characters
8186
   *
8187
   * @link  http://php.net/manual/en/function.wordwrap.php
8188
   *
8189
   * @param string $str   <p>The input string.</p>
8190
   * @param int    $width [optional] <p>The column width.</p>
8191
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
8192
   * @param bool   $cut   [optional] <p>
8193
   *                      If the cut is set to true, the string is
8194
   *                      always wrapped at or before the specified width. So if you have
8195
   *                      a word that is larger than the given width, it is broken apart.
8196
   *                      </p>
8197
   *
8198
   * @return string <p>The given string wrapped at the specified column.</p>
8199
   */
8200 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
8201
  {
8202 10
    if (!isset($str[0], $break[0])) {
8203 3
      return '';
8204
    }
8205
8206 8
    $w = '';
8207 8
    $strSplit = \explode($break, $str);
8208 8
    $count = \count($strSplit);
8209
8210 8
    $chars = [];
8211
    /** @noinspection ForeachInvariantsInspection */
8212 8
    for ($i = 0; $i < $count; ++$i) {
8213
8214 8
      if ($i) {
8215 1
        $chars[] = $break;
8216 1
        $w .= '#';
8217
      }
8218
8219 8
      $c = $strSplit[$i];
8220 8
      unset($strSplit[$i]);
8221
8222 8
      foreach (self::split($c) as $c) {
8223 8
        $chars[] = $c;
8224 8
        $w .= ' ' === $c ? ' ' : '?';
8225
      }
8226
    }
8227
8228 8
    $strReturn = '';
8229 8
    $j = 0;
8230 8
    $b = $i = -1;
8231 8
    $w = \wordwrap($w, $width, '#', $cut);
8232
8233 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
8234 6
      for (++$i; $i < $b; ++$i) {
8235 6
        $strReturn .= $chars[$j];
8236 6
        unset($chars[$j++]);
8237
      }
8238
8239 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
8240 3
        unset($chars[$j++]);
8241
      }
8242
8243 6
      $strReturn .= $break;
8244
    }
8245
8246 8
    return $strReturn . \implode('', $chars);
8247
  }
8248
8249
  /**
8250
   * Returns an array of Unicode White Space characters.
8251
   *
8252
   * @return string[] <p>An array with numeric code point as key and White Space Character as value.</p>
8253
   */
8254 1
  public static function ws(): array
8255
  {
8256 1
    return self::$WHITESPACE;
8257
  }
8258
8259
}
8260