Completed
Push — master ( d2fcb6...e30965 )
by Lars
14:03
created

UTF8::strtolower()   D

Complexity

Conditions 9
Paths 29

Size

Total Lines 40
Code Lines 19

Duplication

Lines 40
Ratio 100 %

Code Coverage

Tests 15
CRAP Score 9.7555

Importance

Changes 0
Metric Value
dl 40
loc 40
ccs 15
cts 19
cp 0.7895
rs 4.909
c 0
b 0
f 0
cc 9
eloc 19
nc 29
nop 4
crap 9.7555
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206
   * __construct()
207
   */
208 16
  public function __construct()
209
  {
210 16
    self::checkForSupport();
211 16
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219
   * @return string <p>Single Multi-Byte character.</p>
220
   */
221 3
  public static function access(string $str, int $pos): string
222
  {
223 3
    if (!isset($str[0])) {
224 1
      return '';
225
    }
226
227 3
    if ($pos < 0) {
228 1
      return '';
229
    }
230
231 3
    return (string)self::substr($str, $pos, 1);
232
  }
233
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string <p>The output string that contains BOM.</p>
242
   */
243 1
  public static function add_bom_to_string(string $str): string
244
  {
245 1
    if (self::string_has_bom($str) === false) {
246 1
      $str = self::bom() . $str;
247
    }
248
249 1
    return $str;
250
  }
251
252
  /**
253
   * Changes all keys in an array.
254
   *
255
   * @param array $array <p>The array to work on</p>
256
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
257
   *                     or <strong>CASE_LOWER</strong> (default)</p>
258
   *
259
   * @return array <p>An array with its keys lower or uppercased.</p>
260
   */
261 1
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
262
  {
263
    if (
264 1
        $case !== CASE_LOWER
265
        &&
266 1
        $case !== CASE_UPPER
267
    ) {
268
      $case = CASE_LOWER;
269
    }
270
271 1
    $return = [];
272 1
    foreach ($array as $key => $value) {
273 1
      if ($case === CASE_LOWER) {
274 1
        $key = self::strtolower($key);
275
      } else {
276 1
        $key = self::strtoupper($key);
277
      }
278
279 1
      $return[$key] = $value;
280
    }
281
282 1
    return $return;
283
  }
284
285
  /**
286
   * Convert binary into an string.
287
   *
288
   * @param mixed $bin 1|0
289
   *
290
   * @return string
291
   */
292 1
  public static function binary_to_str($bin): string
293
  {
294 1
    if (!isset($bin[0])) {
295
      return '';
296
    }
297
298 1
    $convert = \base_convert($bin, 2, 16);
299 1
    if ($convert === '0') {
300 1
      return '';
301
    }
302
303 1
    return \pack('H*', $convert);
304
  }
305
306
  /**
307
   * Returns the UTF-8 Byte Order Mark Character.
308
   *
309
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
310
   *
311
   * @return string UTF-8 Byte Order Mark
312
   */
313 2
  public static function bom(): string
314
  {
315 2
    return "\xef\xbb\xbf";
316
  }
317
318
  /**
319
   * @alias of UTF8::chr_map()
320
   *
321
   * @see   UTF8::chr_map()
322
   *
323
   * @param string|array $callback
324
   * @param string       $str
325
   *
326
   * @return array
327
   */
328 1
  public static function callback($callback, string $str): array
329
  {
330 1
    return self::chr_map($callback, $str);
331
  }
332
333
  /**
334
   * This method will auto-detect your server environment for UTF-8 support.
335
   *
336
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
337
   */
338 19
  public static function checkForSupport()
339
  {
340 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
341
342 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
343
344
      // http://php.net/manual/en/book.mbstring.php
345 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
346 1
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
347
348
      // http://php.net/manual/en/book.iconv.php
349 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
350
351
      // http://php.net/manual/en/book.intl.php
352 1
      self::$SUPPORT['intl'] = self::intl_loaded();
353 1
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
354
      if (
355 1
          self::$SUPPORT['intl'] === true
356
          &&
357 1
          \function_exists('transliterator_list_ids') === true
358
      ) {
359 1
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
360
      }
361
362
      // http://php.net/manual/en/class.intlchar.php
363 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
364
365
      // http://php.net/manual/en/book.pcre.php
366 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
367
    }
368 19
  }
369
370
  /**
371
   * Generates a UTF-8 encoded character from the given code point.
372
   *
373
   * INFO: opposite to UTF8::ord()
374
   *
375
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
376
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
377
   *
378
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
379
   */
380 10
  public static function chr($code_point, string $encoding = 'UTF-8')
381
  {
382
    // init
383 10
    static $CHAR_CACHE = [];
384
385 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
386
      self::checkForSupport();
387
    }
388
389 10
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
390 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
391
    }
392
393 View Code Duplication
    if (
394 10
        $encoding !== 'UTF-8'
395
        &&
396 10
        $encoding !== 'ISO-8859-1'
397
        &&
398 10
        $encoding !== 'WINDOWS-1252'
399
        &&
400
        self::$SUPPORT['mbstring'] === false
401
    ) {
402
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
403 10
    }
404 10
405 8
    $cacheKey = $code_point . $encoding;
406
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
407
      return $CHAR_CACHE[$cacheKey];
408 9
    }
409
410 7
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
411
412
      if (self::$CHR === null) {
413
        self::$CHR = self::getData('chr');
414 7
      }
415
416 7
      $chr = self::$CHR[$code_point];
417 1
418
      if ($encoding !== 'UTF-8') {
419
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
420 7
      }
421
422
      return $CHAR_CACHE[$cacheKey] = $chr;
423 7
    }
424 7
425
    if (self::$SUPPORT['intlChar'] === true) {
426 7
      $chr = \IntlChar::chr($code_point);
427
428
      if ($encoding !== 'UTF-8') {
429
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
430 7
      }
431
432
      return $CHAR_CACHE[$cacheKey] = $chr;
433
    }
434
435
    if (self::$CHR === null) {
436
      self::$CHR = self::getData('chr');
437
    }
438
439
    if ($code_point <= 0x7F) {
440
      $chr = self::$CHR[$code_point];
441
    } elseif ($code_point <= 0x7FF) {
442
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
443
             self::$CHR[($code_point & 0x3F) + 0x80];
444
    } elseif ($code_point <= 0xFFFF) {
445
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
446
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
447
             self::$CHR[($code_point & 0x3F) + 0x80];
448
    } else {
449
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
450
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
451
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
452
             self::$CHR[($code_point & 0x3F) + 0x80];
453
    }
454
455
    if ($encoding !== 'UTF-8') {
456
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
457
    }
458
459
    return $CHAR_CACHE[$cacheKey] = $chr;
460
  }
461
462
  /**
463
   * Applies callback to all characters of a string.
464
   *
465
   * @param string|array $callback <p>The callback function.</p>
466
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
467
   *
468 1
   * @return array <p>The outcome of callback.</p>
469
   */
470 1
  public static function chr_map($callback, string $str): array
471
  {
472 1
    $chars = self::split($str);
473
474
    return \array_map($callback, $chars);
475
  }
476
477
  /**
478
   * Generates an array of byte length of each character of a Unicode string.
479
   *
480
   * 1 byte => U+0000  - U+007F
481
   * 2 byte => U+0080  - U+07FF
482
   * 3 byte => U+0800  - U+FFFF
483
   * 4 byte => U+10000 - U+10FFFF
484
   *
485
   * @param string $str <p>The original unicode string.</p>
486
   *
487 4
   * @return array <p>An array of byte lengths of each character.</p>
488
   */
489 4
  public static function chr_size_list(string $str): array
490 3
  {
491
    if (!isset($str[0])) {
492
      return [];
493 4
    }
494 4
495 4
    $strSplit = self::split($str);
496 4
497 4
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
498
      return \array_map(
499
          function ($data) {
500
            return UTF8::strlen($data, 'CP850'); // 8-BIT
501
          },
502
          $strSplit
503
      );
504
    }
505
506
    return \array_map(
507
        function ($data) {
508 2
          return strlen($data);
509
        },
510 2
        $strSplit
511 2
    );
512
  }
513 2
514
  /**
515 2
   * Get a decimal code representation of a specific character.
516
   *
517
   * @param string $char <p>The input character.</p>
518 2
   *
519
   * @return int
520 2
   */
521 2
  public static function chr_to_decimal(string $char): int
522 2
  {
523
    $code = self::ord($char[0]);
524 2
    $bytes = 1;
525 2
526 1
    if (!($code & 0x80)) {
527
      // 0xxxxxxx
528 1
      return $code;
529 1
    }
530
531
    if (($code & 0xe0) === 0xc0) {
532 2
      // 110xxxxx
533
      $bytes = 2;
534 2
      $code &= ~0xc0;
535
    } elseif (($code & 0xf0) === 0xe0) {
536
      // 1110xxxx
537 2
      $bytes = 3;
538
      $code &= ~0xe0;
539
    } elseif (($code & 0xf8) === 0xf0) {
540
      // 11110xxx
541
      $bytes = 4;
542
      $code &= ~0xf0;
543
    }
544
545
    for ($i = 2; $i <= $bytes; $i++) {
546
      // 10xxxxxx
547
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
548 1
    }
549
550 1
    return $code;
551 1
  }
552
553
  /**
554 1
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
555 1
   *
556
   * @param string $char <p>The input character</p>
557
   * @param string $pfix [optional]
558 1
   *
559
   * @return string <p>The code point encoded as U+xxxx<p>
560
   */
561
  public static function chr_to_hex(string $char, string $pfix = 'U+'): string
562
  {
563
    if (!isset($char[0])) {
564
      return '';
565
    }
566
567
    if ($char === '&#0;') {
568
      $char = '';
569
    }
570 1
571
    return self::int_to_hex(self::ord($char), $pfix);
572 1
  }
573
574
  /**
575
   * alias for "UTF8::chr_to_decimal()"
576
   *
577
   * @see UTF8::chr_to_decimal()
578
   *
579
   * @param string $chr
580
   *
581
   * @return int
582
   */
583
  public static function chr_to_int(string $chr): int
584 1
  {
585
    return self::chr_to_decimal($chr);
586 1
  }
587
588
  /**
589
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
590
   *
591
   * @param string $body     <p>The original string to be split.</p>
592
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
593
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
594
   *
595
   * @return string <p>The chunked string</p>
596
   */
597
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
598
  {
599
    return \implode($end, self::split($body, $chunklen));
600
  }
601
602
  /**
603
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
604
   *
605
   * @param string $str                           <p>The string to be sanitized.</p>
606
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
607
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
608
   *                                              whitespace.</p>
609 64
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
610
   *                                              e.g.: "…"
611
   *                                              => "..."</p>
612
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
613
   *                                              combination with
614 64
   *                                              $normalize_whitespace</p>
615
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
616
   *                                              mark e.g.: "�"</p>
617
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
618
   *                                              characters e.g.: "\0"</p>
619
   *
620
   * @return string <p>Clean UTF-8 encoded string.</p>
621
   */
622
  public static function clean(string $str, bool $remove_bom = false, bool $normalize_whitespace = false, bool $normalize_msword = false, bool $keep_non_breaking_space = false, bool $replace_diamond_question_mark = false, bool $remove_invisible_characters = true): string
623
  {
624
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
625 64
    // caused connection reset problem on larger strings
626
627 64
    $regx = '/
628 36
      (
629
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
630
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
631 64
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
632 64
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
633
        ){1,100}                      # ...one or more times
634
      )
635 64
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
636 38
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
637
    /x';
638
    $str = (string)\preg_replace($regx, '$1', $str);
639 64
640 15
    if ($replace_diamond_question_mark === true) {
641
      $str = self::replace_diamond_question_mark($str, '');
642
    }
643 64
644 37
    if ($remove_invisible_characters === true) {
645
      $str = self::remove_invisible_characters($str);
646
    }
647 64
648
    if ($normalize_whitespace === true) {
649
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
650
    }
651
652
    if ($normalize_msword === true) {
653
      $str = self::normalize_msword($str);
654
    }
655
656
    if ($remove_bom === true) {
657 24
      $str = self::remove_bom($str);
658
    }
659 24
660 2
    return $str;
661
  }
662
663
  /**
664 24
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
665
   *
666
   * @param string $str <p>The input string.</p>
667
   *
668
   * @return string
669
   */
670
  public static function cleanup(string $str): string
671 24
  {
672 24
    if (!isset($str[0])) {
673 24
      return '';
674 24
    }
675 24
676 24
    // fixed ISO <-> UTF-8 Errors
677 24
    $str = self::fix_simple_utf8($str);
678 24
679
    // remove all none UTF-8 symbols
680
    // && remove diamond question mark (�)
681 24
    // && remove remove invisible characters (e.g. "\0")
682
    // && remove BOM
683
    // && normalize whitespace chars (but keep non-breaking-spaces)
684
    $str = self::clean(
685
        $str,
686
        true,
687
        true,
688
        false,
689
        true,
690
        true,
691
        true
692
    );
693
694
    return $str;
695 7
  }
696
697 7
  /**
698 7
   * Accepts a string or a array of strings and returns an array of Unicode code points.
699
   *
700
   * INFO: opposite to UTF8::string()
701 7
   *
702
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
703 7
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
704
   *                                    default, code points will be returned as integers.</p>
705
   *
706 7
   * @return array <p>The array of code points.</p>
707
   */
708
  public static function codepoints($arg, bool $u_style = false): array
709 7
  {
710 1
    if (\is_string($arg) === true) {
711
      $arg = self::split($arg);
712 1
    }
713
714
    $arg = \array_map(
715 1
        [
716
            self::class,
717
            'ord',
718
        ],
719 7
        $arg
720
    );
721
722
    if ($u_style) {
723
      $arg = \array_map(
724
          [
725
              self::class,
726
              'int_to_hex',
727
          ],
728
          $arg
729
      );
730
    }
731 9
732
    return $arg;
733 9
  }
734
735
  /**
736
   * Returns count of characters used in a string.
737
   *
738
   * @param string $str       <p>The input string.</p>
739
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
740
   *
741
   * @return array <p>An associative array of Character as keys and
742
   *               their count as values.</p>
743 5
   */
744
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
745 5
  {
746
    return \array_count_values(self::split($str, 1, $cleanUtf8));
747
  }
748
749
  /**
750
   * Converts a int-value into an UTF-8 character.
751
   *
752
   * @param mixed $int
753
   *
754
   * @return string
755
   */
756
  public static function decimal_to_chr($int): string
757
  {
758
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
759
  }
760
761 14
  /**
762
   * Encode a string with a new charset-encoding.
763 14
   *
764 6
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
765
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
766
   *
767 14
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
768 2
   * @param string $str      <p>The input string</p>
769
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
770
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
771 14
   *
772
   * @return string
773
   */
774
  public static function encode(string $encoding, string $str, bool $force = true): string
775 14
  {
776
    if (!isset($str[0], $encoding[0])) {
777
      return $str;
778 14
    }
779
780 14
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
781
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
782
    }
783
784 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
785
      self::checkForSupport();
786
    }
787 14
788 4
    $encodingDetected = self::str_detect_encoding($str);
789 4
790 14
    if (
791
        $force === true
792
        ||
793 13
        $encodingDetected !== $encoding
794
    ) {
795
796 View Code Duplication
      if (
797 4
          $encoding === 'UTF-8'
798
          &&
799
          (
800 2
              $force === true
801 1
              || $encodingDetected === 'UTF-8'
802 1
              || $encodingDetected === 'WINDOWS-1252'
803 4
              || $encodingDetected === 'ISO-8859-1'
804
          )
805
      ) {
806 2
        return self::to_utf8($str);
807
      }
808
809 View Code Duplication
      if (
810 3
          $encoding === 'ISO-8859-1'
811
          &&
812 3
          (
813
              $force === true
814 3
              || $encodingDetected === 'ISO-8859-1'
815
              || $encodingDetected === 'WINDOWS-1252'
816
              || $encodingDetected === 'UTF-8'
817
          )
818
      ) {
819 3
        return self::to_iso8859($str);
820 3
      }
821 3
822 3 View Code Duplication
      if (
823
          $encoding !== 'UTF-8'
824
          &&
825 3
          $encoding !== 'ISO-8859-1'
826 3
          &&
827
          $encoding !== 'WINDOWS-1252'
828
          &&
829
          self::$SUPPORT['mbstring'] === false
830 3
      ) {
831
        \trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
832
      }
833
834
      $strEncoded = \mb_convert_encoding(
835
          $str,
836
          $encoding,
837
          $encodingDetected
838
      );
839
840
      if ($strEncoded) {
841
        return $strEncoded;
842
      }
843
    }
844
845
    return $str;
846
  }
847
848
  /**
849
   * Reads entire file into a string.
850
   *
851
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
852
   *
853
   * @link http://php.net/manual/en/function.file-get-contents.php
854
   *
855
   * @param string        $filename         <p>
856
   *                                        Name of the file to read.
857
   *                                        </p>
858
   * @param bool          $use_include_path [optional] <p>
859
   *                                        Prior to PHP 5, this parameter is called
860
   *                                        use_include_path and is a bool.
861
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
862
   *                                        to trigger include path
863
   *                                        search.
864
   *                                        </p>
865
   * @param resource|null $context          [optional] <p>
866
   *                                        A valid context resource created with
867
   *                                        stream_context_create. If you don't need to use a
868
   *                                        custom context, you can skip this parameter by &null;.
869 6
   *                                        </p>
870
   * @param int|null      $offset           [optional] <p>
871
   *                                        The offset where the reading starts.
872 6
   *                                        </p>
873
   * @param int|null      $maxLength        [optional] <p>
874 6
   *                                        Maximum length of data read. The default is to read until end
875 5
   *                                        of file is reached.
876
   *                                        </p>
877
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
878
   *
879 5
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
880
   *                                        images or pdf, because they used non default utf-8 chars.</p>
881
   *
882
   * @return string|false <p>The function returns the read data or false on failure.</p>
883
   */
884
  public static function file_get_contents(string $filename, bool $use_include_path = false, $context = null, int $offset = null, int $maxLength = null, int $timeout = 10, bool $convertToUtf8 = true)
885 6
  {
886 6
    // init
887
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
888
889 6
    if ($timeout && $context === null) {
890 1
      $context = \stream_context_create(
891
          [
892 6
              'http' =>
893
                  [
894
                      'timeout' => $timeout,
895
                  ],
896 6
          ]
897
      );
898
    }
899
900 6
    if ($offset === null) {
901
      $offset = 0;
902 6
    }
903
904 6
    if (\is_int($maxLength) === true) {
905
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
906 6
    } else {
907 3
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
908
    }
909
910
    // return false on error
911 5
    if ($data === false) {
912 5
      return false;
913
    }
914
915
    if ($convertToUtf8 === true) {
916
      if (
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
917 6
          self::is_binary($data, true) === true
918
          &&
919
          self::is_utf16($data) === false
920
          &&
921
          self::is_utf32($data) === false
922
      ) {
923
        // do nothing, it's binary and not UTF16 or UTF32
924
      } else {
925
926
        $data = self::encode('UTF-8', $data, false);
927 1
        $data = self::cleanup($data);
928
929 1
      }
930
    }
931
932
    return $data;
933
  }
934
935
  /**
936
   * Checks if a file starts with BOM (Byte Order Mark) character.
937
   *
938
   * @param string $file_path <p>Path to a valid file.</p>
939
   *
940
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
941 9
   */
942
  public static function file_has_bom(string $file_path): bool
943 9
  {
944 9
    return self::string_has_bom(\file_get_contents($file_path));
945 3
  }
946
947 3
  /**
948
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
949 3
   *
950 9
   * @param mixed  $var
951 2
   * @param int    $normalization_form
952 2
   * @param string $leading_combining
953
   *
954 2
   * @return mixed
955 9
   */
956
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
957 9
  {
958
    switch (\gettype($var)) {
959 2 View Code Duplication
      case 'array':
960
        foreach ($var as $k => $v) {
961
          /** @noinspection AlterInForeachInspection */
962 9
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
963
        }
964 9
        break;
965 6 View Code Duplication
      case 'object':
966
        foreach ($var as $k => $v) {
967
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
968 7
        }
969
        break;
970 7
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
971 4
972
        if (false !== \strpos($var, "\r")) {
973 5
          // Workaround https://bugs.php.net/65732
974
          $var = \str_replace(["\r\n", "\r"], "\n", $var);
975
        }
976
977
        if (self::is_ascii($var) === false) {
978 9
          /** @noinspection PhpUndefinedClassInspection */
979
          if (\Normalizer::isNormalized($var, $normalization_form)) {
980 9
            $n = '-';
981
          } else {
982 9
            /** @noinspection PhpUndefinedClassInspection */
983
            $n = \Normalizer::normalize($var, $normalization_form);
984
985
            if (isset($n[0])) {
986 2
              $var = $n;
987
            } else {
988
              $var = self::encode('UTF-8', $var, true);
989
            }
990 9
          }
991
992
          if (
993 9
              $var[0] >= "\x80"
994
              &&
995
              isset($n[0], $leading_combining[0])
996
              &&
997
              \preg_match('/^\p{Mn}/u', $var)
998
          ) {
999
            // Prevent leading combining chars
1000
            // for NFC-safe concatenations.
1001
            $var = $leading_combining . $var;
1002
          }
1003
        }
1004
1005
        break;
1006
    }
1007
1008
    return $var;
1009
  }
1010
1011
  /**
1012
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1013
   *
1014
   * Gets a specific external variable by name and optionally filters it
1015
   *
1016
   * @link  http://php.net/manual/en/function.filter-input.php
1017
   *
1018
   * @param int    $type          <p>
1019
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1020
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1021
   *                              <b>INPUT_ENV</b>.
1022
   *                              </p>
1023
   * @param string $variable_name <p>
1024
   *                              Name of a variable to get.
1025
   *                              </p>
1026
   * @param int    $filter        [optional] <p>
1027
   *                              The ID of the filter to apply. The
1028
   *                              manual page lists the available filters.
1029
   *                              </p>
1030
   * @param mixed  $options       [optional] <p>
1031
   *                              Associative array of options or bitwise disjunction of flags. If filter
1032
   *                              accepts options, flags can be provided in "flags" field of array.
1033
   *                              </p>
1034
   *
1035
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1036
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1037
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1038
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1039
   * @since 5.2.0
1040
   */
1041 View Code Duplication
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1042
  {
1043
    if (4 > \func_num_args()) {
1044
      $var = \filter_input($type, $variable_name, $filter);
1045
    } else {
1046
      $var = \filter_input($type, $variable_name, $filter, $options);
1047
    }
1048
1049
    return self::filter($var);
1050
  }
1051
1052
  /**
1053
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1054
   *
1055
   * Gets external variables and optionally filters them
1056
   *
1057
   * @link  http://php.net/manual/en/function.filter-input-array.php
1058
   *
1059
   * @param int   $type       <p>
1060
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1061
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1062
   *                          <b>INPUT_ENV</b>.
1063
   *                          </p>
1064
   * @param mixed $definition [optional] <p>
1065
   *                          An array defining the arguments. A valid key is a string
1066
   *                          containing a variable name and a valid value is either a filter type, or an array
1067
   *                          optionally specifying the filter, flags and options. If the value is an
1068
   *                          array, valid keys are filter which specifies the
1069
   *                          filter type,
1070
   *                          flags which specifies any flags that apply to the
1071
   *                          filter, and options which specifies any options that
1072
   *                          apply to the filter. See the example below for a better understanding.
1073
   *                          </p>
1074
   *                          <p>
1075
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1076
   *                          input array are filtered by this filter.
1077
   *                          </p>
1078
   * @param bool  $add_empty  [optional] <p>
1079
   *                          Add missing keys as <b>NULL</b> to the return value.
1080
   *                          </p>
1081
   *
1082
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1083
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1084
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1085
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1086
   * fails.
1087
   * @since 5.2.0
1088
   */
1089 View Code Duplication
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1090
  {
1091
    if (2 > \func_num_args()) {
1092
      $a = \filter_input_array($type);
1093
    } else {
1094
      $a = \filter_input_array($type, $definition, $add_empty);
1095
    }
1096
1097
    return self::filter($a);
1098
  }
1099
1100
  /**
1101
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1102
   *
1103
   * Filters a variable with a specified filter
1104
   *
1105
   * @link  http://php.net/manual/en/function.filter-var.php
1106
   *
1107
   * @param mixed $variable <p>
1108
   *                        Value to filter.
1109
   *                        </p>
1110
   * @param int   $filter   [optional] <p>
1111
   *                        The ID of the filter to apply. The
1112
   *                        manual page lists the available filters.
1113
   *                        </p>
1114
   * @param mixed $options  [optional] <p>
1115
   *                        Associative array of options or bitwise disjunction of flags. If filter
1116
   *                        accepts options, flags can be provided in "flags" field of array. For
1117
   *                        the "callback" filter, callable type should be passed. The
1118
   *                        callback must accept one argument, the value to be filtered, and return
1119
   *                        the value after filtering/sanitizing it.
1120
   *                        </p>
1121
   *                        <p>
1122
   *                        <code>
1123
   *                        // for filters that accept options, use this format
1124
   *                        $options = array(
1125
   *                        'options' => array(
1126
   *                        'default' => 3, // value to return if the filter fails
1127
   *                        // other options here
1128
   *                        'min_range' => 0
1129
   *                        ),
1130
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1131
   *                        );
1132
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1133
   *                        // for filter that only accept flags, you can pass them directly
1134
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1135
   *                        // for filter that only accept flags, you can also pass as an array
1136
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1137
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1138
   *                        // callback validate filter
1139
   *                        function foo($value)
1140
   *                        {
1141
   *                        // Expected format: Surname, GivenNames
1142
   *                        if (strpos($value, ", ") === false) return false;
1143
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1144 1
   *                        $empty = (empty($surname) || empty($givennames));
1145
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1146 1
   *                        if ($empty || $notstrings) {
1147 1
   *                        return false;
1148
   *                        } else {
1149 1
   *                        return $value;
1150
   *                        }
1151
   *                        }
1152 1
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1153
   *                        </code>
1154
   *                        </p>
1155
   *
1156
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1157
   * @since 5.2.0
1158
   */
1159 View Code Duplication
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1160
  {
1161
    if (3 > \func_num_args()) {
1162
      $variable = \filter_var($variable, $filter);
1163
    } else {
1164
      $variable = \filter_var($variable, $filter, $options);
1165
    }
1166
1167
    return self::filter($variable);
1168
  }
1169
1170
  /**
1171
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1172
   *
1173
   * Gets multiple variables and optionally filters them
1174
   *
1175
   * @link  http://php.net/manual/en/function.filter-var-array.php
1176
   *
1177
   * @param array $data       <p>
1178
   *                          An array with string keys containing the data to filter.
1179
   *                          </p>
1180
   * @param mixed $definition [optional] <p>
1181
   *                          An array defining the arguments. A valid key is a string
1182
   *                          containing a variable name and a valid value is either a
1183
   *                          filter type, or an
1184
   *                          array optionally specifying the filter, flags and options.
1185
   *                          If the value is an array, valid keys are filter
1186
   *                          which specifies the filter type,
1187
   *                          flags which specifies any flags that apply to the
1188
   *                          filter, and options which specifies any options that
1189 1
   *                          apply to the filter. See the example below for a better understanding.
1190
   *                          </p>
1191 1
   *                          <p>
1192 1
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1193
   *                          input array are filtered by this filter.
1194 1
   *                          </p>
1195
   * @param bool  $add_empty  [optional] <p>
1196
   *                          Add missing keys as <b>NULL</b> to the return value.
1197 1
   *                          </p>
1198
   *
1199
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1200
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1201
   * the variable is not set.
1202
   * @since 5.2.0
1203
   */
1204 View Code Duplication
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1205
  {
1206
    if (2 > \func_num_args()) {
1207
      $a = \filter_var_array($data);
1208 1
    } else {
1209
      $a = \filter_var_array($data, $definition, $add_empty);
1210 1
    }
1211
1212
    return self::filter($a);
1213
  }
1214
1215
  /**
1216
   * Check if the number of unicode characters are not more than the specified integer.
1217
   *
1218
   * @param string $str      The original string to be checked.
1219
   * @param int    $box_size The size in number of chars to be checked against string.
1220
   *
1221
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1222
   */
1223
  public static function fits_inside(string $str, int $box_size): bool
1224
  {
1225
    return (self::strlen($str) <= $box_size);
1226 29
  }
1227
1228 29
  /**
1229 2
   * Try to fix simple broken UTF-8 strings.
1230
   *
1231
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1232 29
   *
1233 29
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1234
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1235 29
   * See: http://en.wikipedia.org/wiki/Windows-1252
1236
   *
1237 1
   * @param string $str <p>The input string</p>
1238 1
   *
1239
   * @return string
1240
   */
1241 1 View Code Duplication
  public static function fix_simple_utf8(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1242 1
  {
1243
    if (!isset($str[0])) {
1244
      return '';
1245 29
    }
1246
1247
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1248
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1249
1250
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1251
1252
      if (self::$BROKEN_UTF8_FIX === null) {
1253
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1254
      }
1255
1256 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1257
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1258 1
    }
1259 1
1260 1
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1261
  }
1262
1263 1
  /**
1264
   * Fix a double (or multiple) encoded UTF8 string.
1265
   *
1266 1
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1267 1
   *
1268 1
   * @return string|string[] <p>Will return the fixed input-"array" or
1269 1
   *                         the fixed input-"string".</p>
1270 1
   */
1271
  public static function fix_utf8($str)
1272
  {
1273
    if (\is_array($str) === true) {
1274 1
      foreach ($str as $k => $v) {
1275
        $str[$k] = self::fix_utf8($v);
1276
      }
1277
1278
      return $str;
1279
    }
1280
1281
    $last = '';
1282
    while ($last !== $str) {
1283
      $last = $str;
1284 1
      $str = self::to_utf8(
1285
          self::utf8_decode($str, true)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str, true)) on line 1284 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1286 1
      );
1287
    }
1288
1289
    return $str;
1290 1
  }
1291 1
1292
  /**
1293
   * Get character of a specific character.
1294
   *
1295 1
   * @param string $char
1296
   *
1297
   * @return string <p>'RTL' or 'LTR'</p>
1298
   */
1299 1
  public static function getCharDirection(string $char): string
1300
  {
1301
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1302
      self::checkForSupport();
1303 1
    }
1304 1
1305
    if (self::$SUPPORT['intlChar'] === true) {
1306
      $tmpReturn = \IntlChar::charDirection($char);
1307
1308 1
      // from "IntlChar"-Class
1309
      $charDirection = [
1310 1
          'RTL' => [1, 13, 14, 15, 21],
1311 1
          'LTR' => [0, 11, 12, 20],
1312
      ];
1313
1314 1
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1315
        return 'LTR';
1316 1
      }
1317 1
1318 1
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1319 1
        return 'RTL';
1320 1
      }
1321 1
    }
1322 1
1323 1
    $c = static::chr_to_decimal($char);
1324 1
1325 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1326 1
      return 'LTR';
1327
    }
1328
1329
    if (0x85e >= $c) {
1330
1331
      if (0x5be === $c ||
1332
          0x5c0 === $c ||
1333
          0x5c3 === $c ||
1334
          0x5c6 === $c ||
1335
          (0x5d0 <= $c && 0x5ea >= $c) ||
1336
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1337
          0x608 === $c ||
1338
          0x60b === $c ||
1339
          0x60d === $c ||
1340
          0x61b === $c ||
1341
          (0x61e <= $c && 0x64a >= $c) ||
1342
          (0x66d <= $c && 0x66f >= $c) ||
1343
          (0x671 <= $c && 0x6d5 >= $c) ||
1344
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1345 1
          (0x6ee <= $c && 0x6ef >= $c) ||
1346
          (0x6fa <= $c && 0x70d >= $c) ||
1347 1
          0x710 === $c ||
1348
          (0x712 <= $c && 0x72f >= $c) ||
1349
          (0x74d <= $c && 0x7a5 >= $c) ||
1350 1
          0x7b1 === $c ||
1351
          (0x7c0 <= $c && 0x7ea >= $c) ||
1352
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1353
          0x7fa === $c ||
1354 1
          (0x800 <= $c && 0x815 >= $c) ||
1355
          0x81a === $c ||
1356 1
          0x824 === $c ||
1357 1
          0x828 === $c ||
1358 1
          (0x830 <= $c && 0x83e >= $c) ||
1359 1
          (0x840 <= $c && 0x858 >= $c) ||
1360 1
          0x85e === $c
1361 1
      ) {
1362 1
        return 'RTL';
1363 1
      }
1364 1
1365 1
    } elseif (0x200f === $c) {
1366 1
1367 1
      return 'RTL';
1368 1
1369 1
    } elseif (0xfb1d <= $c) {
1370 1
1371 1
      if (0xfb1d === $c ||
1372 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1373 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1374 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1375 1
          0xfb3e === $c ||
1376 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1377 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1378 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1379 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1380 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1381 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1382 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1383 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1384 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1385 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1386 1
          0x10808 === $c ||
1387 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1388 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1389 1
          0x1083c === $c ||
1390 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1391
          (0x10857 <= $c && 0x1085f >= $c) ||
1392 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1393
          (0x10920 <= $c && 0x10939 >= $c) ||
1394
          0x1093f === $c ||
1395
          0x10a00 === $c ||
1396 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1397
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1398
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1399
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1400
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1401
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1402
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1403
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1404
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1405
          (0x10b78 <= $c && 0x10b7f >= $c)
1406 6
      ) {
1407
        return 'RTL';
1408 6
      }
1409 6
    }
1410
1411 6
    return 'LTR';
1412
  }
1413
1414 1
  /**
1415
   * get data from "/data/*.ser"
1416
   *
1417
   * @param string $file
1418
   *
1419
   * @return bool|string|array|int <p>Will return false on error.</p>
1420
   */
1421
  private static function getData(string $file)
1422
  {
1423
    $file = __DIR__ . '/data/' . $file . '.php';
1424
    if (\file_exists($file)) {
1425
      /** @noinspection PhpIncludeInspection */
1426 19
      return require $file;
1427
    }
1428 19
1429
    return false;
1430
  }
1431
1432 19
  /**
1433 2
   * Check for php-support.
1434
   *
1435
   * @param string|null $key
1436 18
   *
1437 1
   * @return mixed <p>Return the full support-"array", if $key === null<br>
1438
   *               return bool-value, if $key is used and available<br>
1439
   *               otherwise return null</p>
1440 17
   */
1441
  public static function getSupportInfo(string $key = null)
1442
  {
1443
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1444
      self::checkForSupport();
1445
    }
1446
1447
    if ($key === null) {
1448
      return self::$SUPPORT;
1449
    }
1450
1451
    if (!isset(self::$SUPPORT[$key])) {
1452
      return null;
1453
    }
1454 1
1455
    return self::$SUPPORT[$key];
1456 1
  }
1457
1458
  /**
1459
   * alias for "UTF8::string_has_bom()"
1460
   *
1461
   * @see        UTF8::string_has_bom()
1462
   *
1463
   * @param string $str
1464
   *
1465
   * @return bool
1466 2
   *
1467
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1468 2
   */
1469
  public static function hasBom(string $str): bool
1470
  {
1471
    return self::string_has_bom($str);
1472
  }
1473
1474
  /**
1475
   * Converts a hexadecimal-value into an UTF-8 character.
1476
   *
1477
   * @param string $hexdec <p>The hexadecimal value.</p>
1478
   *
1479
   * @return string|false <p>One single UTF-8 character.</p>
1480 1
   */
1481
  public static function hex_to_chr(string $hexdec)
1482 1
  {
1483 1
    return self::decimal_to_chr(\hexdec($hexdec));
1484
  }
1485
1486 1
  /**
1487 1
   * Converts hexadecimal U+xxxx code point representation to integer.
1488
   *
1489
   * INFO: opposite to UTF8::int_to_hex()
1490 1
   *
1491
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1492
   *
1493
   * @return int|false <p>The code point, or false on failure.</p>
1494
   */
1495
  public static function hex_to_int(string $hexDec)
1496
  {
1497
    if (!isset($hexDec[0])) {
1498
      return false;
1499
    }
1500
1501
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1502
      return \intval($match[1], 16);
1503
    }
1504 1
1505
    return false;
1506 1
  }
1507
1508
  /**
1509
   * alias for "UTF8::html_entity_decode()"
1510
   *
1511
   * @see UTF8::html_entity_decode()
1512
   *
1513
   * @param string $str
1514
   * @param int    $flags
1515
   * @param string $encoding
1516
   *
1517
   * @return string
1518
   */
1519
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1520 2
  {
1521
    return self::html_entity_decode($str, $flags, $encoding);
1522 2
  }
1523 1
1524
  /**
1525
   * Converts a UTF-8 string to a series of HTML numbered entities.
1526 2
   *
1527 1
   * INFO: opposite to UTF8::html_decode()
1528
   *
1529
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1530
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1531 2
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
1532
   *
1533 2
   * @return string <p>HTML numbered entities.</p>
1534 2
   */
1535 1
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
1536
  {
1537
    if (!isset($str[0])) {
1538 2
      return '';
1539 2
    }
1540 2
1541 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1542
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1543
    }
1544
1545
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1546
    if (\function_exists('mb_encode_numericentity')) {
1547
1548
      $startCode = 0x00;
1549
      if ($keepAsciiChars === true) {
1550
        $startCode = 0x80;
1551
      }
1552
1553
      return \mb_encode_numericentity(
1554
          $str,
1555
          [$startCode, 0xfffff, 0, 0xfffff, 0],
1556
          $encoding
1557
      );
1558
    }
1559
1560
    return \implode(
1561
        '',
1562
        \array_map(
1563
            function ($data) use ($keepAsciiChars, $encoding) {
1564
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1565
            },
1566
            self::split($str)
1567
        )
1568
    );
1569
  }
1570
1571
  /**
1572
   * UTF-8 version of html_entity_decode()
1573
   *
1574
   * The reason we are not using html_entity_decode() by itself is because
1575
   * while it is not technically correct to leave out the semicolon
1576
   * at the end of an entity most browsers will still interpret the entity
1577
   * correctly. html_entity_decode() does not convert entities without
1578
   * semicolons, so we are left with our own little solution here. Bummer.
1579
   *
1580
   * Convert all HTML entities to their applicable characters
1581
   *
1582
   * INFO: opposite to UTF8::html_encode()
1583
   *
1584
   * @link http://php.net/manual/en/function.html-entity-decode.php
1585
   *
1586
   * @param string $str      <p>
1587
   *                         The input string.
1588
   *                         </p>
1589
   * @param int    $flags    [optional] <p>
1590
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1591
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1592
   *                         <table>
1593
   *                         Available <i>flags</i> constants
1594
   *                         <tr valign="top">
1595
   *                         <td>Constant Name</td>
1596
   *                         <td>Description</td>
1597
   *                         </tr>
1598
   *                         <tr valign="top">
1599
   *                         <td><b>ENT_COMPAT</b></td>
1600
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1601
   *                         </tr>
1602
   *                         <tr valign="top">
1603
   *                         <td><b>ENT_QUOTES</b></td>
1604
   *                         <td>Will convert both double and single quotes.</td>
1605
   *                         </tr>
1606
   *                         <tr valign="top">
1607
   *                         <td><b>ENT_NOQUOTES</b></td>
1608
   *                         <td>Will leave both double and single quotes unconverted.</td>
1609
   *                         </tr>
1610
   *                         <tr valign="top">
1611
   *                         <td><b>ENT_HTML401</b></td>
1612
   *                         <td>
1613
   *                         Handle code as HTML 4.01.
1614
   *                         </td>
1615
   *                         </tr>
1616
   *                         <tr valign="top">
1617
   *                         <td><b>ENT_XML1</b></td>
1618
   *                         <td>
1619
   *                         Handle code as XML 1.
1620
   *                         </td>
1621
   *                         </tr>
1622
   *                         <tr valign="top">
1623
   *                         <td><b>ENT_XHTML</b></td>
1624
   *                         <td>
1625 17
   *                         Handle code as XHTML.
1626
   *                         </td>
1627 17
   *                         </tr>
1628 6
   *                         <tr valign="top">
1629
   *                         <td><b>ENT_HTML5</b></td>
1630
   *                         <td>
1631 17
   *                         Handle code as HTML 5.
1632 10
   *                         </td>
1633
   *                         </tr>
1634
   *                         </table>
1635
   *                         </p>
1636 16
   * @param string $encoding [optional] <p>Encoding to use.</p>
1637
   *
1638
   * @return string <p>The decoded string.</p>
1639 16
   */
1640
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1641 16
  {
1642
    if (!isset($str[0])) {
1643
      return '';
1644 9
    }
1645
1646
    if (!isset($str[3])) { // examples: &; || &x;
1647 16
      return $str;
1648 2
    }
1649
1650
    if (
1651 16
        \strpos($str, '&') === false
1652 5
        ||
1653
        (
1654
            \strpos($str, '&#') === false
1655
            &&
1656 16
            \strpos($str, ';') === false
1657
        )
1658 16
    ) {
1659
      return $str;
1660 16
    }
1661
1662
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1663
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1664
    }
1665
1666 16
    if ($flags === null) {
1667
      $flags = ENT_QUOTES | ENT_HTML5;
1668 16
    }
1669 16
1670 16 View Code Duplication
    if (
1671 14
        $encoding !== 'UTF-8'
1672
        &&
1673 14
        $encoding !== 'ISO-8859-1'
1674 13
        &&
1675
        $encoding !== 'WINDOWS-1252'
1676
        &&
1677 7
        self::$SUPPORT['mbstring'] === false
1678 16
    ) {
1679 16
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1680
    }
1681
1682
    do {
1683 16
      $str_compare = $str;
1684 16
1685 16
      $str = (string)\preg_replace_callback(
1686 16
          "/&#\d{2,6};/",
1687
          function ($matches) use ($encoding) {
1688
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
1689 16
1690
            if ($returnTmp !== '"' && $returnTmp !== "'") {
1691 16
              return $returnTmp;
1692
            }
1693
1694
            return $matches[0];
1695
          },
1696
          $str
1697
      );
1698
1699
      // decode numeric & UTF16 two byte entities
1700
      $str = \html_entity_decode(
1701
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
1702
          $flags,
1703
          $encoding
1704
      );
1705
1706
    } while ($str_compare !== $str);
1707
1708
    return $str;
1709
  }
1710
1711
  /**
1712
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
1713
   *
1714
   * @link http://php.net/manual/en/function.htmlentities.php
1715
   *
1716
   * @param string $str           <p>
1717
   *                              The input string.
1718
   *                              </p>
1719
   * @param int    $flags         [optional] <p>
1720
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1721
   *                              invalid code unit sequences and the used document type. The default is
1722
   *                              ENT_COMPAT | ENT_HTML401.
1723
   *                              <table>
1724
   *                              Available <i>flags</i> constants
1725
   *                              <tr valign="top">
1726
   *                              <td>Constant Name</td>
1727
   *                              <td>Description</td>
1728
   *                              </tr>
1729
   *                              <tr valign="top">
1730
   *                              <td><b>ENT_COMPAT</b></td>
1731
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1732
   *                              </tr>
1733
   *                              <tr valign="top">
1734
   *                              <td><b>ENT_QUOTES</b></td>
1735
   *                              <td>Will convert both double and single quotes.</td>
1736
   *                              </tr>
1737
   *                              <tr valign="top">
1738
   *                              <td><b>ENT_NOQUOTES</b></td>
1739
   *                              <td>Will leave both double and single quotes unconverted.</td>
1740
   *                              </tr>
1741
   *                              <tr valign="top">
1742
   *                              <td><b>ENT_IGNORE</b></td>
1743
   *                              <td>
1744
   *                              Silently discard invalid code unit sequences instead of returning
1745
   *                              an empty string. Using this flag is discouraged as it
1746
   *                              may have security implications.
1747
   *                              </td>
1748
   *                              </tr>
1749
   *                              <tr valign="top">
1750
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1751
   *                              <td>
1752
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1753
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1754
   *                              </td>
1755
   *                              </tr>
1756
   *                              <tr valign="top">
1757
   *                              <td><b>ENT_DISALLOWED</b></td>
1758
   *                              <td>
1759
   *                              Replace invalid code points for the given document type with a
1760
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1761
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1762
   *                              instance, to ensure the well-formedness of XML documents with
1763
   *                              embedded external content.
1764
   *                              </td>
1765
   *                              </tr>
1766
   *                              <tr valign="top">
1767
   *                              <td><b>ENT_HTML401</b></td>
1768
   *                              <td>
1769
   *                              Handle code as HTML 4.01.
1770
   *                              </td>
1771
   *                              </tr>
1772
   *                              <tr valign="top">
1773
   *                              <td><b>ENT_XML1</b></td>
1774
   *                              <td>
1775
   *                              Handle code as XML 1.
1776
   *                              </td>
1777
   *                              </tr>
1778
   *                              <tr valign="top">
1779
   *                              <td><b>ENT_XHTML</b></td>
1780
   *                              <td>
1781
   *                              Handle code as XHTML.
1782
   *                              </td>
1783
   *                              </tr>
1784
   *                              <tr valign="top">
1785
   *                              <td><b>ENT_HTML5</b></td>
1786
   *                              <td>
1787
   *                              Handle code as HTML 5.
1788
   *                              </td>
1789
   *                              </tr>
1790
   *                              </table>
1791
   *                              </p>
1792
   * @param string $encoding      [optional] <p>
1793
   *                              Like <b>htmlspecialchars</b>,
1794
   *                              <b>htmlentities</b> takes an optional third argument
1795
   *                              <i>encoding</i> which defines encoding used in
1796
   *                              conversion.
1797 2
   *                              Although this argument is technically optional, you are highly
1798
   *                              encouraged to specify the correct value for your code.
1799 2
   *                              </p>
1800 1
   * @param bool   $double_encode [optional] <p>
1801
   *                              When <i>double_encode</i> is turned off PHP will not
1802
   *                              encode existing html entities. The default is to convert everything.
1803 2
   *                              </p>
1804
   *
1805
   *
1806
   * @return string the encoded string.
1807
   * </p>
1808
   * <p>
1809
   * If the input <i>string</i> contains an invalid code unit
1810
   * sequence within the given <i>encoding</i> an empty string
1811
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1812
   * <b>ENT_SUBSTITUTE</b> flags are set.
1813 2
   */
1814
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
1815 2
  {
1816 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1817
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1818
    }
1819 2
1820 2
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
1821 2
1822 2
    /**
1823 2
     * PHP doesn't replace a backslash to its html entity since this is something
1824 1
     * that's mostly used to escape characters when inserting in a database. Since
1825
     * we're using a decent database layer, we don't need this shit and we're replacing
1826 1
     * the double backslashes by its' html entity equivalent.
1827 1
     *
1828 2
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
1829
     */
1830
    $str = \str_replace('\\', '&#92;', $str);
1831
1832
    return self::html_encode($str, true, $encoding);
1833 2
  }
1834
1835
  /**
1836
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
1837
   *
1838
   * INFO: Take a look at "UTF8::htmlentities()"
1839
   *
1840
   * @link http://php.net/manual/en/function.htmlspecialchars.php
1841
   *
1842
   * @param string $str           <p>
1843
   *                              The string being converted.
1844
   *                              </p>
1845
   * @param int    $flags         [optional] <p>
1846
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1847
   *                              invalid code unit sequences and the used document type. The default is
1848
   *                              ENT_COMPAT | ENT_HTML401.
1849
   *                              <table>
1850
   *                              Available <i>flags</i> constants
1851
   *                              <tr valign="top">
1852
   *                              <td>Constant Name</td>
1853
   *                              <td>Description</td>
1854
   *                              </tr>
1855
   *                              <tr valign="top">
1856
   *                              <td><b>ENT_COMPAT</b></td>
1857
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1858
   *                              </tr>
1859
   *                              <tr valign="top">
1860
   *                              <td><b>ENT_QUOTES</b></td>
1861
   *                              <td>Will convert both double and single quotes.</td>
1862
   *                              </tr>
1863
   *                              <tr valign="top">
1864
   *                              <td><b>ENT_NOQUOTES</b></td>
1865
   *                              <td>Will leave both double and single quotes unconverted.</td>
1866
   *                              </tr>
1867
   *                              <tr valign="top">
1868
   *                              <td><b>ENT_IGNORE</b></td>
1869
   *                              <td>
1870
   *                              Silently discard invalid code unit sequences instead of returning
1871
   *                              an empty string. Using this flag is discouraged as it
1872
   *                              may have security implications.
1873
   *                              </td>
1874
   *                              </tr>
1875
   *                              <tr valign="top">
1876
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1877
   *                              <td>
1878
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1879
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1880
   *                              </td>
1881
   *                              </tr>
1882
   *                              <tr valign="top">
1883
   *                              <td><b>ENT_DISALLOWED</b></td>
1884
   *                              <td>
1885
   *                              Replace invalid code points for the given document type with a
1886
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1887
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1888
   *                              instance, to ensure the well-formedness of XML documents with
1889
   *                              embedded external content.
1890
   *                              </td>
1891
   *                              </tr>
1892
   *                              <tr valign="top">
1893
   *                              <td><b>ENT_HTML401</b></td>
1894
   *                              <td>
1895
   *                              Handle code as HTML 4.01.
1896
   *                              </td>
1897
   *                              </tr>
1898
   *                              <tr valign="top">
1899
   *                              <td><b>ENT_XML1</b></td>
1900
   *                              <td>
1901
   *                              Handle code as XML 1.
1902
   *                              </td>
1903
   *                              </tr>
1904
   *                              <tr valign="top">
1905
   *                              <td><b>ENT_XHTML</b></td>
1906
   *                              <td>
1907
   *                              Handle code as XHTML.
1908
   *                              </td>
1909
   *                              </tr>
1910
   *                              <tr valign="top">
1911
   *                              <td><b>ENT_HTML5</b></td>
1912
   *                              <td>
1913
   *                              Handle code as HTML 5.
1914
   *                              </td>
1915
   *                              </tr>
1916
   *                              </table>
1917
   *                              </p>
1918
   * @param string $encoding      [optional] <p>
1919
   *                              Defines encoding used in conversion.
1920
   *                              </p>
1921
   *                              <p>
1922
   *                              For the purposes of this function, the encodings
1923
   *                              ISO-8859-1, ISO-8859-15,
1924
   *                              UTF-8, cp866,
1925
   *                              cp1251, cp1252, and
1926
   *                              KOI8-R are effectively equivalent, provided the
1927
   *                              <i>string</i> itself is valid for the encoding, as
1928
   *                              the characters affected by <b>htmlspecialchars</b> occupy
1929
   *                              the same positions in all of these encodings.
1930
   *                              </p>
1931
   * @param bool   $double_encode [optional] <p>
1932
   *                              When <i>double_encode</i> is turned off PHP will not
1933
   *                              encode existing html entities, the default is to convert everything.
1934
   *                              </p>
1935
   *
1936
   * @return string The converted string.
1937
   * </p>
1938
   * <p>
1939
   * If the input <i>string</i> contains an invalid code unit
1940
   * sequence within the given <i>encoding</i> an empty string
1941
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1942
   * <b>ENT_SUBSTITUTE</b> flags are set.
1943
   */
1944 View Code Duplication
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1945 1
  {
1946
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1947 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1948 1
    }
1949
1950
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
1951 1
  }
1952
1953
  /**
1954
   * Checks whether iconv is available on the server.
1955
   *
1956
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1957
   */
1958
  public static function iconv_loaded(): bool
1959 1
  {
1960
    return \extension_loaded('iconv') ? true : false;
1961 1
  }
1962
1963
  /**
1964
   * alias for "UTF8::decimal_to_chr()"
1965
   *
1966
   * @see UTF8::decimal_to_chr()
1967
   *
1968
   * @param mixed $int
1969
   *
1970
   * @return string
1971
   */
1972
  public static function int_to_chr($int): string
1973 2
  {
1974
    return self::decimal_to_chr($int);
1975 2
  }
1976
1977
  /**
1978
   * Converts Integer to hexadecimal U+xxxx code point representation.
1979
   *
1980
   * INFO: opposite to UTF8::hex_to_int()
1981
   *
1982
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
1983
   * @param string $pfix [optional]
1984
   *
1985
   * @return string <p>The code point, or empty string on failure.</p>
1986
   */
1987
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
1988 3
  {
1989
    $hex = \dechex($int);
1990 3
1991
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
1992 3
1993
    return $pfix . $hex;
1994 3
  }
1995
1996
  /**
1997
   * Checks whether intl-char is available on the server.
1998
   *
1999
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2000
   */
2001
  public static function intlChar_loaded(): bool
2002 1
  {
2003
    return \class_exists('IntlChar');
2004 1
  }
2005
2006
  /**
2007
   * Checks whether intl is available on the server.
2008
   *
2009
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2010
   */
2011
  public static function intl_loaded(): bool
2012 4
  {
2013
    return \extension_loaded('intl');
2014 4
  }
2015
2016
  /**
2017
   * alias for "UTF8::is_ascii()"
2018
   *
2019
   * @see        UTF8::is_ascii()
2020
   *
2021
   * @param string $str
2022
   *
2023
   * @return boolean
2024
   *
2025
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2026
   */
2027
  public static function isAscii(string $str): bool
2028 1
  {
2029
    return self::is_ascii($str);
2030 1
  }
2031
2032
  /**
2033
   * alias for "UTF8::is_base64()"
2034
   *
2035
   * @see        UTF8::is_base64()
2036
   *
2037
   * @param string $str
2038
   *
2039
   * @return bool
2040
   *
2041
   * @deprecated <p>use "UTF8::is_base64()"</p>
2042
   */
2043
  public static function isBase64(string $str): bool
2044 1
  {
2045
    return self::is_base64($str);
2046 1
  }
2047
2048
  /**
2049
   * alias for "UTF8::is_binary()"
2050
   *
2051
   * @see        UTF8::is_binary()
2052
   *
2053
   * @param mixed $str
2054
   * @param bool  $strict
2055
   *
2056
   * @return bool
2057
   *
2058
   * @deprecated <p>use "UTF8::is_binary()"</p>
2059
   */
2060
  public static function isBinary($str, $strict = false): bool
2061 2
  {
2062
    return self::is_binary($str, $strict);
2063 2
  }
2064
2065
  /**
2066
   * alias for "UTF8::is_bom()"
2067
   *
2068
   * @see        UTF8::is_bom()
2069
   *
2070
   * @param string $utf8_chr
2071
   *
2072
   * @return boolean
2073
   *
2074
   * @deprecated <p>use "UTF8::is_bom()"</p>
2075
   */
2076
  public static function isBom(string $utf8_chr): bool
2077 1
  {
2078
    return self::is_bom($utf8_chr);
2079 1
  }
2080
2081
  /**
2082
   * alias for "UTF8::is_html()"
2083
   *
2084
   * @see        UTF8::is_html()
2085
   *
2086
   * @param string $str
2087
   *
2088
   * @return boolean
2089
   *
2090
   * @deprecated <p>use "UTF8::is_html()"</p>
2091
   */
2092
  public static function isHtml(string $str): bool
2093 1
  {
2094
    return self::is_html($str);
2095 1
  }
2096
2097
  /**
2098
   * alias for "UTF8::is_json()"
2099
   *
2100
   * @see        UTF8::is_json()
2101
   *
2102
   * @param string $str
2103
   *
2104
   * @return bool
2105
   *
2106
   * @deprecated <p>use "UTF8::is_json()"</p>
2107
   */
2108
  public static function isJson(string $str): bool
2109
  {
2110
    return self::is_json($str);
2111
  }
2112
2113
  /**
2114
   * alias for "UTF8::is_utf16()"
2115
   *
2116
   * @see        UTF8::is_utf16()
2117
   *
2118
   * @param string $str
2119
   *
2120
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2121
   *
2122
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2123
   */
2124
  public static function isUtf16(string $str)
2125 1
  {
2126
    return self::is_utf16($str);
2127 1
  }
2128
2129
  /**
2130
   * alias for "UTF8::is_utf32()"
2131
   *
2132
   * @see        UTF8::is_utf32()
2133
   *
2134
   * @param string $str
2135
   *
2136
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2137
   *
2138
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2139
   */
2140
  public static function isUtf32(string $str)
2141 1
  {
2142
    return self::is_utf32($str);
2143 1
  }
2144
2145
  /**
2146
   * alias for "UTF8::is_utf8()"
2147
   *
2148
   * @see        UTF8::is_utf8()
2149
   *
2150
   * @param string $str
2151
   * @param bool   $strict
2152
   *
2153
   * @return bool
2154
   *
2155
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2156
   */
2157
  public static function isUtf8($str, $strict = false): bool
2158 16
  {
2159
    return self::is_utf8($str, $strict);
2160 16
  }
2161
2162
  /**
2163
   * Checks if a string is 7 bit ASCII.
2164
   *
2165
   * @param string $str <p>The string to check.</p>
2166
   *
2167
   * @return bool <p>
2168
   *              <strong>true</strong> if it is ASCII<br>
2169
   *              <strong>false</strong> otherwise
2170
   *              </p>
2171
   */
2172
  public static function is_ascii(string $str): bool
2173 58
  {
2174
    if (!isset($str[0])) {
2175 58
      return true;
2176 6
    }
2177
2178
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2179 57
  }
2180
2181
  /**
2182
   * Returns true if the string is base64 encoded, false otherwise.
2183
   *
2184
   * @param string $str <p>The input string.</p>
2185
   *
2186
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2187
   */
2188
  public static function is_base64(string $str): bool
2189 1
  {
2190
    $base64String = (string)\base64_decode($str, true);
2191 1
2192
    return $base64String && \base64_encode($base64String) === $str;
2193 1
  }
2194
2195
  /**
2196
   * Check if the input is binary... (is look like a hack).
2197
   *
2198
   * @param mixed $input
2199
   * @param bool  $strict
2200
   *
2201
   * @return bool
2202
   */
2203
  public static function is_binary($input, bool $strict = false): bool
2204 19
  {
2205
    $input = (string)$input;
2206 19
    if (!isset($input[0])) {
2207 19
      return false;
2208 5
    }
2209
2210
    if (\preg_match('~^[01]+$~', $input)) {
2211 19
      return true;
2212 6
    }
2213
2214
    $testNull = 0;
2215 19
    $testLength = \strlen($input);
2216 19
    if ($testLength) {
2217 19
      $testNull = \substr_count($input, "\x0");
2218 19
      if (($testNull / $testLength) > 0.3) {
2219 19
        return true;
2220 6
      }
2221
    }
2222
2223
    if (
2224
        $strict === true
2225 18
        &&
2226
        \class_exists('finfo')
2227 18
    ) {
2228
2229
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
2230 16
      $finfo_encoding = $finfo->buffer($input);
2231 16
      if ($finfo_encoding && $finfo_encoding === 'binary') {
2232 16
        return true;
2233 16
      }
2234
2235
2236
    } else {
2237
2238
      if ($testNull > 0) {
2239 8
        return true;
2240 3
      }
2241
2242
    }
2243
2244
    return false;
2245 17
  }
2246
2247
  /**
2248
   * Check if the file is binary.
2249
   *
2250
   * @param string $file
2251
   *
2252
   * @return boolean
2253
   */
2254
  public static function is_binary_file($file): bool
2255 3
  {
2256
    try {
2257
      $fp = \fopen($file, 'rb');
2258 3
      $block = \fread($fp, 512);
2259 3
      \fclose($fp);
2260 3
    } catch (\Exception $e) {
2261
      $block = '';
2262
    }
2263
2264
    return self::is_binary($block, true);
2265 3
  }
2266
2267
  /**
2268
   * Checks if the given string is equal to any "Byte Order Mark".
2269
   *
2270
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2271
   *
2272
   * @param string $str <p>The input string.</p>
2273
   *
2274
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2275
   */
2276
  public static function is_bom($str): bool
2277 1
  {
2278
    foreach (self::$BOM as $bomString => $bomByteLength) {
2279 1
      if ($str === $bomString) {
2280 1
        return true;
2281 1
      }
2282
    }
2283
2284
    return false;
2285 1
  }
2286
2287
  /**
2288
   * Check if the string contains any html-tags <lall>.
2289
   *
2290
   * @param string $str <p>The input string.</p>
2291
   *
2292
   * @return boolean
2293
   */
2294
  public static function is_html(string $str): bool
2295 1
  {
2296
    if (!isset($str[0])) {
2297 1
      return false;
2298 1
    }
2299
2300
    // init
2301
    $matches = [];
2302 1
2303
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2304 1
2305
    return !(\count($matches) === 0);
2306 1
  }
2307
2308
  /**
2309
   * Try to check if "$str" is an json-string.
2310
   *
2311
   * @param string $str <p>The input string.</p>
2312
   *
2313
   * @return bool
2314
   */
2315
  public static function is_json(string $str): bool
2316 1
  {
2317
    if (!isset($str[0])) {
2318 1
      return false;
2319 1
    }
2320
2321
    $json = self::json_decode($str);
2322 1
2323
    return (
2324
               \is_object($json) === true
2325 1
               ||
2326
               \is_array($json) === true
2327 1
           )
2328
           &&
2329
           \json_last_error() === JSON_ERROR_NONE;
2330 1
  }
2331
2332
  /**
2333
   * Check if the string is UTF-16.
2334
   *
2335
   * @param string $str <p>The input string.</p>
2336
   *
2337
   * @return int|false <p>
2338
   *                   <strong>false</strong> if is't not UTF-16,<br>
2339
   *                   <strong>1</strong> for UTF-16LE,<br>
2340
   *                   <strong>2</strong> for UTF-16BE.
2341
   *                   </p>
2342
   */
2343 View Code Duplication
  public static function is_utf16(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2344 10
  {
2345
    if (self::is_binary($str) === false) {
2346 10
      return false;
2347 4
    }
2348
2349
    // init
2350
    $strChars = [];
2351 8
2352
    $str = self::remove_bom($str);
2353 8
2354
    $maybeUTF16LE = 0;
2355 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2356 8
    if ($test) {
2357 8
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2358 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2359 7
      if ($test3 === $test) {
2360 7
        if (\count($strChars) === 0) {
2361 7
          $strChars = self::count_chars($str, true);
2362 7
        }
2363
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2364 7
          if (\in_array($test3char, $strChars, true) === true) {
2365 7
            $maybeUTF16LE++;
2366 7
          }
2367
        }
2368
      }
2369
    }
2370
2371
    $maybeUTF16BE = 0;
2372 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2373 8
    if ($test) {
2374 8
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2375 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2376 7
      if ($test3 === $test) {
2377 7
        if (\count($strChars) === 0) {
2378 7
          $strChars = self::count_chars($str, true);
2379 3
        }
2380
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2381 7
          if (\in_array($test3char, $strChars, true) === true) {
2382 7
            $maybeUTF16BE++;
2383 7
          }
2384
        }
2385
      }
2386
    }
2387
2388
    if ($maybeUTF16BE !== $maybeUTF16LE) {
2389 8
      if ($maybeUTF16LE > $maybeUTF16BE) {
2390 3
        return 1;
2391 2
      }
2392
2393
      return 2;
2394 3
    }
2395
2396
    return false;
2397 6
  }
2398
2399
  /**
2400
   * Check if the string is UTF-32.
2401
   *
2402
   * @param string $str
2403
   *
2404
   * @return int|false <p>
2405
   *                   <strong>false</strong> if is't not UTF-32,<br>
2406
   *                   <strong>1</strong> for UTF-32LE,<br>
2407
   *                   <strong>2</strong> for UTF-32BE.
2408
   *                   </p>
2409
   */
2410 View Code Duplication
  public static function is_utf32(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2411 8
  {
2412
    if (self::is_binary($str) === false) {
2413 8
      return false;
2414 4
    }
2415
2416
    // init
2417
    $strChars = [];
2418 6
2419
    $str = self::remove_bom($str);
2420 6
2421
    $maybeUTF32LE = 0;
2422 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2423 6
    if ($test) {
2424 6
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2425 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2426 5
      if ($test3 === $test) {
2427 5
        if (\count($strChars) === 0) {
2428 5
          $strChars = self::count_chars($str, true);
2429 5
        }
2430
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2431 5
          if (\in_array($test3char, $strChars, true) === true) {
2432 5
            $maybeUTF32LE++;
2433 5
          }
2434
        }
2435
      }
2436
    }
2437
2438
    $maybeUTF32BE = 0;
2439 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2440 6
    if ($test) {
2441 6
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2442 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2443 5
      if ($test3 === $test) {
2444 5
        if (\count($strChars) === 0) {
2445 5
          $strChars = self::count_chars($str, true);
2446 3
        }
2447
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2448 5
          if (\in_array($test3char, $strChars, true) === true) {
2449 5
            $maybeUTF32BE++;
2450 5
          }
2451
        }
2452
      }
2453
    }
2454
2455
    if ($maybeUTF32BE !== $maybeUTF32LE) {
2456 6
      if ($maybeUTF32LE > $maybeUTF32BE) {
2457 1
        return 1;
2458 1
      }
2459
2460
      return 2;
2461 1
    }
2462
2463
    return false;
2464 6
  }
2465
2466
  /**
2467
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2468
   *
2469
   * @see    http://hsivonen.iki.fi/php-utf8/
2470
   *
2471
   * @param string|string[] $str    <p>The string to be checked.</p>
2472
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2473
   *
2474
   * @return bool
2475
   */
2476
  public static function is_utf8($str, bool $strict = false): bool
2477 61
  {
2478
    if (\is_array($str) === true) {
2479 61
      foreach ($str as $k => $v) {
2480 1
        if (false === self::is_utf8($v, $strict)) {
2481 1
          return false;
2482 1
        }
2483
      }
2484
2485
      return true;
2486
    }
2487
2488
    if (!isset($str[0])) {
2489 61
      return true;
2490 3
    }
2491
2492
    if ($strict === true) {
2493 59
      if (self::is_utf16($str) !== false) {
2494 1
        return false;
2495 1
      }
2496
2497
      if (self::is_utf32($str) !== false) {
2498
        return false;
2499
      }
2500
    }
2501
2502
    if (self::pcre_utf8_support() !== true) {
2503 59
2504
      // If even just the first character can be matched, when the /u
2505
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2506
      // invalid, nothing at all will match, even if the string contains
2507
      // some valid sequences
2508
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
2509
    }
2510
2511
    $mState = 0; // cached expected number of octets after the current octet
2512 59
    // until the beginning of the next UTF8 character sequence
2513
    $mUcs4 = 0; // cached Unicode character
2514 59
    $mBytes = 1; // cached expected number of octets in the current sequence
2515 59
2516
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2517 59
      self::checkForSupport();
2518
    }
2519
2520
    if (self::$ORD === null) {
2521 59
      self::$ORD = self::getData('ord');
2522
    }
2523
2524
    $len = self::strlen_in_byte($str);
2525 59
    /** @noinspection ForeachInvariantsInspection */
2526
    for ($i = 0; $i < $len; $i++) {
2527 59
      $in = self::$ORD[$str[$i]];
2528 59
      if ($mState === 0) {
2529 59
        // When mState is zero we expect either a US-ASCII character or a
2530
        // multi-octet sequence.
2531
        if (0 === (0x80 & $in)) {
2532 59
          // US-ASCII, pass straight through.
2533
          $mBytes = 1;
2534 56 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
2535 56
          // First octet of 2 octet sequence.
2536
          $mUcs4 = $in;
2537 49
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
2538 49
          $mState = 1;
2539 49
          $mBytes = 2;
2540 49
        } elseif (0xE0 === (0xF0 & $in)) {
2541 46
          // First octet of 3 octet sequence.
2542
          $mUcs4 = $in;
2543 30
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
2544 30
          $mState = 2;
2545 30
          $mBytes = 3;
2546 30 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
2547 23
          // First octet of 4 octet sequence.
2548
          $mUcs4 = $in;
2549 13
          $mUcs4 = ($mUcs4 & 0x07) << 18;
2550 13
          $mState = 3;
2551 13
          $mBytes = 4;
2552 13
        } elseif (0xF8 === (0xFC & $in)) {
2553 11
          /* First octet of 5 octet sequence.
2554
          *
2555
          * This is illegal because the encoded codepoint must be either
2556
          * (a) not the shortest form or
2557
          * (b) outside the Unicode range of 0-0x10FFFF.
2558
          * Rather than trying to resynchronize, we will carry on until the end
2559
          * of the sequence and let the later error handling code catch it.
2560
          */
2561
          $mUcs4 = $in;
2562 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
2563 4
          $mState = 4;
2564 4
          $mBytes = 5;
2565 4 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
2566 8
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
2567
          $mUcs4 = $in;
2568 4
          $mUcs4 = ($mUcs4 & 1) << 30;
2569 4
          $mState = 5;
2570 4
          $mBytes = 6;
2571 4
        } else {
2572
          /* Current octet is neither in the US-ASCII range nor a legal first
2573
           * octet of a multi-octet sequence.
2574
           */
2575
          return false;
2576 59
        }
2577
      } else {
2578
        // When mState is non-zero, we expect a continuation of the multi-octet
2579
        // sequence
2580
        if (0x80 === (0xC0 & $in)) {
2581 56
          // Legal continuation.
2582
          $shift = ($mState - 1) * 6;
2583 50
          $tmp = $in;
2584 50
          $tmp = ($tmp & 0x0000003F) << $shift;
2585 50
          $mUcs4 |= $tmp;
2586 50
          /**
2587
           * End of the multi-octet sequence. mUcs4 now contains the final
2588
           * Unicode code point to be output
2589
           */
2590
          if (0 === --$mState) {
2591 50
            /*
2592
            * Check for illegal sequences and code points.
2593
            */
2594
            // From Unicode 3.1, non-shortest form is illegal
2595
            if (
2596
                (2 === $mBytes && $mUcs4 < 0x0080) ||
2597 50
                (3 === $mBytes && $mUcs4 < 0x0800) ||
2598 50
                (4 === $mBytes && $mUcs4 < 0x10000) ||
2599 50
                (4 < $mBytes) ||
2600 50
                // From Unicode 3.2, surrogate characters are illegal.
2601
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
2602 50
                // Code points outside the Unicode range are illegal.
2603
                ($mUcs4 > 0x10FFFF)
2604 50
            ) {
2605
              return false;
2606 7
            }
2607
            // initialize UTF8 cache
2608
            $mState = 0;
2609 50
            $mUcs4 = 0;
2610 50
            $mBytes = 1;
2611 50
          }
2612
        } else {
2613
          /**
2614
           *((0xC0 & (*in) != 0x80) && (mState != 0))
2615
           * Incomplete multi-octet sequence.
2616
           */
2617
          return false;
2618 28
        }
2619
      }
2620
    }
2621
2622
    return true;
2623 27
  }
2624
2625
  /**
2626
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2627
   * Decodes a JSON string
2628
   *
2629
   * @link http://php.net/manual/en/function.json-decode.php
2630
   *
2631
   * @param string $json    <p>
2632
   *                        The <i>json</i> string being decoded.
2633
   *                        </p>
2634
   *                        <p>
2635
   *                        This function only works with UTF-8 encoded strings.
2636
   *                        </p>
2637
   *                        <p>PHP implements a superset of
2638
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2639
   *                        only supports these values when they are nested inside an array or an object.
2640
   *                        </p>
2641
   * @param bool   $assoc   [optional] <p>
2642
   *                        When <b>TRUE</b>, returned objects will be converted into
2643
   *                        associative arrays.
2644
   *                        </p>
2645
   * @param int    $depth   [optional] <p>
2646
   *                        User specified recursion depth.
2647
   *                        </p>
2648
   * @param int    $options [optional] <p>
2649
   *                        Bitmask of JSON decode options. Currently only
2650
   *                        <b>JSON_BIGINT_AS_STRING</b>
2651
   *                        is supported (default is to cast large integers as floats)
2652
   *                        </p>
2653
   *
2654
   * @return mixed the value encoded in <i>json</i> in appropriate
2655
   * PHP type. Values true, false and
2656
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
2657
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
2658
   * <i>json</i> cannot be decoded or if the encoded
2659
   * data is deeper than the recursion limit.
2660
   */
2661
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
2662 2
  {
2663
    $json = self::filter($json);
2664 2
2665
    $json = \json_decode($json, $assoc, $depth, $options);
2666 2
2667
    return $json;
2668 2
  }
2669
2670
  /**
2671
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2672
   * Returns the JSON representation of a value.
2673
   *
2674
   * @link http://php.net/manual/en/function.json-encode.php
2675
   *
2676
   * @param mixed $value   <p>
2677
   *                       The <i>value</i> being encoded. Can be any type except
2678
   *                       a resource.
2679
   *                       </p>
2680
   *                       <p>
2681
   *                       All string data must be UTF-8 encoded.
2682
   *                       </p>
2683
   *                       <p>PHP implements a superset of
2684
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2685
   *                       only supports these values when they are nested inside an array or an object.
2686
   *                       </p>
2687
   * @param int   $options [optional] <p>
2688
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
2689
   *                       <b>JSON_HEX_TAG</b>,
2690
   *                       <b>JSON_HEX_AMP</b>,
2691
   *                       <b>JSON_HEX_APOS</b>,
2692
   *                       <b>JSON_NUMERIC_CHECK</b>,
2693
   *                       <b>JSON_PRETTY_PRINT</b>,
2694
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
2695
   *                       <b>JSON_FORCE_OBJECT</b>,
2696
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
2697
   *                       constants is described on
2698
   *                       the JSON constants page.
2699
   *                       </p>
2700
   * @param int   $depth   [optional] <p>
2701
   *                       Set the maximum depth. Must be greater than zero.
2702
   *                       </p>
2703
   *
2704
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
2705
   */
2706
  public static function json_encode($value, int $options = 0, int $depth = 512): string
2707 2
  {
2708
    $value = self::filter($value);
2709 2
2710
    $json = \json_encode($value, $options, $depth);
2711 2
2712
    return $json;
2713 2
  }
2714
2715
  /**
2716
   * Makes string's first char lowercase.
2717
   *
2718
   * @param string $str       <p>The input string</p>
2719
   * @param string $encoding  [optional] <p>Set the charset.</p>
2720
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
2721
   *
2722
   * @return string <p>The resulting string</p>
2723
   */
2724
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2725 7
  {
2726
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
2727 7
    if ($strPartTwo === false) {
2728 7
      $strPartTwo = '';
2729
    }
2730
2731
    $strPartOne = self::strtolower(
2732 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
2733 7
        $encoding,
2734 7
        $cleanUtf8
2735 7
    );
2736
2737
    return $strPartOne . $strPartTwo;
2738 7
  }
2739
2740
  /**
2741
   * alias for "UTF8::lcfirst()"
2742
   *
2743
   * @see UTF8::lcfirst()
2744
   *
2745
   * @param string $word
2746
   * @param string $encoding
2747
   * @param bool   $cleanUtf8
2748
   *
2749
   * @return string
2750
   */
2751
  public static function lcword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2752 1
  {
2753
    return self::lcfirst($word, $encoding, $cleanUtf8);
2754 1
  }
2755
2756
  /**
2757
   * Lowercase for all words in the string.
2758
   *
2759
   * @param string   $str        <p>The input string.</p>
2760
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
2761
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
2762
   * @param string   $encoding   [optional] <p>Set the charset.</p>
2763
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
2764
   *
2765
   * @return string
2766
   */
2767
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2768 1
  {
2769
    if (!$str) {
2770 1
      return '';
2771 1
    }
2772
2773
    $words = self::str_to_words($str, $charlist);
2774 1
    $newWords = [];
2775 1
2776
    if (\count($exceptions) > 0) {
2777 1
      $useExceptions = true;
2778 1
    } else {
2779
      $useExceptions = false;
2780 1
    }
2781
2782 View Code Duplication
    foreach ($words as $word) {
2783 1
2784
      if (!$word) {
2785 1
        continue;
2786 1
      }
2787
2788
      if (
2789
          $useExceptions === false
2790 1
          ||
2791
          (
2792
              $useExceptions === true
2793 1
              &&
2794
              !\in_array($word, $exceptions, true)
2795 1
          )
2796
      ) {
2797
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
2798 1
      }
2799
2800
      $newWords[] = $word;
2801 1
    }
2802
2803
    return \implode('', $newWords);
2804 1
  }
2805
2806
  /**
2807
   * Strip whitespace or other characters from beginning of a UTF-8 string.
2808
   *
2809
   * @param string $str   <p>The string to be trimmed</p>
2810
   * @param mixed  $chars <p>Optional characters to be stripped</p>
2811
   *
2812
   * @return string <p>The string with unwanted characters stripped from the left.</p>
2813
   */
2814 View Code Duplication
  public static function ltrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2815 24
  {
2816
    if (!isset($str[0])) {
2817 24
      return '';
2818 2
    }
2819
2820
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
2821
    if ($chars === INF || !$chars) {
2822 23
      return \preg_replace('/^[\pZ\pC]+/u', '', $str);
2823 2
    }
2824
2825
    return \preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
2826 23
  }
2827
2828
  /**
2829
   * Returns the UTF-8 character with the maximum code point in the given data.
2830
   *
2831
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
2832
   *
2833
   * @return string <p>The character with the highest code point than others.</p>
2834
   */
2835 View Code Duplication
  public static function max($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2836 1
  {
2837
    if (\is_array($arg) === true) {
2838 1
      $arg = \implode('', $arg);
2839 1
    }
2840
2841
    return self::chr(\max(self::codepoints($arg)));
2842 1
  }
2843
2844
  /**
2845
   * Calculates and returns the maximum number of bytes taken by any
2846
   * UTF-8 encoded character in the given string.
2847
   *
2848
   * @param string $str <p>The original Unicode string.</p>
2849
   *
2850
   * @return int <p>Max byte lengths of the given chars.</p>
2851
   */
2852
  public static function max_chr_width(string $str): int
2853 1
  {
2854
    $bytes = self::chr_size_list($str);
2855 1
    if (\count($bytes) > 0) {
2856 1
      return (int)\max($bytes);
2857 1
    }
2858
2859
    return 0;
2860 1
  }
2861
2862
  /**
2863
   * Checks whether mbstring is available on the server.
2864
   *
2865
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2866
   */
2867
  public static function mbstring_loaded(): bool
2868 12
  {
2869
    $return = \extension_loaded('mbstring') ? true : false;
2870 12
2871
    if ($return === true) {
2872 12
      \mb_internal_encoding('UTF-8');
2873 12
    }
2874
2875
    return $return;
2876 12
  }
2877
2878
  private static function mbstring_overloaded(): bool
2879 1
  {
2880
    return \defined('MB_OVERLOAD_STRING')
2881 1
           &&
2882
           \ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING;
2883 1
  }
2884
2885
  /**
2886
   * Returns the UTF-8 character with the minimum code point in the given data.
2887
   *
2888
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
2889
   *
2890
   * @return string <p>The character with the lowest code point than others.</p>
2891
   */
2892 View Code Duplication
  public static function min($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2893 1
  {
2894
    if (\is_array($arg) === true) {
2895 1
      $arg = \implode('', $arg);
2896 1
    }
2897
2898
    return self::chr(\min(self::codepoints($arg)));
2899 1
  }
2900
2901
  /**
2902
   * alias for "UTF8::normalize_encoding()"
2903
   *
2904
   * @see        UTF8::normalize_encoding()
2905
   *
2906
   * @param string $encoding
2907
   * @param mixed  $fallback
2908
   *
2909
   * @return string
2910
   *
2911
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
2912
   */
2913
  public static function normalizeEncoding(string $encoding, $fallback = '')
2914 1
  {
2915
    return self::normalize_encoding($encoding, $fallback);
2916 1
  }
2917
2918
  /**
2919
   * Normalize the encoding-"name" input.
2920
   *
2921
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
2922
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
2923
   *
2924
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by
2925
   *                default)</p>
2926
   */
2927
  public static function normalize_encoding(string $encoding, $fallback = '')
2928 79
  {
2929
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
2930 79
2931
    if (!$encoding) {
2932 79
      return $fallback;
2933 4
    }
2934
2935
    if (
2936
        'UTF-8' === $encoding
2937 78
        ||
2938
        'UTF8' === $encoding
2939 78
    ) {
2940
      return 'UTF-8';
2941 11
    }
2942
2943
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
2944 74
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
2945 71
    }
2946
2947
    if (self::$ENCODINGS === null) {
2948 7
      self::$ENCODINGS = self::getData('encodings');
2949 1
    }
2950
2951
    if (\in_array($encoding, self::$ENCODINGS, true)) {
2952 7
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
2953 3
2954
      return $encoding;
2955 3
    }
2956
2957
    $encodingOrig = $encoding;
2958 6
    $encoding = \strtoupper($encoding);
2959 6
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
2960 6
2961
    $equivalences = [
2962
        'ISO8859'     => 'ISO-8859-1',
2963 6
        'ISO88591'    => 'ISO-8859-1',
2964
        'ISO'         => 'ISO-8859-1',
2965
        'LATIN'       => 'ISO-8859-1',
2966
        'LATIN1'      => 'ISO-8859-1', // Western European
2967
        'ISO88592'    => 'ISO-8859-2',
2968
        'LATIN2'      => 'ISO-8859-2', // Central European
2969
        'ISO88593'    => 'ISO-8859-3',
2970
        'LATIN3'      => 'ISO-8859-3', // Southern European
2971
        'ISO88594'    => 'ISO-8859-4',
2972
        'LATIN4'      => 'ISO-8859-4', // Northern European
2973
        'ISO88595'    => 'ISO-8859-5',
2974
        'ISO88596'    => 'ISO-8859-6', // Greek
2975
        'ISO88597'    => 'ISO-8859-7',
2976
        'ISO88598'    => 'ISO-8859-8', // Hebrew
2977
        'ISO88599'    => 'ISO-8859-9',
2978
        'LATIN5'      => 'ISO-8859-9', // Turkish
2979
        'ISO885911'   => 'ISO-8859-11',
2980
        'TIS620'      => 'ISO-8859-11', // Thai
2981
        'ISO885910'   => 'ISO-8859-10',
2982
        'LATIN6'      => 'ISO-8859-10', // Nordic
2983
        'ISO885913'   => 'ISO-8859-13',
2984
        'LATIN7'      => 'ISO-8859-13', // Baltic
2985
        'ISO885914'   => 'ISO-8859-14',
2986
        'LATIN8'      => 'ISO-8859-14', // Celtic
2987
        'ISO885915'   => 'ISO-8859-15',
2988
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
2989
        'ISO885916'   => 'ISO-8859-16',
2990
        'LATIN10'     => 'ISO-8859-16', // Southeast European
2991
        'CP1250'      => 'WINDOWS-1250',
2992
        'WIN1250'     => 'WINDOWS-1250',
2993
        'WINDOWS1250' => 'WINDOWS-1250',
2994
        'CP1251'      => 'WINDOWS-1251',
2995
        'WIN1251'     => 'WINDOWS-1251',
2996
        'WINDOWS1251' => 'WINDOWS-1251',
2997
        'CP1252'      => 'WINDOWS-1252',
2998
        'WIN1252'     => 'WINDOWS-1252',
2999
        'WINDOWS1252' => 'WINDOWS-1252',
3000
        'CP1253'      => 'WINDOWS-1253',
3001
        'WIN1253'     => 'WINDOWS-1253',
3002
        'WINDOWS1253' => 'WINDOWS-1253',
3003
        'CP1254'      => 'WINDOWS-1254',
3004
        'WIN1254'     => 'WINDOWS-1254',
3005
        'WINDOWS1254' => 'WINDOWS-1254',
3006
        'CP1255'      => 'WINDOWS-1255',
3007
        'WIN1255'     => 'WINDOWS-1255',
3008
        'WINDOWS1255' => 'WINDOWS-1255',
3009
        'CP1256'      => 'WINDOWS-1256',
3010
        'WIN1256'     => 'WINDOWS-1256',
3011
        'WINDOWS1256' => 'WINDOWS-1256',
3012
        'CP1257'      => 'WINDOWS-1257',
3013
        'WIN1257'     => 'WINDOWS-1257',
3014
        'WINDOWS1257' => 'WINDOWS-1257',
3015
        'CP1258'      => 'WINDOWS-1258',
3016
        'WIN1258'     => 'WINDOWS-1258',
3017
        'WINDOWS1258' => 'WINDOWS-1258',
3018
        'UTF16'       => 'UTF-16',
3019
        'UTF32'       => 'UTF-32',
3020
        'UTF8'        => 'UTF-8',
3021
        'UTF'         => 'UTF-8',
3022
        'UTF7'        => 'UTF-7',
3023
        '8BIT'        => 'CP850',
3024
        'BINARY'      => 'CP850',
3025
    ];
3026
3027
    if (!empty($equivalences[$encodingUpperHelper])) {
3028 6
      $encoding = $equivalences[$encodingUpperHelper];
3029 5
    }
3030
3031
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3032 6
3033
    return $encoding;
3034 6
  }
3035
3036
  /**
3037
   * Normalize some MS Word special characters.
3038
   *
3039
   * @param string $str <p>The string to be normalized.</p>
3040
   *
3041
   * @return string
3042
   */
3043 View Code Duplication
  public static function normalize_msword(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3044 16
  {
3045
    if (!isset($str[0])) {
3046 16
      return '';
3047 1
    }
3048
3049
    static $UTF8_MSWORD_KEYS_CACHE = null;
3050 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3051 16
3052
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3053 16
3054
      if (self::$UTF8_MSWORD === null) {
3055 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
3056 1
      }
3057
3058
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
3059 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
3060 1
    }
3061
3062
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3063 16
  }
3064
3065
  /**
3066
   * Normalize the whitespace.
3067
   *
3068
   * @param string $str                     <p>The string to be normalized.</p>
3069
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3070
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3071
   *                                        bidirectional text chars.</p>
3072
   *
3073
   * @return string
3074
   */
3075
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
3076 39
  {
3077
    if (!isset($str[0])) {
3078 39
      return '';
3079 4
    }
3080
3081
    static $WHITESPACE_CACHE = [];
3082 39
    $cacheKey = (int)$keepNonBreakingSpace;
3083 39
3084
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3085 39
3086
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3087 2
3088
      if ($keepNonBreakingSpace === true) {
3089 2
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3090 1
      }
3091
3092
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
3093 2
    }
3094
3095
    if ($keepBidiUnicodeControls === false) {
3096 39
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3097 39
3098
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3099 39
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3100 1
      }
3101
3102
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3103 39
    }
3104
3105
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3106 39
  }
3107
3108
  /**
3109
   * Calculates Unicode code point of the given UTF-8 encoded character.
3110
   *
3111
   * INFO: opposite to UTF8::chr()
3112
   *
3113
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3114
   * @param string $encoding [optional] <p>Default is UTF-8</p>
3115
   *
3116
   * @return int <p>
3117
   *             Unicode code point of the given character,<br>
3118
   *             0 on invalid UTF-8 byte sequence.
3119
   *             </p>
3120
   */
3121
  public static function ord(string $chr, string $encoding = 'UTF-8'): int
3122 23
  {
3123
    // init
3124
    static $CHAR_CACHE = [];
3125 23
3126
    // save the original string
3127
    $chr_orig = $chr;
3128 23
3129
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3130 23
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3131 2
3132
      // check again, if it's still not UTF-8
3133
      /** @noinspection NotOptimalIfConditionsInspection */
3134
      if ($encoding !== 'UTF-8') {
3135 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3136 2
      }
3137
    }
3138
3139
    $cacheKey = $chr_orig . $encoding;
3140 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3141 23
      return $CHAR_CACHE[$cacheKey];
3142 23
    }
3143
3144
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3145 11
      self::checkForSupport();
3146
    }
3147
3148
    if (self::$SUPPORT['intlChar'] === true) {
3149 11
      $code = \IntlChar::ord($chr);
3150 10
      if ($code) {
3151 10
        return $CHAR_CACHE[$cacheKey] = $code;
3152 9
      }
3153
    }
3154
3155
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3156
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
3157 6
    $code = $chr ? $chr[1] : 0;
3158 6
3159
    if (0xF0 <= $code && isset($chr[4])) {
3160 6
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3161
    }
3162
3163
    if (0xE0 <= $code && isset($chr[3])) {
3164 6
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3165 1
    }
3166
3167
    if (0xC0 <= $code && isset($chr[2])) {
3168 6
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3169 2
    }
3170
3171
    return $CHAR_CACHE[$cacheKey] = $code;
3172 5
  }
3173
3174
  /**
3175
   * Parses the string into an array (into the the second parameter).
3176
   *
3177
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3178
   *          if the second parameter is not set!
3179
   *
3180
   * @link http://php.net/manual/en/function.parse-str.php
3181
   *
3182
   * @param string $str       <p>The input string.</p>
3183
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
3184
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3185
   *
3186
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3187
   */
3188
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
3189 1
  {
3190
    if ($cleanUtf8 === true) {
3191 1
      $str = self::clean($str);
3192 1
    }
3193
3194
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3195
    $return = \mb_parse_str($str, $result);
3196 1
3197
    return !($return === false || empty($result));
3198 1
  }
3199
3200
  /**
3201
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3202
   *
3203
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3204
   */
3205
  public static function pcre_utf8_support(): bool
3206 60
  {
3207
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3208
    return (bool)@\preg_match('//u', '');
3209 60
  }
3210
3211
  /**
3212
   * Create an array containing a range of UTF-8 characters.
3213
   *
3214
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3215
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3216
   *
3217
   * @return array
3218
   */
3219
  public static function range($var1, $var2): array
3220 1
  {
3221
    if (!$var1 || !$var2) {
3222 1
      return [];
3223 1
    }
3224
3225 View Code Duplication
    if (\ctype_digit((string)$var1)) {
3226 1
      $start = (int)$var1;
3227 1
    } elseif (\ctype_xdigit($var1)) {
3228 1
      $start = (int)self::hex_to_int($var1);
3229
    } else {
3230
      $start = self::ord($var1);
3231 1
    }
3232
3233
    if (!$start) {
3234 1
      return [];
3235
    }
3236
3237 View Code Duplication
    if (\ctype_digit((string)$var2)) {
3238 1
      $end = (int)$var2;
3239 1
    } elseif (\ctype_xdigit($var2)) {
3240 1
      $end = (int)self::hex_to_int($var2);
3241
    } else {
3242
      $end = self::ord($var2);
3243 1
    }
3244
3245
    if (!$end) {
3246 1
      return [];
3247
    }
3248
3249
    return \array_map(
3250 1
        [
3251
            self::class,
3252 1
            'chr',
3253
        ],
3254
        \range($start, $end)
3255 1
    );
3256
  }
3257
3258
  /**
3259
   * Multi decode html entity & fix urlencoded-win1252-chars.
3260
   *
3261
   * e.g:
3262
   * 'test+test'                     => 'test+test'
3263
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3264
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3265
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3266
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3267
   * 'Düsseldorf'                   => 'Düsseldorf'
3268
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3269
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3270
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3271
   *
3272
   * @param string $str          <p>The input string.</p>
3273
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3274
   *
3275
   * @return string
3276
   */
3277 View Code Duplication
  public static function rawurldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3278 2
  {
3279
    if (!isset($str[0])) {
3280 2
      return '';
3281 1
    }
3282
3283
    $pattern = '/%u([0-9a-f]{3,4})/i';
3284 2
    if (\preg_match($pattern, $str)) {
3285 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
3286 1
    }
3287
3288
    $flags = ENT_QUOTES | ENT_HTML5;
3289 2
3290
    do {
3291
      $str_compare = $str;
3292 2
3293
      $str = self::fix_simple_utf8(
3294 2
          \rawurldecode(
3295 2
              self::html_entity_decode(
3296 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3297 2
                  $flags
3298 2
              )
3299
          )
3300
      );
3301
3302
    } while ($multi_decode === true && $str_compare !== $str);
3303 2
3304
    return $str;
3305 2
  }
3306
3307
  /**
3308
   * alias for "UTF8::remove_bom()"
3309
   *
3310
   * @see        UTF8::remove_bom()
3311
   *
3312
   * @param string $str
3313
   *
3314
   * @return string
3315
   *
3316
   * @deprecated <p>use "UTF8::remove_bom()"</p>
3317
   */
3318
  public static function removeBOM(string $str): string
3319
  {
3320
    return self::remove_bom($str);
3321
  }
3322
3323
  /**
3324
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3325
   *
3326
   * @param string $str <p>The input string.</p>
3327
   *
3328
   * @return string <p>String without UTF-BOM</p>
3329
   */
3330
  public static function remove_bom(string $str): string
3331 43
  {
3332
    if (!isset($str[0])) {
3333 43
      return '';
3334 3
    }
3335
3336
    foreach (self::$BOM as $bomString => $bomByteLength) {
3337 43
      if (0 === self::strpos($str, $bomString, 0, 'CP850')) {
3338 43
        $strTmp = self::substr($str, $bomByteLength, null, 'CP850');
3339 5
        if ($strTmp === false) {
3340 5
          $strTmp = '';
3341
        }
3342
        $str = (string)$strTmp;
3343 43
      }
3344
    }
3345
3346
    return $str;
3347 43
  }
3348
3349
  /**
3350
   * Removes duplicate occurrences of a string in another string.
3351
   *
3352
   * @param string          $str  <p>The base string.</p>
3353
   * @param string|string[] $what <p>String to search for in the base string.</p>
3354
   *
3355
   * @return string <p>The result string with removed duplicates.</p>
3356
   */
3357
  public static function remove_duplicates(string $str, $what = ' '): string
3358 1
  {
3359
    if (\is_string($what) === true) {
3360 1
      $what = [$what];
3361 1
    }
3362
3363
    if (\is_array($what) === true) {
3364 1
      /** @noinspection ForeachSourceInspection */
3365
      foreach ($what as $item) {
3366 1
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
3367 1
      }
3368
    }
3369
3370
    return $str;
3371 1
  }
3372
3373
  /**
3374
   * Remove invisible characters from a string.
3375
   *
3376
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3377
   *
3378
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3379
   *
3380
   * @param string $str
3381
   * @param bool   $url_encoded
3382
   * @param string $replacement
3383
   *
3384
   * @return string
3385
   */
3386
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
3387 65
  {
3388
    // init
3389
    $non_displayables = [];
3390 65
3391
    // every control character except newline (dec 10),
3392
    // carriage return (dec 13) and horizontal tab (dec 09)
3393
    if ($url_encoded) {
3394 65
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
3395 65
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3396 65
    }
3397
3398
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
3399 65
3400
    do {
3401
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
3402 65
    } while ($count !== 0);
3403 65
3404
    return $str;
3405 65
  }
3406
3407
  /**
3408
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3409
   *
3410
   * @param string $str                <p>The input string</p>
3411
   * @param string $replacementChar    <p>The replacement character.</p>
3412
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3413
   *
3414
   * @return string
3415
   */
3416
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
3417 37
  {
3418
    if (!isset($str[0])) {
3419 37
      return '';
3420 4
    }
3421
3422
    if ($processInvalidUtf8 === true) {
3423 37
      $replacementCharHelper = $replacementChar;
3424 37
      if ($replacementChar === '') {
3425 37
        $replacementCharHelper = 'none';
3426 37
      }
3427
3428
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3429 37
        self::checkForSupport();
3430
      }
3431
3432
      $save = \mb_substitute_character();
3433 37
      \mb_substitute_character($replacementCharHelper);
3434 37
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3435 37
      \mb_substitute_character($save);
3436 37
3437
      if (\is_string($strTmp)) {
3438 37
        $str = $strTmp;
3439 37
      } else {
3440
        $str = '';
3441
      }
3442
    }
3443
3444
    return str_replace(
3445 37
        [
3446
            "\xEF\xBF\xBD",
3447 37
            '�',
3448
        ],
3449
        [
3450
            $replacementChar,
3451 37
            $replacementChar,
3452 37
        ],
3453
        $str
3454 37
    );
3455
  }
3456
3457
  /**
3458
   * Strip whitespace or other characters from end of a UTF-8 string.
3459
   *
3460
   * @param string $str   <p>The string to be trimmed.</p>
3461
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
3462
   *
3463
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3464
   */
3465 View Code Duplication
  public static function rtrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3466 23
  {
3467
    if (!isset($str[0])) {
3468 23
      return '';
3469 5
    }
3470
3471
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3472
    if ($chars === INF || !$chars) {
3473 19
      return \preg_replace('/[\pZ\pC]+$/u', '', $str);
3474 3
    }
3475
3476
    return \preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3477 18
  }
3478
3479
  /**
3480
   * rxClass
3481
   *
3482
   * @param string $s
3483
   * @param string $class
3484
   *
3485
   * @return string
3486
   */
3487
  private static function rxClass(string $s, string $class = ''): string
3488 60
  {
3489
    static $RX_CLASSS_CACHE = [];
3490 60
3491
    $cacheKey = $s . $class;
3492 60
3493
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
3494 60
      return $RX_CLASSS_CACHE[$cacheKey];
3495 48
    }
3496
3497
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3498
    $class = [$class];
3499 20
3500
    /** @noinspection SuspiciousLoopInspection */
3501
    foreach (self::str_split($s) as $s) {
3502 20
      if ('-' === $s) {
3503 19
        $class[0] = '-' . $class[0];
3504
      } elseif (!isset($s[2])) {
3505 19
        $class[0] .= \preg_quote($s, '/');
3506 19
      } elseif (1 === self::strlen($s)) {
3507 2
        $class[0] .= $s;
3508 2
      } else {
3509
        $class[] = $s;
3510 19
      }
3511
    }
3512
3513
    if ($class[0]) {
3514 20
      $class[0] = '[' . $class[0] . ']';
3515 20
    }
3516
3517
    if (1 === \count($class)) {
3518 20
      $return = $class[0];
3519 20
    } else {
3520
      $return = '(?:' . \implode('|', $class) . ')';
3521
    }
3522
3523
    $RX_CLASSS_CACHE[$cacheKey] = $return;
3524 20
3525
    return $return;
3526 20
  }
3527
3528
  /**
3529
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
3530
   */
3531
  public static function showSupport()
3532 1
  {
3533
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3534 1
      self::checkForSupport();
3535
    }
3536
3537
    echo '<pre>';
3538 1
    foreach (self::$SUPPORT as $key => $value) {
3539 1
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
3540 1
    }
3541
    echo '</pre>';
3542 1
  }
3543 1
3544
  /**
3545
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3546
   *
3547
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3548
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3549
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3550
   *
3551
   * @return string <p>The HTML numbered entity.</p>
3552
   */
3553
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
3554 1
  {
3555
    if (!isset($char[0])) {
3556 1
      return '';
3557 1
    }
3558
3559
    if (
3560
        $keepAsciiChars === true
3561 1
        &&
3562
        self::is_ascii($char) === true
3563 1
    ) {
3564
      return $char;
3565 1
    }
3566
3567
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3568 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3569 1
    }
3570
3571
    return '&#' . self::ord($char, $encoding) . ';';
3572 1
  }
3573
3574
  /**
3575
   * Convert a string to an array of Unicode characters.
3576
   *
3577
   * @param string $str       <p>The string to split into array.</p>
3578
   * @param int    $length    [optional] <p>Max character length of each array element.</p>
3579
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3580
   *
3581
   * @return string[] <p>An array containing chunks of the string.</p>
3582
   */
3583
  public static function split(string $str, int $length = 1, bool $cleanUtf8 = false): array
3584 41
  {
3585
    if (!isset($str[0])) {
3586 41
      return [];
3587 3
    }
3588
3589
    // init
3590
    $ret = [];
3591 40
3592
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3593 40
      self::checkForSupport();
3594
    }
3595
3596
    if ($cleanUtf8 === true) {
3597 40
      $str = self::clean($str);
3598 9
    }
3599
3600
    if (self::$SUPPORT['pcre_utf8'] === true) {
3601 40
3602
      \preg_match_all('/./us', $str, $retArray);
3603 40
      if (isset($retArray[0])) {
3604 40
        $ret = $retArray[0];
3605 40
      }
3606
      unset($retArray);
3607 40
3608
    } else {
3609
3610
      // fallback
3611
3612
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3613 2
        self::checkForSupport();
3614
      }
3615
3616
      $len = self::strlen_in_byte($str);
3617 2
3618
      /** @noinspection ForeachInvariantsInspection */
3619
      for ($i = 0; $i < $len; $i++) {
3620 2
3621
        if (($str[$i] & "\x80") === "\x00") {
3622 2
3623
          $ret[] = $str[$i];
3624 2
3625
        } elseif (
3626
            isset($str[$i + 1])
3627 2
            &&
3628
            ($str[$i] & "\xE0") === "\xC0"
3629 2
        ) {
3630
3631
          if (($str[$i + 1] & "\xC0") === "\x80") {
3632
            $ret[] = $str[$i] . $str[$i + 1];
3633
3634
            $i++;
3635
          }
3636
3637 View Code Duplication
        } elseif (
3638
            isset($str[$i + 2])
3639 2
            &&
3640
            ($str[$i] & "\xF0") === "\xE0"
3641 2
        ) {
3642
3643
          if (
3644
              ($str[$i + 1] & "\xC0") === "\x80"
3645 2
              &&
3646
              ($str[$i + 2] & "\xC0") === "\x80"
3647 2
          ) {
3648
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3649 2
3650
            $i += 2;
3651 2
          }
3652
3653
        } elseif (
3654
            isset($str[$i + 3])
3655
            &&
3656
            ($str[$i] & "\xF8") === "\xF0"
3657
        ) {
3658
3659 View Code Duplication
          if (
3660
              ($str[$i + 1] & "\xC0") === "\x80"
3661
              &&
3662
              ($str[$i + 2] & "\xC0") === "\x80"
3663
              &&
3664
              ($str[$i + 3] & "\xC0") === "\x80"
3665
          ) {
3666
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3667
3668
            $i += 3;
3669
          }
3670
3671
        }
3672
      }
3673
    }
3674
3675
    if ($length > 1) {
3676 40
      $ret = \array_chunk($ret, $length);
3677 5
3678
      return \array_map(
3679 5
          function ($item) {
3680 5
            return \implode('', $item);
3681 5
          }, $ret
3682 5
      );
3683
    }
3684
3685
    if (isset($ret[0]) && $ret[0] === '') {
3686 36
      return [];
3687
    }
3688
3689
    return $ret;
3690 36
  }
3691
3692
  /**
3693
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3694
   *
3695
   * @param string $str <p>The input string.</p>
3696
   *
3697
   * @return false|string <p>
3698
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
3699
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
3700
   *                      </p>
3701
   */
3702
  public static function str_detect_encoding(string $str)
3703 15
  {
3704
    //
3705
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
3706
    //
3707
3708
    if (self::is_binary($str, true) === true) {
3709 15
3710
      if (self::is_utf16($str) === 1) {
3711 5
        return 'UTF-16LE';
3712 1
      }
3713
3714
      if (self::is_utf16($str) === 2) {
3715 5
        return 'UTF-16BE';
3716 1
      }
3717
3718
      if (self::is_utf32($str) === 1) {
3719 4
        return 'UTF-32LE';
3720
      }
3721
3722
      if (self::is_utf32($str) === 2) {
3723 4
        return 'UTF-32BE';
3724
      }
3725
3726
      // is binary but not "UTF-16" or "UTF-32"
3727 4
      return false;
3728
    }
3729
3730
    //
3731
    // 2.) simple check for ASCII chars
3732
    //
3733
3734 13
    if (self::is_ascii($str) === true) {
3735 5
      return 'ASCII';
3736
    }
3737
3738
    //
3739
    // 3.) simple check for UTF-8 chars
3740
    //
3741
3742 13
    if (self::is_utf8($str) === true) {
3743 9
      return 'UTF-8';
3744
    }
3745
3746
    //
3747
    // 4.) check via "\mb_detect_encoding()"
3748
    //
3749
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
3750
3751
    $detectOrder = [
3752 8
        'ISO-8859-1',
3753
        'ISO-8859-2',
3754
        'ISO-8859-3',
3755
        'ISO-8859-4',
3756
        'ISO-8859-5',
3757
        'ISO-8859-6',
3758
        'ISO-8859-7',
3759
        'ISO-8859-8',
3760
        'ISO-8859-9',
3761
        'ISO-8859-10',
3762
        'ISO-8859-13',
3763
        'ISO-8859-14',
3764
        'ISO-8859-15',
3765
        'ISO-8859-16',
3766
        'WINDOWS-1251',
3767
        'WINDOWS-1252',
3768
        'WINDOWS-1254',
3769
        'ISO-2022-JP',
3770
        'JIS',
3771
        'EUC-JP',
3772
    ];
3773
3774 8
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
3775 8
    if ($encoding) {
3776 8
      return $encoding;
3777
    }
3778
3779
    //
3780
    // 5.) check via "iconv()"
3781
    //
3782
3783
    if (self::$ENCODINGS === null) {
3784
      self::$ENCODINGS = self::getData('encodings');
3785
    }
3786
3787
    $md5 = \md5($str);
3788
    foreach (self::$ENCODINGS as $encodingTmp) {
0 ignored issues
show
Bug introduced by
The expression self::$ENCODINGS of type boolean|string|array|integer is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
3789
      # INFO: //IGNORE and //TRANSLIT still throw notice
3790
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3791
      if (\md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
3792
        return $encodingTmp;
3793
      }
3794
    }
3795
3796
    return false;
3797
  }
3798
3799
  /**
3800
   * Check if the string ends with the given substring.
3801
   *
3802
   * @param string $haystack <p>The string to search in.</p>
3803
   * @param string $needle   <p>The substring to search for.</p>
3804
   *
3805
   * @return bool
3806
   */
3807 2 View Code Duplication
  public static function str_ends_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3808
  {
3809 2
    if (!isset($haystack[0], $needle[0])) {
3810 1
      return false;
3811
    }
3812
3813 2
    if (\substr($haystack, -\strlen($needle)) === $needle) {
3814 2
      return true;
3815
    }
3816
3817 2
    return false;
3818
  }
3819
3820
  /**
3821
   * Check if the string ends with the given substring, case insensitive.
3822
   *
3823
   * @param string $haystack <p>The string to search in.</p>
3824
   * @param string $needle   <p>The substring to search for.</p>
3825
   *
3826
   * @return bool
3827
   */
3828 2 View Code Duplication
  public static function str_iends_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3829
  {
3830 2
    if (!isset($haystack[0], $needle[0])) {
3831 1
      return false;
3832
    }
3833
3834 2
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
3835 2
      return true;
3836
    }
3837
3838 2
    return false;
3839
  }
3840
3841
  /**
3842
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
3843
   *
3844
   * @link  http://php.net/manual/en/function.str-ireplace.php
3845
   *
3846
   * @param mixed $search  <p>
3847
   *                       Every replacement with search array is
3848
   *                       performed on the result of previous replacement.
3849
   *                       </p>
3850
   * @param mixed $replace <p>
3851
   *                       </p>
3852
   * @param mixed $subject <p>
3853
   *                       If subject is an array, then the search and
3854
   *                       replace is performed with every entry of
3855
   *                       subject, and the return value is an array as
3856
   *                       well.
3857
   *                       </p>
3858
   * @param int   $count   [optional] <p>
3859
   *                       The number of matched and replaced needles will
3860
   *                       be returned in count which is passed by
3861
   *                       reference.
3862
   *                       </p>
3863
   *
3864
   * @return mixed <p>A string or an array of replacements.</p>
3865
   */
3866 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
3867
  {
3868 26
    $search = (array)$search;
3869
3870
    /** @noinspection AlterInForeachInspection */
3871 26
    foreach ($search as &$s) {
3872 26
      if ('' === $s .= '') {
3873 2
        $s = '/^(?<=.)$/';
3874
      } else {
3875 26
        $s = '/' . \preg_quote($s, '/') . '/ui';
3876
      }
3877
    }
3878
3879 26
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
3880 26
    $count = $replace; // used as reference parameter
3881
3882 26
    return $subject;
3883
  }
3884
3885
  /**
3886
   * Check if the string starts with the given substring, case insensitive.
3887
   *
3888
   * @param string $haystack <p>The string to search in.</p>
3889
   * @param string $needle   <p>The substring to search for.</p>
3890
   *
3891
   * @return bool
3892
   */
3893 2 View Code Duplication
  public static function str_istarts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3894
  {
3895 2
    if (!isset($haystack[0], $needle[0])) {
3896 1
      return false;
3897
    }
3898
3899 2
    if (self::stripos($haystack, $needle) === 0) {
3900 2
      return true;
3901
    }
3902
3903 2
    return false;
3904
  }
3905
3906
  /**
3907
   * Limit the number of characters in a string, but also after the next word.
3908
   *
3909
   * @param string $str
3910
   * @param int    $length
3911
   * @param string $strAddOn
3912
   *
3913
   * @return string
3914
   */
3915 1
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…'): string
3916
  {
3917 1
    if (!isset($str[0])) {
3918 1
      return '';
3919
    }
3920
3921 1
    if (self::strlen($str) <= $length) {
3922 1
      return $str;
3923
    }
3924
3925 1
    if (self::substr($str, $length - 1, 1) === ' ') {
3926 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
3927
    }
3928
3929 1
    $str = (string)self::substr($str, 0, $length);
3930 1
    $array = \explode(' ', $str);
3931 1
    \array_pop($array);
3932 1
    $new_str = \implode(' ', $array);
3933
3934 1
    if ($new_str === '') {
3935 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
3936
    } else {
3937 1
      $str = $new_str . $strAddOn;
3938
    }
3939
3940 1
    return $str;
3941
  }
3942
3943
  /**
3944
   * Pad a UTF-8 string to given length with another string.
3945
   *
3946
   * @param string $str        <p>The input string.</p>
3947
   * @param int    $pad_length <p>The length of return string.</p>
3948
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
3949
   * @param int    $pad_type   [optional] <p>
3950
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
3951
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
3952
   *                           </p>
3953
   *
3954
   * @return string <strong>Returns the padded string</strong>
3955
   */
3956 2
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', int $pad_type = STR_PAD_RIGHT): string
3957
  {
3958 2
    $str_length = self::strlen($str);
3959
3960
    if (
3961 2
        \is_int($pad_length) === true
3962
        &&
3963 2
        $pad_length > 0
3964
        &&
3965 2
        $pad_length >= $str_length
3966
    ) {
3967 2
      $ps_length = self::strlen($pad_string);
3968
3969 2
      $diff = ($pad_length - $str_length);
3970
3971
      switch ($pad_type) {
3972 2 View Code Duplication
        case STR_PAD_LEFT:
3973 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
3974 2
          $pre = (string)self::substr($pre, 0, $diff);
3975 2
          $post = '';
3976 2
          break;
3977
3978 2
        case STR_PAD_BOTH:
3979 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
3980 2
          $pre = (string)self::substr($pre, 0, $diff / 2);
3981 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
3982 2
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2));
3983 2
          break;
3984
3985 2
        case STR_PAD_RIGHT:
3986 View Code Duplication
        default:
3987 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
3988 2
          $post = (string)self::substr($post, 0, $diff);
3989 2
          $pre = '';
3990
      }
3991
3992 2
      return $pre . $str . $post;
3993
    }
3994
3995 2
    return $str;
3996
  }
3997
3998
  /**
3999
   * Repeat a string.
4000
   *
4001
   * @param string $str        <p>
4002
   *                           The string to be repeated.
4003
   *                           </p>
4004
   * @param int    $multiplier <p>
4005
   *                           Number of time the input string should be
4006
   *                           repeated.
4007
   *                           </p>
4008
   *                           <p>
4009
   *                           multiplier has to be greater than or equal to 0.
4010
   *                           If the multiplier is set to 0, the function
4011
   *                           will return an empty string.
4012
   *                           </p>
4013
   *
4014
   * @return string <p>The repeated string.</p>
4015
   */
4016 1
  public static function str_repeat(string $str, int $multiplier): string
4017
  {
4018 1
    $str = self::filter($str);
4019
4020 1
    return \str_repeat($str, $multiplier);
4021
  }
4022
4023
  /**
4024
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4025
   *
4026
   * Replace all occurrences of the search string with the replacement string
4027
   *
4028
   * @link http://php.net/manual/en/function.str-replace.php
4029
   *
4030
   * @param mixed $search  <p>
4031
   *                       The value being searched for, otherwise known as the needle.
4032
   *                       An array may be used to designate multiple needles.
4033
   *                       </p>
4034
   * @param mixed $replace <p>
4035
   *                       The replacement value that replaces found search
4036
   *                       values. An array may be used to designate multiple replacements.
4037
   *                       </p>
4038
   * @param mixed $subject <p>
4039
   *                       The string or array being searched and replaced on,
4040
   *                       otherwise known as the haystack.
4041
   *                       </p>
4042
   *                       <p>
4043
   *                       If subject is an array, then the search and
4044
   *                       replace is performed with every entry of
4045
   *                       subject, and the return value is an array as
4046
   *                       well.
4047
   *                       </p>
4048
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4049
   *
4050
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4051
   */
4052 12
  public static function str_replace($search, $replace, $subject, int &$count = null)
4053
  {
4054 12
    return \str_replace($search, $replace, $subject, $count);
4055
  }
4056
4057
  /**
4058
   * Replace the first "$search"-term with the "$replace"-term.
4059
   *
4060
   * @param string $search
4061
   * @param string $replace
4062
   * @param string $subject
4063
   *
4064
   * @return string
4065
   */
4066 1
  public static function str_replace_first(string $search, string $replace, string $subject): string
4067
  {
4068 1
    $pos = self::strpos($subject, $search);
4069
4070 1
    if ($pos !== false) {
4071 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4072
    }
4073
4074 1
    return $subject;
4075
  }
4076
4077
  /**
4078
   * Shuffles all the characters in the string.
4079
   *
4080
   * @param string $str <p>The input string</p>
4081
   *
4082
   * @return string <p>The shuffled string.</p>
4083
   */
4084 1
  public static function str_shuffle(string $str): string
4085
  {
4086 1
    $array = self::split($str);
4087
4088 1
    \shuffle($array);
4089
4090 1
    return \implode('', $array);
4091
  }
4092
4093
  /**
4094
   * Sort all characters according to code points.
4095
   *
4096
   * @param string $str    <p>A UTF-8 string.</p>
4097
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4098
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4099
   *
4100
   * @return string <p>String of sorted characters.</p>
4101
   */
4102 1
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
4103
  {
4104 1
    $array = self::codepoints($str);
4105
4106 1
    if ($unique) {
4107 1
      $array = \array_flip(\array_flip($array));
4108
    }
4109
4110 1
    if ($desc) {
4111 1
      \arsort($array);
4112
    } else {
4113 1
      \asort($array);
4114
    }
4115
4116 1
    return self::string($array);
4117
  }
4118
4119
  /**
4120
   * Split a string into an array.
4121
   *
4122
   * @param string|string[] $str
4123
   * @param int             $len
4124
   *
4125
   * @return array
4126
   */
4127 23
  public static function str_split($str, int $len = 1): array
4128
  {
4129 23 View Code Duplication
    if (\is_array($str) === true) {
4130 1
      foreach ($str as $k => $v) {
4131 1
        $str[$k] = self::str_split($v, $len);
4132
      }
4133
4134 1
      return $str;
4135
    }
4136
4137 23
    if (!isset($str[0])) {
4138 1
      return [];
4139
    }
4140
4141 22
    if ($len < 1) {
4142
      return \str_split($str, $len);
4143
    }
4144
4145
    /** @noinspection PhpInternalEntityUsedInspection */
4146 22
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4147 22
    $a = $a[0];
4148
4149 22
    if ($len === 1) {
4150 22
      return $a;
4151
    }
4152
4153 1
    $arrayOutput = [];
4154 1
    $p = -1;
4155
4156
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4157 1
    foreach ($a as $l => $a) {
4158 1
      if ($l % $len) {
4159 1
        $arrayOutput[$p] .= $a;
4160
      } else {
4161 1
        $arrayOutput[++$p] = $a;
4162
      }
4163
    }
4164
4165 1
    return $arrayOutput;
4166
  }
4167
4168
  /**
4169
   * Check if the string starts with the given substring.
4170
   *
4171
   * @param string $haystack <p>The string to search in.</p>
4172
   * @param string $needle   <p>The substring to search for.</p>
4173
   *
4174
   * @return bool
4175
   */
4176 2 View Code Duplication
  public static function str_starts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4177
  {
4178 2
    if (!isset($haystack[0], $needle[0])) {
4179 1
      return false;
4180
    }
4181
4182 2
    if (\strpos($haystack, $needle) === 0) {
4183 2
      return true;
4184
    }
4185
4186 2
    return false;
4187
  }
4188
4189
  /**
4190
   * Get a binary representation of a specific string.
4191
   *
4192
   * @param string $str <p>The input string.</p>
4193
   *
4194
   * @return string
4195
   */
4196 1
  public static function str_to_binary(string $str): string
4197
  {
4198 1
    $value = \unpack('H*', $str);
4199
4200 1
    return \base_convert($value[1], 16, 2);
4201
  }
4202
4203
  /**
4204
   * Convert a string into an array of words.
4205
   *
4206
   * @param string   $str
4207
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
4208
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4209
   * @param null|int $removeShortValues
4210
   *
4211
   * @return array
4212
   */
4213 10
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
4214
  {
4215 10
    if (!isset($str[0])) {
4216 2
      if ($removeEmptyValues === true) {
4217
        return [];
4218
      }
4219
4220 2
      return [''];
4221
    }
4222
4223 10
    $charList = self::rxClass($charList, '\pL');
4224
4225 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4226
4227
    if (
4228 10
        $removeShortValues === null
4229
        &&
4230 10
        $removeEmptyValues === false
4231
    ) {
4232 10
      return $return;
4233
    }
4234
4235 1
    $tmpReturn = [];
4236 1
    foreach ($return as $returnValue) {
4237
      if (
4238 1
          $removeShortValues !== null
4239
          &&
4240 1
          self::strlen($returnValue) <= $removeShortValues
4241
      ) {
4242 1
        continue;
4243
      }
4244
4245
      if (
4246 1
          $removeEmptyValues === true
4247
          &&
4248 1
          \trim($returnValue) === ''
4249
      ) {
4250 1
        continue;
4251
      }
4252
4253 1
      $tmpReturn[] = $returnValue;
4254
    }
4255
4256 1
    return $tmpReturn;
4257
  }
4258
4259
  /**
4260
   * alias for "UTF8::to_ascii()"
4261
   *
4262
   * @see UTF8::to_ascii()
4263
   *
4264
   * @param string $str
4265
   * @param string $unknown
4266
   * @param bool   $strict
4267
   *
4268
   * @return string
4269
   */
4270 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
4271
  {
4272 7
    return self::to_ascii($str, $unknown, $strict);
4273
  }
4274
4275
  /**
4276
   * Counts number of words in the UTF-8 string.
4277
   *
4278
   * @param string $str      <p>The input string.</p>
4279
   * @param int    $format   [optional] <p>
4280
   *                         <strong>0</strong> => return a number of words (default)<br>
4281
   *                         <strong>1</strong> => return an array of words<br>
4282
   *                         <strong>2</strong> => return an array of words with word-offset as key
4283
   *                         </p>
4284
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4285
   *
4286
   * @return array|int <p>The number of words in the string</p>
4287
   */
4288 1
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
4289
  {
4290 1
    $strParts = self::str_to_words($str, $charlist);
4291
4292 1
    $len = \count($strParts);
4293
4294 1
    if ($format === 1) {
4295
4296 1
      $numberOfWords = [];
4297 1
      for ($i = 1; $i < $len; $i += 2) {
4298 1
        $numberOfWords[] = $strParts[$i];
4299
      }
4300
4301 1
    } elseif ($format === 2) {
4302
4303 1
      $numberOfWords = [];
4304 1
      $offset = self::strlen($strParts[0]);
4305 1
      for ($i = 1; $i < $len; $i += 2) {
4306 1
        $numberOfWords[$offset] = $strParts[$i];
4307 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4308
      }
4309
4310
    } else {
4311
4312 1
      $numberOfWords = ($len - 1) / 2;
4313
4314
    }
4315
4316 1
    return $numberOfWords;
4317
  }
4318
4319
  /**
4320
   * Case-insensitive string comparison.
4321
   *
4322
   * INFO: Case-insensitive version of UTF8::strcmp()
4323
   *
4324
   * @param string $str1
4325
   * @param string $str2
4326
   *
4327
   * @return int <p>
4328
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
4329
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
4330
   *             <strong>0</strong> if they are equal.
4331
   *             </p>
4332
   */
4333 11
  public static function strcasecmp(string $str1, string $str2): int
4334
  {
4335 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4336
  }
4337
4338
  /**
4339
   * alias for "UTF8::strstr()"
4340
   *
4341
   * @see UTF8::strstr()
4342
   *
4343
   * @param string $haystack
4344
   * @param string $needle
4345
   * @param bool   $before_needle
4346
   * @param string $encoding
4347
   * @param bool   $cleanUtf8
4348
   *
4349
   * @return string|false
4350
   */
4351 1
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4352
  {
4353 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4354
  }
4355
4356
  /**
4357
   * Case-sensitive string comparison.
4358
   *
4359
   * @param string $str1
4360
   * @param string $str2
4361
   *
4362
   * @return int  <p>
4363
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
4364
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
4365
   *              <strong>0</strong> if they are equal.
4366
   *              </p>
4367
   */
4368 14
  public static function strcmp(string $str1, string $str2): int
4369
  {
4370
    /** @noinspection PhpUndefinedClassInspection */
4371 14
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
4372 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
4373 14
        \Normalizer::normalize($str2, \Normalizer::NFD)
4374
    );
4375
  }
4376
4377
  /**
4378
   * Find length of initial segment not matching mask.
4379
   *
4380
   * @param string $str
4381
   * @param string $charList
4382
   * @param int    $offset
4383
   * @param int    $length
4384
   *
4385
   * @return int|null
4386
   */
4387 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
4388
  {
4389 15
    if ('' === $charList .= '') {
4390 1
      return null;
4391
    }
4392
4393 14 View Code Duplication
    if ($offset || $length !== null) {
4394 2
      $strTmp = self::substr($str, $offset, $length);
4395 2
      if ($strTmp === false) {
4396
        return null;
4397
      }
4398 2
      $str = (string)$strTmp;
4399
    }
4400
4401 14
    if (!isset($str[0])) {
4402 1
      return null;
4403
    }
4404
4405 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4406 13
      return self::strlen($length[1]);
4407
    }
4408
4409 1
    return self::strlen($str);
4410
  }
4411
4412
  /**
4413
   * alias for "UTF8::stristr()"
4414
   *
4415
   * @see UTF8::stristr()
4416
   *
4417
   * @param string $haystack
4418
   * @param string $needle
4419
   * @param bool   $before_needle
4420
   * @param string $encoding
4421
   * @param bool   $cleanUtf8
4422
   *
4423
   * @return string|false
4424
   */
4425 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4426
  {
4427 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4428
  }
4429
4430
  /**
4431
   * Create a UTF-8 string from code points.
4432
   *
4433
   * INFO: opposite to UTF8::codepoints()
4434
   *
4435
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4436
   *
4437
   * @return string <p>UTF-8 encoded string.</p>
4438
   */
4439 2
  public static function string(array $array): string
4440
  {
4441 2
    return \implode(
4442 2
        '',
4443 2
        \array_map(
4444
            [
4445 2
                self::class,
4446
                'chr',
4447
            ],
4448 2
            $array
4449
        )
4450
    );
4451
  }
4452
4453
  /**
4454
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4455
   *
4456
   * @param string $str <p>The input string.</p>
4457
   *
4458
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4459
   */
4460 3
  public static function string_has_bom(string $str): bool
4461
  {
4462 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
4463 3
      if (0 === \strpos($str, $bomString)) {
4464 3
        return true;
4465
      }
4466
    }
4467
4468 3
    return false;
4469
  }
4470
4471
  /**
4472
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4473
   *
4474
   * @link http://php.net/manual/en/function.strip-tags.php
4475
   *
4476
   * @param string $str             <p>
4477
   *                                The input string.
4478
   *                                </p>
4479
   * @param string $allowable_tags  [optional] <p>
4480
   *                                You can use the optional second parameter to specify tags which should
4481
   *                                not be stripped.
4482
   *                                </p>
4483
   *                                <p>
4484
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4485
   *                                can not be changed with allowable_tags.
4486
   *                                </p>
4487
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
4488
   *
4489
   * @return string <p>The stripped string.</p>
4490
   */
4491 2
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
4492
  {
4493 2
    if (!isset($str[0])) {
4494 1
      return '';
4495
    }
4496
4497 2
    if ($cleanUtf8 === true) {
4498 1
      $str = self::clean($str);
4499
    }
4500
4501 2
    return \strip_tags($str, $allowable_tags);
4502
  }
4503
4504
  /**
4505
   * Strip all whitespace characters. This includes tabs and newline
4506
   * characters, as well as multibyte whitespace such as the thin space
4507
   * and ideographic space.
4508
   *
4509
   * @param string $str
4510
   *
4511
   * @return string
4512
   */
4513 12
  public static function strip_whitespace(string $str): string
4514
  {
4515 12
    if (!isset($str[0])) {
4516 1
      return '';
4517
    }
4518
4519 11
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
4520
  }
4521
4522
  /**
4523
   * Finds position of first occurrence of a string within another, case insensitive.
4524
   *
4525
   * @link http://php.net/manual/en/function.mb-stripos.php
4526
   *
4527
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4528
   * @param string $needle    <p>The string to find in haystack.</p>
4529
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
4530
   * @param string $encoding  [optional] <p>Set the charset.</p>
4531
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4532
   *
4533
   * @return int|false <p>
4534
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
4535
   *                   or false if needle is not found.
4536
   *                   </p>
4537
   */
4538 10
  public static function stripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4539
  {
4540 10
    if (!isset($haystack[0], $needle[0])) {
4541 3
      return false;
4542
    }
4543
4544 9
    if ($cleanUtf8 === true) {
4545
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4546
      // if invalid characters are found in $haystack before $needle
4547 1
      $haystack = self::clean($haystack);
4548 1
      $needle = self::clean($needle);
4549
    }
4550
4551 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4552 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4553
    }
4554
4555 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4556
      self::checkForSupport();
4557
    }
4558
4559 View Code Duplication
    if (
4560 9
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4561
        &&
4562 9
        self::$SUPPORT['intl'] === true
4563
    ) {
4564 9
      return \grapheme_stripos($haystack, $needle, $offset);
4565
    }
4566
4567
    // fallback to "mb_"-function via polyfill
4568 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4569
  }
4570
4571
  /**
4572
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4573
   *
4574
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
4575
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
4576
   * @param bool   $before_needle  [optional] <p>
4577
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4578
   *                               haystack before the first occurrence of the needle (excluding the needle).
4579
   *                               </p>
4580
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
4581
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
4582
   *
4583
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
4584
   */
4585 17
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4586
  {
4587 17
    if (!isset($haystack[0], $needle[0])) {
4588 6
      return false;
4589
    }
4590
4591 11
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4592 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4593
    }
4594
4595 11
    if ($cleanUtf8 === true) {
4596
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4597
      // if invalid characters are found in $haystack before $needle
4598 1
      $needle = self::clean($needle);
4599 1
      $haystack = self::clean($haystack);
4600
    }
4601
4602 11
    if (!$needle) {
4603
      return $haystack;
4604
    }
4605
4606 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4607
      self::checkForSupport();
4608
    }
4609
4610 View Code Duplication
    if (
4611 11
        $encoding !== 'UTF-8'
4612
        &&
4613 11
        self::$SUPPORT['mbstring'] === false
4614
    ) {
4615
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4616
    }
4617
4618 11
    if (self::$SUPPORT['mbstring'] === true) {
4619 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4620
    }
4621
4622 View Code Duplication
    if (
4623
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4624
        &&
4625
        self::$SUPPORT['intl'] === true
4626
    ) {
4627
      return \grapheme_stristr($haystack, $needle, $before_needle);
4628
    }
4629
4630
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
4631
      return \stristr($haystack, $needle, $before_needle);
4632
    }
4633
4634
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
4635
4636
    if (!isset($match[1])) {
4637
      return false;
4638
    }
4639
4640
    if ($before_needle) {
4641
      return $match[1];
4642
    }
4643
4644
    return self::substr($haystack, self::strlen($match[1]));
4645
  }
4646
4647
  /**
4648
   * Get the string length, not the byte-length!
4649
   *
4650
   * @link     http://php.net/manual/en/function.mb-strlen.php
4651
   *
4652
   * @param string $str       <p>The string being checked for length.</p>
4653
   * @param string $encoding  [optional] <p>Set the charset.</p>
4654
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4655
   *
4656
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4657
   *             character counted as +1)</p>
4658
   */
4659 89
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
4660
  {
4661 89
    if (!isset($str[0])) {
4662 6
      return 0;
4663
    }
4664
4665 88
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4666 14
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4667
    }
4668
4669 88
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4670
      self::checkForSupport();
4671
    }
4672
4673
    switch ($encoding) {
4674 88
      case 'ASCII':
4675 88
      case 'CP850':
4676 80
        if (
4677
            $encoding === 'CP850'
4678 10
            &&
4679
            self::$SUPPORT['mbstring_func_overload'] === false
4680 10
        ) {
4681
          return \strlen($str);
4682 10
        }
4683
4684
        return \mb_strlen($str, 'CP850'); // 8-BIT
4685
    }
4686
4687
    if ($cleanUtf8 === true) {
4688 80
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
4689
      // if invalid characters are found in $str
4690
      $str = self::clean($str);
4691 2
    }
4692
4693 View Code Duplication
    if (
4694
        $encoding !== 'UTF-8'
4695 80
        &&
4696
        self::$SUPPORT['mbstring'] === false
4697 80
        &&
4698
        self::$SUPPORT['iconv'] === false
4699 80
    ) {
4700
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4701
    }
4702
4703
    if (
4704
        $encoding !== 'UTF-8'
4705 80
        &&
4706
        self::$SUPPORT['iconv'] === true
4707 80
        &&
4708
        self::$SUPPORT['mbstring'] === false
4709 80
    ) {
4710
      $returnTmp = \iconv_strlen($str, $encoding);
4711
      if ($returnTmp !== false) {
4712
        return $returnTmp;
4713
      }
4714
    }
4715
4716 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
4717 80
      $returnTmp = \mb_strlen($str, $encoding);
4718 79
      if ($returnTmp !== false) {
4719 79
        return $returnTmp;
4720 79
      }
4721
    }
4722
4723 View Code Duplication
    if (self::$SUPPORT['iconv'] === true) {
4724 2
      $returnTmp = \iconv_strlen($str, $encoding);
4725
      if ($returnTmp !== false) {
4726
        return $returnTmp;
4727
      }
4728
    }
4729
4730
    if (
4731
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4732 2
        &&
4733
        self::$SUPPORT['intl'] === true
4734 2
    ) {
4735
      return \grapheme_strlen($str);
4736
    }
4737
4738
    if (self::is_ascii($str)) {
4739 2
      return \strlen($str);
4740 1
    }
4741
4742
    // fallback via vanilla php
4743
    \preg_match_all('/./us', $str, $parts);
4744 2
    $returnTmp = \count($parts[0]);
4745 2
    if ($returnTmp !== 0) {
4746 2
      return $returnTmp;
4747 2
    }
4748
4749
    // fallback to "mb_"-function via polyfill
4750
    return \mb_strlen($str, $encoding);
4751
  }
4752
4753
  /**
4754
   * Get string length in byte.
4755
   *
4756
   * @param string $str
4757
   *
4758
   * @return int
4759
   */
4760
  public static function strlen_in_byte(string $str): int
4761 72
  {
4762
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
4763 72
      $len = \mb_strlen($str, 'CP850'); // 8-BIT
4764
    } else {
4765
      $len = \strlen($str);
4766 72
    }
4767
4768
    return $len;
4769 72
  }
4770
4771
  /**
4772
   * Case insensitive string comparisons using a "natural order" algorithm.
4773
   *
4774
   * INFO: natural order version of UTF8::strcasecmp()
4775
   *
4776
   * @param string $str1 <p>The first string.</p>
4777
   * @param string $str2 <p>The second string.</p>
4778
   *
4779
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
4780
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
4781
   *             <strong>0</strong> if they are equal
4782
   */
4783
  public static function strnatcasecmp(string $str1, string $str2): int
4784 1
  {
4785
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4786 1
  }
4787
4788
  /**
4789
   * String comparisons using a "natural order" algorithm
4790
   *
4791
   * INFO: natural order version of UTF8::strcmp()
4792
   *
4793
   * @link  http://php.net/manual/en/function.strnatcmp.php
4794
   *
4795
   * @param string $str1 <p>The first string.</p>
4796
   * @param string $str2 <p>The second string.</p>
4797
   *
4798
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
4799
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
4800
   *             <strong>0</strong> if they are equal
4801
   */
4802
  public static function strnatcmp(string $str1, string $str2): int
4803 2
  {
4804
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4805 2
  }
4806
4807
  /**
4808
   * Case-insensitive string comparison of the first n characters.
4809
   *
4810
   * @link  http://php.net/manual/en/function.strncasecmp.php
4811
   *
4812
   * @param string $str1 <p>The first string.</p>
4813
   * @param string $str2 <p>The second string.</p>
4814
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
4815
   *
4816
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4817
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4818
   *             <strong>0</strong> if they are equal
4819
   */
4820
  public static function strncasecmp(string $str1, string $str2, int $len): int
4821 1
  {
4822
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4823 1
  }
4824
4825
  /**
4826
   * String comparison of the first n characters.
4827
   *
4828
   * @link  http://php.net/manual/en/function.strncmp.php
4829
   *
4830
   * @param string $str1 <p>The first string.</p>
4831
   * @param string $str2 <p>The second string.</p>
4832
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
4833
   *
4834
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4835
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4836
   *             <strong>0</strong> if they are equal
4837
   */
4838
  public static function strncmp(string $str1, string $str2, int $len): int
4839 2
  {
4840
    $str1 = (string)self::substr($str1, 0, $len);
4841 2
    $str2 = (string)self::substr($str2, 0, $len);
4842 2
4843
    return self::strcmp($str1, $str2);
4844 2
  }
4845
4846
  /**
4847
   * Search a string for any of a set of characters.
4848
   *
4849
   * @link  http://php.net/manual/en/function.strpbrk.php
4850
   *
4851
   * @param string $haystack  <p>The string where char_list is looked for.</p>
4852
   * @param string $char_list <p>This parameter is case sensitive.</p>
4853
   *
4854
   * @return string|false <p>String starting from the character found, or false if it is not found.</p>
4855
   */
4856
  public static function strpbrk(string $haystack, string $char_list)
4857 1
  {
4858
    if (!isset($haystack[0], $char_list[0])) {
4859 1
      return false;
4860 1
    }
4861
4862
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
4863 1
      return \substr($haystack, \strpos($haystack, $m[0]));
4864 1
    }
4865
4866
    return false;
4867 1
  }
4868
4869
  /**
4870
   * Find position of first occurrence of string in a string.
4871
   *
4872
   * @link http://php.net/manual/en/function.mb-strpos.php
4873
   *
4874
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4875
   * @param string $needle    <p>The string to find in haystack.</p>
4876
   * @param int    $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
4877
   * @param string $encoding  [optional] <p>Set the charset.</p>
4878
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4879
   *
4880
   * @return int|false <p>
4881
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
4882
   *                   If needle is not found it returns false.
4883
   *                   </p>
4884
   */
4885
  public static function strpos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4886 59
  {
4887
    if (!isset($haystack[0], $needle[0])) {
4888 59
      return false;
4889 3
    }
4890
4891
    // iconv and mbstring do not support integer $needle
4892 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4893 58
      $needle = (string)self::chr((int)$needle);
4894
    }
4895
4896
    if ($cleanUtf8 === true) {
4897 58
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4898
      // if invalid characters are found in $haystack before $needle
4899
      $needle = self::clean($needle);
4900 2
      $haystack = self::clean($haystack);
4901 2
    }
4902
4903
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4904 58
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4905 45
    }
4906
4907
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4908 58
      self::checkForSupport();
4909
    }
4910
4911
    if (
4912
        $encoding === 'CP850'
4913 58
        &&
4914
        self::$SUPPORT['mbstring_func_overload'] === false
4915 58
    ) {
4916
      return \strpos($haystack, $needle, $offset);
4917 44
    }
4918
4919 View Code Duplication
    if (
4920
        $encoding !== 'UTF-8'
4921 15
        &&
4922
        self::$SUPPORT['iconv'] === false
4923 15
        &&
4924
        self::$SUPPORT['mbstring'] === false
4925 15
    ) {
4926
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4927
    }
4928
4929
    if (
4930
        $offset >= 0 // iconv_strpos() can't handle negative offset
4931 15
        &&
4932
        $encoding !== 'UTF-8'
4933 15
        &&
4934
        self::$SUPPORT['mbstring'] === false
4935 15
        &&
4936
        self::$SUPPORT['iconv'] === true
4937 15
    ) {
4938
      // ignore invalid negative offset to keep compatibility
4939
      // with php < 5.5.35, < 5.6.21, < 7.0.6
4940
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4941
      if ($returnTmp !== false) {
4942
        return $returnTmp;
4943
      }
4944
    }
4945
4946 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
4947 15
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
4948 15
      if ($returnTmp !== false) {
4949 15
        return $returnTmp;
4950 13
      }
4951
    }
4952
4953 View Code Duplication
    if (
4954
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4955 10
        &&
4956
        self::$SUPPORT['intl'] === true
4957 10
    ) {
4958
      return \grapheme_strpos($haystack, $needle, $offset);
4959 9
    }
4960
4961
    if (
4962
        $offset >= 0 // iconv_strpos() can't handle negative offset
4963 2
        &&
4964
        self::$SUPPORT['iconv'] === true
4965 2
    ) {
4966
      // ignore invalid negative offset to keep compatibility
4967
      // with php < 5.5.35, < 5.6.21, < 7.0.6
4968
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4969 1
      if ($returnTmp !== false) {
4970 1
        return $returnTmp;
4971
      }
4972
    }
4973
4974
    $haystackIsAscii = self::is_ascii($haystack);
4975 2
    if ($haystackIsAscii && self::is_ascii($needle)) {
4976 2
      return \strpos($haystack, $needle, $offset);
4977 1
    }
4978
4979
    // fallback via vanilla php
4980
4981
    if ($haystackIsAscii) {
4982 2
      $haystackTmp = \substr($haystack, $offset);
4983
    } else {
4984
      $haystackTmp = self::substr($haystack, $offset);
4985 2
    }
4986
    if ($haystackTmp === false) {
4987 2
      $haystackTmp = '';
4988
    }
4989
    $haystack = (string)$haystackTmp;
4990 2
4991
    if ($offset < 0) {
4992 2
      $offset = 0;
4993
    }
4994
4995
    $pos = \strpos($haystack, $needle);
4996 2
    if ($pos === false) {
4997 2
      return false;
4998
    }
4999
5000
    $returnTmp = $offset + self::strlen(\substr($haystack, 0, $pos));
5001 2
    if ($returnTmp !== false) {
5002 2
      return $returnTmp;
5003 2
    }
5004
5005
    // fallback to "mb_"-function via polyfill
5006
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5007
  }
5008
5009
  /**
5010
   * Finds the last occurrence of a character in a string within another.
5011
   *
5012
   * @link http://php.net/manual/en/function.mb-strrchr.php
5013
   *
5014
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5015
   * @param string $needle        <p>The string to find in haystack</p>
5016
   * @param bool   $before_needle [optional] <p>
5017
   *                              Determines which portion of haystack
5018
   *                              this function returns.
5019
   *                              If set to true, it returns all of haystack
5020
   *                              from the beginning to the last occurrence of needle.
5021
   *                              If set to false, it returns all of haystack
5022
   *                              from the last occurrence of needle to the end,
5023
   *                              </p>
5024
   * @param string $encoding      [optional] <p>
5025
   *                              Character encoding name to use.
5026
   *                              If it is omitted, internal character encoding is used.
5027
   *                              </p>
5028
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5029
   *
5030
   * @return string|false The portion of haystack or false if needle is not found.
5031
   */
5032 View Code Duplication
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5033 1
  {
5034
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5035 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5036 1
    }
5037
5038
    if ($cleanUtf8 === true) {
5039 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5040
      // if invalid characters are found in $haystack before $needle
5041
      $needle = self::clean($needle);
5042 1
      $haystack = self::clean($haystack);
5043 1
    }
5044
5045
    // fallback to "mb_"-function via polyfill
5046
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5047 1
  }
5048
5049
  /**
5050
   * Reverses characters order in the string.
5051
   *
5052
   * @param string $str The input string
5053
   *
5054
   * @return string The string with characters in the reverse sequence
5055
   */
5056
  public static function strrev(string $str): string
5057 4
  {
5058
    if (!isset($str[0])) {
5059 4
      return '';
5060 2
    }
5061
5062
    return \implode('', \array_reverse(self::split($str)));
5063 3
  }
5064
5065
  /**
5066
   * Finds the last occurrence of a character in a string within another, case insensitive.
5067
   *
5068
   * @link http://php.net/manual/en/function.mb-strrichr.php
5069
   *
5070
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
5071
   * @param string $needle         <p>The string to find in haystack.</p>
5072
   * @param bool   $before_needle  [optional] <p>
5073
   *                               Determines which portion of haystack
5074
   *                               this function returns.
5075
   *                               If set to true, it returns all of haystack
5076
   *                               from the beginning to the last occurrence of needle.
5077
   *                               If set to false, it returns all of haystack
5078
   *                               from the last occurrence of needle to the end,
5079
   *                               </p>
5080
   * @param string $encoding       [optional] <p>
5081
   *                               Character encoding name to use.
5082
   *                               If it is omitted, internal character encoding is used.
5083
   *                               </p>
5084
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5085
   *
5086
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
5087
   */
5088 View Code Duplication
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5089 1
  {
5090
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5091 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5092 1
    }
5093
5094
    if ($cleanUtf8 === true) {
5095 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5096
      // if invalid characters are found in $haystack before $needle
5097
      $needle = self::clean($needle);
5098 1
      $haystack = self::clean($haystack);
5099 1
    }
5100
5101
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5102 1
  }
5103
5104
  /**
5105
   * Find position of last occurrence of a case-insensitive string.
5106
   *
5107
   * @param string $haystack  <p>The string to look in.</p>
5108
   * @param string $needle    <p>The string to look for.</p>
5109
   * @param int    $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5110
   * @param string $encoding  [optional] <p>Set the charset.</p>
5111
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5112
   *
5113
   * @return int|false <p>
5114
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5115
   *                   not found, it returns false.
5116
   *                   </p>
5117
   */
5118
  public static function strripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5119 1
  {
5120 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5121 1
      $needle = (string)self::chr((int)$needle);
5122
    }
5123
5124
    if (!isset($haystack[0], $needle[0])) {
5125 1
      return false;
5126
    }
5127
5128
    if ($cleanUtf8 === true) {
5129 1
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5130
      $needle = self::clean($needle);
5131 1
      $haystack = self::clean($haystack);
5132 1
    }
5133
5134
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5135 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5136 1
    }
5137
5138
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5139 1
      self::checkForSupport();
5140
    }
5141
5142 View Code Duplication
    if (
5143
        $encoding !== 'UTF-8'
5144 1
        &&
5145
        self::$SUPPORT['mbstring'] === false
5146 1
    ) {
5147
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5148
    }
5149
5150
    if (self::$SUPPORT['mbstring'] === true) {
5151 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5152 1
    }
5153
5154 View Code Duplication
    if (
5155
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5156
        &&
5157
        self::$SUPPORT['intl'] === true
5158
    ) {
5159
      return \grapheme_strripos($haystack, $needle, $offset);
5160
    }
5161
5162
    // fallback via vanilla php
5163
5164
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5165
  }
5166
5167
  /**
5168
   * Find position of last occurrence of a string in a string.
5169
   *
5170
   * @link http://php.net/manual/en/function.mb-strrpos.php
5171
   *
5172
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5173
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5174
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5175
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5176
   *                              the end of the string.
5177
   *                              </p>
5178
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5179
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5180
   *
5181
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5182
   *                   is not found, it returns false.</p>
5183
   */
5184
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5185 10
  {
5186
    if ((int)$needle === $needle && $needle >= 0) {
5187 10
      $needle = (string)self::chr($needle);
5188 2
    }
5189
    $needle = (string)$needle;
5190 10
5191
    if (!isset($haystack[0], $needle[0])) {
5192 10
      return false;
5193 2
    }
5194
5195
    if (
5196
        $cleanUtf8 === true
5197 9
        ||
5198
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5199 9
    ) {
5200
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5201
      $needle = self::clean($needle);
5202 3
      $haystack = self::clean($haystack);
5203 3
    }
5204
5205
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5206 9
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5207 1
    }
5208
5209
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5210 9
      self::checkForSupport();
5211
    }
5212
5213 View Code Duplication
    if (
5214
        $encoding !== 'UTF-8'
5215 9
        &&
5216
        self::$SUPPORT['mbstring'] === false
5217 9
    ) {
5218
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5219
    }
5220
5221
    if (self::$SUPPORT['mbstring'] === true) {
5222 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5223 9
    }
5224
5225 View Code Duplication
    if (
5226
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5227
        &&
5228
        self::$SUPPORT['intl'] === true
5229
    ) {
5230
      return \grapheme_strrpos($haystack, $needle, $offset);
5231
    }
5232
5233
    // fallback via vanilla php
5234
5235
    $haystackTmp = null;
5236
    if ($offset > 0) {
5237
      $haystackTmp = self::substr($haystack, $offset);
5238
    } elseif ($offset < 0) {
5239
      $haystackTmp = self::substr($haystack, 0, $offset);
5240
      $offset = 0;
5241
    }
5242
5243
    if ($haystackTmp !== null) {
5244
      if ($haystackTmp === false) {
5245
        $haystackTmp = '';
5246
      }
5247
      $haystack = (string)$haystackTmp;
5248
    }
5249
5250
    $pos = \strrpos($haystack, $needle);
5251
    if ($pos === false) {
5252
      return false;
5253
    }
5254
5255
    return $offset + self::strlen(\substr($haystack, 0, $pos));
5256
  }
5257
5258
  /**
5259
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5260
   * mask.
5261
   *
5262
   * @param string $str    <p>The input string.</p>
5263
   * @param string $mask   <p>The mask of chars</p>
5264
   * @param int    $offset [optional]
5265
   * @param int    $length [optional]
5266
   *
5267
   * @return int
5268
   */
5269
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
5270 10
  {
5271 View Code Duplication
    if ($offset || $length !== null) {
5272 10
      $strTmp = self::substr($str, $offset, $length);
5273 2
      if ($strTmp === false) {
5274 2
        $strTmp = '';
5275
      }
5276
      $str = (string)$strTmp;
5277 2
    }
5278
5279
    if (!isset($str[0], $mask[0])) {
5280 10
      return 0;
5281 2
    }
5282
5283
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5284 8
  }
5285
5286
  /**
5287
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5288
   *
5289
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
5290
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
5291
   * @param bool   $before_needle  [optional] <p>
5292
   *                               If <b>TRUE</b>, strstr() returns the part of the
5293
   *                               haystack before the first occurrence of the needle (excluding the needle).
5294
   *                               </p>
5295
   * @param string $encoding       [optional] <p>Set the charset.</p>
5296
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5297
   *
5298
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
5299
   */
5300
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5301 2
  {
5302
    if (!isset($haystack[0], $needle[0])) {
5303 2
      return false;
5304 1
    }
5305
5306
    if ($cleanUtf8 === true) {
5307 2
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5308
      // if invalid characters are found in $haystack before $needle
5309
      $needle = self::clean($needle);
5310
      $haystack = self::clean($haystack);
5311
    }
5312
5313
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5314 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5315 1
    }
5316
5317
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5318 2
      self::checkForSupport();
5319
    }
5320
5321 View Code Duplication
    if (
5322
        $encoding !== 'UTF-8'
5323 2
        &&
5324
        self::$SUPPORT['mbstring'] === false
5325 2
    ) {
5326
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5327
    }
5328
5329
    if (self::$SUPPORT['mbstring'] === true) {
5330 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5331 2
    }
5332
5333 View Code Duplication
    if (
5334
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5335
        &&
5336
        self::$SUPPORT['intl'] === true
5337
    ) {
5338
      return \grapheme_strstr($haystack, $needle, $before_needle);
5339
    }
5340
5341
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
5342
5343
    if (!isset($match[1])) {
5344
      return false;
5345
    }
5346
5347
    if ($before_needle) {
5348
      return $match[1];
5349
    }
5350
5351
    return self::substr($haystack, self::strlen($match[1]));
5352
  }
5353
5354
  /**
5355
   * Unicode transformation for case-less matching.
5356
   *
5357
   * @link http://unicode.org/reports/tr21/tr21-5.html
5358
   *
5359
   * @param string $str        <p>The input string.</p>
5360
   * @param bool   $full       [optional] <p>
5361
   *                           <b>true</b>, replace full case folding chars (default)<br>
5362
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5363
   *                           </p>
5364
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
5365
   *
5366
   * @return string
5367
   */
5368
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
5369 13
  {
5370
    if (!isset($str[0])) {
5371 13
      return '';
5372 4
    }
5373
5374
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5375 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5376 12
5377
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5378 12
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
5379 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
5380 1
    }
5381
5382
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5383 12
5384
    if ($full) {
5385 12
5386
      static $FULL_CASE_FOLD = null;
5387 12
      if ($FULL_CASE_FOLD === null) {
5388 12
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
5389 1
      }
5390
5391
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
5392 12
    }
5393
5394
    if ($cleanUtf8 === true) {
5395 12
      $str = self::clean($str);
5396 1
    }
5397
5398
    return self::strtolower($str);
5399 12
  }
5400
5401
  /**
5402
   * Make a string lowercase.
5403
   *
5404
   * @link http://php.net/manual/en/function.mb-strtolower.php
5405
   *
5406
   * @param string      $str       <p>The string being lowercased.</p>
5407
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5408
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5409
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5410
   *
5411
   * @return string str with all alphabetic characters converted to lowercase.
5412
   */
5413 View Code Duplication
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5414 25
  {
5415
    // init
5416
    $str = (string)$str;
5417 25
    if (!isset($str[0])) {
5418 25
      return '';
5419 3
    }
5420
5421
    if ($cleanUtf8 === true) {
5422 23
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5423
      // if invalid characters are found in $haystack before $needle
5424
      $str = self::clean($str);
5425 1
    }
5426
5427
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5428 23
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5429 2
    }
5430
5431
    if ($lang !== null) {
5432 23
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5433 1
        self::checkForSupport();
5434
      }
5435
5436
      if (self::$SUPPORT['intl'] === true) {
5437 1
5438
        $langCode = $lang . '-Lower';
5439 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5440 1
          \trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
5441
5442
          $langCode = 'Any-Lower';
5443
        }
5444
5445
        return transliterator_transliterate($langCode, $str);
5446 1
      }
5447
5448
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5449
    }
5450
5451
    return \mb_strtolower($str, $encoding);
5452 23
  }
5453
5454
  /**
5455
   * Generic case sensitive transformation for collation matching.
5456
   *
5457
   * @param string $str <p>The input string</p>
5458
   *
5459
   * @return string
5460
   */
5461
  private static function strtonatfold(string $str): string
5462 3
  {
5463
    /** @noinspection PhpUndefinedClassInspection */
5464
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5465 3
  }
5466
5467
  /**
5468
   * Make a string uppercase.
5469
   *
5470
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5471
   *
5472
   * @param string      $str       <p>The string being uppercased.</p>
5473
   * @param string      $encoding  [optional] <p>Set the charset.</p>
5474
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5475
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5476
   *
5477
   * @return string <p>$str with all alphabetic characters converted to uppercase.</p>
5478
   */
5479 View Code Duplication
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5480 19
  {
5481
    $str = (string)$str;
5482 19
    if (!isset($str[0])) {
5483 19
      return '';
5484 3
    }
5485
5486
    if ($cleanUtf8 === true) {
5487 17
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5488
      // if invalid characters are found in $haystack before $needle
5489
      $str = self::clean($str);
5490 2
    }
5491
5492
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5493 17
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5494 3
    }
5495
5496
    if ($lang !== null) {
5497 17
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5498 1
        self::checkForSupport();
5499
      }
5500
5501
      if (self::$SUPPORT['intl'] === true) {
5502 1
5503
        $langCode = $lang . '-Upper';
5504 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5505 1
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
5506
5507
          $langCode = 'Any-Upper';
5508
        }
5509
5510
        return transliterator_transliterate($langCode, $str);
5511 1
      }
5512
5513
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5514
    }
5515
5516
    return \mb_strtoupper($str, $encoding);
5517 17
  }
5518
5519
  /**
5520
   * Translate characters or replace sub-strings.
5521
   *
5522
   * @link  http://php.net/manual/en/function.strtr.php
5523
   *
5524
   * @param string          $str  <p>The string being translated.</p>
5525
   * @param string|string[] $from <p>The string replacing from.</p>
5526
   * @param string|string[] $to   <p>The string being translated to to.</p>
5527
   *
5528
   * @return string <p>
5529
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5530
   *                corresponding character in to.
5531
   *                </p>
5532
   */
5533
  public static function strtr(string $str, $from, $to = INF): string
5534 1
  {
5535
    if (!isset($str[0])) {
5536 1
      return '';
5537
    }
5538
5539
    if ($from === $to) {
5540 1
      return $str;
5541
    }
5542
5543
    if (INF !== $to) {
5544 1
      $from = self::str_split($from);
5545 1
      $to = self::str_split($to);
5546 1
      $countFrom = \count($from);
5547 1
      $countTo = \count($to);
5548 1
5549
      if ($countFrom > $countTo) {
5550 1
        $from = \array_slice($from, 0, $countTo);
5551 1
      } elseif ($countFrom < $countTo) {
5552 1
        $to = \array_slice($to, 0, $countFrom);
5553 1
      }
5554
5555
      $from = \array_combine($from, $to);
5556 1
    }
5557
5558
    if (\is_string($from)) {
5559 1
      return \str_replace($from, '', $str);
5560 1
    }
5561
5562
    return \strtr($str, $from);
5563 1
  }
5564
5565
  /**
5566
   * Return the width of a string.
5567
   *
5568
   * @param string $str       <p>The input string.</p>
5569
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5570
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5571
   *
5572
   * @return int
5573
   */
5574 View Code Duplication
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5575 1
  {
5576
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5577 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5578 1
    }
5579
5580
    if ($cleanUtf8 === true) {
5581 1
      // iconv and mbstring are not tolerant to invalid encoding
5582
      // further, their behaviour is inconsistent with that of PHP's substr
5583
      $str = self::clean($str);
5584 1
    }
5585
5586
    // fallback to "mb_"-function via polyfill
5587
    return \mb_strwidth($str, $encoding);
5588 1
  }
5589
5590
  /**
5591
   * Get part of a string.
5592
   *
5593
   * @link http://php.net/manual/en/function.mb-substr.php
5594
   *
5595
   * @param string $str       <p>The string being checked.</p>
5596
   * @param int    $offset    <p>The first position used in str.</p>
5597
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
5598
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5599
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5600
   *
5601
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
5602
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
5603
   *                      characters long, <b>FALSE</b> will be returned.</p>
5604
   */
5605
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5606 72
  {
5607
    if (!isset($str[0])) {
5608 72
      return '';
5609 10
    }
5610
5611
    // Empty string
5612
    if ($length === 0) {
5613 69
      return '';
5614 3
    }
5615
5616
    if ($cleanUtf8 === true) {
5617 68
      // iconv and mbstring are not tolerant to invalid encoding
5618
      // further, their behaviour is inconsistent with that of PHP's substr
5619
      $str = self::clean($str);
5620 1
    }
5621
5622
    // Whole string
5623
    if (!$offset && $length === null) {
5624 68
      return $str;
5625 3
    }
5626
5627
    $str_length = 0;
5628 65
    if ($offset || $length === null) {
5629 65
      $str_length = self::strlen($str, $encoding);
5630 45
    }
5631
5632
    // Empty string
5633
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be zero. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5634 65
      return '';
5635 10
    }
5636
5637
    // Impossible
5638
    if ($offset && $offset > $str_length) {
5639 65
      return false;
5640 2
    }
5641
5642
    if ($length === null) {
5643 63
      $length = $str_length;
5644 27
    } else {
5645
      $length = (int)$length;
5646 54
    }
5647
5648
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5649 63
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5650 19
    }
5651
5652
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5653 63
      self::checkForSupport();
5654
    }
5655
5656
    if (
5657
        $encoding === 'CP850'
5658 63
        &&
5659
        self::$SUPPORT['mbstring_func_overload'] === false
5660 63
    ) {
5661
      return \substr($str, $offset, $length ?? $str_length);
5662 16
    }
5663
5664 View Code Duplication
    if (
5665
        $encoding !== 'UTF-8'
5666 47
        &&
5667
        self::$SUPPORT['mbstring'] === false
5668 47
    ) {
5669
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5670
    }
5671
5672
    if (self::$SUPPORT['mbstring'] === true) {
5673 47
      return \mb_substr($str, $offset, $length, $encoding);
5674 47
    }
5675
5676
    if (
5677
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5678
        &&
5679
        self::$SUPPORT['intl'] === true
5680
    ) {
5681
      return \grapheme_substr($str, $offset, $length);
5682
    }
5683
5684
    if (
5685
        $length >= 0 // "iconv_substr()" can't handle negative length
5686
        &&
5687
        self::$SUPPORT['iconv'] === true
5688
    ) {
5689
      $returnTmp = \iconv_substr($str, $offset, $length);
5690
      if ($returnTmp !== false) {
5691
        return $returnTmp;
5692
      }
5693
    }
5694
5695
    if (self::is_ascii($str)) {
5696
      return ($length === null) ?
5697
          \substr($str, $offset) :
5698
          \substr($str, $offset, $length);
5699
    }
5700
5701
    // fallback via vanilla php
5702
5703
    // split to array, and remove invalid characters
5704
    $array = self::split($str);
5705
5706
    // extract relevant part, and join to make sting again
5707
    return \implode('', \array_slice($array, $offset, $length));
5708
  }
5709
5710
  /**
5711
   * Binary safe comparison of two strings from an offset, up to length characters.
5712
   *
5713
   * @param string   $str1               <p>The main string being compared.</p>
5714
   * @param string   $str2               <p>The secondary string being compared.</p>
5715
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
5716
   *                                     counting from the end of the string.</p>
5717
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
5718
   *                                     the length of the str compared to the length of main_str less the offset.</p>
5719
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5720
   *                                     insensitive.</p>
5721
   *
5722
   * @return int <p>
5723
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5724
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5725
   *             <strong>0</strong> if they are equal.
5726
   *             </p>
5727
   */
5728
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
5729 1
  {
5730
    if (
5731
        $offset !== 0
5732 1
        ||
5733
        $length !== null
5734 1
    ) {
5735
      $str1Tmp = self::substr($str1, $offset, $length);
5736 1
      if ($str1Tmp === false) {
5737 1
        $str1Tmp = '';
5738
      }
5739
      $str1 = (string)$str1Tmp;
5740 1
5741
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
5742 1
      if ($str2Tmp === false) {
5743 1
        $str2Tmp = '';
5744
      }
5745
      $str2 = (string)$str2Tmp;
5746 1
    }
5747
5748
    if ($case_insensitivity === true) {
5749 1
      return self::strcasecmp($str1, $str2);
5750 1
    }
5751
5752
    return self::strcmp($str1, $str2);
5753 1
  }
5754
5755
  /**
5756
   * Count the number of substring occurrences.
5757
   *
5758
   * @link  http://php.net/manual/en/function.substr-count.php
5759
   *
5760
   * @param string $haystack   <p>The string to search in.</p>
5761
   * @param string $needle     <p>The substring to search for.</p>
5762
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
5763
   * @param int    $length     [optional] <p>
5764
   *                           The maximum length after the specified offset to search for the
5765
   *                           substring. It outputs a warning if the offset plus the length is
5766
   *                           greater than the haystack length.
5767
   *                           </p>
5768
   * @param string $encoding   <p>Set the charset.</p>
5769
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
5770
   *
5771
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5772
   */
5773
  public static function substr_count(string $haystack, string $needle, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5774 1
  {
5775
    if (!isset($haystack[0], $needle[0])) {
5776 1
      return false;
5777 1
    }
5778
5779
    if ($offset || $length !== null) {
5780 1
5781
      if ($length === null) {
5782 1
        $length = self::strlen($haystack);
5783 1
      }
5784
5785
      if (
5786
          (
5787
              $length !== 0
5788 1
              &&
5789
              $offset !== 0
5790 1
          )
5791
          &&
5792
          ($length + $offset) <= 0
5793 1
          &&
5794
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
5795 1
      ) {
5796
        return false;
5797 1
      }
5798
5799
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
5800 1
      if ($haystackTmp === false) {
5801 1
        $haystackTmp = '';
5802
      }
5803
      $haystack = (string)$haystackTmp;
5804 1
    }
5805
5806
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5807 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5808 1
    }
5809
5810
    if ($cleanUtf8 === true) {
5811 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5812
      // if invalid characters are found in $haystack before $needle
5813
      $needle = self::clean($needle);
5814
      $haystack = self::clean($haystack);
5815
    }
5816
5817
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5818 1
      self::checkForSupport();
5819
    }
5820
5821 View Code Duplication
    if (
5822
        $encoding !== 'UTF-8'
5823 1
        &&
5824
        self::$SUPPORT['mbstring'] === false
5825 1
    ) {
5826
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5827
    }
5828
5829
    if (self::$SUPPORT['mbstring'] === true) {
5830 1
      return \mb_substr_count($haystack, $needle, $encoding);
5831 1
    }
5832
5833
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
5834
5835
    return \count($matches);
5836
  }
5837
5838
  /**
5839
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
5840
   *
5841
   * @param string $haystack <p>The string to search in.</p>
5842
   * @param string $needle   <p>The substring to search for.</p>
5843
   *
5844
   * @return string <p>Return the sub-string.</p>
5845
   */
5846 View Code Duplication
  public static function substr_ileft(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5847 1
  {
5848
    if (!isset($haystack[0])) {
5849 1
      return '';
5850 1
    }
5851
5852
    if (!isset($needle[0])) {
5853 1
      return $haystack;
5854 1
    }
5855
5856
    if (self::str_istarts_with($haystack, $needle) === true) {
5857 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
5858 1
      if ($haystackTmp === false) {
5859 1
        $haystackTmp = '';
5860
      }
5861
      $haystack = (string)$haystackTmp;
5862 1
    }
5863
5864
    return $haystack;
5865 1
  }
5866
5867
  /**
5868
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
5869
   *
5870
   * @param string $haystack <p>The string to search in.</p>
5871
   * @param string $needle   <p>The substring to search for.</p>
5872
   *
5873
   * @return string <p>Return the sub-string.</p>
5874
   */
5875 View Code Duplication
  public static function substr_iright(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5876 1
  {
5877
    if (!isset($haystack[0])) {
5878 1
      return '';
5879 1
    }
5880
5881
    if (!isset($needle[0])) {
5882 1
      return $haystack;
5883 1
    }
5884
5885
    if (self::str_iends_with($haystack, $needle) === true) {
5886 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5887 1
      if ($haystackTmp === false) {
5888 1
        $haystackTmp = '';
5889
      }
5890
      $haystack = (string)$haystackTmp;
5891 1
    }
5892
5893
    return $haystack;
5894 1
  }
5895
5896
  /**
5897
   * Removes an prefix ($needle) from start of the string ($haystack).
5898
   *
5899
   * @param string $haystack <p>The string to search in.</p>
5900
   * @param string $needle   <p>The substring to search for.</p>
5901
   *
5902
   * @return string <p>Return the sub-string.</p>
5903
   */
5904 View Code Duplication
  public static function substr_left(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5905 1
  {
5906
    if (!isset($haystack[0])) {
5907 1
      return '';
5908 1
    }
5909
5910
    if (!isset($needle[0])) {
5911 1
      return $haystack;
5912 1
    }
5913
5914
    if (self::str_starts_with($haystack, $needle) === true) {
5915 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
5916 1
      if ($haystackTmp === false) {
5917 1
        $haystackTmp = '';
5918
      }
5919
      $haystack = (string)$haystackTmp;
5920 1
    }
5921
5922
    return $haystack;
5923 1
  }
5924
5925
  /**
5926
   * Replace text within a portion of a string.
5927
   *
5928
   * source: https://gist.github.com/stemar/8287074
5929
   *
5930
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
5931
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
5932
   * @param int|int[]       $offset           <p>
5933
   *                                          If start is positive, the replacing will begin at the start'th offset
5934
   *                                          into string.
5935
   *                                          <br><br>
5936
   *                                          If start is negative, the replacing will begin at the start'th character
5937
   *                                          from the end of string.
5938
   *                                          </p>
5939
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
5940
   *                                          portion of string which is to be replaced. If it is negative, it
5941
   *                                          represents the number of characters from the end of string at which to
5942
   *                                          stop replacing. If it is not given, then it will default to strlen(
5943
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
5944
   *                                          length is zero then this function will have the effect of inserting
5945
   *                                          replacement into string at the given start offset.</p>
5946
   *
5947
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
5948
   */
5949
  public static function substr_replace($str, $replacement, $offset, $length = null)
5950 7
  {
5951
    if (\is_array($str) === true) {
5952 7
      $num = \count($str);
5953 1
5954
      // the replacement
5955
      if (\is_array($replacement) === true) {
5956 1
        $replacement = \array_slice($replacement, 0, $num);
5957 1
      } else {
5958
        $replacement = \array_pad([$replacement], $num, $replacement);
5959 1
      }
5960
5961
      // the offset
5962
      if (\is_array($offset) === true) {
5963 1
        $offset = \array_slice($offset, 0, $num);
5964 1
        foreach ($offset as &$valueTmp) {
5965 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5966 1
        }
5967
        unset($valueTmp);
5968 1
      } else {
5969
        $offset = \array_pad([$offset], $num, $offset);
5970 1
      }
5971
5972
      // the length
5973
      if (null === $length) {
5974 1
        $length = \array_fill(0, $num, 0);
5975 1
      } elseif (\is_array($length) === true) {
5976 1
        $length = \array_slice($length, 0, $num);
5977 1
        foreach ($length as &$valueTmpV2) {
5978 1
          if (null !== $valueTmpV2) {
5979 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5980 1
          } else {
5981
            $valueTmpV2 = 0;
5982 1
          }
5983
        }
5984
        unset($valueTmpV2);
5985 1
      } else {
5986
        $length = \array_pad([$length], $num, $length);
5987 1
      }
5988
5989
      // recursive call
5990
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return \array_map(array(...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
5991 1
    }
5992
5993
    if (\is_array($replacement) === true) {
5994 7
      if (\count($replacement) > 0) {
5995 1
        $replacement = $replacement[0];
5996 1
      } else {
5997
        $replacement = '';
5998 1
      }
5999
    }
6000
6001
    // init
6002
    $str = (string)$str;
6003 7
    $replacement = (string)$replacement;
6004 7
6005
    if (!isset($str[0])) {
6006 7
      return $replacement;
6007 1
    }
6008
6009
    if (self::is_ascii($str)) {
6010 6
      return ($length === null) ?
6011 3
          \substr_replace($str, $replacement, $offset) :
6012
          \substr_replace($str, $replacement, $offset, $length);
6013 3
    }
6014
6015
    \preg_match_all('/./us', $str, $smatches);
6016 5
    \preg_match_all('/./us', $replacement, $rmatches);
6017 5
6018
    if ($length === null) {
6019 5
      $length = self::strlen($str);
6020 3
    }
6021
6022
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
6023 5
6024
    return \implode('', $smatches[0]);
6025 5
  }
6026
6027
  /**
6028
   * Removes an suffix ($needle) from end of the string ($haystack).
6029
   *
6030
   * @param string $haystack <p>The string to search in.</p>
6031
   * @param string $needle   <p>The substring to search for.</p>
6032
   *
6033
   * @return string <p>Return the sub-string.</p>
6034
   */
6035 View Code Duplication
  public static function substr_right(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6036 1
  {
6037
    if (!isset($haystack[0])) {
6038 1
      return '';
6039 1
    }
6040
6041
    if (!isset($needle[0])) {
6042 1
      return $haystack;
6043 1
    }
6044
6045
    if (self::str_ends_with($haystack, $needle) === true) {
6046 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6047 1
      if ($haystackTmp === false) {
6048 1
        $haystackTmp = '';
6049
      }
6050
      $haystack = (string)$haystackTmp;
6051 1
    }
6052
6053
    return $haystack;
6054 1
  }
6055
6056
  /**
6057
   * Returns a case swapped version of the string.
6058
   *
6059
   * @param string $str       <p>The input string.</p>
6060
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
6061
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6062
   *
6063
   * @return string <p>Each character's case swapped.</p>
6064
   */
6065
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6066 1
  {
6067
    if (!isset($str[0])) {
6068 1
      return '';
6069 1
    }
6070
6071
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6072 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6073 1
    }
6074
6075
    if ($cleanUtf8 === true) {
6076 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6077
      // if invalid characters are found in $haystack before $needle
6078
      $str = self::clean($str);
6079 1
    }
6080
6081
    $strSwappedCase = \preg_replace_callback(
6082 1
        '/[\S]/u',
6083 1
        function ($match) use ($encoding) {
6084 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6085 1
6086
          if ($match[0] === $marchToUpper) {
6087 1
            return UTF8::strtolower($match[0], $encoding);
6088 1
          }
6089
6090
          return $marchToUpper;
6091 1
        },
6092 1
        $str
6093 1
    );
6094
6095
    return $strSwappedCase;
6096 1
  }
6097
6098
  /**
6099
   * alias for "UTF8::to_ascii()"
6100
   *
6101
   * @see        UTF8::to_ascii()
6102
   *
6103
   * @param string $str
6104
   * @param string $subst_chr
6105
   * @param bool   $strict
6106
   *
6107
   * @return string
6108
   *
6109
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6110
   */
6111
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
6112 7
  {
6113
    return self::to_ascii($str, $subst_chr, $strict);
6114 7
  }
6115
6116
  /**
6117
   * alias for "UTF8::to_iso8859()"
6118
   *
6119
   * @see        UTF8::to_iso8859()
6120
   *
6121
   * @param string|string[] $str
6122
   *
6123
   * @return string|string[]
6124
   *
6125
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6126
   */
6127
  public static function toIso8859($str)
6128 1
  {
6129
    return self::to_iso8859($str);
6130 1
  }
6131
6132
  /**
6133
   * alias for "UTF8::to_latin1()"
6134
   *
6135
   * @see        UTF8::to_latin1()
6136
   *
6137
   * @param string|string[] $str
6138
   *
6139
   * @return string|string[]
6140
   *
6141
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6142
   */
6143
  public static function toLatin1($str)
6144 1
  {
6145
    return self::to_latin1($str);
6146 1
  }
6147
6148
  /**
6149
   * alias for "UTF8::to_utf8()"
6150
   *
6151
   * @see        UTF8::to_utf8()
6152
   *
6153
   * @param string|string[] $str
6154
   *
6155
   * @return string|string[]
6156
   *
6157
   * @deprecated <p>use "UTF8::to_utf8()"</p>
6158
   */
6159
  public static function toUTF8($str)
6160 1
  {
6161
    return self::to_utf8($str);
6162 1
  }
6163
6164
  /**
6165
   * Convert a string into ASCII.
6166
   *
6167
   * @param string $str     <p>The input string.</p>
6168
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6169
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6170
   *                        performance</p>
6171
   *
6172
   * @return string
6173
   */
6174
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
6175 21
  {
6176
    static $UTF8_TO_ASCII;
6177 21
6178
    if (!isset($str[0])) {
6179 21
      return '';
6180 4
    }
6181
6182
    // check if we only have ASCII, first (better performance)
6183
    if (self::is_ascii($str) === true) {
6184 18
      return $str;
6185 6
    }
6186
6187
    $str = self::clean(
6188 13
        $str,
6189 13
        true,
6190 13
        true,
6191 13
        true,
6192 13
        false,
6193 13
        true,
6194 13
        true
6195 13
    );
6196
6197
    // check again, if we only have ASCII, now ...
6198
    if (self::is_ascii($str) === true) {
6199 13
      return $str;
6200 7
    }
6201
6202
    if ($strict === true) {
6203 7
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6204 1
        self::checkForSupport();
6205
      }
6206
6207
      if (self::$SUPPORT['intl'] === true) {
6208 1
6209
        // HACK for issue from "transliterator_transliterate()"
6210
        $str = \str_replace(
6211 1
            'ℌ',
6212 1
            'H',
6213 1
            $str
6214 1
        );
6215
6216
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
6217 1
6218
        // check again, if we only have ASCII, now ...
6219
        if (self::is_ascii($str) === true) {
6220 1
          return $str;
6221 1
        }
6222
6223
      }
6224
    }
6225
6226
    if (self::$ORD === null) {
6227 7
      self::$ORD = self::getData('ord');
6228
    }
6229
6230
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6231 7
    $chars = $ar[0];
6232 7
    foreach ($chars as &$c) {
6233 7
6234
      $ordC0 = self::$ORD[$c[0]];
6235 7
6236
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6237 7
        continue;
6238 7
      }
6239
6240
      $ordC1 = self::$ORD[$c[1]];
6241 7
6242
      // ASCII - next please
6243
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6244 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6245 7
      }
6246
6247
      if ($ordC0 >= 224) {
6248 7
        $ordC2 = self::$ORD[$c[2]];
6249 2
6250
        if ($ordC0 <= 239) {
6251 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6252 2
        }
6253
6254
        if ($ordC0 >= 240) {
6255 2
          $ordC3 = self::$ORD[$c[3]];
6256 1
6257
          if ($ordC0 <= 247) {
6258 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6259 1
          }
6260
6261
          if ($ordC0 >= 248) {
6262 1
            $ordC4 = self::$ORD[$c[4]];
6263
6264 View Code Duplication
            if ($ordC0 <= 251) {
6265
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6266
            }
6267
6268
            if ($ordC0 >= 252) {
6269
              $ordC5 = self::$ORD[$c[5]];
6270
6271 View Code Duplication
              if ($ordC0 <= 253) {
6272
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6273
              }
6274
            }
6275
          }
6276
        }
6277
      }
6278
6279
      if ($ordC0 === 254 || $ordC0 === 255) {
6280 7
        $c = $unknown;
6281
        continue;
6282
      }
6283
6284
      if (!isset($ord)) {
6285 7
        $c = $unknown;
6286
        continue;
6287
      }
6288
6289
      $bank = $ord >> 8;
6290 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
6291 7
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
6292 3
        if ($UTF8_TO_ASCII[$bank] === false) {
6293 3
          $UTF8_TO_ASCII[$bank] = [];
6294 1
        }
6295
      }
6296
6297
      $newchar = $ord & 255;
6298 7
6299
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
6300 7
6301
        // keep for debugging
6302
        /*
6303
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6304
        echo "char: " . $c . "\n";
6305
        echo "ord: " . $ord . "\n";
6306
        echo "newchar: " . $newchar . "\n";
6307
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
6308
        echo "bank:" . $bank . "\n\n";
6309
        */
6310
6311
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6312 7
      } else {
6313
6314
        // keep for debugging missing chars
6315
        /*
6316
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6317
        echo "char: " . $c . "\n";
6318
        echo "ord: " . $ord . "\n";
6319
        echo "newchar: " . $newchar . "\n";
6320
        echo "bank:" . $bank . "\n\n";
6321
        */
6322
6323
        $c = $unknown;
6324 7
      }
6325
    }
6326
6327
    return \implode('', $chars);
6328 7
  }
6329
6330
  /**
6331
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6332
   *
6333
   * @param string|string[] $str
6334
   *
6335
   * @return string|string[]
6336
   */
6337
  public static function to_iso8859($str)
6338 3
  {
6339
    if (\is_array($str) === true) {
6340 3
      foreach ($str as $k => $v) {
6341 1
        $str[$k] = self::to_iso8859($v);
6342 1
      }
6343
6344
      return $str;
6345 1
    }
6346
6347
    $str = (string)$str;
6348 3
    if (!isset($str[0])) {
6349 3
      return '';
6350 1
    }
6351
6352
    return self::utf8_decode($str);
6353 3
  }
6354
6355
  /**
6356
   * alias for "UTF8::to_iso8859()"
6357
   *
6358
   * @see UTF8::to_iso8859()
6359
   *
6360
   * @param string|string[] $str
6361
   *
6362
   * @return string|string[]
6363
   */
6364
  public static function to_latin1($str)
6365 1
  {
6366
    return self::to_iso8859($str);
6367 1
  }
6368
6369
  /**
6370
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6371
   *
6372
   * <ul>
6373
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6374
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
6375
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
6376
   * case.</li>
6377
   * </ul>
6378
   *
6379
   * @param string|string[] $str                    <p>Any string or array.</p>
6380
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6381
   *
6382
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6383
   */
6384
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
6385 22
  {
6386 View Code Duplication
    if (\is_array($str) === true) {
6387 22
      foreach ($str as $k => $v) {
6388 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6389 2
      }
6390
6391
      return $str;
6392 2
    }
6393
6394
    $str = (string)$str;
6395 22
    if (!isset($str[0])) {
6396 22
      return $str;
6397 3
    }
6398
6399
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6400 22
      self::checkForSupport();
6401
    }
6402
6403
    $max = self::strlen_in_byte($str);
6404 22
    $buf = '';
6405 22
6406
    /** @noinspection ForeachInvariantsInspection */
6407
    for ($i = 0; $i < $max; $i++) {
6408 22
      $c1 = $str[$i];
6409 22
6410
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
6411 22
6412
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
6413 22
6414
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6415 20
6416
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
6417 20
            $buf .= $c1 . $c2;
6418 15
            $i++;
6419 15
          } else { // not valid UTF8 - convert it
6420
            $buf .= self::to_utf8_convert($c1);
6421 20
          }
6422
6423
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
6424 21
6425
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6426 20
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6427 20
6428
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
6429 20
            $buf .= $c1 . $c2 . $c3;
6430 12
            $i += 2;
6431 12
          } else { // not valid UTF8 - convert it
6432
            $buf .= self::to_utf8_convert($c1);
6433 20
          }
6434
6435
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
6436 14
6437
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6438 14
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6439 14
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6440 14
6441
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
6442 14
            $buf .= $c1 . $c2 . $c3 . $c4;
6443 5
            $i += 3;
6444 5
          } else { // not valid UTF8 - convert it
6445
            $buf .= self::to_utf8_convert($c1);
6446 14
          }
6447
6448
        } else { // doesn't look like UTF8, but should be converted
6449
          $buf .= self::to_utf8_convert($c1);
6450 22
        }
6451
6452
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
6453 20
6454
        $buf .= self::to_utf8_convert($c1);
6455 2
6456
      } else { // it doesn't need conversion
6457
        $buf .= $c1;
6458 20
      }
6459
    }
6460
6461
    // decode unicode escape sequences
6462
    $buf = \preg_replace_callback(
6463 22
        '/\\\\u([0-9a-f]{4})/i',
6464 22
        function ($match) {
6465 22
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6466 4
        },
6467 22
        $buf
6468 22
    );
6469
6470
    // decode UTF-8 codepoints
6471
    if ($decodeHtmlEntityToUtf8 === true) {
6472 22
      $buf = self::html_entity_decode($buf);
6473 1
    }
6474
6475
    return $buf;
6476 22
  }
6477
6478
  /**
6479
   * @param int $int
6480
   *
6481
   * @return string
6482
   */
6483
  private static function to_utf8_convert($int): string
6484 16
  {
6485
    // init
6486
    $buf = '';
6487 16
6488
    if (self::$ORD === null) {
6489 16
      self::$ORD = self::getData('ord');
6490 1
    }
6491
6492
    if (self::$CHR === null) {
6493 16
      self::$CHR = self::getData('chr');
6494 1
    }
6495
6496
    if (self::$WIN1252_TO_UTF8 === null) {
6497 16
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6498 1
    }
6499
6500
    $ordC1 = self::$ORD[$int];
6501 16
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
6502 16
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
6503 16
    } else {
6504
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
6505 1
      $cc2 = ($int & "\x3F") | "\x80";
6506 1
      $buf .= $cc1 . $cc2;
6507 1
    }
6508
6509
    return $buf;
6510 16
  }
6511
6512
  /**
6513
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6514
   *
6515
   * INFO: This is slower then "trim()"
6516
   *
6517
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6518
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6519
   *
6520
   * @param string $str   <p>The string to be trimmed</p>
6521
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
6522
   *
6523
   * @return string <p>The trimmed string.</p>
6524
   */
6525
  public static function trim(string $str = '', $chars = INF): string
6526 26
  {
6527
    if (!isset($str[0])) {
6528 26
      return '';
6529 5
    }
6530
6531
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6532
    if ($chars === INF || !$chars) {
6533 22
      return \preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6534 6
    }
6535
6536
    return self::rtrim(self::ltrim($str, $chars), $chars);
6537 16
  }
6538
6539
  /**
6540
   * Makes string's first char uppercase.
6541
   *
6542
   * @param string $str       <p>The input string.</p>
6543
   * @param string $encoding  [optional] <p>Set the charset.</p>
6544
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6545
   *
6546
   * @return string <p>The resulting string</p>
6547
   */
6548
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6549 14
  {
6550
    if ($cleanUtf8 === true) {
6551 14
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6552
      // if invalid characters are found in $haystack before $needle
6553
      $str = self::clean($str);
6554 1
    }
6555
6556
    $strPartTwo = self::substr($str, 1, null, $encoding);
6557 14
    if ($strPartTwo === false) {
6558 14
      $strPartTwo = '';
6559
    }
6560
6561
    $strPartOne = self::strtoupper(
6562 14
        (string)self::substr($str, 0, 1, $encoding),
6563 14
        $encoding,
6564 14
        $cleanUtf8
6565 14
    );
6566
6567
    return $strPartOne . $strPartTwo;
6568 14
  }
6569
6570
  /**
6571
   * alias for "UTF8::ucfirst()"
6572
   *
6573
   * @see UTF8::ucfirst()
6574
   *
6575
   * @param string $word
6576
   * @param string $encoding
6577
   * @param bool   $cleanUtf8
6578
   *
6579
   * @return string
6580
   */
6581
  public static function ucword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6582 1
  {
6583
    return self::ucfirst($word, $encoding, $cleanUtf8);
6584 1
  }
6585
6586
  /**
6587
   * Uppercase for all words in the string.
6588
   *
6589
   * @param string   $str        <p>The input string.</p>
6590
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6591
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6592
   * @param string   $encoding   [optional] <p>Set the charset.</p>
6593
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6594
   *
6595
   * @return string
6596
   */
6597
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6598 8
  {
6599
    if (!$str) {
6600 8
      return '';
6601 2
    }
6602
6603
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
6604
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
6605
6606
    if ($cleanUtf8 === true) {
6607 7
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6608
      // if invalid characters are found in $haystack before $needle
6609
      $str = self::clean($str);
6610 1
    }
6611
6612
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
6613 7
6614
    if (
6615
        $usePhpDefaultFunctions === true
6616 7
        &&
6617
        self::is_ascii($str) === true
6618 7
    ) {
6619
      return \ucwords($str);
6620
    }
6621
6622
    $words = self::str_to_words($str, $charlist);
6623 7
    $newWords = [];
6624 7
6625
    if (\count($exceptions) > 0) {
6626 7
      $useExceptions = true;
6627 1
    } else {
6628
      $useExceptions = false;
6629 7
    }
6630
6631 View Code Duplication
    foreach ($words as $word) {
6632 7
6633
      if (!$word) {
6634 7
        continue;
6635 7
      }
6636
6637
      if (
6638
          $useExceptions === false
6639 7
          ||
6640
          (
6641
              $useExceptions === true
6642 1
              &&
6643
              !\in_array($word, $exceptions, true)
6644 7
          )
6645
      ) {
6646
        $word = self::ucfirst($word, $encoding);
6647 7
      }
6648
6649
      $newWords[] = $word;
6650 7
    }
6651
6652
    return \implode('', $newWords);
6653 7
  }
6654
6655
  /**
6656
   * Multi decode html entity & fix urlencoded-win1252-chars.
6657
   *
6658
   * e.g:
6659
   * 'test+test'                     => 'test test'
6660
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6661
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6662
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6663
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6664
   * 'Düsseldorf'                   => 'Düsseldorf'
6665
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6666
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6667
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6668
   *
6669
   * @param string $str          <p>The input string.</p>
6670
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6671
   *
6672
   * @return string
6673
   */
6674 View Code Duplication
  public static function urldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6675 1
  {
6676
    if (!isset($str[0])) {
6677 1
      return '';
6678 1
    }
6679
6680
    $pattern = '/%u([0-9a-f]{3,4})/i';
6681 1
    if (\preg_match($pattern, $str)) {
6682 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
6683 1
    }
6684
6685
    $flags = ENT_QUOTES | ENT_HTML5;
6686 1
6687
    do {
6688
      $str_compare = $str;
6689 1
6690
      $str = self::fix_simple_utf8(
6691 1
          \urldecode(
6692 1
              self::html_entity_decode(
6693 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6694 1
                  $flags
6695 1
              )
6696
          )
6697
      );
6698
6699
    } while ($multi_decode === true && $str_compare !== $str);
6700 1
6701
    return $str;
6702 1
  }
6703
6704
  /**
6705
   * Return a array with "urlencoded"-win1252 -> UTF-8
6706
   *
6707
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
6708
   *
6709
   * @return array
6710
   */
6711
  public static function urldecode_fix_win1252_chars(): array
6712 1
  {
6713
    return [
6714
        '%20' => ' ',
6715 1
        '%21' => '!',
6716
        '%22' => '"',
6717
        '%23' => '#',
6718
        '%24' => '$',
6719
        '%25' => '%',
6720
        '%26' => '&',
6721
        '%27' => "'",
6722
        '%28' => '(',
6723
        '%29' => ')',
6724
        '%2A' => '*',
6725
        '%2B' => '+',
6726
        '%2C' => ',',
6727
        '%2D' => '-',
6728
        '%2E' => '.',
6729
        '%2F' => '/',
6730
        '%30' => '0',
6731
        '%31' => '1',
6732
        '%32' => '2',
6733
        '%33' => '3',
6734
        '%34' => '4',
6735
        '%35' => '5',
6736
        '%36' => '6',
6737
        '%37' => '7',
6738
        '%38' => '8',
6739
        '%39' => '9',
6740
        '%3A' => ':',
6741
        '%3B' => ';',
6742
        '%3C' => '<',
6743
        '%3D' => '=',
6744
        '%3E' => '>',
6745
        '%3F' => '?',
6746
        '%40' => '@',
6747
        '%41' => 'A',
6748
        '%42' => 'B',
6749
        '%43' => 'C',
6750
        '%44' => 'D',
6751
        '%45' => 'E',
6752
        '%46' => 'F',
6753
        '%47' => 'G',
6754
        '%48' => 'H',
6755
        '%49' => 'I',
6756
        '%4A' => 'J',
6757
        '%4B' => 'K',
6758
        '%4C' => 'L',
6759
        '%4D' => 'M',
6760
        '%4E' => 'N',
6761
        '%4F' => 'O',
6762
        '%50' => 'P',
6763
        '%51' => 'Q',
6764
        '%52' => 'R',
6765
        '%53' => 'S',
6766
        '%54' => 'T',
6767
        '%55' => 'U',
6768
        '%56' => 'V',
6769
        '%57' => 'W',
6770
        '%58' => 'X',
6771
        '%59' => 'Y',
6772
        '%5A' => 'Z',
6773
        '%5B' => '[',
6774
        '%5C' => '\\',
6775
        '%5D' => ']',
6776
        '%5E' => '^',
6777
        '%5F' => '_',
6778
        '%60' => '`',
6779
        '%61' => 'a',
6780
        '%62' => 'b',
6781
        '%63' => 'c',
6782
        '%64' => 'd',
6783
        '%65' => 'e',
6784
        '%66' => 'f',
6785
        '%67' => 'g',
6786
        '%68' => 'h',
6787
        '%69' => 'i',
6788
        '%6A' => 'j',
6789
        '%6B' => 'k',
6790
        '%6C' => 'l',
6791
        '%6D' => 'm',
6792
        '%6E' => 'n',
6793
        '%6F' => 'o',
6794
        '%70' => 'p',
6795
        '%71' => 'q',
6796
        '%72' => 'r',
6797
        '%73' => 's',
6798
        '%74' => 't',
6799
        '%75' => 'u',
6800
        '%76' => 'v',
6801
        '%77' => 'w',
6802
        '%78' => 'x',
6803
        '%79' => 'y',
6804
        '%7A' => 'z',
6805
        '%7B' => '{',
6806
        '%7C' => '|',
6807
        '%7D' => '}',
6808
        '%7E' => '~',
6809
        '%7F' => '',
6810
        '%80' => '`',
6811
        '%81' => '',
6812
        '%82' => '‚',
6813
        '%83' => 'ƒ',
6814
        '%84' => '„',
6815
        '%85' => '…',
6816
        '%86' => '†',
6817
        '%87' => '‡',
6818
        '%88' => 'ˆ',
6819
        '%89' => '‰',
6820
        '%8A' => 'Š',
6821
        '%8B' => '‹',
6822
        '%8C' => 'Œ',
6823
        '%8D' => '',
6824
        '%8E' => 'Ž',
6825
        '%8F' => '',
6826
        '%90' => '',
6827
        '%91' => '‘',
6828
        '%92' => '’',
6829
        '%93' => '“',
6830
        '%94' => '”',
6831
        '%95' => '•',
6832
        '%96' => '–',
6833
        '%97' => '—',
6834
        '%98' => '˜',
6835
        '%99' => '™',
6836
        '%9A' => 'š',
6837
        '%9B' => '›',
6838
        '%9C' => 'œ',
6839
        '%9D' => '',
6840
        '%9E' => 'ž',
6841
        '%9F' => 'Ÿ',
6842
        '%A0' => '',
6843
        '%A1' => '¡',
6844
        '%A2' => '¢',
6845
        '%A3' => '£',
6846
        '%A4' => '¤',
6847
        '%A5' => '¥',
6848
        '%A6' => '¦',
6849
        '%A7' => '§',
6850
        '%A8' => '¨',
6851
        '%A9' => '©',
6852
        '%AA' => 'ª',
6853
        '%AB' => '«',
6854
        '%AC' => '¬',
6855
        '%AD' => '',
6856
        '%AE' => '®',
6857
        '%AF' => '¯',
6858
        '%B0' => '°',
6859
        '%B1' => '±',
6860
        '%B2' => '²',
6861
        '%B3' => '³',
6862
        '%B4' => '´',
6863
        '%B5' => 'µ',
6864
        '%B6' => '¶',
6865
        '%B7' => '·',
6866
        '%B8' => '¸',
6867
        '%B9' => '¹',
6868
        '%BA' => 'º',
6869
        '%BB' => '»',
6870
        '%BC' => '¼',
6871
        '%BD' => '½',
6872
        '%BE' => '¾',
6873
        '%BF' => '¿',
6874
        '%C0' => 'À',
6875
        '%C1' => 'Á',
6876
        '%C2' => 'Â',
6877
        '%C3' => 'Ã',
6878
        '%C4' => 'Ä',
6879
        '%C5' => 'Å',
6880
        '%C6' => 'Æ',
6881
        '%C7' => 'Ç',
6882
        '%C8' => 'È',
6883
        '%C9' => 'É',
6884
        '%CA' => 'Ê',
6885
        '%CB' => 'Ë',
6886
        '%CC' => 'Ì',
6887
        '%CD' => 'Í',
6888
        '%CE' => 'Î',
6889
        '%CF' => 'Ï',
6890
        '%D0' => 'Ð',
6891
        '%D1' => 'Ñ',
6892
        '%D2' => 'Ò',
6893
        '%D3' => 'Ó',
6894
        '%D4' => 'Ô',
6895
        '%D5' => 'Õ',
6896
        '%D6' => 'Ö',
6897
        '%D7' => '×',
6898
        '%D8' => 'Ø',
6899
        '%D9' => 'Ù',
6900
        '%DA' => 'Ú',
6901
        '%DB' => 'Û',
6902
        '%DC' => 'Ü',
6903
        '%DD' => 'Ý',
6904
        '%DE' => 'Þ',
6905
        '%DF' => 'ß',
6906
        '%E0' => 'à',
6907
        '%E1' => 'á',
6908
        '%E2' => 'â',
6909
        '%E3' => 'ã',
6910
        '%E4' => 'ä',
6911
        '%E5' => 'å',
6912
        '%E6' => 'æ',
6913
        '%E7' => 'ç',
6914
        '%E8' => 'è',
6915
        '%E9' => 'é',
6916
        '%EA' => 'ê',
6917
        '%EB' => 'ë',
6918
        '%EC' => 'ì',
6919
        '%ED' => 'í',
6920
        '%EE' => 'î',
6921
        '%EF' => 'ï',
6922
        '%F0' => 'ð',
6923
        '%F1' => 'ñ',
6924
        '%F2' => 'ò',
6925
        '%F3' => 'ó',
6926
        '%F4' => 'ô',
6927
        '%F5' => 'õ',
6928
        '%F6' => 'ö',
6929
        '%F7' => '÷',
6930
        '%F8' => 'ø',
6931
        '%F9' => 'ù',
6932
        '%FA' => 'ú',
6933
        '%FB' => 'û',
6934
        '%FC' => 'ü',
6935
        '%FD' => 'ý',
6936
        '%FE' => 'þ',
6937
        '%FF' => 'ÿ',
6938
    ];
6939
  }
6940
6941
  /**
6942
   * Decodes an UTF-8 string to ISO-8859-1.
6943
   *
6944
   * @param string $str <p>The input string.</p>
6945
   * @param bool   $keepUtf8Chars
6946
   *
6947
   * @return string
6948
   */
6949
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
6950 6
  {
6951
    if (!isset($str[0])) {
6952 6
      return '';
6953 3
    }
6954
6955
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
6956 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
6957 6
6958 View Code Duplication
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
6959 6
6960
      if (self::$WIN1252_TO_UTF8 === null) {
6961 1
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6962
      }
6963
6964
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
6965 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
6966 1
    }
6967
6968
    /** @noinspection PhpInternalEntityUsedInspection */
6969
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
6970 6
6971
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6972 6
      self::checkForSupport();
6973
    }
6974
6975
    // save for later comparision
6976
    $str_backup = $str;
6977 6
    $len = self::strlen_in_byte($str);
6978 6
6979
    if (self::$ORD === null) {
6980 6
      self::$ORD = self::getData('ord');
6981
    }
6982
6983
    if (self::$CHR === null) {
6984 6
      self::$CHR = self::getData('chr');
6985
    }
6986
6987
    $noCharFound = '?';
6988 6
    /** @noinspection ForeachInvariantsInspection */
6989
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
6990 6
      switch ($str[$i] & "\xF0") {
6991 6
        case "\xC0":
6992 6
        case "\xD0":
6993 6
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
6994 6
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
6995 6
          break;
6996 6
6997
        /** @noinspection PhpMissingBreakStatementInspection */
6998
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
6999 6
          ++$i;
7000
        case "\xE0":
7001 6
          $str[$j] = $noCharFound;
7002 5
          $i += 2;
7003 5
          break;
7004 5
7005
        default:
7006
          $str[$j] = $str[$i];
7007 6
      }
7008
    }
7009
7010
    $return = (string)self::substr($str, 0, $j, 'CP850'); // 8-BIT
7011 6
7012
    if (
7013
        $keepUtf8Chars === true
7014 6
        &&
7015
        self::strlen($return) >= self::strlen($str_backup)
7016 6
    ) {
7017
      return $str_backup;
7018 1
    }
7019
7020
    return $return;
7021 6
  }
7022
7023
  /**
7024
   * Encodes an ISO-8859-1 string to UTF-8.
7025
   *
7026
   * @param string $str <p>The input string.</p>
7027
   *
7028
   * @return string
7029
   */
7030
  public static function utf8_encode(string $str): string
7031 7
  {
7032
    if (!isset($str[0])) {
7033 7
      return '';
7034 7
    }
7035
7036
    $strTmp = \utf8_encode($str);
7037 7
7038
    // the polyfill maybe return false
7039
    if ($strTmp === false) {
7040 7
      return '';
7041
    }
7042
7043
    $str = (string)$strTmp;
7044 7
    if (false === \strpos($str, "\xC2")) {
7045 7
      return $str;
7046 3
    }
7047
7048
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
7049 6
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
7050 6
7051 View Code Duplication
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
7052 6
7053
      if (self::$WIN1252_TO_UTF8 === null) {
7054 1
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
7055
      }
7056
7057
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
7058 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
7059 1
    }
7060
7061
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
7062 6
  }
7063
7064
  /**
7065
   * fix -> utf8-win1252 chars
7066
   *
7067
   * @param string $str <p>The input string.</p>
7068
   *
7069
   * @return string
7070
   *
7071
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
7072
   */
7073
  public static function utf8_fix_win1252_chars(string $str): string
7074 1
  {
7075
    return self::fix_simple_utf8($str);
7076 1
  }
7077
7078
  /**
7079
   * Returns an array with all utf8 whitespace characters.
7080
   *
7081
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7082
   *
7083
   * @author: Derek E. [email protected]
7084
   *
7085
   * @return array <p>
7086
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7087
   *               as defined in above URL.
7088
   *               </p>
7089
   */
7090
  public static function whitespace_table(): array
7091 1
  {
7092
    return self::$WHITESPACE_TABLE;
7093 1
  }
7094
7095
  /**
7096
   * Limit the number of words in a string.
7097
   *
7098
   * @param string $str      <p>The input string.</p>
7099
   * @param int    $limit    <p>The limit of words as integer.</p>
7100
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7101
   *
7102
   * @return string
7103
   */
7104
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
7105 1
  {
7106
    if (!isset($str[0])) {
7107 1
      return '';
7108 1
    }
7109
7110
    if ($limit < 1) {
7111 1
      return '';
7112 1
    }
7113
7114
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7115 1
7116
    if (
7117
        !isset($matches[0])
7118 1
        ||
7119
        self::strlen($str) === self::strlen($matches[0])
7120 1
    ) {
7121
      return $str;
7122 1
    }
7123
7124
    return self::rtrim($matches[0]) . $strAddOn;
7125 1
  }
7126
7127
  /**
7128
   * Wraps a string to a given number of characters
7129
   *
7130
   * @link  http://php.net/manual/en/function.wordwrap.php
7131
   *
7132
   * @param string $str   <p>The input string.</p>
7133
   * @param int    $width [optional] <p>The column width.</p>
7134
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7135
   * @param bool   $cut   [optional] <p>
7136
   *                      If the cut is set to true, the string is
7137
   *                      always wrapped at or before the specified width. So if you have
7138
   *                      a word that is larger than the given width, it is broken apart.
7139
   *                      </p>
7140
   *
7141
   * @return string <p>The given string wrapped at the specified column.</p>
7142
   */
7143
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
7144 10
  {
7145
    if (!isset($str[0], $break[0])) {
7146 10
      return '';
7147 3
    }
7148
7149
    $w = '';
7150 8
    $strSplit = \explode($break, $str);
7151 8
    $count = \count($strSplit);
7152 8
7153
    $chars = [];
7154 8
    /** @noinspection ForeachInvariantsInspection */
7155
    for ($i = 0; $i < $count; ++$i) {
7156 8
7157
      if ($i) {
7158 8
        $chars[] = $break;
7159 1
        $w .= '#';
7160 1
      }
7161
7162
      $c = $strSplit[$i];
7163 8
      unset($strSplit[$i]);
7164 8
7165
      foreach (self::split($c) as $c) {
7166 8
        $chars[] = $c;
7167 8
        $w .= ' ' === $c ? ' ' : '?';
7168 8
      }
7169
    }
7170
7171
    $strReturn = '';
7172 8
    $j = 0;
7173 8
    $b = $i = -1;
7174 8
    $w = \wordwrap($w, $width, '#', $cut);
7175 8
7176
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7177 8
      for (++$i; $i < $b; ++$i) {
7178 6
        $strReturn .= $chars[$j];
7179 6
        unset($chars[$j++]);
7180 6
      }
7181
7182
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7183 6
        unset($chars[$j++]);
7184 3
      }
7185
7186
      $strReturn .= $break;
7187 6
    }
7188
7189
    return $strReturn . \implode('', $chars);
7190 8
  }
7191
7192
  /**
7193
   * Returns an array of Unicode White Space characters.
7194
   *
7195
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7196
   */
7197
  public static function ws(): array
7198 1
  {
7199
    return self::$WHITESPACE;
7200 1
  }
7201
7202
}
7203