Completed
Push — master ( 473391...0cea8d )
by Lars
13:26
created

UTF8::is_json()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 16
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 4

Importance

Changes 0
Metric Value
dl 0
loc 16
ccs 7
cts 7
cp 1
rs 9.2
c 0
b 0
f 0
cc 4
eloc 10
nc 4
nop 1
crap 4
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206
   * __construct()
207
   */
208 16
  public function __construct()
209
  {
210 16
    self::checkForSupport();
211 16
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219
   * @return string <p>Single Multi-Byte character.</p>
220
   */
221 3
  public static function access(string $str, int $pos): string
222
  {
223 3
    if (!isset($str[0])) {
224 1
      return '';
225
    }
226
227 3
    if ($pos < 0) {
228 1
      return '';
229
    }
230
231 3
    return (string)self::substr($str, $pos, 1);
232
  }
233
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string <p>The output string that contains BOM.</p>
242
   */
243 1
  public static function add_bom_to_string(string $str): string
244
  {
245 1
    if (self::string_has_bom($str) === false) {
246 1
      $str = self::bom() . $str;
247
    }
248
249 1
    return $str;
250
  }
251
252
  /**
253
   * Changes all keys in an array.
254
   *
255
   * @param array $array <p>The array to work on</p>
256
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
257
   *                     or <strong>CASE_LOWER</strong> (default)</p>
258
   *
259
   * @return array <p>An array with its keys lower or uppercased.</p>
260
   */
261 1
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
262
  {
263
    if (
264 1
        $case !== CASE_LOWER
265
        &&
266 1
        $case !== CASE_UPPER
267
    ) {
268
      $case = CASE_LOWER;
269
    }
270
271 1
    $return = [];
272 1
    foreach ($array as $key => $value) {
273 1
      if ($case === CASE_LOWER) {
274 1
        $key = self::strtolower($key);
275
      } else {
276 1
        $key = self::strtoupper($key);
277
      }
278
279 1
      $return[$key] = $value;
280
    }
281
282 1
    return $return;
283
  }
284
285
  /**
286
   * Convert binary into an string.
287
   *
288
   * @param mixed $bin 1|0
289
   *
290
   * @return string
291
   */
292 1
  public static function binary_to_str($bin): string
293
  {
294 1
    if (!isset($bin[0])) {
295
      return '';
296
    }
297
298 1
    $convert = \base_convert($bin, 2, 16);
299 1
    if ($convert === '0') {
300 1
      return '';
301
    }
302
303 1
    return \pack('H*', $convert);
304
  }
305
306
  /**
307
   * Returns the UTF-8 Byte Order Mark Character.
308
   *
309
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
310
   *
311
   * @return string UTF-8 Byte Order Mark
312
   */
313 2
  public static function bom(): string
314
  {
315 2
    return "\xef\xbb\xbf";
316
  }
317
318
  /**
319
   * @alias of UTF8::chr_map()
320
   *
321
   * @see   UTF8::chr_map()
322
   *
323
   * @param string|array $callback
324
   * @param string       $str
325
   *
326
   * @return array
327
   */
328 1
  public static function callback($callback, string $str): array
329
  {
330 1
    return self::chr_map($callback, $str);
331
  }
332
333
  /**
334
   * This method will auto-detect your server environment for UTF-8 support.
335
   *
336
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
337
   */
338 19
  public static function checkForSupport()
339
  {
340 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
341
342 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
343
344
      // http://php.net/manual/en/book.mbstring.php
345 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
346 1
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
347
348
      // http://php.net/manual/en/book.iconv.php
349 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
350
351
      // http://php.net/manual/en/book.intl.php
352 1
      self::$SUPPORT['intl'] = self::intl_loaded();
353 1
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
354
      if (
355 1
          self::$SUPPORT['intl'] === true
356
          &&
357 1
          \function_exists('transliterator_list_ids') === true
358
      ) {
359 1
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
360
      }
361
362
      // http://php.net/manual/en/class.intlchar.php
363 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
364
365
      // http://php.net/manual/en/book.pcre.php
366 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
367
    }
368 19
  }
369
370
  /**
371
   * Generates a UTF-8 encoded character from the given code point.
372
   *
373
   * INFO: opposite to UTF8::ord()
374
   *
375
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
376
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
377
   *
378
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
379
   */
380 10
  public static function chr($code_point, string $encoding = 'UTF-8')
381
  {
382
    // init
383 10
    static $CHAR_CACHE = [];
384
385 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
386
      self::checkForSupport();
387
    }
388
389 10
    if ($encoding !== 'UTF-8') {
390 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
391
    }
392
393 View Code Duplication
    if (
394 10
        $encoding !== 'UTF-8'
395
        &&
396 10
        $encoding !== 'WINDOWS-1252'
397
        &&
398 10
        self::$SUPPORT['mbstring'] === false
399
    ) {
400
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
401
    }
402
403 10
    $cacheKey = $code_point . $encoding;
404 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
405 8
      return $CHAR_CACHE[$cacheKey];
406
    }
407
408 9
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
409
410 7
      if (self::$CHR === null) {
411
        self::$CHR = self::getData('chr');
412
      }
413
414 7
      $chr = self::$CHR[$code_point];
415
416 7
      if ($encoding !== 'UTF-8') {
417 1
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
418
      }
419
420 7
      return $CHAR_CACHE[$cacheKey] = $chr;
421
    }
422
423 7
    if (self::$SUPPORT['intlChar'] === true) {
424 7
      $chr = \IntlChar::chr($code_point);
425
426 7
      if ($encoding !== 'UTF-8') {
427
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
428
      }
429
430 7
      return $CHAR_CACHE[$cacheKey] = $chr;
431
    }
432
433
    if (self::$CHR === null) {
434
      self::$CHR = self::getData('chr');
435
    }
436
437
    if ($code_point <= 0x7F) {
438
      $chr = self::$CHR[$code_point];
439
    } elseif ($code_point <= 0x7FF) {
440
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
441
             self::$CHR[($code_point & 0x3F) + 0x80];
442
    } elseif ($code_point <= 0xFFFF) {
443
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
444
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
445
             self::$CHR[($code_point & 0x3F) + 0x80];
446
    } else {
447
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
448
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
449
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
450
             self::$CHR[($code_point & 0x3F) + 0x80];
451
    }
452
453
    if ($encoding !== 'UTF-8') {
454
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
455
    }
456
457
    return $CHAR_CACHE[$cacheKey] = $chr;
458
  }
459
460
  /**
461
   * Applies callback to all characters of a string.
462
   *
463
   * @param string|array $callback <p>The callback function.</p>
464
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
465
   *
466
   * @return array <p>The outcome of callback.</p>
467
   */
468 1
  public static function chr_map($callback, string $str): array
469
  {
470 1
    $chars = self::split($str);
471
472 1
    return \array_map($callback, $chars);
473
  }
474
475
  /**
476
   * Generates an array of byte length of each character of a Unicode string.
477
   *
478
   * 1 byte => U+0000  - U+007F
479
   * 2 byte => U+0080  - U+07FF
480
   * 3 byte => U+0800  - U+FFFF
481
   * 4 byte => U+10000 - U+10FFFF
482
   *
483
   * @param string $str <p>The original unicode string.</p>
484
   *
485
   * @return array <p>An array of byte lengths of each character.</p>
486
   */
487 4
  public static function chr_size_list(string $str): array
488
  {
489 4
    if (!isset($str[0])) {
490 3
      return [];
491
    }
492
493 4
    return \array_map(
494 4
        function ($data) {
495 4
          return UTF8::strlen($data, '8BIT');
496 4
        },
497 4
        self::split($str)
498
    );
499
  }
500
501
  /**
502
   * Get a decimal code representation of a specific character.
503
   *
504
   * @param string $char <p>The input character.</p>
505
   *
506
   * @return int
507
   */
508 2
  public static function chr_to_decimal(string $char): int
509
  {
510 2
    $code = self::ord($char[0]);
511 2
    $bytes = 1;
512
513 2
    if (!($code & 0x80)) {
514
      // 0xxxxxxx
515 2
      return $code;
516
    }
517
518 2
    if (($code & 0xe0) === 0xc0) {
519
      // 110xxxxx
520 2
      $bytes = 2;
521 2
      $code &= ~0xc0;
522 2
    } elseif (($code & 0xf0) === 0xe0) {
523
      // 1110xxxx
524 2
      $bytes = 3;
525 2
      $code &= ~0xe0;
526 1
    } elseif (($code & 0xf8) === 0xf0) {
527
      // 11110xxx
528 1
      $bytes = 4;
529 1
      $code &= ~0xf0;
530
    }
531
532 2
    for ($i = 2; $i <= $bytes; $i++) {
533
      // 10xxxxxx
534 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
535
    }
536
537 2
    return $code;
538
  }
539
540
  /**
541
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
542
   *
543
   * @param string $char <p>The input character</p>
544
   * @param string $pfix [optional]
545
   *
546
   * @return string <p>The code point encoded as U+xxxx<p>
547
   */
548 1
  public static function chr_to_hex(string $char, string $pfix = 'U+'): string
549
  {
550 1
    if (!isset($char[0])) {
551 1
      return '';
552
    }
553
554 1
    if ($char === '&#0;') {
555 1
      $char = '';
556
    }
557
558 1
    return self::int_to_hex(self::ord($char), $pfix);
559
  }
560
561
  /**
562
   * alias for "UTF8::chr_to_decimal()"
563
   *
564
   * @see UTF8::chr_to_decimal()
565
   *
566
   * @param string $chr
567
   *
568
   * @return int
569
   */
570 1
  public static function chr_to_int(string $chr): int
571
  {
572 1
    return self::chr_to_decimal($chr);
573
  }
574
575
  /**
576
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
577
   *
578
   * @param string $body     <p>The original string to be split.</p>
579
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
580
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
581
   *
582
   * @return string <p>The chunked string</p>
583
   */
584 1
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
585
  {
586 1
    return \implode($end, self::split($body, $chunklen));
587
  }
588
589
  /**
590
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
591
   *
592
   * @param string $str                           <p>The string to be sanitized.</p>
593
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
594
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
595
   *                                              whitespace.</p>
596
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
597
   *                                              e.g.: "…"
598
   *                                              => "..."</p>
599
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
600
   *                                              combination with
601
   *                                              $normalize_whitespace</p>
602
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
603
   *                                              mark e.g.: "�"</p>
604
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
605
   *                                              characters e.g.: "\0"</p>
606
   *
607
   * @return string <p>Clean UTF-8 encoded string.</p>
608
   */
609 64
  public static function clean(string $str, bool $remove_bom = false, bool $normalize_whitespace = false, bool $normalize_msword = false, bool $keep_non_breaking_space = false, bool $replace_diamond_question_mark = false, bool $remove_invisible_characters = true): string
610
  {
611
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
612
    // caused connection reset problem on larger strings
613
614 64
    $regx = '/
615
      (
616
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
617
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
618
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
619
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
620
        ){1,100}                      # ...one or more times
621
      )
622
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
623
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
624
    /x';
625 64
    $str = (string)\preg_replace($regx, '$1', $str);
626
627 64
    if ($replace_diamond_question_mark === true) {
628 36
      $str = self::replace_diamond_question_mark($str, '');
629
    }
630
631 64
    if ($remove_invisible_characters === true) {
632 64
      $str = self::remove_invisible_characters($str);
633
    }
634
635 64
    if ($normalize_whitespace === true) {
636 38
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
637
    }
638
639 64
    if ($normalize_msword === true) {
640 15
      $str = self::normalize_msword($str);
641
    }
642
643 64
    if ($remove_bom === true) {
644 37
      $str = self::remove_bom($str);
645
    }
646
647 64
    return $str;
648
  }
649
650
  /**
651
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
652
   *
653
   * @param string $str <p>The input string.</p>
654
   *
655
   * @return string
656
   */
657 24
  public static function cleanup(string $str): string
658
  {
659 24
    if (!isset($str[0])) {
660 2
      return '';
661
    }
662
663
    // fixed ISO <-> UTF-8 Errors
664 24
    $str = self::fix_simple_utf8($str);
665
666
    // remove all none UTF-8 symbols
667
    // && remove diamond question mark (�)
668
    // && remove remove invisible characters (e.g. "\0")
669
    // && remove BOM
670
    // && normalize whitespace chars (but keep non-breaking-spaces)
671 24
    $str = self::clean(
672 24
        $str,
673 24
        true,
674 24
        true,
675 24
        false,
676 24
        true,
677 24
        true,
678 24
        true
679
    );
680
681 24
    return $str;
682
  }
683
684
  /**
685
   * Accepts a string or a array of strings and returns an array of Unicode code points.
686
   *
687
   * INFO: opposite to UTF8::string()
688
   *
689
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
690
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
691
   *                                    default, code points will be returned as integers.</p>
692
   *
693
   * @return array <p>The array of code points.</p>
694
   */
695 7
  public static function codepoints($arg, bool $u_style = false): array
696
  {
697 7
    if (\is_string($arg) === true) {
698 7
      $arg = self::split($arg);
699
    }
700
701 7
    $arg = \array_map(
702
        [
703 7
            self::class,
704
            'ord',
705
        ],
706 7
        $arg
707
    );
708
709 7
    if ($u_style) {
710 1
      $arg = \array_map(
711
          [
712 1
              self::class,
713
              'int_to_hex',
714
          ],
715 1
          $arg
716
      );
717
    }
718
719 7
    return $arg;
720
  }
721
722
  /**
723
   * Returns count of characters used in a string.
724
   *
725
   * @param string $str       <p>The input string.</p>
726
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
727
   *
728
   * @return array <p>An associative array of Character as keys and
729
   *               their count as values.</p>
730
   */
731 9
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
732
  {
733 9
    return \array_count_values(self::split($str, 1, $cleanUtf8));
734
  }
735
736
  /**
737
   * Converts a int-value into an UTF-8 character.
738
   *
739
   * @param mixed $int
740
   *
741
   * @return string
742
   */
743 5
  public static function decimal_to_chr($int): string
744
  {
745 5
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
746
  }
747
748
  /**
749
   * Encode a string with a new charset-encoding.
750
   *
751
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
752
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
753
   *
754
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
755
   * @param string $str      <p>The input string</p>
756
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
757
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
758
   *
759
   * @return string
760
   */
761 14
  public static function encode(string $encoding, string $str, bool $force = true): string
762
  {
763 14
    if (!isset($str[0], $encoding[0])) {
764 6
      return $str;
765
    }
766
767 14
    if ($encoding !== 'UTF-8') {
768 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
769
    }
770
771 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
772
      self::checkForSupport();
773
    }
774
775 14
    $encodingDetected = self::str_detect_encoding($str);
776
777
    if (
778 14
        $encodingDetected !== false
779
        &&
780
        (
781 14
            $force === true
782
            ||
783 14
            $encodingDetected !== $encoding
784
        )
785
    ) {
786
787 View Code Duplication
      if (
788 14
          $encoding === 'UTF-8'
789
          &&
790
          (
791 14
              $force === true
792 4
              || $encodingDetected === 'UTF-8'
793 4
              || $encodingDetected === 'WINDOWS-1252'
794 14
              || $encodingDetected === 'ISO-8859-1'
795
          )
796
      ) {
797 13
        return self::to_utf8($str);
798
      }
799
800 View Code Duplication
      if (
801 4
          $encoding === 'ISO-8859-1'
802
          &&
803
          (
804 2
              $force === true
805 1
              || $encodingDetected === 'ISO-8859-1'
806 1
              || $encodingDetected === 'WINDOWS-1252'
807 4
              || $encodingDetected === 'UTF-8'
808
          )
809
      ) {
810 2
        return self::to_iso8859($str);
811
      }
812
813 View Code Duplication
      if (
814 3
          $encoding !== 'UTF-8'
815
          &&
816 3
          $encoding !== 'WINDOWS-1252'
817
          &&
818 3
          self::$SUPPORT['mbstring'] === false
819
      ) {
820
        \trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
821
      }
822
823 3
      $strEncoded = \mb_convert_encoding(
824 3
          $str,
825 3
          $encoding,
826 3
          $encodingDetected
827
      );
828
829 3
      if ($strEncoded) {
830 3
        return $strEncoded;
831
      }
832
    }
833
834 3
    return $str;
835
  }
836
837
  /**
838
   * Reads entire file into a string.
839
   *
840
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
841
   *
842
   * @link http://php.net/manual/en/function.file-get-contents.php
843
   *
844
   * @param string        $filename         <p>
845
   *                                        Name of the file to read.
846
   *                                        </p>
847
   * @param bool          $use_include_path [optional] <p>
848
   *                                        Prior to PHP 5, this parameter is called
849
   *                                        use_include_path and is a bool.
850
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
851
   *                                        to trigger include path
852
   *                                        search.
853
   *                                        </p>
854
   * @param resource|null $context          [optional] <p>
855
   *                                        A valid context resource created with
856
   *                                        stream_context_create. If you don't need to use a
857
   *                                        custom context, you can skip this parameter by &null;.
858
   *                                        </p>
859
   * @param int|null      $offset           [optional] <p>
860
   *                                        The offset where the reading starts.
861
   *                                        </p>
862
   * @param int|null      $maxLength        [optional] <p>
863
   *                                        Maximum length of data read. The default is to read until end
864
   *                                        of file is reached.
865
   *                                        </p>
866
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
867
   *
868
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
869
   *                                        images or pdf, because they used non default utf-8 chars.</p>
870
   *
871
   * @return string|false <p>The function returns the read data or false on failure.</p>
872
   */
873 6
  public static function file_get_contents(string $filename, bool $use_include_path = false, $context = null, int $offset = null, int $maxLength = null, int $timeout = 10, bool $convertToUtf8 = true)
874
  {
875
    // init
876 6
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
877
878 6
    if ($timeout && $context === null) {
879 5
      $context = \stream_context_create(
880
          [
881
              'http' =>
882
                  [
883 5
                      'timeout' => $timeout,
884
                  ],
885
          ]
886
      );
887
    }
888
889 6
    if ($offset === null) {
890 6
      $offset = 0;
891
    }
892
893 6
    if (\is_int($maxLength) === true) {
894 1
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
895
    } else {
896 6
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
897
    }
898
899
    // return false on error
900 6
    if ($data === false) {
901
      return false;
902
    }
903
904 6
    if ($convertToUtf8 === true) {
905
      if (
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
906 6
          self::is_binary($data, true) === true
907
          &&
908 6
          self::is_utf16($data) === false
909
          &&
910 6
          self::is_utf32($data) === false
911 3
      ) {
912
        // do nothing, it's binary and not UTF16 or UTF32
913
      } else {
914
915 5
        $data = self::encode('UTF-8', $data, false);
916 5
        $data = self::cleanup($data);
917
918
      }
919
    }
920
921 6
    return $data;
922
  }
923
924
  /**
925
   * Checks if a file starts with BOM (Byte Order Mark) character.
926
   *
927
   * @param string $file_path <p>Path to a valid file.</p>
928
   *
929
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
930
   */
931 1
  public static function file_has_bom(string $file_path): bool
932
  {
933 1
    return self::string_has_bom(\file_get_contents($file_path));
934
  }
935
936
  /**
937
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
938
   *
939
   * @param mixed  $var
940
   * @param int    $normalization_form
941
   * @param string $leading_combining
942
   *
943
   * @return mixed
944
   */
945 9
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
946
  {
947 9
    switch (\gettype($var)) {
948 9 View Code Duplication
      case 'array':
949 3
        foreach ($var as $k => $v) {
950
          /** @noinspection AlterInForeachInspection */
951 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
952
        }
953 3
        break;
954 9 View Code Duplication
      case 'object':
955 2
        foreach ($var as $k => $v) {
956 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
957
        }
958 2
        break;
959 9
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
960
961 9
        if (false !== \strpos($var, "\r")) {
962
          // Workaround https://bugs.php.net/65732
963 2
          $var = \str_replace(["\r\n", "\r"], "\n", $var);
964
        }
965
966 9
        if (self::is_ascii($var) === false) {
967
          /** @noinspection PhpUndefinedClassInspection */
968 9
          if (\Normalizer::isNormalized($var, $normalization_form)) {
969 6
            $n = '-';
970
          } else {
971
            /** @noinspection PhpUndefinedClassInspection */
972 7
            $n = \Normalizer::normalize($var, $normalization_form);
973
974 7
            if (isset($n[0])) {
975 4
              $var = $n;
976
            } else {
977 5
              $var = self::encode('UTF-8', $var, true);
978
            }
979
          }
980
981
          if (
982 9
              $var[0] >= "\x80"
983
              &&
984 9
              isset($n[0], $leading_combining[0])
985
              &&
986 9
              \preg_match('/^\p{Mn}/u', $var)
987
          ) {
988
            // Prevent leading combining chars
989
            // for NFC-safe concatenations.
990 2
            $var = $leading_combining . $var;
991
          }
992
        }
993
994 9
        break;
995
    }
996
997 9
    return $var;
998
  }
999
1000
  /**
1001
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1002
   *
1003
   * Gets a specific external variable by name and optionally filters it
1004
   *
1005
   * @link  http://php.net/manual/en/function.filter-input.php
1006
   *
1007
   * @param int    $type          <p>
1008
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1009
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1010
   *                              <b>INPUT_ENV</b>.
1011
   *                              </p>
1012
   * @param string $variable_name <p>
1013
   *                              Name of a variable to get.
1014
   *                              </p>
1015
   * @param int    $filter        [optional] <p>
1016
   *                              The ID of the filter to apply. The
1017
   *                              manual page lists the available filters.
1018
   *                              </p>
1019
   * @param mixed  $options       [optional] <p>
1020
   *                              Associative array of options or bitwise disjunction of flags. If filter
1021
   *                              accepts options, flags can be provided in "flags" field of array.
1022
   *                              </p>
1023
   *
1024
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1025
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1026
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1027
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1028
   * @since 5.2.0
1029
   */
1030 View Code Duplication
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1031
  {
1032
    if (4 > \func_num_args()) {
1033
      $var = \filter_input($type, $variable_name, $filter);
1034
    } else {
1035
      $var = \filter_input($type, $variable_name, $filter, $options);
1036
    }
1037
1038
    return self::filter($var);
1039
  }
1040
1041
  /**
1042
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1043
   *
1044
   * Gets external variables and optionally filters them
1045
   *
1046
   * @link  http://php.net/manual/en/function.filter-input-array.php
1047
   *
1048
   * @param int   $type       <p>
1049
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1050
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1051
   *                          <b>INPUT_ENV</b>.
1052
   *                          </p>
1053
   * @param mixed $definition [optional] <p>
1054
   *                          An array defining the arguments. A valid key is a string
1055
   *                          containing a variable name and a valid value is either a filter type, or an array
1056
   *                          optionally specifying the filter, flags and options. If the value is an
1057
   *                          array, valid keys are filter which specifies the
1058
   *                          filter type,
1059
   *                          flags which specifies any flags that apply to the
1060
   *                          filter, and options which specifies any options that
1061
   *                          apply to the filter. See the example below for a better understanding.
1062
   *                          </p>
1063
   *                          <p>
1064
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1065
   *                          input array are filtered by this filter.
1066
   *                          </p>
1067
   * @param bool  $add_empty  [optional] <p>
1068
   *                          Add missing keys as <b>NULL</b> to the return value.
1069
   *                          </p>
1070
   *
1071
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1072
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1073
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1074
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1075
   * fails.
1076
   * @since 5.2.0
1077
   */
1078 View Code Duplication
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1079
  {
1080
    if (2 > \func_num_args()) {
1081
      $a = \filter_input_array($type);
1082
    } else {
1083
      $a = \filter_input_array($type, $definition, $add_empty);
1084
    }
1085
1086
    return self::filter($a);
1087
  }
1088
1089
  /**
1090
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1091
   *
1092
   * Filters a variable with a specified filter
1093
   *
1094
   * @link  http://php.net/manual/en/function.filter-var.php
1095
   *
1096
   * @param mixed $variable <p>
1097
   *                        Value to filter.
1098
   *                        </p>
1099
   * @param int   $filter   [optional] <p>
1100
   *                        The ID of the filter to apply. The
1101
   *                        manual page lists the available filters.
1102
   *                        </p>
1103
   * @param mixed $options  [optional] <p>
1104
   *                        Associative array of options or bitwise disjunction of flags. If filter
1105
   *                        accepts options, flags can be provided in "flags" field of array. For
1106
   *                        the "callback" filter, callable type should be passed. The
1107
   *                        callback must accept one argument, the value to be filtered, and return
1108
   *                        the value after filtering/sanitizing it.
1109
   *                        </p>
1110
   *                        <p>
1111
   *                        <code>
1112
   *                        // for filters that accept options, use this format
1113
   *                        $options = array(
1114
   *                        'options' => array(
1115
   *                        'default' => 3, // value to return if the filter fails
1116
   *                        // other options here
1117
   *                        'min_range' => 0
1118
   *                        ),
1119
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1120
   *                        );
1121
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1122
   *                        // for filter that only accept flags, you can pass them directly
1123
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1124
   *                        // for filter that only accept flags, you can also pass as an array
1125
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1126
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1127
   *                        // callback validate filter
1128
   *                        function foo($value)
1129
   *                        {
1130
   *                        // Expected format: Surname, GivenNames
1131
   *                        if (strpos($value, ", ") === false) return false;
1132
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1133
   *                        $empty = (empty($surname) || empty($givennames));
1134
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1135
   *                        if ($empty || $notstrings) {
1136
   *                        return false;
1137
   *                        } else {
1138
   *                        return $value;
1139
   *                        }
1140
   *                        }
1141
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1142
   *                        </code>
1143
   *                        </p>
1144
   *
1145
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1146
   * @since 5.2.0
1147
   */
1148 1 View Code Duplication
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1149
  {
1150 1
    if (3 > \func_num_args()) {
1151 1
      $variable = \filter_var($variable, $filter);
1152
    } else {
1153 1
      $variable = \filter_var($variable, $filter, $options);
1154
    }
1155
1156 1
    return self::filter($variable);
1157
  }
1158
1159
  /**
1160
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1161
   *
1162
   * Gets multiple variables and optionally filters them
1163
   *
1164
   * @link  http://php.net/manual/en/function.filter-var-array.php
1165
   *
1166
   * @param array $data       <p>
1167
   *                          An array with string keys containing the data to filter.
1168
   *                          </p>
1169
   * @param mixed $definition [optional] <p>
1170
   *                          An array defining the arguments. A valid key is a string
1171
   *                          containing a variable name and a valid value is either a
1172
   *                          filter type, or an
1173
   *                          array optionally specifying the filter, flags and options.
1174
   *                          If the value is an array, valid keys are filter
1175
   *                          which specifies the filter type,
1176
   *                          flags which specifies any flags that apply to the
1177
   *                          filter, and options which specifies any options that
1178
   *                          apply to the filter. See the example below for a better understanding.
1179
   *                          </p>
1180
   *                          <p>
1181
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1182
   *                          input array are filtered by this filter.
1183
   *                          </p>
1184
   * @param bool  $add_empty  [optional] <p>
1185
   *                          Add missing keys as <b>NULL</b> to the return value.
1186
   *                          </p>
1187
   *
1188
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1189
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1190
   * the variable is not set.
1191
   * @since 5.2.0
1192
   */
1193 1 View Code Duplication
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1194
  {
1195 1
    if (2 > \func_num_args()) {
1196 1
      $a = \filter_var_array($data);
1197
    } else {
1198 1
      $a = \filter_var_array($data, $definition, $add_empty);
1199
    }
1200
1201 1
    return self::filter($a);
1202
  }
1203
1204
  /**
1205
   * Check if the number of unicode characters are not more than the specified integer.
1206
   *
1207
   * @param string $str      The original string to be checked.
1208
   * @param int    $box_size The size in number of chars to be checked against string.
1209
   *
1210
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1211
   */
1212 1
  public static function fits_inside(string $str, int $box_size): bool
1213
  {
1214 1
    return (self::strlen($str) <= $box_size);
1215
  }
1216
1217
  /**
1218
   * Try to fix simple broken UTF-8 strings.
1219
   *
1220
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1221
   *
1222
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1223
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1224
   * See: http://en.wikipedia.org/wiki/Windows-1252
1225
   *
1226
   * @param string $str <p>The input string</p>
1227
   *
1228
   * @return string
1229
   */
1230 29 View Code Duplication
  public static function fix_simple_utf8(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1231
  {
1232 29
    if (!isset($str[0])) {
1233 2
      return '';
1234
    }
1235
1236 29
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1237 29
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1238
1239 29
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1240
1241 1
      if (self::$BROKEN_UTF8_FIX === null) {
1242 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1243
      }
1244
1245 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1246 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1247
    }
1248
1249 29
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1250
  }
1251
1252
  /**
1253
   * Fix a double (or multiple) encoded UTF8 string.
1254
   *
1255
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1256
   *
1257
   * @return string|string[] <p>Will return the fixed input-"array" or
1258
   *                         the fixed input-"string".</p>
1259
   */
1260 1
  public static function fix_utf8($str)
1261
  {
1262 1
    if (\is_array($str) === true) {
1263 1
      foreach ($str as $k => $v) {
1264 1
        $str[$k] = self::fix_utf8($v);
1265
      }
1266
1267 1
      return $str;
1268
    }
1269
1270 1
    $last = '';
1271 1
    while ($last !== $str) {
1272 1
      $last = $str;
1273 1
      $str = self::to_utf8(
1274 1
          self::utf8_decode($str, true)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str, true)) on line 1273 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1275
      );
1276
    }
1277
1278 1
    return $str;
1279
  }
1280
1281
  /**
1282
   * Get character of a specific character.
1283
   *
1284
   * @param string $char
1285
   *
1286
   * @return string <p>'RTL' or 'LTR'</p>
1287
   */
1288 1
  public static function getCharDirection(string $char): string
1289
  {
1290 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1291
      self::checkForSupport();
1292
    }
1293
1294 1
    if (self::$SUPPORT['intlChar'] === true) {
1295 1
      $tmpReturn = \IntlChar::charDirection($char);
1296
1297
      // from "IntlChar"-Class
1298
      $charDirection = [
1299 1
          'RTL' => [1, 13, 14, 15, 21],
1300
          'LTR' => [0, 11, 12, 20],
1301
      ];
1302
1303 1
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1304
        return 'LTR';
1305
      }
1306
1307 1
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1308 1
        return 'RTL';
1309
      }
1310
    }
1311
1312 1
    $c = static::chr_to_decimal($char);
1313
1314 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1315 1
      return 'LTR';
1316
    }
1317
1318 1
    if (0x85e >= $c) {
1319
1320 1
      if (0x5be === $c ||
1321 1
          0x5c0 === $c ||
1322 1
          0x5c3 === $c ||
1323 1
          0x5c6 === $c ||
1324 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1325 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1326 1
          0x608 === $c ||
1327 1
          0x60b === $c ||
1328 1
          0x60d === $c ||
1329 1
          0x61b === $c ||
1330 1
          (0x61e <= $c && 0x64a >= $c) ||
1331
          (0x66d <= $c && 0x66f >= $c) ||
1332
          (0x671 <= $c && 0x6d5 >= $c) ||
1333
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1334
          (0x6ee <= $c && 0x6ef >= $c) ||
1335
          (0x6fa <= $c && 0x70d >= $c) ||
1336
          0x710 === $c ||
1337
          (0x712 <= $c && 0x72f >= $c) ||
1338
          (0x74d <= $c && 0x7a5 >= $c) ||
1339
          0x7b1 === $c ||
1340
          (0x7c0 <= $c && 0x7ea >= $c) ||
1341
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1342
          0x7fa === $c ||
1343
          (0x800 <= $c && 0x815 >= $c) ||
1344
          0x81a === $c ||
1345
          0x824 === $c ||
1346
          0x828 === $c ||
1347
          (0x830 <= $c && 0x83e >= $c) ||
1348
          (0x840 <= $c && 0x858 >= $c) ||
1349 1
          0x85e === $c
1350
      ) {
1351 1
        return 'RTL';
1352
      }
1353
1354 1
    } elseif (0x200f === $c) {
1355
1356
      return 'RTL';
1357
1358 1
    } elseif (0xfb1d <= $c) {
1359
1360 1
      if (0xfb1d === $c ||
1361 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1362 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1363 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1364 1
          0xfb3e === $c ||
1365 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1366 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1367 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1368 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1369 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1370 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1371 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1372 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1373 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1374 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1375 1
          0x10808 === $c ||
1376 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1377 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1378 1
          0x1083c === $c ||
1379 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1380 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1381 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1382 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1383 1
          0x1093f === $c ||
1384 1
          0x10a00 === $c ||
1385 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1386 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1387 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1388 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1389 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1390 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1391 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1392 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1393 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1394 1
          (0x10b78 <= $c && 0x10b7f >= $c)
1395
      ) {
1396 1
        return 'RTL';
1397
      }
1398
    }
1399
1400 1
    return 'LTR';
1401
  }
1402
1403
  /**
1404
   * get data from "/data/*.ser"
1405
   *
1406
   * @param string $file
1407
   *
1408
   * @return bool|string|array|int <p>Will return false on error.</p>
1409
   */
1410 6
  private static function getData(string $file)
1411
  {
1412 6
    $file = __DIR__ . '/data/' . $file . '.php';
1413 6
    if (\file_exists($file)) {
1414
      /** @noinspection PhpIncludeInspection */
1415 6
      return require $file;
1416
    }
1417
1418 1
    return false;
1419
  }
1420
1421
  /**
1422
   * Check for php-support.
1423
   *
1424
   * @param string|null $key
1425
   *
1426
   * @return mixed <p>Return the full support-"array", if $key === null<br>
1427
   *               return bool-value, if $key is used and available<br>
1428
   *               otherwise return null</p>
1429
   */
1430 19
  public static function getSupportInfo(string $key = null)
1431
  {
1432 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1433
      self::checkForSupport();
1434
    }
1435
1436 19
    if ($key === null) {
1437 2
      return self::$SUPPORT;
1438
    }
1439
1440 18
    if (!isset(self::$SUPPORT[$key])) {
1441 1
      return null;
1442
    }
1443
1444 17
    return self::$SUPPORT[$key];
1445
  }
1446
1447
  /**
1448
   * alias for "UTF8::string_has_bom()"
1449
   *
1450
   * @see        UTF8::string_has_bom()
1451
   *
1452
   * @param string $str
1453
   *
1454
   * @return bool
1455
   *
1456
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1457
   */
1458 1
  public static function hasBom(string $str): bool
1459
  {
1460 1
    return self::string_has_bom($str);
1461
  }
1462
1463
  /**
1464
   * Converts a hexadecimal-value into an UTF-8 character.
1465
   *
1466
   * @param string $hexdec <p>The hexadecimal value.</p>
1467
   *
1468
   * @return string|false <p>One single UTF-8 character.</p>
1469
   */
1470 2
  public static function hex_to_chr(string $hexdec)
1471
  {
1472 2
    return self::decimal_to_chr(\hexdec($hexdec));
1473
  }
1474
1475
  /**
1476
   * Converts hexadecimal U+xxxx code point representation to integer.
1477
   *
1478
   * INFO: opposite to UTF8::int_to_hex()
1479
   *
1480
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1481
   *
1482
   * @return int|false <p>The code point, or false on failure.</p>
1483
   */
1484 1
  public static function hex_to_int(string $hexDec)
1485
  {
1486 1
    if (!isset($hexDec[0])) {
1487 1
      return false;
1488
    }
1489
1490 1
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1491 1
      return \intval($match[1], 16);
1492
    }
1493
1494 1
    return false;
1495
  }
1496
1497
  /**
1498
   * alias for "UTF8::html_entity_decode()"
1499
   *
1500
   * @see UTF8::html_entity_decode()
1501
   *
1502
   * @param string $str
1503
   * @param int    $flags
1504
   * @param string $encoding
1505
   *
1506
   * @return string
1507
   */
1508 1
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1509
  {
1510 1
    return self::html_entity_decode($str, $flags, $encoding);
1511
  }
1512
1513
  /**
1514
   * Converts a UTF-8 string to a series of HTML numbered entities.
1515
   *
1516
   * INFO: opposite to UTF8::html_decode()
1517
   *
1518
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1519
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1520
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
1521
   *
1522
   * @return string <p>HTML numbered entities.</p>
1523
   */
1524 2
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
1525
  {
1526 2
    if (!isset($str[0])) {
1527 1
      return '';
1528
    }
1529
1530 2
    if ($encoding !== 'UTF-8') {
1531 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1532
    }
1533
1534
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1535 2
    if (\function_exists('mb_encode_numericentity')) {
1536
1537 2
      $startCode = 0x00;
1538 2
      if ($keepAsciiChars === true) {
1539 1
        $startCode = 0x80;
1540
      }
1541
1542 2
      return \mb_encode_numericentity(
1543 2
          $str,
1544 2
          [$startCode, 0xfffff, 0, 0xfffff, 0],
1545 2
          $encoding
1546
      );
1547
    }
1548
1549
    return \implode(
1550
        '',
1551
        \array_map(
1552
            function ($data) use ($keepAsciiChars, $encoding) {
1553
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1554
            },
1555
            self::split($str)
1556
        )
1557
    );
1558
  }
1559
1560
  /**
1561
   * UTF-8 version of html_entity_decode()
1562
   *
1563
   * The reason we are not using html_entity_decode() by itself is because
1564
   * while it is not technically correct to leave out the semicolon
1565
   * at the end of an entity most browsers will still interpret the entity
1566
   * correctly. html_entity_decode() does not convert entities without
1567
   * semicolons, so we are left with our own little solution here. Bummer.
1568
   *
1569
   * Convert all HTML entities to their applicable characters
1570
   *
1571
   * INFO: opposite to UTF8::html_encode()
1572
   *
1573
   * @link http://php.net/manual/en/function.html-entity-decode.php
1574
   *
1575
   * @param string $str      <p>
1576
   *                         The input string.
1577
   *                         </p>
1578
   * @param int    $flags    [optional] <p>
1579
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1580
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1581
   *                         <table>
1582
   *                         Available <i>flags</i> constants
1583
   *                         <tr valign="top">
1584
   *                         <td>Constant Name</td>
1585
   *                         <td>Description</td>
1586
   *                         </tr>
1587
   *                         <tr valign="top">
1588
   *                         <td><b>ENT_COMPAT</b></td>
1589
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1590
   *                         </tr>
1591
   *                         <tr valign="top">
1592
   *                         <td><b>ENT_QUOTES</b></td>
1593
   *                         <td>Will convert both double and single quotes.</td>
1594
   *                         </tr>
1595
   *                         <tr valign="top">
1596
   *                         <td><b>ENT_NOQUOTES</b></td>
1597
   *                         <td>Will leave both double and single quotes unconverted.</td>
1598
   *                         </tr>
1599
   *                         <tr valign="top">
1600
   *                         <td><b>ENT_HTML401</b></td>
1601
   *                         <td>
1602
   *                         Handle code as HTML 4.01.
1603
   *                         </td>
1604
   *                         </tr>
1605
   *                         <tr valign="top">
1606
   *                         <td><b>ENT_XML1</b></td>
1607
   *                         <td>
1608
   *                         Handle code as XML 1.
1609
   *                         </td>
1610
   *                         </tr>
1611
   *                         <tr valign="top">
1612
   *                         <td><b>ENT_XHTML</b></td>
1613
   *                         <td>
1614
   *                         Handle code as XHTML.
1615
   *                         </td>
1616
   *                         </tr>
1617
   *                         <tr valign="top">
1618
   *                         <td><b>ENT_HTML5</b></td>
1619
   *                         <td>
1620
   *                         Handle code as HTML 5.
1621
   *                         </td>
1622
   *                         </tr>
1623
   *                         </table>
1624
   *                         </p>
1625
   * @param string $encoding [optional] <p>Encoding to use.</p>
1626
   *
1627
   * @return string <p>The decoded string.</p>
1628
   */
1629 17
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1630
  {
1631 17
    if (!isset($str[0])) {
1632 6
      return '';
1633
    }
1634
1635 17
    if (!isset($str[3])) { // examples: &; || &x;
1636 10
      return $str;
1637
    }
1638
1639
    if (
1640 16
        \strpos($str, '&') === false
1641
        ||
1642
        (
1643 16
            \strpos($str, '&#') === false
1644
            &&
1645 16
            \strpos($str, ';') === false
1646
        )
1647
    ) {
1648 9
      return $str;
1649
    }
1650
1651 16
    if ($encoding !== 'UTF-8') {
1652 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1653
    }
1654
1655 16
    if ($flags === null) {
1656 5
      $flags = ENT_QUOTES | ENT_HTML5;
1657
    }
1658
1659 View Code Duplication
    if (
1660 16
        $encoding !== 'UTF-8'
1661
        &&
1662 16
        $encoding !== 'WINDOWS-1252'
1663
        &&
1664 16
        self::$SUPPORT['mbstring'] === false
1665
    ) {
1666
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1667
    }
1668
1669
    do {
1670 16
      $str_compare = $str;
1671
1672 16
      $str = (string)\preg_replace_callback(
1673 16
          "/&#\d{2,6};/",
1674 16
          function ($matches) use ($encoding) {
1675 14
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
1676
1677 14
            if ($returnTmp !== '"' && $returnTmp !== "'") {
1678 13
              return $returnTmp;
1679
            }
1680
1681 7
            return $matches[0];
1682 16
          },
1683 16
          $str
1684
      );
1685
1686
      // decode numeric & UTF16 two byte entities
1687 16
      $str = \html_entity_decode(
1688 16
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
1689 16
          $flags,
1690 16
          $encoding
1691
      );
1692
1693 16
    } while ($str_compare !== $str);
1694
1695 16
    return $str;
1696
  }
1697
1698
  /**
1699
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
1700
   *
1701
   * @link http://php.net/manual/en/function.htmlentities.php
1702
   *
1703
   * @param string $str           <p>
1704
   *                              The input string.
1705
   *                              </p>
1706
   * @param int    $flags         [optional] <p>
1707
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1708
   *                              invalid code unit sequences and the used document type. The default is
1709
   *                              ENT_COMPAT | ENT_HTML401.
1710
   *                              <table>
1711
   *                              Available <i>flags</i> constants
1712
   *                              <tr valign="top">
1713
   *                              <td>Constant Name</td>
1714
   *                              <td>Description</td>
1715
   *                              </tr>
1716
   *                              <tr valign="top">
1717
   *                              <td><b>ENT_COMPAT</b></td>
1718
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1719
   *                              </tr>
1720
   *                              <tr valign="top">
1721
   *                              <td><b>ENT_QUOTES</b></td>
1722
   *                              <td>Will convert both double and single quotes.</td>
1723
   *                              </tr>
1724
   *                              <tr valign="top">
1725
   *                              <td><b>ENT_NOQUOTES</b></td>
1726
   *                              <td>Will leave both double and single quotes unconverted.</td>
1727
   *                              </tr>
1728
   *                              <tr valign="top">
1729
   *                              <td><b>ENT_IGNORE</b></td>
1730
   *                              <td>
1731
   *                              Silently discard invalid code unit sequences instead of returning
1732
   *                              an empty string. Using this flag is discouraged as it
1733
   *                              may have security implications.
1734
   *                              </td>
1735
   *                              </tr>
1736
   *                              <tr valign="top">
1737
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1738
   *                              <td>
1739
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1740
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1741
   *                              </td>
1742
   *                              </tr>
1743
   *                              <tr valign="top">
1744
   *                              <td><b>ENT_DISALLOWED</b></td>
1745
   *                              <td>
1746
   *                              Replace invalid code points for the given document type with a
1747
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1748
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1749
   *                              instance, to ensure the well-formedness of XML documents with
1750
   *                              embedded external content.
1751
   *                              </td>
1752
   *                              </tr>
1753
   *                              <tr valign="top">
1754
   *                              <td><b>ENT_HTML401</b></td>
1755
   *                              <td>
1756
   *                              Handle code as HTML 4.01.
1757
   *                              </td>
1758
   *                              </tr>
1759
   *                              <tr valign="top">
1760
   *                              <td><b>ENT_XML1</b></td>
1761
   *                              <td>
1762
   *                              Handle code as XML 1.
1763
   *                              </td>
1764
   *                              </tr>
1765
   *                              <tr valign="top">
1766
   *                              <td><b>ENT_XHTML</b></td>
1767
   *                              <td>
1768
   *                              Handle code as XHTML.
1769
   *                              </td>
1770
   *                              </tr>
1771
   *                              <tr valign="top">
1772
   *                              <td><b>ENT_HTML5</b></td>
1773
   *                              <td>
1774
   *                              Handle code as HTML 5.
1775
   *                              </td>
1776
   *                              </tr>
1777
   *                              </table>
1778
   *                              </p>
1779
   * @param string $encoding      [optional] <p>
1780
   *                              Like <b>htmlspecialchars</b>,
1781
   *                              <b>htmlentities</b> takes an optional third argument
1782
   *                              <i>encoding</i> which defines encoding used in
1783
   *                              conversion.
1784
   *                              Although this argument is technically optional, you are highly
1785
   *                              encouraged to specify the correct value for your code.
1786
   *                              </p>
1787
   * @param bool   $double_encode [optional] <p>
1788
   *                              When <i>double_encode</i> is turned off PHP will not
1789
   *                              encode existing html entities. The default is to convert everything.
1790
   *                              </p>
1791
   *
1792
   *
1793
   * @return string the encoded string.
1794
   * </p>
1795
   * <p>
1796
   * If the input <i>string</i> contains an invalid code unit
1797
   * sequence within the given <i>encoding</i> an empty string
1798
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1799
   * <b>ENT_SUBSTITUTE</b> flags are set.
1800
   */
1801 2
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
1802
  {
1803 2
    if ($encoding !== 'UTF-8') {
1804 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1805
    }
1806
1807 2
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
1808
1809
    /**
1810
     * PHP doesn't replace a backslash to its html entity since this is something
1811
     * that's mostly used to escape characters when inserting in a database. Since
1812
     * we're using a decent database layer, we don't need this shit and we're replacing
1813
     * the double backslashes by its' html entity equivalent.
1814
     *
1815
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
1816
     */
1817 2
    $str = \str_replace('\\', '&#92;', $str);
1818
1819 2
    if ($encoding !== 'UTF-8') {
1820 1
      return $str;
1821
    }
1822
1823 2
    $byteLengths = self::chr_size_list($str);
1824 2
    $search = [];
1825 2
    $replacements = [];
1826 2
    foreach ($byteLengths as $counter => $byteLength) {
1827 2
      if ($byteLength >= 3) {
1828 1
        $char = self::access($str, $counter);
1829
1830 1
        if (!isset($replacements[$char])) {
1831 1
          $search[$char] = $char;
1832 2
          $replacements[$char] = self::html_encode($char);
1833
        }
1834
      }
1835
    }
1836
1837 2
    return \str_replace($search, $replacements, $str);
1838
  }
1839
1840
  /**
1841
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
1842
   *
1843
   * INFO: Take a look at "UTF8::htmlentities()"
1844
   *
1845
   * @link http://php.net/manual/en/function.htmlspecialchars.php
1846
   *
1847
   * @param string $str           <p>
1848
   *                              The string being converted.
1849
   *                              </p>
1850
   * @param int    $flags         [optional] <p>
1851
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1852
   *                              invalid code unit sequences and the used document type. The default is
1853
   *                              ENT_COMPAT | ENT_HTML401.
1854
   *                              <table>
1855
   *                              Available <i>flags</i> constants
1856
   *                              <tr valign="top">
1857
   *                              <td>Constant Name</td>
1858
   *                              <td>Description</td>
1859
   *                              </tr>
1860
   *                              <tr valign="top">
1861
   *                              <td><b>ENT_COMPAT</b></td>
1862
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1863
   *                              </tr>
1864
   *                              <tr valign="top">
1865
   *                              <td><b>ENT_QUOTES</b></td>
1866
   *                              <td>Will convert both double and single quotes.</td>
1867
   *                              </tr>
1868
   *                              <tr valign="top">
1869
   *                              <td><b>ENT_NOQUOTES</b></td>
1870
   *                              <td>Will leave both double and single quotes unconverted.</td>
1871
   *                              </tr>
1872
   *                              <tr valign="top">
1873
   *                              <td><b>ENT_IGNORE</b></td>
1874
   *                              <td>
1875
   *                              Silently discard invalid code unit sequences instead of returning
1876
   *                              an empty string. Using this flag is discouraged as it
1877
   *                              may have security implications.
1878
   *                              </td>
1879
   *                              </tr>
1880
   *                              <tr valign="top">
1881
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1882
   *                              <td>
1883
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1884
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1885
   *                              </td>
1886
   *                              </tr>
1887
   *                              <tr valign="top">
1888
   *                              <td><b>ENT_DISALLOWED</b></td>
1889
   *                              <td>
1890
   *                              Replace invalid code points for the given document type with a
1891
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1892
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1893
   *                              instance, to ensure the well-formedness of XML documents with
1894
   *                              embedded external content.
1895
   *                              </td>
1896
   *                              </tr>
1897
   *                              <tr valign="top">
1898
   *                              <td><b>ENT_HTML401</b></td>
1899
   *                              <td>
1900
   *                              Handle code as HTML 4.01.
1901
   *                              </td>
1902
   *                              </tr>
1903
   *                              <tr valign="top">
1904
   *                              <td><b>ENT_XML1</b></td>
1905
   *                              <td>
1906
   *                              Handle code as XML 1.
1907
   *                              </td>
1908
   *                              </tr>
1909
   *                              <tr valign="top">
1910
   *                              <td><b>ENT_XHTML</b></td>
1911
   *                              <td>
1912
   *                              Handle code as XHTML.
1913
   *                              </td>
1914
   *                              </tr>
1915
   *                              <tr valign="top">
1916
   *                              <td><b>ENT_HTML5</b></td>
1917
   *                              <td>
1918
   *                              Handle code as HTML 5.
1919
   *                              </td>
1920
   *                              </tr>
1921
   *                              </table>
1922
   *                              </p>
1923
   * @param string $encoding      [optional] <p>
1924
   *                              Defines encoding used in conversion.
1925
   *                              </p>
1926
   *                              <p>
1927
   *                              For the purposes of this function, the encodings
1928
   *                              ISO-8859-1, ISO-8859-15,
1929
   *                              UTF-8, cp866,
1930
   *                              cp1251, cp1252, and
1931
   *                              KOI8-R are effectively equivalent, provided the
1932
   *                              <i>string</i> itself is valid for the encoding, as
1933
   *                              the characters affected by <b>htmlspecialchars</b> occupy
1934
   *                              the same positions in all of these encodings.
1935
   *                              </p>
1936
   * @param bool   $double_encode [optional] <p>
1937
   *                              When <i>double_encode</i> is turned off PHP will not
1938
   *                              encode existing html entities, the default is to convert everything.
1939
   *                              </p>
1940
   *
1941
   * @return string The converted string.
1942
   * </p>
1943
   * <p>
1944
   * If the input <i>string</i> contains an invalid code unit
1945
   * sequence within the given <i>encoding</i> an empty string
1946
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1947
   * <b>ENT_SUBSTITUTE</b> flags are set.
1948
   */
1949 1
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
1950
  {
1951 1
    if ($encoding !== 'UTF-8') {
1952 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1953
    }
1954
1955 1
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
1956
  }
1957
1958
  /**
1959
   * Checks whether iconv is available on the server.
1960
   *
1961
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1962
   */
1963 1
  public static function iconv_loaded(): bool
1964
  {
1965 1
    return \extension_loaded('iconv') ? true : false;
1966
  }
1967
1968
  /**
1969
   * alias for "UTF8::decimal_to_chr()"
1970
   *
1971
   * @see UTF8::decimal_to_chr()
1972
   *
1973
   * @param mixed $int
1974
   *
1975
   * @return string
1976
   */
1977 2
  public static function int_to_chr($int): string
1978
  {
1979 2
    return self::decimal_to_chr($int);
1980
  }
1981
1982
  /**
1983
   * Converts Integer to hexadecimal U+xxxx code point representation.
1984
   *
1985
   * INFO: opposite to UTF8::hex_to_int()
1986
   *
1987
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
1988
   * @param string $pfix [optional]
1989
   *
1990
   * @return string <p>The code point, or empty string on failure.</p>
1991
   */
1992 3
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
1993
  {
1994 3
    $hex = \dechex($int);
1995
1996 3
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
1997
1998 3
    return $pfix . $hex;
1999
  }
2000
2001
  /**
2002
   * Checks whether intl-char is available on the server.
2003
   *
2004
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2005
   */
2006 1
  public static function intlChar_loaded(): bool
2007
  {
2008 1
    return \class_exists('IntlChar');
2009
  }
2010
2011
  /**
2012
   * Checks whether intl is available on the server.
2013
   *
2014
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2015
   */
2016 4
  public static function intl_loaded(): bool
2017
  {
2018 4
    return \extension_loaded('intl');
2019
  }
2020
2021
  /**
2022
   * alias for "UTF8::is_ascii()"
2023
   *
2024
   * @see        UTF8::is_ascii()
2025
   *
2026
   * @param string $str
2027
   *
2028
   * @return boolean
2029
   *
2030
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2031
   */
2032 1
  public static function isAscii(string $str): bool
2033
  {
2034 1
    return self::is_ascii($str);
2035
  }
2036
2037
  /**
2038
   * alias for "UTF8::is_base64()"
2039
   *
2040
   * @see        UTF8::is_base64()
2041
   *
2042
   * @param string $str
2043
   *
2044
   * @return bool
2045
   *
2046
   * @deprecated <p>use "UTF8::is_base64()"</p>
2047
   */
2048 1
  public static function isBase64(string $str): bool
2049
  {
2050 1
    return self::is_base64($str);
2051
  }
2052
2053
  /**
2054
   * alias for "UTF8::is_binary()"
2055
   *
2056
   * @see        UTF8::is_binary()
2057
   *
2058
   * @param mixed $str
2059
   * @param bool  $strict
2060
   *
2061
   * @return bool
2062
   *
2063
   * @deprecated <p>use "UTF8::is_binary()"</p>
2064
   */
2065 2
  public static function isBinary($str, $strict = false): bool
2066
  {
2067 2
    return self::is_binary($str, $strict);
2068
  }
2069
2070
  /**
2071
   * alias for "UTF8::is_bom()"
2072
   *
2073
   * @see        UTF8::is_bom()
2074
   *
2075
   * @param string $utf8_chr
2076
   *
2077
   * @return boolean
2078
   *
2079
   * @deprecated <p>use "UTF8::is_bom()"</p>
2080
   */
2081 1
  public static function isBom(string $utf8_chr): bool
2082
  {
2083 1
    return self::is_bom($utf8_chr);
2084
  }
2085
2086
  /**
2087
   * alias for "UTF8::is_html()"
2088
   *
2089
   * @see        UTF8::is_html()
2090
   *
2091
   * @param string $str
2092
   *
2093
   * @return boolean
2094
   *
2095
   * @deprecated <p>use "UTF8::is_html()"</p>
2096
   */
2097 1
  public static function isHtml(string $str): bool
2098
  {
2099 1
    return self::is_html($str);
2100
  }
2101
2102
  /**
2103
   * alias for "UTF8::is_json()"
2104
   *
2105
   * @see        UTF8::is_json()
2106
   *
2107
   * @param string $str
2108
   *
2109
   * @return bool
2110
   *
2111
   * @deprecated <p>use "UTF8::is_json()"</p>
2112
   */
2113
  public static function isJson(string $str): bool
2114
  {
2115
    return self::is_json($str);
2116
  }
2117
2118
  /**
2119
   * alias for "UTF8::is_utf16()"
2120
   *
2121
   * @see        UTF8::is_utf16()
2122
   *
2123
   * @param string $str
2124
   *
2125
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2126
   *
2127
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2128
   */
2129 1
  public static function isUtf16(string $str)
2130
  {
2131 1
    return self::is_utf16($str);
2132
  }
2133
2134
  /**
2135
   * alias for "UTF8::is_utf32()"
2136
   *
2137
   * @see        UTF8::is_utf32()
2138
   *
2139
   * @param string $str
2140
   *
2141
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2142
   *
2143
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2144
   */
2145 1
  public static function isUtf32(string $str)
2146
  {
2147 1
    return self::is_utf32($str);
2148
  }
2149
2150
  /**
2151
   * alias for "UTF8::is_utf8()"
2152
   *
2153
   * @see        UTF8::is_utf8()
2154
   *
2155
   * @param string $str
2156
   * @param bool   $strict
2157
   *
2158
   * @return bool
2159
   *
2160
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2161
   */
2162 16
  public static function isUtf8($str, $strict = false): bool
2163
  {
2164 16
    return self::is_utf8($str, $strict);
2165
  }
2166
2167
  /**
2168
   * Checks if a string is 7 bit ASCII.
2169
   *
2170
   * @param string $str <p>The string to check.</p>
2171
   *
2172
   * @return bool <p>
2173
   *              <strong>true</strong> if it is ASCII<br>
2174
   *              <strong>false</strong> otherwise
2175
   *              </p>
2176
   */
2177 58
  public static function is_ascii(string $str): bool
2178
  {
2179 58
    if (!isset($str[0])) {
2180 6
      return true;
2181
    }
2182
2183 57
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2184
  }
2185
2186
  /**
2187
   * Returns true if the string is base64 encoded, false otherwise.
2188
   *
2189
   * @param string $str <p>The input string.</p>
2190
   *
2191
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2192
   */
2193 1
  public static function is_base64(string $str): bool
2194
  {
2195 1
    $base64String = (string)\base64_decode($str, true);
2196
2197 1
    return $base64String && \base64_encode($base64String) === $str;
2198
  }
2199
2200
  /**
2201
   * Check if the input is binary... (is look like a hack).
2202
   *
2203
   * @param mixed $input
2204
   * @param bool  $strict
2205
   *
2206
   * @return bool
2207
   */
2208 19
  public static function is_binary($input, $strict = false): bool
2209
  {
2210 19
    $input = (string)$input;
2211 19
    if (!isset($input[0])) {
2212 5
      return false;
2213
    }
2214
2215 19
    if (\preg_match('~^[01]+$~', $input)) {
2216 6
      return true;
2217
    }
2218
2219 19
    $testNull = 0;
2220 19
    $testLength = \strlen($input);
2221 19
    if ($testLength) {
2222 19
      $testNull = \substr_count($input, "\x0");
2223 19
      if (($testNull / $testLength) > 0.3) {
2224 6
        return true;
2225
      }
2226
    }
2227
2228
    if (
2229 18
        $strict === true
2230
        &&
2231 18
        \class_exists('finfo')
2232
    ) {
2233
2234 16
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
2235 16
      $finfo_encoding = $finfo->buffer($input);
2236 16
      if ($finfo_encoding && $finfo_encoding === 'binary') {
2237 16
        return true;
2238
      }
2239
2240
2241
    } else {
2242
2243 8
      if ($testNull > 0) {
2244 3
        return true;
2245
      }
2246
2247
    }
2248
2249 17
    return false;
2250
  }
2251
2252
  /**
2253
   * Check if the file is binary.
2254
   *
2255
   * @param string $file
2256
   *
2257
   * @return boolean
2258
   */
2259 3
  public static function is_binary_file($file): bool
2260
  {
2261
    try {
2262 3
      $fp = \fopen($file, 'rb');
2263 3
      $block = \fread($fp, 512);
2264 3
      \fclose($fp);
2265
    } catch (\Exception $e) {
2266
      $block = '';
2267
    }
2268
2269 3
    return self::is_binary($block, true);
2270
  }
2271
2272
  /**
2273
   * Checks if the given string is equal to any "Byte Order Mark".
2274
   *
2275
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2276
   *
2277
   * @param string $str <p>The input string.</p>
2278
   *
2279
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2280
   */
2281 1
  public static function is_bom($str): bool
2282
  {
2283 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2284 1
      if ($str === $bomString) {
2285 1
        return true;
2286
      }
2287
    }
2288
2289 1
    return false;
2290
  }
2291
2292
  /**
2293
   * Check if the string contains any html-tags <lall>.
2294
   *
2295
   * @param string $str <p>The input string.</p>
2296
   *
2297
   * @return boolean
2298
   */
2299 1
  public static function is_html(string $str): bool
2300
  {
2301 1
    if (!isset($str[0])) {
2302 1
      return false;
2303
    }
2304
2305
    // init
2306 1
    $matches = [];
2307
2308 1
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2309
2310 1
    return !(\count($matches) === 0);
2311
  }
2312
2313
  /**
2314
   * Try to check if "$str" is an json-string.
2315
   *
2316
   * @param string $str <p>The input string.</p>
2317
   *
2318
   * @return bool
2319
   */
2320 1
  public static function is_json(string $str): bool
2321
  {
2322 1
    if (!isset($str[0])) {
2323 1
      return false;
2324
    }
2325
2326 1
    $json = self::json_decode($str);
2327
2328
    return (
2329 1
               \is_object($json) === true
2330
               ||
2331 1
               \is_array($json) === true
2332
           )
2333
           &&
2334 1
           \json_last_error() === JSON_ERROR_NONE;
2335
  }
2336
2337
  /**
2338
   * Check if the string is UTF-16.
2339
   *
2340
   * @param string $str <p>The input string.</p>
2341
   *
2342
   * @return int|false <p>
2343
   *                   <strong>false</strong> if is't not UTF-16,<br>
2344
   *                   <strong>1</strong> for UTF-16LE,<br>
2345
   *                   <strong>2</strong> for UTF-16BE.
2346
   *                   </p>
2347
   */
2348 10 View Code Duplication
  public static function is_utf16(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2349
  {
2350 10
    if (self::is_binary($str) === false) {
2351 4
      return false;
2352
    }
2353
2354
    // init
2355 8
    $strChars = [];
2356
2357 8
    $str = self::remove_bom($str);
2358
2359 8
    $maybeUTF16LE = 0;
2360 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2361 8
    if ($test) {
2362 7
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2363 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2364 7
      if ($test3 === $test) {
2365 7
        if (\count($strChars) === 0) {
2366 7
          $strChars = self::count_chars($str, true);
2367
        }
2368 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2369 7
          if (\in_array($test3char, $strChars, true) === true) {
2370 7
            $maybeUTF16LE++;
2371
          }
2372
        }
2373
      }
2374
    }
2375
2376 8
    $maybeUTF16BE = 0;
2377 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2378 8
    if ($test) {
2379 7
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2380 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2381 7
      if ($test3 === $test) {
2382 7
        if (\count($strChars) === 0) {
2383 3
          $strChars = self::count_chars($str, true);
2384
        }
2385 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2386 7
          if (\in_array($test3char, $strChars, true) === true) {
2387 7
            $maybeUTF16BE++;
2388
          }
2389
        }
2390
      }
2391
    }
2392
2393 8
    if ($maybeUTF16BE !== $maybeUTF16LE) {
2394 3
      if ($maybeUTF16LE > $maybeUTF16BE) {
2395 2
        return 1;
2396
      }
2397
2398 3
      return 2;
2399
    }
2400
2401 6
    return false;
2402
  }
2403
2404
  /**
2405
   * Check if the string is UTF-32.
2406
   *
2407
   * @param string $str
2408
   *
2409
   * @return int|false <p>
2410
   *                   <strong>false</strong> if is't not UTF-32,<br>
2411
   *                   <strong>1</strong> for UTF-32LE,<br>
2412
   *                   <strong>2</strong> for UTF-32BE.
2413
   *                   </p>
2414
   */
2415 8 View Code Duplication
  public static function is_utf32(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2416
  {
2417 8
    if (self::is_binary($str) === false) {
2418 4
      return false;
2419
    }
2420
2421
    // init
2422 6
    $strChars = [];
2423
2424 6
    $str = self::remove_bom($str);
2425
2426 6
    $maybeUTF32LE = 0;
2427 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2428 6
    if ($test) {
2429 5
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2430 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2431 5
      if ($test3 === $test) {
2432 5
        if (\count($strChars) === 0) {
2433 5
          $strChars = self::count_chars($str, true);
2434
        }
2435 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2436 5
          if (\in_array($test3char, $strChars, true) === true) {
2437 5
            $maybeUTF32LE++;
2438
          }
2439
        }
2440
      }
2441
    }
2442
2443 6
    $maybeUTF32BE = 0;
2444 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2445 6
    if ($test) {
2446 5
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2447 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2448 5
      if ($test3 === $test) {
2449 5
        if (\count($strChars) === 0) {
2450 3
          $strChars = self::count_chars($str, true);
2451
        }
2452 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2453 5
          if (\in_array($test3char, $strChars, true) === true) {
2454 5
            $maybeUTF32BE++;
2455
          }
2456
        }
2457
      }
2458
    }
2459
2460 6
    if ($maybeUTF32BE !== $maybeUTF32LE) {
2461 1
      if ($maybeUTF32LE > $maybeUTF32BE) {
2462 1
        return 1;
2463
      }
2464
2465 1
      return 2;
2466
    }
2467
2468 6
    return false;
2469
  }
2470
2471
  /**
2472
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2473
   *
2474
   * @see    http://hsivonen.iki.fi/php-utf8/
2475
   *
2476
   * @param string|string[] $str    <p>The string to be checked.</p>
2477
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2478
   *
2479
   * @return bool
2480
   */
2481 63
  public static function is_utf8($str, bool $strict = false): bool
2482
  {
2483 63
    if (\is_array($str) === true) {
2484 1
      foreach ($str as $k => $v) {
2485 1
        if (false === self::is_utf8($v, $strict)) {
2486 1
          return false;
2487
        }
2488
      }
2489
2490
      return true;
2491
    }
2492
2493 63
    if (!isset($str[0])) {
2494 3
      return true;
2495
    }
2496
2497 61
    if ($strict === true) {
2498 1
      if (self::is_utf16($str) !== false) {
2499 1
        return false;
2500
      }
2501
2502
      if (self::is_utf32($str) !== false) {
2503
        return false;
2504
      }
2505
    }
2506
2507 61
    if (self::pcre_utf8_support() !== true) {
2508
2509
      // If even just the first character can be matched, when the /u
2510
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2511
      // invalid, nothing at all will match, even if the string contains
2512
      // some valid sequences
2513
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
2514
    }
2515
2516 61
    $mState = 0; // cached expected number of octets after the current octet
2517
    // until the beginning of the next UTF8 character sequence
2518 61
    $mUcs4 = 0; // cached Unicode character
2519 61
    $mBytes = 1; // cached expected number of octets in the current sequence
2520
2521 61
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2522
      self::checkForSupport();
2523
    }
2524
2525 61
    if (self::$ORD === null) {
2526 1
      self::$ORD = self::getData('ord');
2527
    }
2528
2529 61
    $len = self::strlen_in_byte($str);
2530
    /** @noinspection ForeachInvariantsInspection */
2531 61
    for ($i = 0; $i < $len; $i++) {
2532 61
      $in = self::$ORD[$str[$i]];
2533 61
      if ($mState === 0) {
2534
        // When mState is zero we expect either a US-ASCII character or a
2535
        // multi-octet sequence.
2536 61
        if (0 === (0x80 & $in)) {
2537
          // US-ASCII, pass straight through.
2538 56
          $mBytes = 1;
2539 58 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
2540
          // First octet of 2 octet sequence.
2541 49
          $mUcs4 = $in;
2542 49
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
2543 49
          $mState = 1;
2544 49
          $mBytes = 2;
2545 48
        } elseif (0xE0 === (0xF0 & $in)) {
2546
          // First octet of 3 octet sequence.
2547 32
          $mUcs4 = $in;
2548 32
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
2549 32
          $mState = 2;
2550 32
          $mBytes = 3;
2551 23 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
2552
          // First octet of 4 octet sequence.
2553 13
          $mUcs4 = $in;
2554 13
          $mUcs4 = ($mUcs4 & 0x07) << 18;
2555 13
          $mState = 3;
2556 13
          $mBytes = 4;
2557 11
        } elseif (0xF8 === (0xFC & $in)) {
2558
          /* First octet of 5 octet sequence.
2559
          *
2560
          * This is illegal because the encoded codepoint must be either
2561
          * (a) not the shortest form or
2562
          * (b) outside the Unicode range of 0-0x10FFFF.
2563
          * Rather than trying to resynchronize, we will carry on until the end
2564
          * of the sequence and let the later error handling code catch it.
2565
          */
2566 4
          $mUcs4 = $in;
2567 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
2568 4
          $mState = 4;
2569 4
          $mBytes = 5;
2570 8 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
2571
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
2572 4
          $mUcs4 = $in;
2573 4
          $mUcs4 = ($mUcs4 & 1) << 30;
2574 4
          $mState = 5;
2575 4
          $mBytes = 6;
2576
        } else {
2577
          /* Current octet is neither in the US-ASCII range nor a legal first
2578
           * octet of a multi-octet sequence.
2579
           */
2580 61
          return false;
2581
        }
2582
      } else {
2583
        // When mState is non-zero, we expect a continuation of the multi-octet
2584
        // sequence
2585 56
        if (0x80 === (0xC0 & $in)) {
2586
          // Legal continuation.
2587 50
          $shift = ($mState - 1) * 6;
2588 50
          $tmp = $in;
2589 50
          $tmp = ($tmp & 0x0000003F) << $shift;
2590 50
          $mUcs4 |= $tmp;
2591
          /**
2592
           * End of the multi-octet sequence. mUcs4 now contains the final
2593
           * Unicode code point to be output
2594
           */
2595 50
          if (0 === --$mState) {
2596
            /*
2597
            * Check for illegal sequences and code points.
2598
            */
2599
            // From Unicode 3.1, non-shortest form is illegal
2600
            if (
2601 50
                (2 === $mBytes && $mUcs4 < 0x0080) ||
2602 50
                (3 === $mBytes && $mUcs4 < 0x0800) ||
2603 50
                (4 === $mBytes && $mUcs4 < 0x10000) ||
2604 50
                (4 < $mBytes) ||
2605
                // From Unicode 3.2, surrogate characters are illegal.
2606 50
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
2607
                // Code points outside the Unicode range are illegal.
2608 50
                ($mUcs4 > 0x10FFFF)
2609
            ) {
2610 7
              return false;
2611
            }
2612
            // initialize UTF8 cache
2613 50
            $mState = 0;
2614 50
            $mUcs4 = 0;
2615 50
            $mBytes = 1;
2616
          }
2617
        } else {
2618
          /**
2619
           *((0xC0 & (*in) != 0x80) && (mState != 0))
2620
           * Incomplete multi-octet sequence.
2621
           */
2622 28
          return false;
2623
        }
2624
      }
2625
    }
2626
2627 29
    return true;
2628
  }
2629
2630
  /**
2631
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2632
   * Decodes a JSON string
2633
   *
2634
   * @link http://php.net/manual/en/function.json-decode.php
2635
   *
2636
   * @param string $json    <p>
2637
   *                        The <i>json</i> string being decoded.
2638
   *                        </p>
2639
   *                        <p>
2640
   *                        This function only works with UTF-8 encoded strings.
2641
   *                        </p>
2642
   *                        <p>PHP implements a superset of
2643
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2644
   *                        only supports these values when they are nested inside an array or an object.
2645
   *                        </p>
2646
   * @param bool   $assoc   [optional] <p>
2647
   *                        When <b>TRUE</b>, returned objects will be converted into
2648
   *                        associative arrays.
2649
   *                        </p>
2650
   * @param int    $depth   [optional] <p>
2651
   *                        User specified recursion depth.
2652
   *                        </p>
2653
   * @param int    $options [optional] <p>
2654
   *                        Bitmask of JSON decode options. Currently only
2655
   *                        <b>JSON_BIGINT_AS_STRING</b>
2656
   *                        is supported (default is to cast large integers as floats)
2657
   *                        </p>
2658
   *
2659
   * @return mixed the value encoded in <i>json</i> in appropriate
2660
   * PHP type. Values true, false and
2661
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
2662
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
2663
   * <i>json</i> cannot be decoded or if the encoded
2664
   * data is deeper than the recursion limit.
2665
   */
2666 2
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
2667
  {
2668 2
    $json = self::filter($json);
2669
2670 2
    $json = \json_decode($json, $assoc, $depth, $options);
2671
2672 2
    return $json;
2673
  }
2674
2675
  /**
2676
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2677
   * Returns the JSON representation of a value.
2678
   *
2679
   * @link http://php.net/manual/en/function.json-encode.php
2680
   *
2681
   * @param mixed $value   <p>
2682
   *                       The <i>value</i> being encoded. Can be any type except
2683
   *                       a resource.
2684
   *                       </p>
2685
   *                       <p>
2686
   *                       All string data must be UTF-8 encoded.
2687
   *                       </p>
2688
   *                       <p>PHP implements a superset of
2689
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2690
   *                       only supports these values when they are nested inside an array or an object.
2691
   *                       </p>
2692
   * @param int   $options [optional] <p>
2693
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
2694
   *                       <b>JSON_HEX_TAG</b>,
2695
   *                       <b>JSON_HEX_AMP</b>,
2696
   *                       <b>JSON_HEX_APOS</b>,
2697
   *                       <b>JSON_NUMERIC_CHECK</b>,
2698
   *                       <b>JSON_PRETTY_PRINT</b>,
2699
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
2700
   *                       <b>JSON_FORCE_OBJECT</b>,
2701
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
2702
   *                       constants is described on
2703
   *                       the JSON constants page.
2704
   *                       </p>
2705
   * @param int   $depth   [optional] <p>
2706
   *                       Set the maximum depth. Must be greater than zero.
2707
   *                       </p>
2708
   *
2709
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
2710
   */
2711 2
  public static function json_encode($value, int $options = 0, int $depth = 512): string
2712
  {
2713 2
    $value = self::filter($value);
2714
2715 2
    $json = \json_encode($value, $options, $depth);
2716
2717 2
    return $json;
2718
  }
2719
2720
  /**
2721
   * Makes string's first char lowercase.
2722
   *
2723
   * @param string $str       <p>The input string</p>
2724
   * @param string $encoding  [optional] <p>Set the charset.</p>
2725
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
2726
   *
2727
   * @return string <p>The resulting string</p>
2728
   */
2729 7
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2730
  {
2731 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
2732 7
    if ($strPartTwo === false) {
2733
      $strPartTwo = '';
2734
    }
2735
2736 7
    $strPartOne = self::strtolower(
2737 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
2738 7
        $encoding,
2739 7
        $cleanUtf8
2740
    );
2741
2742 7
    return $strPartOne . $strPartTwo;
2743
  }
2744
2745
  /**
2746
   * alias for "UTF8::lcfirst()"
2747
   *
2748
   * @see UTF8::lcfirst()
2749
   *
2750
   * @param string $word
2751
   * @param string $encoding
2752
   * @param bool   $cleanUtf8
2753
   *
2754
   * @return string
2755
   */
2756 1
  public static function lcword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2757
  {
2758 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
2759
  }
2760
2761
  /**
2762
   * Lowercase for all words in the string.
2763
   *
2764
   * @param string   $str        <p>The input string.</p>
2765
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
2766
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
2767
   * @param string   $encoding   [optional] <p>Set the charset.</p>
2768
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
2769
   *
2770
   * @return string
2771
   */
2772 1
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2773
  {
2774 1
    if (!$str) {
2775 1
      return '';
2776
    }
2777
2778 1
    $words = self::str_to_words($str, $charlist);
2779 1
    $newWords = [];
2780
2781 1
    if (\count($exceptions) > 0) {
2782 1
      $useExceptions = true;
2783
    } else {
2784 1
      $useExceptions = false;
2785
    }
2786
2787 1 View Code Duplication
    foreach ($words as $word) {
2788
2789 1
      if (!$word) {
2790 1
        continue;
2791
      }
2792
2793
      if (
2794 1
          $useExceptions === false
2795
          ||
2796
          (
2797 1
              $useExceptions === true
2798
              &&
2799 1
              !\in_array($word, $exceptions, true)
2800
          )
2801
      ) {
2802 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
2803
      }
2804
2805 1
      $newWords[] = $word;
2806
    }
2807
2808 1
    return \implode('', $newWords);
2809
  }
2810
2811
  /**
2812
   * Strip whitespace or other characters from beginning of a UTF-8 string.
2813
   *
2814
   * @param string $str   <p>The string to be trimmed</p>
2815
   * @param mixed  $chars <p>Optional characters to be stripped</p>
2816
   *
2817
   * @return string <p>The string with unwanted characters stripped from the left.</p>
2818
   */
2819 24 View Code Duplication
  public static function ltrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2820
  {
2821 24
    if (!isset($str[0])) {
2822 2
      return '';
2823
    }
2824
2825
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
2826 23
    if ($chars === INF || !$chars) {
2827 2
      return \preg_replace('/^[\pZ\pC]+/u', '', $str);
2828
    }
2829
2830 23
    return \preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
2831
  }
2832
2833
  /**
2834
   * Returns the UTF-8 character with the maximum code point in the given data.
2835
   *
2836
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
2837
   *
2838
   * @return string <p>The character with the highest code point than others.</p>
2839
   */
2840 1 View Code Duplication
  public static function max($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2841
  {
2842 1
    if (\is_array($arg) === true) {
2843 1
      $arg = \implode('', $arg);
2844
    }
2845
2846 1
    return self::chr(\max(self::codepoints($arg)));
2847
  }
2848
2849
  /**
2850
   * Calculates and returns the maximum number of bytes taken by any
2851
   * UTF-8 encoded character in the given string.
2852
   *
2853
   * @param string $str <p>The original Unicode string.</p>
2854
   *
2855
   * @return int <p>Max byte lengths of the given chars.</p>
2856
   */
2857 1
  public static function max_chr_width(string $str): int
2858
  {
2859 1
    $bytes = self::chr_size_list($str);
2860 1
    if (\count($bytes) > 0) {
2861 1
      return (int)\max($bytes);
2862
    }
2863
2864 1
    return 0;
2865
  }
2866
2867
  /**
2868
   * Checks whether mbstring is available on the server.
2869
   *
2870
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2871
   */
2872 12
  public static function mbstring_loaded(): bool
2873
  {
2874 12
    $return = \extension_loaded('mbstring') ? true : false;
2875
2876 12
    if ($return === true) {
2877 12
      \mb_internal_encoding('UTF-8');
2878
    }
2879
2880 12
    return $return;
2881
  }
2882
2883 1
  private static function mbstring_overloaded(): bool
2884
  {
2885 1
    return \defined('MB_OVERLOAD_STRING')
2886
           &&
2887 1
           \ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING;
2888
  }
2889
2890
  /**
2891
   * Returns the UTF-8 character with the minimum code point in the given data.
2892
   *
2893
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
2894
   *
2895
   * @return string <p>The character with the lowest code point than others.</p>
2896
   */
2897 1 View Code Duplication
  public static function min($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2898
  {
2899 1
    if (\is_array($arg) === true) {
2900 1
      $arg = \implode('', $arg);
2901
    }
2902
2903 1
    return self::chr(\min(self::codepoints($arg)));
2904
  }
2905
2906
  /**
2907
   * alias for "UTF8::normalize_encoding()"
2908
   *
2909
   * @see        UTF8::normalize_encoding()
2910
   *
2911
   * @param string $encoding
2912
   * @param mixed  $fallback
2913
   *
2914
   * @return string
2915
   *
2916
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
2917
   */
2918 1
  public static function normalizeEncoding(string $encoding, $fallback = '')
2919
  {
2920 1
    return self::normalize_encoding($encoding, $fallback);
2921
  }
2922
2923
  /**
2924
   * Normalize the encoding-"name" input.
2925
   *
2926
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
2927
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
2928
   *
2929
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by
2930
   *                default)</p>
2931
   */
2932 79
  public static function normalize_encoding(string $encoding, $fallback = '')
2933
  {
2934 79
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
2935
2936 79
    if (!$encoding) {
2937 4
      return $fallback;
2938
    }
2939
2940
    if (
2941 78
        'UTF-8' === $encoding
2942
        ||
2943 78
        'UTF8' === $encoding
2944
    ) {
2945 11
      return 'UTF-8';
2946
    }
2947
2948 74
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
2949 71
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
2950
    }
2951
2952 7
    if (self::$ENCODINGS === null) {
2953 1
      self::$ENCODINGS = self::getData('encodings');
2954
    }
2955
2956 7
    if (\in_array($encoding, self::$ENCODINGS, true)) {
2957 3
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
2958
2959 3
      return $encoding;
2960
    }
2961
2962 6
    $encodingOrig = $encoding;
2963 6
    $encoding = \strtoupper($encoding);
2964 6
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
2965
2966
    $equivalences = [
2967 6
        'ISO8859'     => 'ISO-8859-1',
2968
        'ISO88591'    => 'ISO-8859-1',
2969
        'ISO'         => 'ISO-8859-1',
2970
        'LATIN'       => 'ISO-8859-1',
2971
        'LATIN1'      => 'ISO-8859-1', // Western European
2972
        'ISO88592'    => 'ISO-8859-2',
2973
        'LATIN2'      => 'ISO-8859-2', // Central European
2974
        'ISO88593'    => 'ISO-8859-3',
2975
        'LATIN3'      => 'ISO-8859-3', // Southern European
2976
        'ISO88594'    => 'ISO-8859-4',
2977
        'LATIN4'      => 'ISO-8859-4', // Northern European
2978
        'ISO88595'    => 'ISO-8859-5',
2979
        'ISO88596'    => 'ISO-8859-6', // Greek
2980
        'ISO88597'    => 'ISO-8859-7',
2981
        'ISO88598'    => 'ISO-8859-8', // Hebrew
2982
        'ISO88599'    => 'ISO-8859-9',
2983
        'LATIN5'      => 'ISO-8859-9', // Turkish
2984
        'ISO885911'   => 'ISO-8859-11',
2985
        'TIS620'      => 'ISO-8859-11', // Thai
2986
        'ISO885910'   => 'ISO-8859-10',
2987
        'LATIN6'      => 'ISO-8859-10', // Nordic
2988
        'ISO885913'   => 'ISO-8859-13',
2989
        'LATIN7'      => 'ISO-8859-13', // Baltic
2990
        'ISO885914'   => 'ISO-8859-14',
2991
        'LATIN8'      => 'ISO-8859-14', // Celtic
2992
        'ISO885915'   => 'ISO-8859-15',
2993
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
2994
        'ISO885916'   => 'ISO-8859-16',
2995
        'LATIN10'     => 'ISO-8859-16', // Southeast European
2996
        'CP1250'      => 'WINDOWS-1250',
2997
        'WIN1250'     => 'WINDOWS-1250',
2998
        'WINDOWS1250' => 'WINDOWS-1250',
2999
        'CP1251'      => 'WINDOWS-1251',
3000
        'WIN1251'     => 'WINDOWS-1251',
3001
        'WINDOWS1251' => 'WINDOWS-1251',
3002
        'CP1252'      => 'WINDOWS-1252',
3003
        'WIN1252'     => 'WINDOWS-1252',
3004
        'WINDOWS1252' => 'WINDOWS-1252',
3005
        'CP1253'      => 'WINDOWS-1253',
3006
        'WIN1253'     => 'WINDOWS-1253',
3007
        'WINDOWS1253' => 'WINDOWS-1253',
3008
        'CP1254'      => 'WINDOWS-1254',
3009
        'WIN1254'     => 'WINDOWS-1254',
3010
        'WINDOWS1254' => 'WINDOWS-1254',
3011
        'CP1255'      => 'WINDOWS-1255',
3012
        'WIN1255'     => 'WINDOWS-1255',
3013
        'WINDOWS1255' => 'WINDOWS-1255',
3014
        'CP1256'      => 'WINDOWS-1256',
3015
        'WIN1256'     => 'WINDOWS-1256',
3016
        'WINDOWS1256' => 'WINDOWS-1256',
3017
        'CP1257'      => 'WINDOWS-1257',
3018
        'WIN1257'     => 'WINDOWS-1257',
3019
        'WINDOWS1257' => 'WINDOWS-1257',
3020
        'CP1258'      => 'WINDOWS-1258',
3021
        'WIN1258'     => 'WINDOWS-1258',
3022
        'WINDOWS1258' => 'WINDOWS-1258',
3023
        'UTF16'       => 'UTF-16',
3024
        'UTF32'       => 'UTF-32',
3025
        'UTF8'        => 'UTF-8',
3026
        'UTF'         => 'UTF-8',
3027
        'UTF7'        => 'UTF-7',
3028
        '8BIT'        => 'CP850',
3029
        'BINARY'      => 'CP850',
3030
    ];
3031
3032 6
    if (!empty($equivalences[$encodingUpperHelper])) {
3033 5
      $encoding = $equivalences[$encodingUpperHelper];
3034
    }
3035
3036 6
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3037
3038 6
    return $encoding;
3039
  }
3040
3041
  /**
3042
   * Normalize some MS Word special characters.
3043
   *
3044
   * @param string $str <p>The string to be normalized.</p>
3045
   *
3046
   * @return string
3047
   */
3048 16 View Code Duplication
  public static function normalize_msword(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3049
  {
3050 16
    if (!isset($str[0])) {
3051 1
      return '';
3052
    }
3053
3054 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
3055 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3056
3057 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3058
3059 1
      if (self::$UTF8_MSWORD === null) {
3060 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
3061
      }
3062
3063 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
3064 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
3065
    }
3066
3067 16
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3068
  }
3069
3070
  /**
3071
   * Normalize the whitespace.
3072
   *
3073
   * @param string $str                     <p>The string to be normalized.</p>
3074
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3075
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3076
   *                                        bidirectional text chars.</p>
3077
   *
3078
   * @return string
3079
   */
3080 39
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
3081
  {
3082 39
    if (!isset($str[0])) {
3083 4
      return '';
3084
    }
3085
3086 39
    static $WHITESPACE_CACHE = [];
3087 39
    $cacheKey = (int)$keepNonBreakingSpace;
3088
3089 39
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3090
3091 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3092
3093 2
      if ($keepNonBreakingSpace === true) {
3094 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3095
      }
3096
3097 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
3098
    }
3099
3100 39
    if ($keepBidiUnicodeControls === false) {
3101 39
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3102
3103 39
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3104 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3105
      }
3106
3107 39
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3108
    }
3109
3110 39
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3111
  }
3112
3113
  /**
3114
   * Calculates Unicode code point of the given UTF-8 encoded character.
3115
   *
3116
   * INFO: opposite to UTF8::chr()
3117
   *
3118
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3119
   * @param string $encoding [optional] <p>Default is UTF-8</p>
3120
   *
3121
   * @return int <p>
3122
   *             Unicode code point of the given character,<br>
3123
   *             0 on invalid UTF-8 byte sequence.
3124
   *             </p>
3125
   */
3126 23
  public static function ord(string $chr, string $encoding = 'UTF-8'): int
3127
  {
3128
    // init
3129 23
    static $CHAR_CACHE = [];
3130
3131
    // save the original string
3132 23
    $chr_orig = $chr;
3133
3134 23
    if ($encoding !== 'UTF-8') {
3135 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3136
3137
      // check again, if it's still not UTF-8
3138
      /** @noinspection NotOptimalIfConditionsInspection */
3139 2
      if ($encoding !== 'UTF-8') {
3140 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3141
      }
3142
    }
3143
3144 23
    $cacheKey = $chr_orig . $encoding;
3145 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3146 23
      return $CHAR_CACHE[$cacheKey];
3147
    }
3148
3149 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3150
      self::checkForSupport();
3151
    }
3152
3153 11
    if (self::$SUPPORT['intlChar'] === true) {
3154 10
      $code = \IntlChar::ord($chr);
3155 10
      if ($code) {
3156 9
        return $CHAR_CACHE[$cacheKey] = $code;
3157
      }
3158
    }
3159
3160
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3161 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3162 6
    $code = $chr ? $chr[1] : 0;
3163
3164 6
    if (0xF0 <= $code && isset($chr[4])) {
3165
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3166
    }
3167
3168 6
    if (0xE0 <= $code && isset($chr[3])) {
3169 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3170
    }
3171
3172 6
    if (0xC0 <= $code && isset($chr[2])) {
3173 2
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3174
    }
3175
3176 5
    return $CHAR_CACHE[$cacheKey] = $code;
3177
  }
3178
3179
  /**
3180
   * Parses the string into an array (into the the second parameter).
3181
   *
3182
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3183
   *          if the second parameter is not set!
3184
   *
3185
   * @link http://php.net/manual/en/function.parse-str.php
3186
   *
3187
   * @param string $str       <p>The input string.</p>
3188
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
3189
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3190
   *
3191
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3192
   */
3193 1
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
3194
  {
3195 1
    if ($cleanUtf8 === true) {
3196 1
      $str = self::clean($str);
3197
    }
3198
3199
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3200 1
    $return = \mb_parse_str($str, $result);
3201
3202 1
    return !($return === false || empty($result));
3203
  }
3204
3205
  /**
3206
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3207
   *
3208
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3209
   */
3210 61
  public static function pcre_utf8_support(): bool
3211
  {
3212
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3213 61
    return (bool)@\preg_match('//u', '');
3214
  }
3215
3216
  /**
3217
   * Create an array containing a range of UTF-8 characters.
3218
   *
3219
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3220
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3221
   *
3222
   * @return array
3223
   */
3224 1
  public static function range($var1, $var2): array
3225
  {
3226 1
    if (!$var1 || !$var2) {
3227 1
      return [];
3228
    }
3229
3230 1 View Code Duplication
    if (\ctype_digit((string)$var1)) {
3231 1
      $start = (int)$var1;
3232 1
    } elseif (\ctype_xdigit($var1)) {
3233
      $start = (int)self::hex_to_int($var1);
3234
    } else {
3235 1
      $start = self::ord($var1);
3236
    }
3237
3238 1
    if (!$start) {
3239
      return [];
3240
    }
3241
3242 1 View Code Duplication
    if (\ctype_digit((string)$var2)) {
3243 1
      $end = (int)$var2;
3244 1
    } elseif (\ctype_xdigit($var2)) {
3245
      $end = (int)self::hex_to_int($var2);
3246
    } else {
3247 1
      $end = self::ord($var2);
3248
    }
3249
3250 1
    if (!$end) {
3251
      return [];
3252
    }
3253
3254 1
    return \array_map(
3255
        [
3256 1
            self::class,
3257
            'chr',
3258
        ],
3259 1
        \range($start, $end)
3260
    );
3261
  }
3262
3263
  /**
3264
   * Multi decode html entity & fix urlencoded-win1252-chars.
3265
   *
3266
   * e.g:
3267
   * 'test+test'                     => 'test+test'
3268
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3269
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3270
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3271
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3272
   * 'Düsseldorf'                   => 'Düsseldorf'
3273
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3274
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3275
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3276
   *
3277
   * @param string $str          <p>The input string.</p>
3278
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3279
   *
3280
   * @return string
3281
   */
3282 2 View Code Duplication
  public static function rawurldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3283
  {
3284 2
    if (!isset($str[0])) {
3285 1
      return '';
3286
    }
3287
3288 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3289 2
    if (\preg_match($pattern, $str)) {
3290 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
3291
    }
3292
3293 2
    $flags = ENT_QUOTES | ENT_HTML5;
3294
3295
    do {
3296 2
      $str_compare = $str;
3297
3298 2
      $str = self::fix_simple_utf8(
3299 2
          \rawurldecode(
3300 2
              self::html_entity_decode(
3301 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3302 2
                  $flags
3303
              )
3304
          )
3305
      );
3306
3307 2
    } while ($multi_decode === true && $str_compare !== $str);
3308
3309 2
    return $str;
3310
  }
3311
3312
  /**
3313
   * alias for "UTF8::remove_bom()"
3314
   *
3315
   * @see        UTF8::remove_bom()
3316
   *
3317
   * @param string $str
3318
   *
3319
   * @return string
3320
   *
3321
   * @deprecated <p>use "UTF8::remove_bom()"</p>
3322
   */
3323
  public static function removeBOM(string $str): string
3324
  {
3325
    return self::remove_bom($str);
3326
  }
3327
3328
  /**
3329
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3330
   *
3331
   * @param string $str <p>The input string.</p>
3332
   *
3333
   * @return string <p>String without UTF-BOM</p>
3334
   */
3335 43
  public static function remove_bom(string $str): string
3336
  {
3337 43
    if (!isset($str[0])) {
3338 3
      return '';
3339
    }
3340
3341 43
    foreach (self::$BOM as $bomString => $bomByteLength) {
3342 43
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
3343 5
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
3344 5
        if ($strTmp === false) {
3345
          $strTmp = '';
3346
        }
3347 43
        $str = (string)$strTmp;
3348
      }
3349
    }
3350
3351 43
    return $str;
3352
  }
3353
3354
  /**
3355
   * Removes duplicate occurrences of a string in another string.
3356
   *
3357
   * @param string          $str  <p>The base string.</p>
3358
   * @param string|string[] $what <p>String to search for in the base string.</p>
3359
   *
3360
   * @return string <p>The result string with removed duplicates.</p>
3361
   */
3362 1
  public static function remove_duplicates(string $str, $what = ' '): string
3363
  {
3364 1
    if (\is_string($what) === true) {
3365 1
      $what = [$what];
3366
    }
3367
3368 1
    if (\is_array($what) === true) {
3369
      /** @noinspection ForeachSourceInspection */
3370 1
      foreach ($what as $item) {
3371 1
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
3372
      }
3373
    }
3374
3375 1
    return $str;
3376
  }
3377
3378
  /**
3379
   * Remove invisible characters from a string.
3380
   *
3381
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3382
   *
3383
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3384
   *
3385
   * @param string $str
3386
   * @param bool   $url_encoded
3387
   * @param string $replacement
3388
   *
3389
   * @return string
3390
   */
3391 65
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
3392
  {
3393
    // init
3394 65
    $non_displayables = [];
3395
3396
    // every control character except newline (dec 10),
3397
    // carriage return (dec 13) and horizontal tab (dec 09)
3398 65
    if ($url_encoded) {
3399 65
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
3400 65
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3401
    }
3402
3403 65
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
3404
3405
    do {
3406 65
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
3407 65
    } while ($count !== 0);
3408
3409 65
    return $str;
3410
  }
3411
3412
  /**
3413
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3414
   *
3415
   * @param string $str                <p>The input string</p>
3416
   * @param string $replacementChar    <p>The replacement character.</p>
3417
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3418
   *
3419
   * @return string
3420
   */
3421 37
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
3422
  {
3423 37
    if (!isset($str[0])) {
3424 4
      return '';
3425
    }
3426
3427 37
    if ($processInvalidUtf8 === true) {
3428 37
      $replacementCharHelper = $replacementChar;
3429 37
      if ($replacementChar === '') {
3430 37
        $replacementCharHelper = 'none';
3431
      }
3432
3433 37
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3434
        self::checkForSupport();
3435
      }
3436
3437 37
      $save = \mb_substitute_character();
3438 37
      \mb_substitute_character($replacementCharHelper);
3439 37
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3440 37
      \mb_substitute_character($save);
3441
3442 37
      if (\is_string($strTmp)) {
3443 37
        $str = $strTmp;
3444
      } else {
3445
        $str = '';
3446
      }
3447
    }
3448
3449 37
    return str_replace(
3450
        [
3451 37
            "\xEF\xBF\xBD",
3452
            '�',
3453
        ],
3454
        [
3455 37
            $replacementChar,
3456 37
            $replacementChar,
3457
        ],
3458 37
        $str
3459
    );
3460
  }
3461
3462
  /**
3463
   * Strip whitespace or other characters from end of a UTF-8 string.
3464
   *
3465
   * @param string $str   <p>The string to be trimmed.</p>
3466
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
3467
   *
3468
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3469
   */
3470 23 View Code Duplication
  public static function rtrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3471
  {
3472 23
    if (!isset($str[0])) {
3473 5
      return '';
3474
    }
3475
3476
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3477 19
    if ($chars === INF || !$chars) {
3478 3
      return \preg_replace('/[\pZ\pC]+$/u', '', $str);
3479
    }
3480
3481 18
    return \preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3482
  }
3483
3484
  /**
3485
   * rxClass
3486
   *
3487
   * @param string $s
3488
   * @param string $class
3489
   *
3490
   * @return string
3491
   */
3492 60
  private static function rxClass(string $s, string $class = ''): string
3493
  {
3494 60
    static $RX_CLASSS_CACHE = [];
3495
3496 60
    $cacheKey = $s . $class;
3497
3498 60
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
3499 48
      return $RX_CLASSS_CACHE[$cacheKey];
3500
    }
3501
3502
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3503 20
    $class = [$class];
3504
3505
    /** @noinspection SuspiciousLoopInspection */
3506 20
    foreach (self::str_split($s) as $s) {
3507 19
      if ('-' === $s) {
3508
        $class[0] = '-' . $class[0];
3509 19
      } elseif (!isset($s[2])) {
3510 19
        $class[0] .= \preg_quote($s, '/');
3511 2
      } elseif (1 === self::strlen($s)) {
3512 2
        $class[0] .= $s;
3513
      } else {
3514 19
        $class[] = $s;
3515
      }
3516
    }
3517
3518 20
    if ($class[0]) {
3519 20
      $class[0] = '[' . $class[0] . ']';
3520
    }
3521
3522 20
    if (1 === \count($class)) {
3523 20
      $return = $class[0];
3524
    } else {
3525
      $return = '(?:' . \implode('|', $class) . ')';
3526
    }
3527
3528 20
    $RX_CLASSS_CACHE[$cacheKey] = $return;
3529
3530 20
    return $return;
3531
  }
3532
3533
  /**
3534
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
3535
   */
3536 1
  public static function showSupport()
3537
  {
3538 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3539
      self::checkForSupport();
3540
    }
3541
3542 1
    echo '<pre>';
3543 1
    foreach (self::$SUPPORT as $key => $value) {
3544 1
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
3545
    }
3546 1
    echo '</pre>';
3547 1
  }
3548
3549
  /**
3550
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3551
   *
3552
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3553
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3554
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3555
   *
3556
   * @return string <p>The HTML numbered entity.</p>
3557
   */
3558 1
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
3559
  {
3560 1
    if (!isset($char[0])) {
3561 1
      return '';
3562
    }
3563
3564
    if (
3565 1
        $keepAsciiChars === true
3566
        &&
3567 1
        self::is_ascii($char) === true
3568
    ) {
3569 1
      return $char;
3570
    }
3571
3572 1
    if ($encoding !== 'UTF-8') {
3573 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3574
    }
3575
3576 1
    return '&#' . self::ord($char, $encoding) . ';';
3577
  }
3578
3579
  /**
3580
   * Convert a string to an array of Unicode characters.
3581
   *
3582
   * @param string $str       <p>The string to split into array.</p>
3583
   * @param int    $length    [optional] <p>Max character length of each array element.</p>
3584
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3585
   *
3586
   * @return string[] <p>An array containing chunks of the string.</p>
3587
   */
3588 41
  public static function split(string $str, int $length = 1, bool $cleanUtf8 = false): array
3589
  {
3590 41
    if (!isset($str[0])) {
3591 3
      return [];
3592
    }
3593
3594
    // init
3595 40
    $ret = [];
3596
3597 40
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3598
      self::checkForSupport();
3599
    }
3600
3601 40
    if ($cleanUtf8 === true) {
3602 9
      $str = self::clean($str);
3603
    }
3604
3605 40
    if (self::$SUPPORT['pcre_utf8'] === true) {
3606
3607 40
      \preg_match_all('/./us', $str, $retArray);
3608 40
      if (isset($retArray[0])) {
3609 40
        $ret = $retArray[0];
3610
      }
3611 40
      unset($retArray);
3612
3613
    } else {
3614
3615
      // fallback
3616
3617 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3618
        self::checkForSupport();
3619
      }
3620
3621 2
      $len = self::strlen_in_byte($str);
3622
3623
      /** @noinspection ForeachInvariantsInspection */
3624 2
      for ($i = 0; $i < $len; $i++) {
3625
3626 2
        if (($str[$i] & "\x80") === "\x00") {
3627
3628 2
          $ret[] = $str[$i];
3629
3630
        } elseif (
3631 2
            isset($str[$i + 1])
3632
            &&
3633 2
            ($str[$i] & "\xE0") === "\xC0"
3634
        ) {
3635
3636
          if (($str[$i + 1] & "\xC0") === "\x80") {
3637
            $ret[] = $str[$i] . $str[$i + 1];
3638
3639
            $i++;
3640
          }
3641
3642 View Code Duplication
        } elseif (
3643 2
            isset($str[$i + 2])
3644
            &&
3645 2
            ($str[$i] & "\xF0") === "\xE0"
3646
        ) {
3647
3648
          if (
3649 2
              ($str[$i + 1] & "\xC0") === "\x80"
3650
              &&
3651 2
              ($str[$i + 2] & "\xC0") === "\x80"
3652
          ) {
3653 2
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3654
3655 2
            $i += 2;
3656
          }
3657
3658
        } elseif (
3659
            isset($str[$i + 3])
3660
            &&
3661
            ($str[$i] & "\xF8") === "\xF0"
3662
        ) {
3663
3664 View Code Duplication
          if (
3665
              ($str[$i + 1] & "\xC0") === "\x80"
3666
              &&
3667
              ($str[$i + 2] & "\xC0") === "\x80"
3668
              &&
3669
              ($str[$i + 3] & "\xC0") === "\x80"
3670
          ) {
3671
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3672
3673
            $i += 3;
3674
          }
3675
3676
        }
3677
      }
3678
    }
3679
3680 40
    if ($length > 1) {
3681 5
      $ret = \array_chunk($ret, $length);
3682
3683 5
      return \array_map(
3684 5
          function ($item) {
3685 5
            return \implode('', $item);
3686 5
          }, $ret
3687
      );
3688
    }
3689
3690 36
    if (isset($ret[0]) && $ret[0] === '') {
3691
      return [];
3692
    }
3693
3694 36
    return $ret;
3695
  }
3696
3697
  /**
3698
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3699
   *
3700
   * @param string $str <p>The input string.</p>
3701
   *
3702
   * @return false|string <p>
3703
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
3704
   *                      otherwise it will return false.
3705
   *                      </p>
3706
   */
3707 15
  public static function str_detect_encoding(string $str)
3708
  {
3709
    //
3710
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
3711
    //
3712
3713 15
    if (self::is_binary($str, true) === true) {
3714
3715 5
      if (self::is_utf16($str) === 1) {
3716 1
        return 'UTF-16LE';
3717
      }
3718
3719 5
      if (self::is_utf16($str) === 2) {
3720 1
        return 'UTF-16BE';
3721
      }
3722
3723 4
      if (self::is_utf32($str) === 1) {
3724
        return 'UTF-32LE';
3725
      }
3726
3727 4
      if (self::is_utf32($str) === 2) {
3728
        return 'UTF-32BE';
3729
      }
3730
3731
    }
3732
3733
    //
3734
    // 2.) simple check for ASCII chars
3735
    //
3736
3737 15
    if (self::is_ascii($str) === true) {
3738 5
      return 'ASCII';
3739
    }
3740
3741
    //
3742
    // 3.) simple check for UTF-8 chars
3743
    //
3744
3745 15
    if (self::is_utf8($str) === true) {
3746 11
      return 'UTF-8';
3747
    }
3748
3749
    //
3750
    // 4.) check via "\mb_detect_encoding()"
3751
    //
3752
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
3753
3754
    $detectOrder = [
3755 8
        'ISO-8859-1',
3756
        'ISO-8859-2',
3757
        'ISO-8859-3',
3758
        'ISO-8859-4',
3759
        'ISO-8859-5',
3760
        'ISO-8859-6',
3761
        'ISO-8859-7',
3762
        'ISO-8859-8',
3763
        'ISO-8859-9',
3764
        'ISO-8859-10',
3765
        'ISO-8859-13',
3766
        'ISO-8859-14',
3767
        'ISO-8859-15',
3768
        'ISO-8859-16',
3769
        'WINDOWS-1251',
3770
        'WINDOWS-1252',
3771
        'WINDOWS-1254',
3772
        'ISO-2022-JP',
3773
        'JIS',
3774
        'EUC-JP',
3775
    ];
3776
3777 8
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
3778 8
    if ($encoding) {
3779 8
      return $encoding;
3780
    }
3781
3782
    //
3783
    // 5.) check via "iconv()"
3784
    //
3785
3786
    if (self::$ENCODINGS === null) {
3787
      self::$ENCODINGS = self::getData('encodings');
3788
    }
3789
3790
    $md5 = \md5($str);
3791
    foreach (self::$ENCODINGS as $encodingTmp) {
0 ignored issues
show
Bug introduced by
The expression self::$ENCODINGS of type boolean|string|array|integer is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
3792
      # INFO: //IGNORE and //TRANSLIT still throw notice
3793
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3794
      if (\md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
3795
        return $encodingTmp;
3796
      }
3797
    }
3798
3799
    return false;
3800
  }
3801
3802
  /**
3803
   * Check if the string ends with the given substring.
3804
   *
3805
   * @param string $haystack <p>The string to search in.</p>
3806
   * @param string $needle   <p>The substring to search for.</p>
3807
   *
3808
   * @return bool
3809
   */
3810 2 View Code Duplication
  public static function str_ends_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3811
  {
3812 2
    if (!isset($haystack[0], $needle[0])) {
3813 1
      return false;
3814
    }
3815
3816 2
    if (\substr($haystack, -\strlen($needle)) === $needle) {
3817 2
      return true;
3818
    }
3819
3820 2
    return false;
3821
  }
3822
3823
  /**
3824
   * Check if the string ends with the given substring, case insensitive.
3825
   *
3826
   * @param string $haystack <p>The string to search in.</p>
3827
   * @param string $needle   <p>The substring to search for.</p>
3828
   *
3829
   * @return bool
3830
   */
3831 2 View Code Duplication
  public static function str_iends_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3832
  {
3833 2
    if (!isset($haystack[0], $needle[0])) {
3834 1
      return false;
3835
    }
3836
3837 2
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
3838 2
      return true;
3839
    }
3840
3841 2
    return false;
3842
  }
3843
3844
  /**
3845
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
3846
   *
3847
   * @link  http://php.net/manual/en/function.str-ireplace.php
3848
   *
3849
   * @param mixed $search  <p>
3850
   *                       Every replacement with search array is
3851
   *                       performed on the result of previous replacement.
3852
   *                       </p>
3853
   * @param mixed $replace <p>
3854
   *                       </p>
3855
   * @param mixed $subject <p>
3856
   *                       If subject is an array, then the search and
3857
   *                       replace is performed with every entry of
3858
   *                       subject, and the return value is an array as
3859
   *                       well.
3860
   *                       </p>
3861
   * @param int   $count   [optional] <p>
3862
   *                       The number of matched and replaced needles will
3863
   *                       be returned in count which is passed by
3864
   *                       reference.
3865
   *                       </p>
3866
   *
3867
   * @return mixed <p>A string or an array of replacements.</p>
3868
   */
3869 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
3870
  {
3871 26
    $search = (array)$search;
3872
3873
    /** @noinspection AlterInForeachInspection */
3874 26
    foreach ($search as &$s) {
3875 26
      if ('' === $s .= '') {
3876 2
        $s = '/^(?<=.)$/';
3877
      } else {
3878 26
        $s = '/' . \preg_quote($s, '/') . '/ui';
3879
      }
3880
    }
3881
3882 26
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
3883 26
    $count = $replace; // used as reference parameter
3884
3885 26
    return $subject;
3886
  }
3887
3888
  /**
3889
   * Check if the string starts with the given substring, case insensitive.
3890
   *
3891
   * @param string $haystack <p>The string to search in.</p>
3892
   * @param string $needle   <p>The substring to search for.</p>
3893
   *
3894
   * @return bool
3895
   */
3896 2 View Code Duplication
  public static function str_istarts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3897
  {
3898 2
    if (!isset($haystack[0], $needle[0])) {
3899 1
      return false;
3900
    }
3901
3902 2
    if (self::stripos($haystack, $needle) === 0) {
3903 2
      return true;
3904
    }
3905
3906 2
    return false;
3907
  }
3908
3909
  /**
3910
   * Limit the number of characters in a string, but also after the next word.
3911
   *
3912
   * @param string $str
3913
   * @param int    $length
3914
   * @param string $strAddOn
3915
   *
3916
   * @return string
3917
   */
3918 1
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…'): string
3919
  {
3920 1
    if (!isset($str[0])) {
3921 1
      return '';
3922
    }
3923
3924 1
    if (self::strlen($str) <= $length) {
3925 1
      return $str;
3926
    }
3927
3928 1
    if (self::substr($str, $length - 1, 1) === ' ') {
3929 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
3930
    }
3931
3932 1
    $str = (string)self::substr($str, 0, $length);
3933 1
    $array = \explode(' ', $str);
3934 1
    \array_pop($array);
3935 1
    $new_str = \implode(' ', $array);
3936
3937 1
    if ($new_str === '') {
3938 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
3939
    } else {
3940 1
      $str = $new_str . $strAddOn;
3941
    }
3942
3943 1
    return $str;
3944
  }
3945
3946
  /**
3947
   * Pad a UTF-8 string to given length with another string.
3948
   *
3949
   * @param string $str        <p>The input string.</p>
3950
   * @param int    $pad_length <p>The length of return string.</p>
3951
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
3952
   * @param int    $pad_type   [optional] <p>
3953
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
3954
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
3955
   *                           </p>
3956
   *
3957
   * @return string <strong>Returns the padded string</strong>
3958
   */
3959 2
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', int $pad_type = STR_PAD_RIGHT): string
3960
  {
3961 2
    $str_length = self::strlen($str);
3962
3963
    if (
3964 2
        \is_int($pad_length) === true
3965
        &&
3966 2
        $pad_length > 0
3967
        &&
3968 2
        $pad_length >= $str_length
3969
    ) {
3970 2
      $ps_length = self::strlen($pad_string);
3971
3972 2
      $diff = ($pad_length - $str_length);
3973
3974
      switch ($pad_type) {
3975 2 View Code Duplication
        case STR_PAD_LEFT:
3976 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
3977 2
          $pre = (string)self::substr($pre, 0, $diff);
3978 2
          $post = '';
3979 2
          break;
3980
3981 2
        case STR_PAD_BOTH:
3982 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
3983 2
          $pre = (string)self::substr($pre, 0, $diff / 2);
3984 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
3985 2
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2));
3986 2
          break;
3987
3988 2
        case STR_PAD_RIGHT:
3989 View Code Duplication
        default:
3990 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
3991 2
          $post = (string)self::substr($post, 0, $diff);
3992 2
          $pre = '';
3993
      }
3994
3995 2
      return $pre . $str . $post;
3996
    }
3997
3998 2
    return $str;
3999
  }
4000
4001
  /**
4002
   * Repeat a string.
4003
   *
4004
   * @param string $str        <p>
4005
   *                           The string to be repeated.
4006
   *                           </p>
4007
   * @param int    $multiplier <p>
4008
   *                           Number of time the input string should be
4009
   *                           repeated.
4010
   *                           </p>
4011
   *                           <p>
4012
   *                           multiplier has to be greater than or equal to 0.
4013
   *                           If the multiplier is set to 0, the function
4014
   *                           will return an empty string.
4015
   *                           </p>
4016
   *
4017
   * @return string <p>The repeated string.</p>
4018
   */
4019 1
  public static function str_repeat(string $str, int $multiplier): string
4020
  {
4021 1
    $str = self::filter($str);
4022
4023 1
    return \str_repeat($str, $multiplier);
4024
  }
4025
4026
  /**
4027
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4028
   *
4029
   * Replace all occurrences of the search string with the replacement string
4030
   *
4031
   * @link http://php.net/manual/en/function.str-replace.php
4032
   *
4033
   * @param mixed $search  <p>
4034
   *                       The value being searched for, otherwise known as the needle.
4035
   *                       An array may be used to designate multiple needles.
4036
   *                       </p>
4037
   * @param mixed $replace <p>
4038
   *                       The replacement value that replaces found search
4039
   *                       values. An array may be used to designate multiple replacements.
4040
   *                       </p>
4041
   * @param mixed $subject <p>
4042
   *                       The string or array being searched and replaced on,
4043
   *                       otherwise known as the haystack.
4044
   *                       </p>
4045
   *                       <p>
4046
   *                       If subject is an array, then the search and
4047
   *                       replace is performed with every entry of
4048
   *                       subject, and the return value is an array as
4049
   *                       well.
4050
   *                       </p>
4051
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4052
   *
4053
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4054
   */
4055 12
  public static function str_replace($search, $replace, $subject, int &$count = null)
4056
  {
4057 12
    return \str_replace($search, $replace, $subject, $count);
4058
  }
4059
4060
  /**
4061
   * Replace the first "$search"-term with the "$replace"-term.
4062
   *
4063
   * @param string $search
4064
   * @param string $replace
4065
   * @param string $subject
4066
   *
4067
   * @return string
4068
   */
4069 1
  public static function str_replace_first(string $search, string $replace, string $subject): string
4070
  {
4071 1
    $pos = self::strpos($subject, $search);
4072
4073 1
    if ($pos !== false) {
4074 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4075
    }
4076
4077 1
    return $subject;
4078
  }
4079
4080
  /**
4081
   * Shuffles all the characters in the string.
4082
   *
4083
   * @param string $str <p>The input string</p>
4084
   *
4085
   * @return string <p>The shuffled string.</p>
4086
   */
4087 1
  public static function str_shuffle(string $str): string
4088
  {
4089 1
    $array = self::split($str);
4090
4091 1
    \shuffle($array);
4092
4093 1
    return \implode('', $array);
4094
  }
4095
4096
  /**
4097
   * Sort all characters according to code points.
4098
   *
4099
   * @param string $str    <p>A UTF-8 string.</p>
4100
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4101
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4102
   *
4103
   * @return string <p>String of sorted characters.</p>
4104
   */
4105 1
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
4106
  {
4107 1
    $array = self::codepoints($str);
4108
4109 1
    if ($unique) {
4110 1
      $array = \array_flip(\array_flip($array));
4111
    }
4112
4113 1
    if ($desc) {
4114 1
      \arsort($array);
4115
    } else {
4116 1
      \asort($array);
4117
    }
4118
4119 1
    return self::string($array);
4120
  }
4121
4122
  /**
4123
   * Split a string into an array.
4124
   *
4125
   * @param string|string[] $str
4126
   * @param int             $len
4127
   *
4128
   * @return array
4129
   */
4130 23
  public static function str_split($str, int $len = 1): array
4131
  {
4132 23 View Code Duplication
    if (\is_array($str) === true) {
4133 1
      foreach ($str as $k => $v) {
4134 1
        $str[$k] = self::str_split($v, $len);
4135
      }
4136
4137 1
      return $str;
4138
    }
4139
4140 23
    if (!isset($str[0])) {
4141 1
      return [];
4142
    }
4143
4144 22
    if ($len < 1) {
4145
      return \str_split($str, $len);
4146
    }
4147
4148
    /** @noinspection PhpInternalEntityUsedInspection */
4149 22
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4150 22
    $a = $a[0];
4151
4152 22
    if ($len === 1) {
4153 22
      return $a;
4154
    }
4155
4156 1
    $arrayOutput = [];
4157 1
    $p = -1;
4158
4159
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4160 1
    foreach ($a as $l => $a) {
4161 1
      if ($l % $len) {
4162 1
        $arrayOutput[$p] .= $a;
4163
      } else {
4164 1
        $arrayOutput[++$p] = $a;
4165
      }
4166
    }
4167
4168 1
    return $arrayOutput;
4169
  }
4170
4171
  /**
4172
   * Check if the string starts with the given substring.
4173
   *
4174
   * @param string $haystack <p>The string to search in.</p>
4175
   * @param string $needle   <p>The substring to search for.</p>
4176
   *
4177
   * @return bool
4178
   */
4179 2 View Code Duplication
  public static function str_starts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4180
  {
4181 2
    if (!isset($haystack[0], $needle[0])) {
4182 1
      return false;
4183
    }
4184
4185 2
    if (\strpos($haystack, $needle) === 0) {
4186 2
      return true;
4187
    }
4188
4189 2
    return false;
4190
  }
4191
4192
  /**
4193
   * Get a binary representation of a specific string.
4194
   *
4195
   * @param string $str <p>The input string.</p>
4196
   *
4197
   * @return string
4198
   */
4199 1
  public static function str_to_binary(string $str): string
4200
  {
4201 1
    $value = \unpack('H*', $str);
4202
4203 1
    return \base_convert($value[1], 16, 2);
4204
  }
4205
4206
  /**
4207
   * Convert a string into an array of words.
4208
   *
4209
   * @param string   $str
4210
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
4211
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4212
   * @param null|int $removeShortValues
4213
   *
4214
   * @return array
4215
   */
4216 10
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
4217
  {
4218 10
    if (!isset($str[0])) {
4219 2
      if ($removeEmptyValues === true) {
4220
        return [];
4221
      }
4222
4223 2
      return [''];
4224
    }
4225
4226 10
    $charList = self::rxClass($charList, '\pL');
4227
4228 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4229
4230
    if (
4231 10
        $removeShortValues === null
4232
        &&
4233 10
        $removeEmptyValues === false
4234
    ) {
4235 10
      return $return;
4236
    }
4237
4238 1
    $tmpReturn = [];
4239 1
    foreach ($return as $returnValue) {
4240
      if (
4241 1
          $removeShortValues !== null
4242
          &&
4243 1
          self::strlen($returnValue) <= $removeShortValues
4244
      ) {
4245 1
        continue;
4246
      }
4247
4248
      if (
4249 1
          $removeEmptyValues === true
4250
          &&
4251 1
          \trim($returnValue) === ''
4252
      ) {
4253 1
        continue;
4254
      }
4255
4256 1
      $tmpReturn[] = $returnValue;
4257
    }
4258
4259 1
    return $tmpReturn;
4260
  }
4261
4262
  /**
4263
   * alias for "UTF8::to_ascii()"
4264
   *
4265
   * @see UTF8::to_ascii()
4266
   *
4267
   * @param string $str
4268
   * @param string $unknown
4269
   * @param bool   $strict
4270
   *
4271
   * @return string
4272
   */
4273 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
4274
  {
4275 7
    return self::to_ascii($str, $unknown, $strict);
4276
  }
4277
4278
  /**
4279
   * Counts number of words in the UTF-8 string.
4280
   *
4281
   * @param string $str      <p>The input string.</p>
4282
   * @param int    $format   [optional] <p>
4283
   *                         <strong>0</strong> => return a number of words (default)<br>
4284
   *                         <strong>1</strong> => return an array of words<br>
4285
   *                         <strong>2</strong> => return an array of words with word-offset as key
4286
   *                         </p>
4287
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4288
   *
4289
   * @return array|int <p>The number of words in the string</p>
4290
   */
4291 1
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
4292
  {
4293 1
    $strParts = self::str_to_words($str, $charlist);
4294
4295 1
    $len = \count($strParts);
4296
4297 1
    if ($format === 1) {
4298
4299 1
      $numberOfWords = [];
4300 1
      for ($i = 1; $i < $len; $i += 2) {
4301 1
        $numberOfWords[] = $strParts[$i];
4302
      }
4303
4304 1
    } elseif ($format === 2) {
4305
4306 1
      $numberOfWords = [];
4307 1
      $offset = self::strlen($strParts[0]);
4308 1
      for ($i = 1; $i < $len; $i += 2) {
4309 1
        $numberOfWords[$offset] = $strParts[$i];
4310 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4311
      }
4312
4313
    } else {
4314
4315 1
      $numberOfWords = ($len - 1) / 2;
4316
4317
    }
4318
4319 1
    return $numberOfWords;
4320
  }
4321
4322
  /**
4323
   * Case-insensitive string comparison.
4324
   *
4325
   * INFO: Case-insensitive version of UTF8::strcmp()
4326
   *
4327
   * @param string $str1
4328
   * @param string $str2
4329
   *
4330
   * @return int <p>
4331
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
4332
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
4333
   *             <strong>0</strong> if they are equal.
4334
   *             </p>
4335
   */
4336 11
  public static function strcasecmp(string $str1, string $str2): int
4337
  {
4338 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4339
  }
4340
4341
  /**
4342
   * alias for "UTF8::strstr()"
4343
   *
4344
   * @see UTF8::strstr()
4345
   *
4346
   * @param string $haystack
4347
   * @param string $needle
4348
   * @param bool   $before_needle
4349
   * @param string $encoding
4350
   * @param bool   $cleanUtf8
4351
   *
4352
   * @return string|false
4353
   */
4354 1
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4355
  {
4356 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4357
  }
4358
4359
  /**
4360
   * Case-sensitive string comparison.
4361
   *
4362
   * @param string $str1
4363
   * @param string $str2
4364
   *
4365
   * @return int  <p>
4366
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
4367
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
4368
   *              <strong>0</strong> if they are equal.
4369
   *              </p>
4370
   */
4371 14
  public static function strcmp(string $str1, string $str2): int
4372
  {
4373
    /** @noinspection PhpUndefinedClassInspection */
4374 14
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
4375 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
4376 14
        \Normalizer::normalize($str2, \Normalizer::NFD)
4377
    );
4378
  }
4379
4380
  /**
4381
   * Find length of initial segment not matching mask.
4382
   *
4383
   * @param string $str
4384
   * @param string $charList
4385
   * @param int    $offset
4386
   * @param int    $length
4387
   *
4388
   * @return int|null
4389
   */
4390 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
4391
  {
4392 15
    if ('' === $charList .= '') {
4393 1
      return null;
4394
    }
4395
4396 14 View Code Duplication
    if ($offset || $length !== null) {
4397 2
      $strTmp = self::substr($str, $offset, $length);
4398 2
      if ($strTmp === false) {
4399
        return null;
4400
      }
4401 2
      $str = (string)$strTmp;
4402
    }
4403
4404 14
    if (!isset($str[0])) {
4405 1
      return null;
4406
    }
4407
4408 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4409 13
      return self::strlen($length[1]);
4410
    }
4411
4412 1
    return self::strlen($str);
4413
  }
4414
4415
  /**
4416
   * alias for "UTF8::stristr()"
4417
   *
4418
   * @see UTF8::stristr()
4419
   *
4420
   * @param string $haystack
4421
   * @param string $needle
4422
   * @param bool   $before_needle
4423
   * @param string $encoding
4424
   * @param bool   $cleanUtf8
4425
   *
4426
   * @return string|false
4427
   */
4428 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4429
  {
4430 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4431
  }
4432
4433
  /**
4434
   * Create a UTF-8 string from code points.
4435
   *
4436
   * INFO: opposite to UTF8::codepoints()
4437
   *
4438
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4439
   *
4440
   * @return string <p>UTF-8 encoded string.</p>
4441
   */
4442 2
  public static function string(array $array): string
4443
  {
4444 2
    return \implode(
4445 2
        '',
4446 2
        \array_map(
4447
            [
4448 2
                self::class,
4449
                'chr',
4450
            ],
4451 2
            $array
4452
        )
4453
    );
4454
  }
4455
4456
  /**
4457
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4458
   *
4459
   * @param string $str <p>The input string.</p>
4460
   *
4461
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4462
   */
4463 3
  public static function string_has_bom(string $str): bool
4464
  {
4465 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
4466 3
      if (0 === \strpos($str, $bomString)) {
4467 3
        return true;
4468
      }
4469
    }
4470
4471 3
    return false;
4472
  }
4473
4474
  /**
4475
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4476
   *
4477
   * @link http://php.net/manual/en/function.strip-tags.php
4478
   *
4479
   * @param string $str             <p>
4480
   *                                The input string.
4481
   *                                </p>
4482
   * @param string $allowable_tags  [optional] <p>
4483
   *                                You can use the optional second parameter to specify tags which should
4484
   *                                not be stripped.
4485
   *                                </p>
4486
   *                                <p>
4487
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4488
   *                                can not be changed with allowable_tags.
4489
   *                                </p>
4490
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
4491
   *
4492
   * @return string <p>The stripped string.</p>
4493
   */
4494 2
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
4495
  {
4496 2
    if (!isset($str[0])) {
4497 1
      return '';
4498
    }
4499
4500 2
    if ($cleanUtf8 === true) {
4501 1
      $str = self::clean($str);
4502
    }
4503
4504 2
    return \strip_tags($str, $allowable_tags);
4505
  }
4506
4507
  /**
4508
   * Strip all whitespace characters. This includes tabs and newline
4509
   * characters, as well as multibyte whitespace such as the thin space
4510
   * and ideographic space.
4511
   *
4512
   * @param string $str
4513
   *
4514
   * @return string
4515
   */
4516 12
  public static function strip_whitespace(string $str): string
4517
  {
4518 12
    if (!isset($str[0])) {
4519 1
      return '';
4520
    }
4521
4522 11
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
4523
  }
4524
4525
  /**
4526
   * Finds position of first occurrence of a string within another, case insensitive.
4527
   *
4528
   * @link http://php.net/manual/en/function.mb-stripos.php
4529
   *
4530
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4531
   * @param string $needle    <p>The string to find in haystack.</p>
4532
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
4533
   * @param string $encoding  [optional] <p>Set the charset.</p>
4534
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4535
   *
4536
   * @return int|false <p>
4537
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
4538
   *                   or false if needle is not found.
4539
   *                   </p>
4540
   */
4541 10
  public static function stripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4542
  {
4543 10
    if (!isset($haystack[0], $needle[0])) {
4544 3
      return false;
4545
    }
4546
4547 9
    if ($cleanUtf8 === true) {
4548
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4549
      // if invalid characters are found in $haystack before $needle
4550 1
      $haystack = self::clean($haystack);
4551 1
      $needle = self::clean($needle);
4552
    }
4553
4554 9
    if ($encoding !== 'UTF-8') {
4555 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4556
    }
4557
4558 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4559
      self::checkForSupport();
4560
    }
4561
4562 View Code Duplication
    if (
4563 9
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4564
        &&
4565 9
        self::$SUPPORT['intl'] === true
4566
    ) {
4567 9
      return \grapheme_stripos($haystack, $needle, $offset);
4568
    }
4569
4570
    // fallback to "mb_"-function via polyfill
4571 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4572
  }
4573
4574
  /**
4575
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4576
   *
4577
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
4578
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
4579
   * @param bool   $before_needle  [optional] <p>
4580
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4581
   *                               haystack before the first occurrence of the needle (excluding the needle).
4582
   *                               </p>
4583
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
4584
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
4585
   *
4586
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
4587
   */
4588 17
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4589
  {
4590 17
    if (!isset($haystack[0], $needle[0])) {
4591 6
      return false;
4592
    }
4593
4594 11
    if ($encoding !== 'UTF-8') {
4595 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4596
    }
4597
4598 11
    if ($cleanUtf8 === true) {
4599
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4600
      // if invalid characters are found in $haystack before $needle
4601 1
      $needle = self::clean($needle);
4602 1
      $haystack = self::clean($haystack);
4603
    }
4604
4605 11
    if (!$needle) {
4606
      return $haystack;
4607
    }
4608
4609 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4610
      self::checkForSupport();
4611
    }
4612
4613 View Code Duplication
    if (
4614 11
        $encoding !== 'UTF-8'
4615
        &&
4616 11
        self::$SUPPORT['mbstring'] === false
4617
    ) {
4618
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4619
    }
4620
4621 11
    if (self::$SUPPORT['mbstring'] === true) {
4622 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4623
    }
4624
4625 View Code Duplication
    if (
4626
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4627
        &&
4628
        self::$SUPPORT['intl'] === true
4629
    ) {
4630
      return \grapheme_stristr($haystack, $needle, $before_needle);
4631
    }
4632
4633
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
4634
      return \stristr($haystack, $needle, $before_needle);
4635
    }
4636
4637
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
4638
4639
    if (!isset($match[1])) {
4640
      return false;
4641
    }
4642
4643
    if ($before_needle) {
4644
      return $match[1];
4645
    }
4646
4647
    return self::substr($haystack, self::strlen($match[1]));
4648
  }
4649
4650
  /**
4651
   * Get the string length, not the byte-length!
4652
   *
4653
   * @link     http://php.net/manual/en/function.mb-strlen.php
4654
   *
4655
   * @param string $str       <p>The string being checked for length.</p>
4656
   * @param string $encoding  [optional] <p>Set the charset.</p>
4657
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4658
   *
4659
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4660
   *             character counted as +1)</p>
4661
   */
4662 89
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
4663
  {
4664 89
    if (!isset($str[0])) {
4665 6
      return 0;
4666
    }
4667
4668 88
    if ($encoding !== 'UTF-8') {
4669 14
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4670
    }
4671
4672 88
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4673
      self::checkForSupport();
4674
    }
4675
4676
    switch ($encoding) {
4677 88
      case 'ASCII':
4678 88
      case 'CP850':
4679 80
      case '8BIT':
4680
        if (
4681 10
            $encoding === 'CP850'
4682
            &&
4683 10
            self::$SUPPORT['mbstring_func_overload'] === false
4684
        ) {
4685 10
          return \strlen($str);
4686
        }
4687
4688
        return \mb_strlen($str, '8BIT');
4689
    }
4690
4691 80
    if ($cleanUtf8 === true) {
4692
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
4693
      // if invalid characters are found in $str
4694 2
      $str = self::clean($str);
4695
    }
4696
4697 View Code Duplication
    if (
4698 80
        $encoding !== 'UTF-8'
4699
        &&
4700 80
        self::$SUPPORT['mbstring'] === false
4701
        &&
4702 80
        self::$SUPPORT['iconv'] === false
4703
    ) {
4704
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4705
    }
4706
4707
    if (
4708 80
        $encoding !== 'UTF-8'
4709
        &&
4710 80
        self::$SUPPORT['iconv'] === true
4711
        &&
4712 80
        self::$SUPPORT['mbstring'] === false
4713
    ) {
4714
      $returnTmp = \iconv_strlen($str, $encoding);
4715
      if ($returnTmp !== false) {
4716
        return $returnTmp;
4717
      }
4718
    }
4719
4720 80 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
4721 79
      $returnTmp = \mb_strlen($str, $encoding);
4722 79
      if ($returnTmp !== false) {
4723 79
        return $returnTmp;
4724
      }
4725
    }
4726
4727 2 View Code Duplication
    if (self::$SUPPORT['iconv'] === true) {
4728
      $returnTmp = \iconv_strlen($str, $encoding);
4729
      if ($returnTmp !== false) {
4730
        return $returnTmp;
4731
      }
4732
    }
4733
4734
    if (
4735 2
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4736
        &&
4737 2
        self::$SUPPORT['intl'] === true
4738
    ) {
4739
      return \grapheme_strlen($str);
4740
    }
4741
4742 2
    if (self::is_ascii($str)) {
4743 1
      return \strlen($str);
4744
    }
4745
4746
    // fallback via vanilla php
4747 2
    \preg_match_all('/./us', $str, $parts);
4748 2
    $returnTmp = \count($parts[0]);
4749 2
    if ($returnTmp !== 0) {
4750 2
      return $returnTmp;
4751
    }
4752
4753
    // fallback to "mb_"-function via polyfill
4754
    return \mb_strlen($str, $encoding);
4755
  }
4756
4757
  /**
4758
   * Get string length in byte.
4759
   *
4760
   * @param string $str
4761
   *
4762
   * @return int
4763
   */
4764 72
  public static function strlen_in_byte(string $str): int
4765
  {
4766 72
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
4767
      $len = \mb_strlen($str, '8BIT');
4768
    } else {
4769 72
      $len = \strlen($str);
4770
    }
4771
4772 72
    return $len;
4773
  }
4774
4775
  /**
4776
   * Case insensitive string comparisons using a "natural order" algorithm.
4777
   *
4778
   * INFO: natural order version of UTF8::strcasecmp()
4779
   *
4780
   * @param string $str1 <p>The first string.</p>
4781
   * @param string $str2 <p>The second string.</p>
4782
   *
4783
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
4784
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
4785
   *             <strong>0</strong> if they are equal
4786
   */
4787 1
  public static function strnatcasecmp(string $str1, string $str2): int
4788
  {
4789 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4790
  }
4791
4792
  /**
4793
   * String comparisons using a "natural order" algorithm
4794
   *
4795
   * INFO: natural order version of UTF8::strcmp()
4796
   *
4797
   * @link  http://php.net/manual/en/function.strnatcmp.php
4798
   *
4799
   * @param string $str1 <p>The first string.</p>
4800
   * @param string $str2 <p>The second string.</p>
4801
   *
4802
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
4803
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
4804
   *             <strong>0</strong> if they are equal
4805
   */
4806 2
  public static function strnatcmp(string $str1, string $str2): int
4807
  {
4808 2
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4809
  }
4810
4811
  /**
4812
   * Case-insensitive string comparison of the first n characters.
4813
   *
4814
   * @link  http://php.net/manual/en/function.strncasecmp.php
4815
   *
4816
   * @param string $str1 <p>The first string.</p>
4817
   * @param string $str2 <p>The second string.</p>
4818
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
4819
   *
4820
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4821
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4822
   *             <strong>0</strong> if they are equal
4823
   */
4824 1
  public static function strncasecmp(string $str1, string $str2, int $len): int
4825
  {
4826 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4827
  }
4828
4829
  /**
4830
   * String comparison of the first n characters.
4831
   *
4832
   * @link  http://php.net/manual/en/function.strncmp.php
4833
   *
4834
   * @param string $str1 <p>The first string.</p>
4835
   * @param string $str2 <p>The second string.</p>
4836
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
4837
   *
4838
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4839
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4840
   *             <strong>0</strong> if they are equal
4841
   */
4842 2
  public static function strncmp(string $str1, string $str2, int $len): int
4843
  {
4844 2
    $str1 = (string)self::substr($str1, 0, $len);
4845 2
    $str2 = (string)self::substr($str2, 0, $len);
4846
4847 2
    return self::strcmp($str1, $str2);
4848
  }
4849
4850
  /**
4851
   * Search a string for any of a set of characters.
4852
   *
4853
   * @link  http://php.net/manual/en/function.strpbrk.php
4854
   *
4855
   * @param string $haystack  <p>The string where char_list is looked for.</p>
4856
   * @param string $char_list <p>This parameter is case sensitive.</p>
4857
   *
4858
   * @return string|false <p>String starting from the character found, or false if it is not found.</p>
4859
   */
4860 1
  public static function strpbrk(string $haystack, string $char_list)
4861
  {
4862 1
    if (!isset($haystack[0], $char_list[0])) {
4863 1
      return false;
4864
    }
4865
4866 1
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
4867 1
      return \substr($haystack, \strpos($haystack, $m[0]));
4868
    }
4869
4870 1
    return false;
4871
  }
4872
4873
  /**
4874
   * Find position of first occurrence of string in a string.
4875
   *
4876
   * @link http://php.net/manual/en/function.mb-strpos.php
4877
   *
4878
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4879
   * @param string $needle    <p>The string to find in haystack.</p>
4880
   * @param int    $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
4881
   * @param string $encoding  [optional] <p>Set the charset.</p>
4882
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4883
   *
4884
   * @return int|false <p>
4885
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
4886
   *                   If needle is not found it returns false.
4887
   *                   </p>
4888
   */
4889 59
  public static function strpos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4890
  {
4891 59
    if (!isset($haystack[0], $needle[0])) {
4892 3
      return false;
4893
    }
4894
4895
    // iconv and mbstring do not support integer $needle
4896 58 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4897
      $needle = (string)self::chr((int)$needle);
4898
    }
4899
4900 58
    if ($cleanUtf8 === true) {
4901
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4902
      // if invalid characters are found in $haystack before $needle
4903 2
      $needle = self::clean($needle);
4904 2
      $haystack = self::clean($haystack);
4905
    }
4906
4907 58
    if ($encoding !== 'UTF-8') {
4908 45
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4909
    }
4910
4911 58
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4912
      self::checkForSupport();
4913
    }
4914
4915
    if (
4916 58
        $encoding === 'CP850'
4917
        &&
4918 58
        self::$SUPPORT['mbstring_func_overload'] === false
4919
    ) {
4920 44
      return \strpos($haystack, $needle, $offset);
4921
    }
4922
4923 View Code Duplication
    if (
4924 15
        $encoding !== 'UTF-8'
4925
        &&
4926 15
        self::$SUPPORT['iconv'] === false
4927
        &&
4928 15
        self::$SUPPORT['mbstring'] === false
4929
    ) {
4930
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4931
    }
4932
4933
    if (
4934 15
        $offset >= 0 // iconv_strpos() can't handle negative offset
4935
        &&
4936 15
        $encoding !== 'UTF-8'
4937
        &&
4938 15
        self::$SUPPORT['mbstring'] === false
4939
        &&
4940 15
        self::$SUPPORT['iconv'] === true
4941
    ) {
4942
      // ignore invalid negative offset to keep compatibility
4943
      // with php < 5.5.35, < 5.6.21, < 7.0.6
4944
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4945
      if ($returnTmp !== false) {
4946
        return $returnTmp;
4947
      }
4948
    }
4949
4950 15 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
4951 15
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
4952 15
      if ($returnTmp !== false) {
4953 13
        return $returnTmp;
4954
      }
4955
    }
4956
4957 View Code Duplication
    if (
4958 10
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4959
        &&
4960 10
        self::$SUPPORT['intl'] === true
4961
    ) {
4962 9
      return \grapheme_strpos($haystack, $needle, $offset);
4963
    }
4964
4965
    if (
4966 2
        $offset >= 0 // iconv_strpos() can't handle negative offset
4967
        &&
4968 2
        self::$SUPPORT['iconv'] === true
4969
    ) {
4970
      // ignore invalid negative offset to keep compatibility
4971
      // with php < 5.5.35, < 5.6.21, < 7.0.6
4972 1
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4973 1
      if ($returnTmp !== false) {
4974
        return $returnTmp;
4975
      }
4976
    }
4977
4978 2
    $haystackIsAscii = self::is_ascii($haystack);
4979 2
    if ($haystackIsAscii && self::is_ascii($needle)) {
4980 1
      return \strpos($haystack, $needle, $offset);
4981
    }
4982
4983
    // fallback via vanilla php
4984
4985 2
    if ($haystackIsAscii) {
4986
      $haystackTmp = \substr($haystack, $offset);
4987
    } else {
4988 2
      $haystackTmp = self::substr($haystack, $offset);
4989
    }
4990 2
    if ($haystackTmp === false) {
4991
      $haystackTmp = '';
4992
    }
4993 2
    $haystack = (string)$haystackTmp;
4994
4995 2
    if ($offset < 0) {
4996
      $offset = 0;
4997
    }
4998
4999 2
    $pos = \strpos($haystack, $needle);
5000 2
    if ($pos === false) {
5001
      return false;
5002
    }
5003
5004 2
    $returnTmp = $offset + self::strlen(\substr($haystack, 0, $pos));
5005 2
    if ($returnTmp !== false) {
5006 2
      return $returnTmp;
5007
    }
5008
5009
    // fallback to "mb_"-function via polyfill
5010
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5011
  }
5012
5013
  /**
5014
   * Finds the last occurrence of a character in a string within another.
5015
   *
5016
   * @link http://php.net/manual/en/function.mb-strrchr.php
5017
   *
5018
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5019
   * @param string $needle        <p>The string to find in haystack</p>
5020
   * @param bool   $before_needle [optional] <p>
5021
   *                              Determines which portion of haystack
5022
   *                              this function returns.
5023
   *                              If set to true, it returns all of haystack
5024
   *                              from the beginning to the last occurrence of needle.
5025
   *                              If set to false, it returns all of haystack
5026
   *                              from the last occurrence of needle to the end,
5027
   *                              </p>
5028
   * @param string $encoding      [optional] <p>
5029
   *                              Character encoding name to use.
5030
   *                              If it is omitted, internal character encoding is used.
5031
   *                              </p>
5032
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5033
   *
5034
   * @return string|false The portion of haystack or false if needle is not found.
5035
   */
5036 1 View Code Duplication
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5037
  {
5038 1
    if ($encoding !== 'UTF-8') {
5039 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5040
    }
5041
5042 1
    if ($cleanUtf8 === true) {
5043
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5044
      // if invalid characters are found in $haystack before $needle
5045 1
      $needle = self::clean($needle);
5046 1
      $haystack = self::clean($haystack);
5047
    }
5048
5049
    // fallback to "mb_"-function via polyfill
5050 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5051
  }
5052
5053
  /**
5054
   * Reverses characters order in the string.
5055
   *
5056
   * @param string $str The input string
5057
   *
5058
   * @return string The string with characters in the reverse sequence
5059
   */
5060 4
  public static function strrev(string $str): string
5061
  {
5062 4
    if (!isset($str[0])) {
5063 2
      return '';
5064
    }
5065
5066 3
    return \implode('', \array_reverse(self::split($str)));
5067
  }
5068
5069
  /**
5070
   * Finds the last occurrence of a character in a string within another, case insensitive.
5071
   *
5072
   * @link http://php.net/manual/en/function.mb-strrichr.php
5073
   *
5074
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
5075
   * @param string $needle         <p>The string to find in haystack.</p>
5076
   * @param bool   $before_needle  [optional] <p>
5077
   *                               Determines which portion of haystack
5078
   *                               this function returns.
5079
   *                               If set to true, it returns all of haystack
5080
   *                               from the beginning to the last occurrence of needle.
5081
   *                               If set to false, it returns all of haystack
5082
   *                               from the last occurrence of needle to the end,
5083
   *                               </p>
5084
   * @param string $encoding       [optional] <p>
5085
   *                               Character encoding name to use.
5086
   *                               If it is omitted, internal character encoding is used.
5087
   *                               </p>
5088
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5089
   *
5090
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
5091
   */
5092 1 View Code Duplication
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5093
  {
5094 1
    if ($encoding !== 'UTF-8') {
5095 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5096
    }
5097
5098 1
    if ($cleanUtf8 === true) {
5099
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5100
      // if invalid characters are found in $haystack before $needle
5101 1
      $needle = self::clean($needle);
5102 1
      $haystack = self::clean($haystack);
5103
    }
5104
5105 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5106
  }
5107
5108
  /**
5109
   * Find position of last occurrence of a case-insensitive string.
5110
   *
5111
   * @param string $haystack  <p>The string to look in.</p>
5112
   * @param string $needle    <p>The string to look for.</p>
5113
   * @param int    $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5114
   * @param string $encoding  [optional] <p>Set the charset.</p>
5115
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5116
   *
5117
   * @return int|false <p>
5118
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5119
   *                   not found, it returns false.
5120
   *                   </p>
5121
   */
5122 1
  public static function strripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5123
  {
5124 1 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5125
      $needle = (string)self::chr((int)$needle);
5126
    }
5127
5128 1
    if (!isset($haystack[0], $needle[0])) {
5129
      return false;
5130
    }
5131
5132 1
    if ($cleanUtf8 === true) {
5133
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5134 1
      $needle = self::clean($needle);
5135 1
      $haystack = self::clean($haystack);
5136
    }
5137
5138 1
    if ($encoding !== 'UTF-8') {
5139 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5140
    }
5141
5142 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5143
      self::checkForSupport();
5144
    }
5145
5146 View Code Duplication
    if (
5147 1
        $encoding !== 'UTF-8'
5148
        &&
5149 1
        self::$SUPPORT['mbstring'] === false
5150
    ) {
5151
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5152
    }
5153
5154 1
    if (self::$SUPPORT['mbstring'] === true) {
5155 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5156
    }
5157
5158 View Code Duplication
    if (
5159
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5160
        &&
5161
        self::$SUPPORT['intl'] === true
5162
    ) {
5163
      return \grapheme_strripos($haystack, $needle, $offset);
5164
    }
5165
5166
    // fallback via vanilla php
5167
5168
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5169
  }
5170
5171
  /**
5172
   * Find position of last occurrence of a string in a string.
5173
   *
5174
   * @link http://php.net/manual/en/function.mb-strrpos.php
5175
   *
5176
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5177
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5178
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5179
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5180
   *                              the end of the string.
5181
   *                              </p>
5182
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5183
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5184
   *
5185
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5186
   *                   is not found, it returns false.</p>
5187
   */
5188 10
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5189
  {
5190 10
    if ((int)$needle === $needle && $needle >= 0) {
5191 2
      $needle = (string)self::chr($needle);
5192
    }
5193 10
    $needle = (string)$needle;
5194
5195 10
    if (!isset($haystack[0], $needle[0])) {
5196 2
      return false;
5197
    }
5198
5199
    if (
5200 9
        $cleanUtf8 === true
5201
        ||
5202 9
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5203
    ) {
5204
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5205 3
      $needle = self::clean($needle);
5206 3
      $haystack = self::clean($haystack);
5207
    }
5208
5209 9
    if ($encoding !== 'UTF-8') {
5210 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5211
    }
5212
5213 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5214
      self::checkForSupport();
5215
    }
5216
5217 View Code Duplication
    if (
5218 9
        $encoding !== 'UTF-8'
5219
        &&
5220 9
        self::$SUPPORT['mbstring'] === false
5221
    ) {
5222
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5223
    }
5224
5225 9
    if (self::$SUPPORT['mbstring'] === true) {
5226 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5227
    }
5228
5229 View Code Duplication
    if (
5230
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5231
        &&
5232
        self::$SUPPORT['intl'] === true
5233
    ) {
5234
      return \grapheme_strrpos($haystack, $needle, $offset);
5235
    }
5236
5237
    // fallback via vanilla php
5238
5239
    $haystackTmp = null;
5240
    if ($offset > 0) {
5241
      $haystackTmp = self::substr($haystack, $offset);
5242
    } elseif ($offset < 0) {
5243
      $haystackTmp = self::substr($haystack, 0, $offset);
5244
      $offset = 0;
5245
    }
5246
5247
    if ($haystackTmp !== null) {
5248
      if ($haystackTmp === false) {
5249
        $haystackTmp = '';
5250
      }
5251
      $haystack = (string)$haystackTmp;
5252
    }
5253
5254
    $pos = \strrpos($haystack, $needle);
5255
    if ($pos === false) {
5256
      return false;
5257
    }
5258
5259
    return $offset + self::strlen(\substr($haystack, 0, $pos));
5260
  }
5261
5262
  /**
5263
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5264
   * mask.
5265
   *
5266
   * @param string $str    <p>The input string.</p>
5267
   * @param string $mask   <p>The mask of chars</p>
5268
   * @param int    $offset [optional]
5269
   * @param int    $length [optional]
5270
   *
5271
   * @return int
5272
   */
5273 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
5274
  {
5275 10 View Code Duplication
    if ($offset || $length !== null) {
5276 2
      $strTmp = self::substr($str, $offset, $length);
5277 2
      if ($strTmp === false) {
5278
        $strTmp = '';
5279
      }
5280 2
      $str = (string)$strTmp;
5281
    }
5282
5283 10
    if (!isset($str[0], $mask[0])) {
5284 2
      return 0;
5285
    }
5286
5287 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5288
  }
5289
5290
  /**
5291
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5292
   *
5293
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
5294
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
5295
   * @param bool   $before_needle  [optional] <p>
5296
   *                               If <b>TRUE</b>, strstr() returns the part of the
5297
   *                               haystack before the first occurrence of the needle (excluding the needle).
5298
   *                               </p>
5299
   * @param string $encoding       [optional] <p>Set the charset.</p>
5300
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5301
   *
5302
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
5303
   */
5304 2
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5305
  {
5306 2
    if (!isset($haystack[0], $needle[0])) {
5307 1
      return false;
5308
    }
5309
5310 2
    if ($cleanUtf8 === true) {
5311
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5312
      // if invalid characters are found in $haystack before $needle
5313
      $needle = self::clean($needle);
5314
      $haystack = self::clean($haystack);
5315
    }
5316
5317 2
    if ($encoding !== 'UTF-8') {
5318 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5319
    }
5320
5321 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5322
      self::checkForSupport();
5323
    }
5324
5325 View Code Duplication
    if (
5326 2
        $encoding !== 'UTF-8'
5327
        &&
5328 2
        self::$SUPPORT['mbstring'] === false
5329
    ) {
5330
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5331
    }
5332
5333 2
    if (self::$SUPPORT['mbstring'] === true) {
5334 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5335
    }
5336
5337 View Code Duplication
    if (
5338
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5339
        &&
5340
        self::$SUPPORT['intl'] === true
5341
    ) {
5342
      return \grapheme_strstr($haystack, $needle, $before_needle);
5343
    }
5344
5345
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
5346
5347
    if (!isset($match[1])) {
5348
      return false;
5349
    }
5350
5351
    if ($before_needle) {
5352
      return $match[1];
5353
    }
5354
5355
    return self::substr($haystack, self::strlen($match[1]));
5356
  }
5357
5358
  /**
5359
   * Unicode transformation for case-less matching.
5360
   *
5361
   * @link http://unicode.org/reports/tr21/tr21-5.html
5362
   *
5363
   * @param string $str        <p>The input string.</p>
5364
   * @param bool   $full       [optional] <p>
5365
   *                           <b>true</b>, replace full case folding chars (default)<br>
5366
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5367
   *                           </p>
5368
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
5369
   *
5370
   * @return string
5371
   */
5372 13
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
5373
  {
5374 13
    if (!isset($str[0])) {
5375 4
      return '';
5376
    }
5377
5378 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5379 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5380
5381 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5382 1
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
5383 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
5384
    }
5385
5386 12
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5387
5388 12
    if ($full) {
5389
5390 12
      static $FULL_CASE_FOLD = null;
5391 12
      if ($FULL_CASE_FOLD === null) {
5392 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
5393
      }
5394
5395 12
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
5396
    }
5397
5398 12
    if ($cleanUtf8 === true) {
5399 1
      $str = self::clean($str);
5400
    }
5401
5402 12
    return self::strtolower($str);
5403
  }
5404
5405
  /**
5406
   * Make a string lowercase.
5407
   *
5408
   * @link http://php.net/manual/en/function.mb-strtolower.php
5409
   *
5410
   * @param string      $str       <p>The string being lowercased.</p>
5411
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5412
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5413
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5414
   *
5415
   * @return string str with all alphabetic characters converted to lowercase.
5416
   */
5417 25 View Code Duplication
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5418
  {
5419
    // init
5420 25
    $str = (string)$str;
5421 25
    if (!isset($str[0])) {
5422 3
      return '';
5423
    }
5424
5425 23
    if ($cleanUtf8 === true) {
5426
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5427
      // if invalid characters are found in $haystack before $needle
5428 1
      $str = self::clean($str);
5429
    }
5430
5431 23
    if ($encoding !== 'UTF-8') {
5432 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5433
    }
5434
5435 23
    if ($lang !== null) {
5436 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5437
        self::checkForSupport();
5438
      }
5439
5440 1
      if (self::$SUPPORT['intl'] === true) {
5441
5442 1
        $langCode = $lang . '-Lower';
5443 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5444
          \trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
5445
5446
          $langCode = 'Any-Lower';
5447
        }
5448
5449 1
        return transliterator_transliterate($langCode, $str);
5450
      }
5451
5452
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5453
    }
5454
5455 23
    return \mb_strtolower($str, $encoding);
5456
  }
5457
5458
  /**
5459
   * Generic case sensitive transformation for collation matching.
5460
   *
5461
   * @param string $str <p>The input string</p>
5462
   *
5463
   * @return string
5464
   */
5465 3
  private static function strtonatfold(string $str): string
5466
  {
5467
    /** @noinspection PhpUndefinedClassInspection */
5468 3
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5469
  }
5470
5471
  /**
5472
   * Make a string uppercase.
5473
   *
5474
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5475
   *
5476
   * @param string      $str       <p>The string being uppercased.</p>
5477
   * @param string      $encoding  [optional] <p>Set the charset.</p>
5478
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5479
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5480
   *
5481
   * @return string <p>$str with all alphabetic characters converted to uppercase.</p>
5482
   */
5483 19 View Code Duplication
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5484
  {
5485 19
    $str = (string)$str;
5486 19
    if (!isset($str[0])) {
5487 3
      return '';
5488
    }
5489
5490 17
    if ($cleanUtf8 === true) {
5491
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5492
      // if invalid characters are found in $haystack before $needle
5493 2
      $str = self::clean($str);
5494
    }
5495
5496 17
    if ($encoding !== 'UTF-8') {
5497 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5498
    }
5499
5500 17
    if ($lang !== null) {
5501 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5502
        self::checkForSupport();
5503
      }
5504
5505 1
      if (self::$SUPPORT['intl'] === true) {
5506
5507 1
        $langCode = $lang . '-Upper';
5508 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5509
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
5510
5511
          $langCode = 'Any-Upper';
5512
        }
5513
5514 1
        return transliterator_transliterate($langCode, $str);
5515
      }
5516
5517
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5518
    }
5519
5520 17
    return \mb_strtoupper($str, $encoding);
5521
  }
5522
5523
  /**
5524
   * Translate characters or replace sub-strings.
5525
   *
5526
   * @link  http://php.net/manual/en/function.strtr.php
5527
   *
5528
   * @param string          $str  <p>The string being translated.</p>
5529
   * @param string|string[] $from <p>The string replacing from.</p>
5530
   * @param string|string[] $to   <p>The string being translated to to.</p>
5531
   *
5532
   * @return string <p>
5533
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5534
   *                corresponding character in to.
5535
   *                </p>
5536
   */
5537 1
  public static function strtr(string $str, $from, $to = INF): string
5538
  {
5539 1
    if (!isset($str[0])) {
5540
      return '';
5541
    }
5542
5543 1
    if ($from === $to) {
5544
      return $str;
5545
    }
5546
5547 1
    if (INF !== $to) {
5548 1
      $from = self::str_split($from);
5549 1
      $to = self::str_split($to);
5550 1
      $countFrom = \count($from);
5551 1
      $countTo = \count($to);
5552
5553 1
      if ($countFrom > $countTo) {
5554 1
        $from = \array_slice($from, 0, $countTo);
5555 1
      } elseif ($countFrom < $countTo) {
5556 1
        $to = \array_slice($to, 0, $countFrom);
5557
      }
5558
5559 1
      $from = \array_combine($from, $to);
5560
    }
5561
5562 1
    if (\is_string($from)) {
5563 1
      return \str_replace($from, '', $str);
5564
    }
5565
5566 1
    return \strtr($str, $from);
5567
  }
5568
5569
  /**
5570
   * Return the width of a string.
5571
   *
5572
   * @param string $str       <p>The input string.</p>
5573
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5574
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5575
   *
5576
   * @return int
5577
   */
5578 1
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
5579
  {
5580 1
    if ($encoding !== 'UTF-8') {
5581 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5582
    }
5583
5584 1
    if ($cleanUtf8 === true) {
5585
      // iconv and mbstring are not tolerant to invalid encoding
5586
      // further, their behaviour is inconsistent with that of PHP's substr
5587 1
      $str = self::clean($str);
5588
    }
5589
5590
    // fallback to "mb_"-function via polyfill
5591 1
    return \mb_strwidth($str, $encoding);
5592
  }
5593
5594
  /**
5595
   * Get part of a string.
5596
   *
5597
   * @link http://php.net/manual/en/function.mb-substr.php
5598
   *
5599
   * @param string $str       <p>The string being checked.</p>
5600
   * @param int    $offset    <p>The first position used in str.</p>
5601
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
5602
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5603
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5604
   *
5605
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
5606
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
5607
   *                      characters long, <b>FALSE</b> will be returned.</p>
5608
   */
5609 72
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5610
  {
5611 72
    if (!isset($str[0])) {
5612 10
      return '';
5613
    }
5614
5615
    // Empty string
5616 69
    if ($length === 0) {
5617 3
      return '';
5618
    }
5619
5620 68
    if ($cleanUtf8 === true) {
5621
      // iconv and mbstring are not tolerant to invalid encoding
5622
      // further, their behaviour is inconsistent with that of PHP's substr
5623 1
      $str = self::clean($str);
5624
    }
5625
5626
    // Whole string
5627 68
    if (!$offset && $length === null) {
5628 3
      return $str;
5629
    }
5630
5631 65
    $str_length = 0;
5632 65
    if ($offset || $length === null) {
5633 45
      $str_length = self::strlen($str, $encoding);
5634
    }
5635
5636
    // Empty string
5637 65
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be zero. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5638 10
      return '';
5639
    }
5640
5641
    // Impossible
5642 65
    if ($offset && $offset > $str_length) {
5643 2
      return false;
5644
    }
5645
5646 63
    if ($length === null) {
5647 27
      $length = $str_length;
5648
    } else {
5649 54
      $length = (int)$length;
5650
    }
5651
5652 63
    if ($encoding !== 'UTF-8') {
5653 19
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5654
    }
5655
5656 63
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5657
      self::checkForSupport();
5658
    }
5659
5660
    if (
5661 63
        $encoding === 'CP850'
5662
        &&
5663 63
        self::$SUPPORT['mbstring_func_overload'] === false
5664
    ) {
5665 16
      return \substr($str, $offset, $length ?? $str_length);
5666
    }
5667
5668 View Code Duplication
    if (
5669 47
        $encoding !== 'UTF-8'
5670
        &&
5671 47
        self::$SUPPORT['mbstring'] === false
5672
    ) {
5673
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5674
    }
5675
5676 47
    if (self::$SUPPORT['mbstring'] === true) {
5677 47
      return \mb_substr($str, $offset, $length, $encoding);
5678
    }
5679
5680
    if (
5681
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5682
        &&
5683
        self::$SUPPORT['intl'] === true
5684
    ) {
5685
      return \grapheme_substr($str, $offset, $length);
5686
    }
5687
5688
    if (
5689
        $length >= 0 // "iconv_substr()" can't handle negative length
5690
        &&
5691
        self::$SUPPORT['iconv'] === true
5692
    ) {
5693
      $returnTmp = \iconv_substr($str, $offset, $length);
5694
      if ($returnTmp !== false) {
5695
        return $returnTmp;
5696
      }
5697
    }
5698
5699
    if (self::is_ascii($str)) {
5700
      return ($length === null) ?
5701
          \substr($str, $offset) :
5702
          \substr($str, $offset, $length);
5703
    }
5704
5705
    // fallback via vanilla php
5706
5707
    // split to array, and remove invalid characters
5708
    $array = self::split($str);
5709
5710
    // extract relevant part, and join to make sting again
5711
    return \implode('', \array_slice($array, $offset, $length));
5712
  }
5713
5714
  /**
5715
   * Binary safe comparison of two strings from an offset, up to length characters.
5716
   *
5717
   * @param string   $str1               <p>The main string being compared.</p>
5718
   * @param string   $str2               <p>The secondary string being compared.</p>
5719
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
5720
   *                                     counting from the end of the string.</p>
5721
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
5722
   *                                     the length of the str compared to the length of main_str less the offset.</p>
5723
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5724
   *                                     insensitive.</p>
5725
   *
5726
   * @return int <p>
5727
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5728
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5729
   *             <strong>0</strong> if they are equal.
5730
   *             </p>
5731
   */
5732 1
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
5733
  {
5734
    if (
5735 1
        $offset !== 0
5736
        ||
5737 1
        $length !== null
5738
    ) {
5739 1
      $str1Tmp = self::substr($str1, $offset, $length);
5740 1
      if ($str1Tmp === false) {
5741
        $str1Tmp = '';
5742
      }
5743 1
      $str1 = (string)$str1Tmp;
5744
5745 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
5746 1
      if ($str2Tmp === false) {
5747
        $str2Tmp = '';
5748
      }
5749 1
      $str2 = (string)$str2Tmp;
5750
    }
5751
5752 1
    if ($case_insensitivity === true) {
5753 1
      return self::strcasecmp($str1, $str2);
5754
    }
5755
5756 1
    return self::strcmp($str1, $str2);
5757
  }
5758
5759
  /**
5760
   * Count the number of substring occurrences.
5761
   *
5762
   * @link  http://php.net/manual/en/function.substr-count.php
5763
   *
5764
   * @param string $haystack   <p>The string to search in.</p>
5765
   * @param string $needle     <p>The substring to search for.</p>
5766
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
5767
   * @param int    $length     [optional] <p>
5768
   *                           The maximum length after the specified offset to search for the
5769
   *                           substring. It outputs a warning if the offset plus the length is
5770
   *                           greater than the haystack length.
5771
   *                           </p>
5772
   * @param string $encoding   <p>Set the charset.</p>
5773
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
5774
   *
5775
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5776
   */
5777 1
  public static function substr_count(string $haystack, string $needle, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5778
  {
5779 1
    if (!isset($haystack[0], $needle[0])) {
5780 1
      return false;
5781
    }
5782
5783 1
    if ($offset || $length !== null) {
5784
5785 1
      if ($length === null) {
5786 1
        $length = self::strlen($haystack);
5787
      }
5788
5789
      if (
5790
          (
5791 1
              $length !== 0
5792
              &&
5793 1
              $offset !== 0
5794
          )
5795
          &&
5796 1
          ($length + $offset) <= 0
5797
          &&
5798 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
5799
      ) {
5800 1
        return false;
5801
      }
5802
5803 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
5804 1
      if ($haystackTmp === false) {
5805
        $haystackTmp = '';
5806
      }
5807 1
      $haystack = (string)$haystackTmp;
5808
    }
5809
5810 1
    if ($encoding !== 'UTF-8') {
5811 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5812
    }
5813
5814 1
    if ($cleanUtf8 === true) {
5815
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5816
      // if invalid characters are found in $haystack before $needle
5817
      $needle = self::clean($needle);
5818
      $haystack = self::clean($haystack);
5819
    }
5820
5821 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5822
      self::checkForSupport();
5823
    }
5824
5825 View Code Duplication
    if (
5826 1
        $encoding !== 'UTF-8'
5827
        &&
5828 1
        self::$SUPPORT['mbstring'] === false
5829
    ) {
5830
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5831
    }
5832
5833 1
    if (self::$SUPPORT['mbstring'] === true) {
5834 1
      return \mb_substr_count($haystack, $needle, $encoding);
5835
    }
5836
5837
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
5838
5839
    return \count($matches);
5840
  }
5841
5842
  /**
5843
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
5844
   *
5845
   * @param string $haystack <p>The string to search in.</p>
5846
   * @param string $needle   <p>The substring to search for.</p>
5847
   *
5848
   * @return string <p>Return the sub-string.</p>
5849
   */
5850 1 View Code Duplication
  public static function substr_ileft(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5851
  {
5852 1
    if (!isset($haystack[0])) {
5853 1
      return '';
5854
    }
5855
5856 1
    if (!isset($needle[0])) {
5857 1
      return $haystack;
5858
    }
5859
5860 1
    if (self::str_istarts_with($haystack, $needle) === true) {
5861 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
5862 1
      if ($haystackTmp === false) {
5863
        $haystackTmp = '';
5864
      }
5865 1
      $haystack = (string)$haystackTmp;
5866
    }
5867
5868 1
    return $haystack;
5869
  }
5870
5871
  /**
5872
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
5873
   *
5874
   * @param string $haystack <p>The string to search in.</p>
5875
   * @param string $needle   <p>The substring to search for.</p>
5876
   *
5877
   * @return string <p>Return the sub-string.</p>
5878
   */
5879 1 View Code Duplication
  public static function substr_iright(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5880
  {
5881 1
    if (!isset($haystack[0])) {
5882 1
      return '';
5883
    }
5884
5885 1
    if (!isset($needle[0])) {
5886 1
      return $haystack;
5887
    }
5888
5889 1
    if (self::str_iends_with($haystack, $needle) === true) {
5890 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5891 1
      if ($haystackTmp === false) {
5892
        $haystackTmp = '';
5893
      }
5894 1
      $haystack = (string)$haystackTmp;
5895
    }
5896
5897 1
    return $haystack;
5898
  }
5899
5900
  /**
5901
   * Removes an prefix ($needle) from start of the string ($haystack).
5902
   *
5903
   * @param string $haystack <p>The string to search in.</p>
5904
   * @param string $needle   <p>The substring to search for.</p>
5905
   *
5906
   * @return string <p>Return the sub-string.</p>
5907
   */
5908 1 View Code Duplication
  public static function substr_left(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5909
  {
5910 1
    if (!isset($haystack[0])) {
5911 1
      return '';
5912
    }
5913
5914 1
    if (!isset($needle[0])) {
5915 1
      return $haystack;
5916
    }
5917
5918 1
    if (self::str_starts_with($haystack, $needle) === true) {
5919 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
5920 1
      if ($haystackTmp === false) {
5921
        $haystackTmp = '';
5922
      }
5923 1
      $haystack = (string)$haystackTmp;
5924
    }
5925
5926 1
    return $haystack;
5927
  }
5928
5929
  /**
5930
   * Replace text within a portion of a string.
5931
   *
5932
   * source: https://gist.github.com/stemar/8287074
5933
   *
5934
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
5935
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
5936
   * @param int|int[]       $offset           <p>
5937
   *                                          If start is positive, the replacing will begin at the start'th offset
5938
   *                                          into string.
5939
   *                                          <br><br>
5940
   *                                          If start is negative, the replacing will begin at the start'th character
5941
   *                                          from the end of string.
5942
   *                                          </p>
5943
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
5944
   *                                          portion of string which is to be replaced. If it is negative, it
5945
   *                                          represents the number of characters from the end of string at which to
5946
   *                                          stop replacing. If it is not given, then it will default to strlen(
5947
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
5948
   *                                          length is zero then this function will have the effect of inserting
5949
   *                                          replacement into string at the given start offset.</p>
5950
   *
5951
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
5952
   */
5953 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
5954
  {
5955 7
    if (\is_array($str) === true) {
5956 1
      $num = \count($str);
5957
5958
      // the replacement
5959 1
      if (\is_array($replacement) === true) {
5960 1
        $replacement = \array_slice($replacement, 0, $num);
5961
      } else {
5962 1
        $replacement = \array_pad([$replacement], $num, $replacement);
5963
      }
5964
5965
      // the offset
5966 1
      if (\is_array($offset) === true) {
5967 1
        $offset = \array_slice($offset, 0, $num);
5968 1
        foreach ($offset as &$valueTmp) {
5969 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5970
        }
5971 1
        unset($valueTmp);
5972
      } else {
5973 1
        $offset = \array_pad([$offset], $num, $offset);
5974
      }
5975
5976
      // the length
5977 1
      if (null === $length) {
5978 1
        $length = \array_fill(0, $num, 0);
5979 1
      } elseif (\is_array($length) === true) {
5980 1
        $length = \array_slice($length, 0, $num);
5981 1
        foreach ($length as &$valueTmpV2) {
5982 1
          if (null !== $valueTmpV2) {
5983 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5984
          } else {
5985 1
            $valueTmpV2 = 0;
5986
          }
5987
        }
5988 1
        unset($valueTmpV2);
5989
      } else {
5990 1
        $length = \array_pad([$length], $num, $length);
5991
      }
5992
5993
      // recursive call
5994 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return \array_map(array(...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
5995
    }
5996
5997 7
    if (\is_array($replacement) === true) {
5998 1
      if (\count($replacement) > 0) {
5999 1
        $replacement = $replacement[0];
6000
      } else {
6001 1
        $replacement = '';
6002
      }
6003
    }
6004
6005
    // init
6006 7
    $str = (string)$str;
6007 7
    $replacement = (string)$replacement;
6008
6009 7
    if (!isset($str[0])) {
6010 1
      return $replacement;
6011
    }
6012
6013 6
    if (self::is_ascii($str)) {
6014 3
      return ($length === null) ?
6015
          \substr_replace($str, $replacement, $offset) :
6016 3
          \substr_replace($str, $replacement, $offset, $length);
6017
    }
6018
6019 5
    \preg_match_all('/./us', $str, $smatches);
6020 5
    \preg_match_all('/./us', $replacement, $rmatches);
6021
6022 5
    if ($length === null) {
6023 3
      $length = self::strlen($str);
6024
    }
6025
6026 5
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
6027
6028 5
    return \implode('', $smatches[0]);
6029
  }
6030
6031
  /**
6032
   * Removes an suffix ($needle) from end of the string ($haystack).
6033
   *
6034
   * @param string $haystack <p>The string to search in.</p>
6035
   * @param string $needle   <p>The substring to search for.</p>
6036
   *
6037
   * @return string <p>Return the sub-string.</p>
6038
   */
6039 1 View Code Duplication
  public static function substr_right(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6040
  {
6041 1
    if (!isset($haystack[0])) {
6042 1
      return '';
6043
    }
6044
6045 1
    if (!isset($needle[0])) {
6046 1
      return $haystack;
6047
    }
6048
6049 1
    if (self::str_ends_with($haystack, $needle) === true) {
6050 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6051 1
      if ($haystackTmp === false) {
6052
        $haystackTmp = '';
6053
      }
6054 1
      $haystack = (string)$haystackTmp;
6055
    }
6056
6057 1
    return $haystack;
6058
  }
6059
6060
  /**
6061
   * Returns a case swapped version of the string.
6062
   *
6063
   * @param string $str       <p>The input string.</p>
6064
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
6065
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6066
   *
6067
   * @return string <p>Each character's case swapped.</p>
6068
   */
6069 1
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6070
  {
6071 1
    if (!isset($str[0])) {
6072 1
      return '';
6073
    }
6074
6075 1
    if ($encoding !== 'UTF-8') {
6076 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6077
    }
6078
6079 1
    if ($cleanUtf8 === true) {
6080
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6081
      // if invalid characters are found in $haystack before $needle
6082 1
      $str = self::clean($str);
6083
    }
6084
6085 1
    $strSwappedCase = \preg_replace_callback(
6086 1
        '/[\S]/u',
6087 1
        function ($match) use ($encoding) {
6088 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6089
6090 1
          if ($match[0] === $marchToUpper) {
6091 1
            return UTF8::strtolower($match[0], $encoding);
6092
          }
6093
6094 1
          return $marchToUpper;
6095 1
        },
6096 1
        $str
6097
    );
6098
6099 1
    return $strSwappedCase;
6100
  }
6101
6102
  /**
6103
   * alias for "UTF8::to_ascii()"
6104
   *
6105
   * @see        UTF8::to_ascii()
6106
   *
6107
   * @param string $str
6108
   * @param string $subst_chr
6109
   * @param bool   $strict
6110
   *
6111
   * @return string
6112
   *
6113
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6114
   */
6115 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
6116
  {
6117 7
    return self::to_ascii($str, $subst_chr, $strict);
6118
  }
6119
6120
  /**
6121
   * alias for "UTF8::to_iso8859()"
6122
   *
6123
   * @see        UTF8::to_iso8859()
6124
   *
6125
   * @param string|string[] $str
6126
   *
6127
   * @return string|string[]
6128
   *
6129
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6130
   */
6131 1
  public static function toIso8859($str)
6132
  {
6133 1
    return self::to_iso8859($str);
6134
  }
6135
6136
  /**
6137
   * alias for "UTF8::to_latin1()"
6138
   *
6139
   * @see        UTF8::to_latin1()
6140
   *
6141
   * @param string|string[] $str
6142
   *
6143
   * @return string|string[]
6144
   *
6145
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6146
   */
6147 1
  public static function toLatin1($str)
6148
  {
6149 1
    return self::to_latin1($str);
6150
  }
6151
6152
  /**
6153
   * alias for "UTF8::to_utf8()"
6154
   *
6155
   * @see        UTF8::to_utf8()
6156
   *
6157
   * @param string|string[] $str
6158
   *
6159
   * @return string|string[]
6160
   *
6161
   * @deprecated <p>use "UTF8::to_utf8()"</p>
6162
   */
6163 1
  public static function toUTF8($str)
6164
  {
6165 1
    return self::to_utf8($str);
6166
  }
6167
6168
  /**
6169
   * Convert a string into ASCII.
6170
   *
6171
   * @param string $str     <p>The input string.</p>
6172
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6173
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6174
   *                        performance</p>
6175
   *
6176
   * @return string
6177
   */
6178 21
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
6179
  {
6180 21
    static $UTF8_TO_ASCII;
6181
6182 21
    if (!isset($str[0])) {
6183 4
      return '';
6184
    }
6185
6186
    // check if we only have ASCII, first (better performance)
6187 18
    if (self::is_ascii($str) === true) {
6188 6
      return $str;
6189
    }
6190
6191 13
    $str = self::clean(
6192 13
        $str,
6193 13
        true,
6194 13
        true,
6195 13
        true,
6196 13
        false,
6197 13
        true,
6198 13
        true
6199
    );
6200
6201
    // check again, if we only have ASCII, now ...
6202 13
    if (self::is_ascii($str) === true) {
6203 7
      return $str;
6204
    }
6205
6206 7
    if ($strict === true) {
6207 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6208
        self::checkForSupport();
6209
      }
6210
6211 1
      if (self::$SUPPORT['intl'] === true) {
6212
6213
        // HACK for issue from "transliterator_transliterate()"
6214 1
        $str = \str_replace(
6215 1
            'ℌ',
6216 1
            'H',
6217 1
            $str
6218
        );
6219
6220 1
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
6221
6222
        // check again, if we only have ASCII, now ...
6223 1
        if (self::is_ascii($str) === true) {
6224 1
          return $str;
6225
        }
6226
6227
      }
6228
    }
6229
6230 7
    if (self::$ORD === null) {
6231
      self::$ORD = self::getData('ord');
6232
    }
6233
6234 7
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6235 7
    $chars = $ar[0];
6236 7
    foreach ($chars as &$c) {
6237
6238 7
      $ordC0 = self::$ORD[$c[0]];
6239
6240 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6241 7
        continue;
6242
      }
6243
6244 7
      $ordC1 = self::$ORD[$c[1]];
6245
6246
      // ASCII - next please
6247 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6248 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6249
      }
6250
6251 7
      if ($ordC0 >= 224) {
6252 2
        $ordC2 = self::$ORD[$c[2]];
6253
6254 2
        if ($ordC0 <= 239) {
6255 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6256
        }
6257
6258 2
        if ($ordC0 >= 240) {
6259 1
          $ordC3 = self::$ORD[$c[3]];
6260
6261 1
          if ($ordC0 <= 247) {
6262 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6263
          }
6264
6265 1
          if ($ordC0 >= 248) {
6266
            $ordC4 = self::$ORD[$c[4]];
6267
6268 View Code Duplication
            if ($ordC0 <= 251) {
6269
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6270
            }
6271
6272
            if ($ordC0 >= 252) {
6273
              $ordC5 = self::$ORD[$c[5]];
6274
6275 View Code Duplication
              if ($ordC0 <= 253) {
6276
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6277
              }
6278
            }
6279
          }
6280
        }
6281
      }
6282
6283 7
      if ($ordC0 === 254 || $ordC0 === 255) {
6284
        $c = $unknown;
6285
        continue;
6286
      }
6287
6288 7
      if (!isset($ord)) {
6289
        $c = $unknown;
6290
        continue;
6291
      }
6292
6293 7
      $bank = $ord >> 8;
6294 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
6295 3
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
6296 3
        if ($UTF8_TO_ASCII[$bank] === false) {
6297 1
          $UTF8_TO_ASCII[$bank] = [];
6298
        }
6299
      }
6300
6301 7
      $newchar = $ord & 255;
6302
6303 7
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
6304
6305
        // keep for debugging
6306
        /*
6307
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6308
        echo "char: " . $c . "\n";
6309
        echo "ord: " . $ord . "\n";
6310
        echo "newchar: " . $newchar . "\n";
6311
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
6312
        echo "bank:" . $bank . "\n\n";
6313
        */
6314
6315 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6316
      } else {
6317
6318
        // keep for debugging missing chars
6319
        /*
6320
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6321
        echo "char: " . $c . "\n";
6322
        echo "ord: " . $ord . "\n";
6323
        echo "newchar: " . $newchar . "\n";
6324
        echo "bank:" . $bank . "\n\n";
6325
        */
6326
6327 7
        $c = $unknown;
6328
      }
6329
    }
6330
6331 7
    return \implode('', $chars);
6332
  }
6333
6334
  /**
6335
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6336
   *
6337
   * @param string|string[] $str
6338
   *
6339
   * @return string|string[]
6340
   */
6341 3
  public static function to_iso8859($str)
6342
  {
6343 3
    if (\is_array($str) === true) {
6344 1
      foreach ($str as $k => $v) {
6345 1
        $str[$k] = self::to_iso8859($v);
6346
      }
6347
6348 1
      return $str;
6349
    }
6350
6351 3
    $str = (string)$str;
6352 3
    if (!isset($str[0])) {
6353 1
      return '';
6354
    }
6355
6356 3
    return self::utf8_decode($str);
6357
  }
6358
6359
  /**
6360
   * alias for "UTF8::to_iso8859()"
6361
   *
6362
   * @see UTF8::to_iso8859()
6363
   *
6364
   * @param string|string[] $str
6365
   *
6366
   * @return string|string[]
6367
   */
6368 1
  public static function to_latin1($str)
6369
  {
6370 1
    return self::to_iso8859($str);
6371
  }
6372
6373
  /**
6374
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6375
   *
6376
   * <ul>
6377
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6378
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
6379
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
6380
   * case.</li>
6381
   * </ul>
6382
   *
6383
   * @param string|string[] $str                    <p>Any string or array.</p>
6384
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6385
   *
6386
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6387
   */
6388 22
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
6389
  {
6390 22 View Code Duplication
    if (\is_array($str) === true) {
6391 2
      foreach ($str as $k => $v) {
6392 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6393
      }
6394
6395 2
      return $str;
6396
    }
6397
6398 22
    $str = (string)$str;
6399 22
    if (!isset($str[0])) {
6400 3
      return $str;
6401
    }
6402
6403 22
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6404
      self::checkForSupport();
6405
    }
6406
6407 22
    $max = self::strlen_in_byte($str);
6408 22
    $buf = '';
6409
6410
    /** @noinspection ForeachInvariantsInspection */
6411 22
    for ($i = 0; $i < $max; $i++) {
6412 22
      $c1 = $str[$i];
6413
6414 22
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
6415
6416 22
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
6417
6418 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6419
6420 20
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
6421 15
            $buf .= $c1 . $c2;
6422 15
            $i++;
6423
          } else { // not valid UTF8 - convert it
6424 20
            $buf .= self::to_utf8_convert($c1);
6425
          }
6426
6427 21
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
6428
6429 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6430 20
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6431
6432 20
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
6433 12
            $buf .= $c1 . $c2 . $c3;
6434 12
            $i += 2;
6435
          } else { // not valid UTF8 - convert it
6436 20
            $buf .= self::to_utf8_convert($c1);
6437
          }
6438
6439 14
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
6440
6441 14
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6442 14
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6443 14
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6444
6445 14
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
6446 5
            $buf .= $c1 . $c2 . $c3 . $c4;
6447 5
            $i += 3;
6448
          } else { // not valid UTF8 - convert it
6449 14
            $buf .= self::to_utf8_convert($c1);
6450
          }
6451
6452
        } else { // doesn't look like UTF8, but should be converted
6453 22
          $buf .= self::to_utf8_convert($c1);
6454
        }
6455
6456 20
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
6457
6458 2
        $buf .= self::to_utf8_convert($c1);
6459
6460
      } else { // it doesn't need conversion
6461 20
        $buf .= $c1;
6462
      }
6463
    }
6464
6465
    // decode unicode escape sequences
6466 22
    $buf = \preg_replace_callback(
6467 22
        '/\\\\u([0-9a-f]{4})/i',
6468 22
        function ($match) {
6469 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6470 22
        },
6471 22
        $buf
6472
    );
6473
6474
    // decode UTF-8 codepoints
6475 22
    if ($decodeHtmlEntityToUtf8 === true) {
6476 1
      $buf = self::html_entity_decode($buf);
6477
    }
6478
6479 22
    return $buf;
6480
  }
6481
6482
  /**
6483
   * @param int $int
6484
   *
6485
   * @return string
6486
   */
6487 16
  private static function to_utf8_convert($int): string
6488
  {
6489
    // init
6490 16
    $buf = '';
6491
6492 16
    if (self::$ORD === null) {
6493
      self::$ORD = self::getData('ord');
6494
    }
6495
6496 16
    if (self::$CHR === null) {
6497 1
      self::$CHR = self::getData('chr');
6498
    }
6499
6500 16
    if (self::$WIN1252_TO_UTF8 === null) {
6501 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6502
    }
6503
6504 16
    $ordC1 = self::$ORD[$int];
6505 16
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
6506 16
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
6507
    } else {
6508 1
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
6509 1
      $cc2 = ($int & "\x3F") | "\x80";
6510 1
      $buf .= $cc1 . $cc2;
6511
    }
6512
6513 16
    return $buf;
6514
  }
6515
6516
  /**
6517
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6518
   *
6519
   * INFO: This is slower then "trim()"
6520
   *
6521
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6522
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6523
   *
6524
   * @param string $str   <p>The string to be trimmed</p>
6525
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
6526
   *
6527
   * @return string <p>The trimmed string.</p>
6528
   */
6529 26
  public static function trim(string $str = '', $chars = INF): string
6530
  {
6531 26
    if (!isset($str[0])) {
6532 5
      return '';
6533
    }
6534
6535
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6536 22
    if ($chars === INF || !$chars) {
6537 6
      return \preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6538
    }
6539
6540 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
6541
  }
6542
6543
  /**
6544
   * Makes string's first char uppercase.
6545
   *
6546
   * @param string $str       <p>The input string.</p>
6547
   * @param string $encoding  [optional] <p>Set the charset.</p>
6548
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6549
   *
6550
   * @return string <p>The resulting string</p>
6551
   */
6552 14
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6553
  {
6554 14
    if ($cleanUtf8 === true) {
6555
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6556
      // if invalid characters are found in $haystack before $needle
6557 1
      $str = self::clean($str);
6558
    }
6559
6560 14
    $strPartTwo = self::substr($str, 1, null, $encoding);
6561 14
    if ($strPartTwo === false) {
6562
      $strPartTwo = '';
6563
    }
6564
6565 14
    $strPartOne = self::strtoupper(
6566 14
        (string)self::substr($str, 0, 1, $encoding),
6567 14
        $encoding,
6568 14
        $cleanUtf8
6569
    );
6570
6571 14
    return $strPartOne . $strPartTwo;
6572
  }
6573
6574
  /**
6575
   * alias for "UTF8::ucfirst()"
6576
   *
6577
   * @see UTF8::ucfirst()
6578
   *
6579
   * @param string $word
6580
   * @param string $encoding
6581
   * @param bool   $cleanUtf8
6582
   *
6583
   * @return string
6584
   */
6585 1
  public static function ucword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6586
  {
6587 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
6588
  }
6589
6590
  /**
6591
   * Uppercase for all words in the string.
6592
   *
6593
   * @param string   $str        <p>The input string.</p>
6594
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6595
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6596
   * @param string   $encoding   [optional] <p>Set the charset.</p>
6597
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6598
   *
6599
   * @return string
6600
   */
6601 8
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6602
  {
6603 8
    if (!$str) {
6604 2
      return '';
6605
    }
6606
6607
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
6608
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
6609
6610 7
    if ($cleanUtf8 === true) {
6611
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6612
      // if invalid characters are found in $haystack before $needle
6613 1
      $str = self::clean($str);
6614
    }
6615
6616 7
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
6617
6618
    if (
6619 7
        $usePhpDefaultFunctions === true
6620
        &&
6621 7
        self::is_ascii($str) === true
6622
    ) {
6623
      return \ucwords($str);
6624
    }
6625
6626 7
    $words = self::str_to_words($str, $charlist);
6627 7
    $newWords = [];
6628
6629 7
    if (\count($exceptions) > 0) {
6630 1
      $useExceptions = true;
6631
    } else {
6632 7
      $useExceptions = false;
6633
    }
6634
6635 7 View Code Duplication
    foreach ($words as $word) {
6636
6637 7
      if (!$word) {
6638 7
        continue;
6639
      }
6640
6641
      if (
6642 7
          $useExceptions === false
6643
          ||
6644
          (
6645 1
              $useExceptions === true
6646
              &&
6647 7
              !\in_array($word, $exceptions, true)
6648
          )
6649
      ) {
6650 7
        $word = self::ucfirst($word, $encoding);
6651
      }
6652
6653 7
      $newWords[] = $word;
6654
    }
6655
6656 7
    return \implode('', $newWords);
6657
  }
6658
6659
  /**
6660
   * Multi decode html entity & fix urlencoded-win1252-chars.
6661
   *
6662
   * e.g:
6663
   * 'test+test'                     => 'test test'
6664
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6665
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6666
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6667
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6668
   * 'Düsseldorf'                   => 'Düsseldorf'
6669
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6670
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6671
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6672
   *
6673
   * @param string $str          <p>The input string.</p>
6674
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6675
   *
6676
   * @return string
6677
   */
6678 1 View Code Duplication
  public static function urldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6679
  {
6680 1
    if (!isset($str[0])) {
6681 1
      return '';
6682
    }
6683
6684 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
6685 1
    if (\preg_match($pattern, $str)) {
6686 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
6687
    }
6688
6689 1
    $flags = ENT_QUOTES | ENT_HTML5;
6690
6691
    do {
6692 1
      $str_compare = $str;
6693
6694 1
      $str = self::fix_simple_utf8(
6695 1
          \urldecode(
6696 1
              self::html_entity_decode(
6697 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6698 1
                  $flags
6699
              )
6700
          )
6701
      );
6702
6703 1
    } while ($multi_decode === true && $str_compare !== $str);
6704
6705 1
    return $str;
6706
  }
6707
6708
  /**
6709
   * Return a array with "urlencoded"-win1252 -> UTF-8
6710
   *
6711
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
6712
   *
6713
   * @return array
6714
   */
6715 1
  public static function urldecode_fix_win1252_chars(): array
6716
  {
6717
    return [
6718 1
        '%20' => ' ',
6719
        '%21' => '!',
6720
        '%22' => '"',
6721
        '%23' => '#',
6722
        '%24' => '$',
6723
        '%25' => '%',
6724
        '%26' => '&',
6725
        '%27' => "'",
6726
        '%28' => '(',
6727
        '%29' => ')',
6728
        '%2A' => '*',
6729
        '%2B' => '+',
6730
        '%2C' => ',',
6731
        '%2D' => '-',
6732
        '%2E' => '.',
6733
        '%2F' => '/',
6734
        '%30' => '0',
6735
        '%31' => '1',
6736
        '%32' => '2',
6737
        '%33' => '3',
6738
        '%34' => '4',
6739
        '%35' => '5',
6740
        '%36' => '6',
6741
        '%37' => '7',
6742
        '%38' => '8',
6743
        '%39' => '9',
6744
        '%3A' => ':',
6745
        '%3B' => ';',
6746
        '%3C' => '<',
6747
        '%3D' => '=',
6748
        '%3E' => '>',
6749
        '%3F' => '?',
6750
        '%40' => '@',
6751
        '%41' => 'A',
6752
        '%42' => 'B',
6753
        '%43' => 'C',
6754
        '%44' => 'D',
6755
        '%45' => 'E',
6756
        '%46' => 'F',
6757
        '%47' => 'G',
6758
        '%48' => 'H',
6759
        '%49' => 'I',
6760
        '%4A' => 'J',
6761
        '%4B' => 'K',
6762
        '%4C' => 'L',
6763
        '%4D' => 'M',
6764
        '%4E' => 'N',
6765
        '%4F' => 'O',
6766
        '%50' => 'P',
6767
        '%51' => 'Q',
6768
        '%52' => 'R',
6769
        '%53' => 'S',
6770
        '%54' => 'T',
6771
        '%55' => 'U',
6772
        '%56' => 'V',
6773
        '%57' => 'W',
6774
        '%58' => 'X',
6775
        '%59' => 'Y',
6776
        '%5A' => 'Z',
6777
        '%5B' => '[',
6778
        '%5C' => '\\',
6779
        '%5D' => ']',
6780
        '%5E' => '^',
6781
        '%5F' => '_',
6782
        '%60' => '`',
6783
        '%61' => 'a',
6784
        '%62' => 'b',
6785
        '%63' => 'c',
6786
        '%64' => 'd',
6787
        '%65' => 'e',
6788
        '%66' => 'f',
6789
        '%67' => 'g',
6790
        '%68' => 'h',
6791
        '%69' => 'i',
6792
        '%6A' => 'j',
6793
        '%6B' => 'k',
6794
        '%6C' => 'l',
6795
        '%6D' => 'm',
6796
        '%6E' => 'n',
6797
        '%6F' => 'o',
6798
        '%70' => 'p',
6799
        '%71' => 'q',
6800
        '%72' => 'r',
6801
        '%73' => 's',
6802
        '%74' => 't',
6803
        '%75' => 'u',
6804
        '%76' => 'v',
6805
        '%77' => 'w',
6806
        '%78' => 'x',
6807
        '%79' => 'y',
6808
        '%7A' => 'z',
6809
        '%7B' => '{',
6810
        '%7C' => '|',
6811
        '%7D' => '}',
6812
        '%7E' => '~',
6813
        '%7F' => '',
6814
        '%80' => '`',
6815
        '%81' => '',
6816
        '%82' => '‚',
6817
        '%83' => 'ƒ',
6818
        '%84' => '„',
6819
        '%85' => '…',
6820
        '%86' => '†',
6821
        '%87' => '‡',
6822
        '%88' => 'ˆ',
6823
        '%89' => '‰',
6824
        '%8A' => 'Š',
6825
        '%8B' => '‹',
6826
        '%8C' => 'Œ',
6827
        '%8D' => '',
6828
        '%8E' => 'Ž',
6829
        '%8F' => '',
6830
        '%90' => '',
6831
        '%91' => '‘',
6832
        '%92' => '’',
6833
        '%93' => '“',
6834
        '%94' => '”',
6835
        '%95' => '•',
6836
        '%96' => '–',
6837
        '%97' => '—',
6838
        '%98' => '˜',
6839
        '%99' => '™',
6840
        '%9A' => 'š',
6841
        '%9B' => '›',
6842
        '%9C' => 'œ',
6843
        '%9D' => '',
6844
        '%9E' => 'ž',
6845
        '%9F' => 'Ÿ',
6846
        '%A0' => '',
6847
        '%A1' => '¡',
6848
        '%A2' => '¢',
6849
        '%A3' => '£',
6850
        '%A4' => '¤',
6851
        '%A5' => '¥',
6852
        '%A6' => '¦',
6853
        '%A7' => '§',
6854
        '%A8' => '¨',
6855
        '%A9' => '©',
6856
        '%AA' => 'ª',
6857
        '%AB' => '«',
6858
        '%AC' => '¬',
6859
        '%AD' => '',
6860
        '%AE' => '®',
6861
        '%AF' => '¯',
6862
        '%B0' => '°',
6863
        '%B1' => '±',
6864
        '%B2' => '²',
6865
        '%B3' => '³',
6866
        '%B4' => '´',
6867
        '%B5' => 'µ',
6868
        '%B6' => '¶',
6869
        '%B7' => '·',
6870
        '%B8' => '¸',
6871
        '%B9' => '¹',
6872
        '%BA' => 'º',
6873
        '%BB' => '»',
6874
        '%BC' => '¼',
6875
        '%BD' => '½',
6876
        '%BE' => '¾',
6877
        '%BF' => '¿',
6878
        '%C0' => 'À',
6879
        '%C1' => 'Á',
6880
        '%C2' => 'Â',
6881
        '%C3' => 'Ã',
6882
        '%C4' => 'Ä',
6883
        '%C5' => 'Å',
6884
        '%C6' => 'Æ',
6885
        '%C7' => 'Ç',
6886
        '%C8' => 'È',
6887
        '%C9' => 'É',
6888
        '%CA' => 'Ê',
6889
        '%CB' => 'Ë',
6890
        '%CC' => 'Ì',
6891
        '%CD' => 'Í',
6892
        '%CE' => 'Î',
6893
        '%CF' => 'Ï',
6894
        '%D0' => 'Ð',
6895
        '%D1' => 'Ñ',
6896
        '%D2' => 'Ò',
6897
        '%D3' => 'Ó',
6898
        '%D4' => 'Ô',
6899
        '%D5' => 'Õ',
6900
        '%D6' => 'Ö',
6901
        '%D7' => '×',
6902
        '%D8' => 'Ø',
6903
        '%D9' => 'Ù',
6904
        '%DA' => 'Ú',
6905
        '%DB' => 'Û',
6906
        '%DC' => 'Ü',
6907
        '%DD' => 'Ý',
6908
        '%DE' => 'Þ',
6909
        '%DF' => 'ß',
6910
        '%E0' => 'à',
6911
        '%E1' => 'á',
6912
        '%E2' => 'â',
6913
        '%E3' => 'ã',
6914
        '%E4' => 'ä',
6915
        '%E5' => 'å',
6916
        '%E6' => 'æ',
6917
        '%E7' => 'ç',
6918
        '%E8' => 'è',
6919
        '%E9' => 'é',
6920
        '%EA' => 'ê',
6921
        '%EB' => 'ë',
6922
        '%EC' => 'ì',
6923
        '%ED' => 'í',
6924
        '%EE' => 'î',
6925
        '%EF' => 'ï',
6926
        '%F0' => 'ð',
6927
        '%F1' => 'ñ',
6928
        '%F2' => 'ò',
6929
        '%F3' => 'ó',
6930
        '%F4' => 'ô',
6931
        '%F5' => 'õ',
6932
        '%F6' => 'ö',
6933
        '%F7' => '÷',
6934
        '%F8' => 'ø',
6935
        '%F9' => 'ù',
6936
        '%FA' => 'ú',
6937
        '%FB' => 'û',
6938
        '%FC' => 'ü',
6939
        '%FD' => 'ý',
6940
        '%FE' => 'þ',
6941
        '%FF' => 'ÿ',
6942
    ];
6943
  }
6944
6945
  /**
6946
   * Decodes an UTF-8 string to ISO-8859-1.
6947
   *
6948
   * @param string $str <p>The input string.</p>
6949
   * @param bool   $keepUtf8Chars
6950
   *
6951
   * @return string
6952
   */
6953 6
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
6954
  {
6955 6
    if (!isset($str[0])) {
6956 3
      return '';
6957
    }
6958
6959 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
6960 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
6961
6962 6 View Code Duplication
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
6963
6964 1
      if (self::$WIN1252_TO_UTF8 === null) {
6965
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6966
      }
6967
6968 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
6969 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
6970
    }
6971
6972
    /** @noinspection PhpInternalEntityUsedInspection */
6973 6
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
6974
6975 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6976
      self::checkForSupport();
6977
    }
6978
6979
    // save for later comparision
6980 6
    $str_backup = $str;
6981 6
    $len = self::strlen_in_byte($str);
6982
6983 6
    if (self::$ORD === null) {
6984
      self::$ORD = self::getData('ord');
6985
    }
6986
6987 6
    if (self::$CHR === null) {
6988
      self::$CHR = self::getData('chr');
6989
    }
6990
6991 6
    $noCharFound = '?';
6992
    /** @noinspection ForeachInvariantsInspection */
6993 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
6994 6
      switch ($str[$i] & "\xF0") {
6995 6
        case "\xC0":
6996 6
        case "\xD0":
6997 6
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
6998 6
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
6999 6
          break;
7000
7001
        /** @noinspection PhpMissingBreakStatementInspection */
7002 6
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7003
          ++$i;
7004 6
        case "\xE0":
7005 5
          $str[$j] = $noCharFound;
7006 5
          $i += 2;
7007 5
          break;
7008
7009
        default:
7010 6
          $str[$j] = $str[$i];
7011
      }
7012
    }
7013
7014 6
    $return = (string)self::substr($str, 0, $j, '8BIT');
7015
7016
    if (
7017 6
        $keepUtf8Chars === true
7018
        &&
7019 6
        self::strlen($return) >= self::strlen($str_backup)
7020
    ) {
7021 1
      return $str_backup;
7022
    }
7023
7024 6
    return $return;
7025
  }
7026
7027
  /**
7028
   * Encodes an ISO-8859-1 string to UTF-8.
7029
   *
7030
   * @param string $str <p>The input string.</p>
7031
   *
7032
   * @return string
7033
   */
7034 7
  public static function utf8_encode(string $str): string
7035
  {
7036 7
    if (!isset($str[0])) {
7037 7
      return '';
7038
    }
7039
7040 7
    $strTmp = \utf8_encode($str);
7041
7042
    // the polyfill maybe return false
7043 7
    if ($strTmp === false) {
7044
      return '';
7045
    }
7046
7047 7
    $str = (string)$strTmp;
7048 7
    if (false === \strpos($str, "\xC2")) {
7049 3
      return $str;
7050
    }
7051
7052 6
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
7053 6
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
7054
7055 6 View Code Duplication
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
7056
7057 1
      if (self::$WIN1252_TO_UTF8 === null) {
7058
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
7059
      }
7060
7061 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
7062 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
7063
    }
7064
7065 6
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
7066
  }
7067
7068
  /**
7069
   * fix -> utf8-win1252 chars
7070
   *
7071
   * @param string $str <p>The input string.</p>
7072
   *
7073
   * @return string
7074
   *
7075
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
7076
   */
7077 1
  public static function utf8_fix_win1252_chars(string $str): string
7078
  {
7079 1
    return self::fix_simple_utf8($str);
7080
  }
7081
7082
  /**
7083
   * Returns an array with all utf8 whitespace characters.
7084
   *
7085
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7086
   *
7087
   * @author: Derek E. [email protected]
7088
   *
7089
   * @return array <p>
7090
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7091
   *               as defined in above URL.
7092
   *               </p>
7093
   */
7094 1
  public static function whitespace_table(): array
7095
  {
7096 1
    return self::$WHITESPACE_TABLE;
7097
  }
7098
7099
  /**
7100
   * Limit the number of words in a string.
7101
   *
7102
   * @param string $str      <p>The input string.</p>
7103
   * @param int    $limit    <p>The limit of words as integer.</p>
7104
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7105
   *
7106
   * @return string
7107
   */
7108 1
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
7109
  {
7110 1
    if (!isset($str[0])) {
7111 1
      return '';
7112
    }
7113
7114 1
    if ($limit < 1) {
7115 1
      return '';
7116
    }
7117
7118 1
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7119
7120
    if (
7121 1
        !isset($matches[0])
7122
        ||
7123 1
        self::strlen($str) === self::strlen($matches[0])
7124
    ) {
7125 1
      return $str;
7126
    }
7127
7128 1
    return self::rtrim($matches[0]) . $strAddOn;
7129
  }
7130
7131
  /**
7132
   * Wraps a string to a given number of characters
7133
   *
7134
   * @link  http://php.net/manual/en/function.wordwrap.php
7135
   *
7136
   * @param string $str   <p>The input string.</p>
7137
   * @param int    $width [optional] <p>The column width.</p>
7138
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7139
   * @param bool   $cut   [optional] <p>
7140
   *                      If the cut is set to true, the string is
7141
   *                      always wrapped at or before the specified width. So if you have
7142
   *                      a word that is larger than the given width, it is broken apart.
7143
   *                      </p>
7144
   *
7145
   * @return string <p>The given string wrapped at the specified column.</p>
7146
   */
7147 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
7148
  {
7149 10
    if (!isset($str[0], $break[0])) {
7150 3
      return '';
7151
    }
7152
7153 8
    $w = '';
7154 8
    $strSplit = \explode($break, $str);
7155 8
    $count = \count($strSplit);
7156
7157 8
    $chars = [];
7158
    /** @noinspection ForeachInvariantsInspection */
7159 8
    for ($i = 0; $i < $count; ++$i) {
7160
7161 8
      if ($i) {
7162 1
        $chars[] = $break;
7163 1
        $w .= '#';
7164
      }
7165
7166 8
      $c = $strSplit[$i];
7167 8
      unset($strSplit[$i]);
7168
7169 8
      foreach (self::split($c) as $c) {
7170 8
        $chars[] = $c;
7171 8
        $w .= ' ' === $c ? ' ' : '?';
7172
      }
7173
    }
7174
7175 8
    $strReturn = '';
7176 8
    $j = 0;
7177 8
    $b = $i = -1;
7178 8
    $w = \wordwrap($w, $width, '#', $cut);
7179
7180 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7181 6
      for (++$i; $i < $b; ++$i) {
7182 6
        $strReturn .= $chars[$j];
7183 6
        unset($chars[$j++]);
7184
      }
7185
7186 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7187 3
        unset($chars[$j++]);
7188
      }
7189
7190 6
      $strReturn .= $break;
7191
    }
7192
7193 8
    return $strReturn . \implode('', $chars);
7194
  }
7195
7196
  /**
7197
   * Returns an array of Unicode White Space characters.
7198
   *
7199
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7200
   */
7201 1
  public static function ws(): array
7202
  {
7203 1
    return self::$WHITESPACE;
7204
  }
7205
7206
}
7207