Completed
Push — master ( 272386...350895 )
by Lars
12:46
created

UTF8::to_utf8_convert()   B

Complexity

Conditions 5
Paths 16

Size

Total Lines 28
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 5

Importance

Changes 0
Metric Value
dl 0
loc 28
ccs 15
cts 15
cp 1
rs 8.439
c 0
b 0
f 0
cc 5
eloc 16
nc 16
nop 1
crap 5
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206
   * __construct()
207
   */
208 16
  public function __construct()
209
  {
210 16
    self::checkForSupport();
211 16
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219
   * @return string <p>Single Multi-Byte character.</p>
220
   */
221 3
  public static function access(string $str, int $pos): string
222
  {
223 3
    if (!isset($str[0])) {
224 1
      return '';
225
    }
226
227 3
    if ($pos < 0) {
228 1
      return '';
229
    }
230
231 3
    return (string)self::substr($str, $pos, 1);
232
  }
233
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string <p>The output string that contains BOM.</p>
242
   */
243 1
  public static function add_bom_to_string(string $str): string
244
  {
245 1
    if (self::string_has_bom($str) === false) {
246 1
      $str = self::bom() . $str;
247
    }
248
249 1
    return $str;
250
  }
251
252
  /**
253
   * Changes all keys in an array.
254
   *
255
   * @param array $array <p>The array to work on</p>
256
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
257
   *                     or <strong>CASE_LOWER</strong> (default)</p>
258
   *
259
   * @return array <p>An array with its keys lower or uppercased.</p>
260
   */
261 1
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
262
  {
263
    if (
264 1
        $case !== CASE_LOWER
265
        &&
266 1
        $case !== CASE_UPPER
267
    ) {
268
      $case = CASE_LOWER;
269
    }
270
271 1
    $return = [];
272 1
    foreach ($array as $key => $value) {
273 1
      if ($case === CASE_LOWER) {
274 1
        $key = self::strtolower($key);
275
      } else {
276 1
        $key = self::strtoupper($key);
277
      }
278
279 1
      $return[$key] = $value;
280
    }
281
282 1
    return $return;
283
  }
284
285
  /**
286
   * Convert binary into an string.
287
   *
288
   * @param mixed $bin 1|0
289
   *
290
   * @return string
291
   */
292 1
  public static function binary_to_str($bin): string
293
  {
294 1
    if (!isset($bin[0])) {
295
      return '';
296
    }
297
298 1
    $convert = \base_convert($bin, 2, 16);
299 1
    if ($convert === '0') {
300 1
      return '';
301
    }
302
303 1
    return \pack('H*', $convert);
304
  }
305
306
  /**
307
   * Returns the UTF-8 Byte Order Mark Character.
308
   *
309
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
310
   *
311
   * @return string UTF-8 Byte Order Mark
312
   */
313 2
  public static function bom(): string
314
  {
315 2
    return "\xef\xbb\xbf";
316
  }
317
318
  /**
319
   * @alias of UTF8::chr_map()
320
   *
321
   * @see   UTF8::chr_map()
322
   *
323
   * @param string|array $callback
324
   * @param string       $str
325
   *
326
   * @return array
327
   */
328 1
  public static function callback($callback, string $str): array
329
  {
330 1
    return self::chr_map($callback, $str);
331
  }
332
333
  /**
334
   * This method will auto-detect your server environment for UTF-8 support.
335
   *
336
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
337
   */
338 19
  public static function checkForSupport()
339
  {
340 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
341
342 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
343
344
      // http://php.net/manual/en/book.mbstring.php
345 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
346 1
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
347
348
      // http://php.net/manual/en/book.iconv.php
349 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
350
351
      // http://php.net/manual/en/book.intl.php
352 1
      self::$SUPPORT['intl'] = self::intl_loaded();
353 1
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
354
      if (
355 1
          self::$SUPPORT['intl'] === true
356
          &&
357 1
          \function_exists('transliterator_list_ids') === true
358
      ) {
359 1
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
360
      }
361
362
      // http://php.net/manual/en/class.intlchar.php
363 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
364
365
      // http://php.net/manual/en/book.pcre.php
366 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
367
    }
368 19
  }
369
370
  /**
371
   * Generates a UTF-8 encoded character from the given code point.
372
   *
373
   * INFO: opposite to UTF8::ord()
374
   *
375
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
376
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
377
   *
378
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
379
   */
380 10
  public static function chr($code_point, string $encoding = 'UTF-8')
381
  {
382
    // init
383 10
    static $CHAR_CACHE = [];
384
385 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
386
      self::checkForSupport();
387
    }
388
389 10
    if ($encoding !== 'UTF-8') {
390 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
391
    }
392
393 View Code Duplication
    if (
394 10
        $encoding !== 'UTF-8'
395
        &&
396 10
        $encoding !== 'WINDOWS-1252'
397
        &&
398 10
        self::$SUPPORT['mbstring'] === false
399
    ) {
400
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
401
    }
402
403 10
    $cacheKey = $code_point . $encoding;
404 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
405 8
      return $CHAR_CACHE[$cacheKey];
406
    }
407
408 9
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
409
410 7
      if (self::$CHR === null) {
411
        self::$CHR = self::getData('chr');
412
      }
413
414 7
      $chr = self::$CHR[$code_point];
415
416 7
      if ($encoding !== 'UTF-8') {
417 1
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
418
      }
419
420 7
      return $CHAR_CACHE[$cacheKey] = $chr;
421
    }
422
423 7
    if (self::$SUPPORT['intlChar'] === true) {
424 7
      $chr = \IntlChar::chr($code_point);
425
426 7
      if ($encoding !== 'UTF-8') {
427
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
428
      }
429
430 7
      return $CHAR_CACHE[$cacheKey] = $chr;
431
    }
432
433
    if (self::$CHR === null) {
434
      self::$CHR = self::getData('chr');
435
    }
436
437
    if ($code_point <= 0x7F) {
438
      $chr = self::$CHR[$code_point];
439
    } elseif ($code_point <= 0x7FF) {
440
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
441
             self::$CHR[($code_point & 0x3F) + 0x80];
442
    } elseif ($code_point <= 0xFFFF) {
443
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
444
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
445
             self::$CHR[($code_point & 0x3F) + 0x80];
446
    } else {
447
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
448
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
449
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
450
             self::$CHR[($code_point & 0x3F) + 0x80];
451
    }
452
453
    if ($encoding !== 'UTF-8') {
454
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
455
    }
456
457
    return $CHAR_CACHE[$cacheKey] = $chr;
458
  }
459
460
  /**
461
   * Applies callback to all characters of a string.
462
   *
463
   * @param string|array $callback <p>The callback function.</p>
464
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
465
   *
466
   * @return array <p>The outcome of callback.</p>
467
   */
468 1
  public static function chr_map($callback, string $str): array
469
  {
470 1
    $chars = self::split($str);
471
472 1
    return \array_map($callback, $chars);
473
  }
474
475
  /**
476
   * Generates an array of byte length of each character of a Unicode string.
477
   *
478
   * 1 byte => U+0000  - U+007F
479
   * 2 byte => U+0080  - U+07FF
480
   * 3 byte => U+0800  - U+FFFF
481
   * 4 byte => U+10000 - U+10FFFF
482
   *
483
   * @param string $str <p>The original unicode string.</p>
484
   *
485
   * @return array <p>An array of byte lengths of each character.</p>
486
   */
487 4
  public static function chr_size_list(string $str): array
488
  {
489 4
    if (!isset($str[0])) {
490 3
      return [];
491
    }
492
493 4
    return \array_map(
494 4
        function ($data) {
495 4
          return UTF8::strlen($data, '8BIT');
496 4
        },
497 4
        self::split($str)
498
    );
499
  }
500
501
  /**
502
   * Get a decimal code representation of a specific character.
503
   *
504
   * @param string $char <p>The input character.</p>
505
   *
506
   * @return int
507
   */
508 2
  public static function chr_to_decimal(string $char): int
509
  {
510 2
    $code = self::ord($char[0]);
511 2
    $bytes = 1;
512
513 2
    if (!($code & 0x80)) {
514
      // 0xxxxxxx
515 2
      return $code;
516
    }
517
518 2
    if (($code & 0xe0) === 0xc0) {
519
      // 110xxxxx
520 2
      $bytes = 2;
521 2
      $code &= ~0xc0;
522 2
    } elseif (($code & 0xf0) === 0xe0) {
523
      // 1110xxxx
524 2
      $bytes = 3;
525 2
      $code &= ~0xe0;
526 1
    } elseif (($code & 0xf8) === 0xf0) {
527
      // 11110xxx
528 1
      $bytes = 4;
529 1
      $code &= ~0xf0;
530
    }
531
532 2
    for ($i = 2; $i <= $bytes; $i++) {
533
      // 10xxxxxx
534 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
535
    }
536
537 2
    return $code;
538
  }
539
540
  /**
541
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
542
   *
543
   * @param string $char <p>The input character</p>
544
   * @param string $pfix [optional]
545
   *
546
   * @return string <p>The code point encoded as U+xxxx<p>
547
   */
548 1
  public static function chr_to_hex(string $char, string $pfix = 'U+'): string
549
  {
550 1
    if (!isset($char[0])) {
551 1
      return '';
552
    }
553
554 1
    if ($char === '&#0;') {
555 1
      $char = '';
556
    }
557
558 1
    return self::int_to_hex(self::ord($char), $pfix);
559
  }
560
561
  /**
562
   * alias for "UTF8::chr_to_decimal()"
563
   *
564
   * @see UTF8::chr_to_decimal()
565
   *
566
   * @param string $chr
567
   *
568
   * @return int
569
   */
570 1
  public static function chr_to_int(string $chr): int
571
  {
572 1
    return self::chr_to_decimal($chr);
573
  }
574
575
  /**
576
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
577
   *
578
   * @param string $body     <p>The original string to be split.</p>
579
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
580
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
581
   *
582
   * @return string <p>The chunked string</p>
583
   */
584 1
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
585
  {
586 1
    return \implode($end, self::split($body, $chunklen));
587
  }
588
589
  /**
590
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
591
   *
592
   * @param string $str                           <p>The string to be sanitized.</p>
593
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
594
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
595
   *                                              whitespace.</p>
596
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
597
   *                                              e.g.: "…"
598
   *                                              => "..."</p>
599
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
600
   *                                              combination with
601
   *                                              $normalize_whitespace</p>
602
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
603
   *                                              mark e.g.: "�"</p>
604
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
605
   *                                              characters e.g.: "\0"</p>
606
   *
607
   * @return string <p>Clean UTF-8 encoded string.</p>
608
   */
609 64
  public static function clean(string $str, bool $remove_bom = false, bool $normalize_whitespace = false, bool $normalize_msword = false, bool $keep_non_breaking_space = false, bool $replace_diamond_question_mark = false, bool $remove_invisible_characters = true): string
610
  {
611
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
612
    // caused connection reset problem on larger strings
613
614 64
    $regx = '/
615
      (
616
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
617
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
618
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
619
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
620
        ){1,100}                      # ...one or more times
621
      )
622
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
623
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
624
    /x';
625 64
    $str = (string)\preg_replace($regx, '$1', $str);
626
627 64
    if ($replace_diamond_question_mark === true) {
628 36
      $str = self::replace_diamond_question_mark($str, '');
629
    }
630
631 64
    if ($remove_invisible_characters === true) {
632 64
      $str = self::remove_invisible_characters($str);
633
    }
634
635 64
    if ($normalize_whitespace === true) {
636 38
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
637
    }
638
639 64
    if ($normalize_msword === true) {
640 15
      $str = self::normalize_msword($str);
641
    }
642
643 64
    if ($remove_bom === true) {
644 37
      $str = self::remove_bom($str);
645
    }
646
647 64
    return $str;
648
  }
649
650
  /**
651
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
652
   *
653
   * @param string $str <p>The input string.</p>
654
   *
655
   * @return string
656
   */
657 24
  public static function cleanup(string $str): string
658
  {
659 24
    if (!isset($str[0])) {
660 2
      return '';
661
    }
662
663
    // fixed ISO <-> UTF-8 Errors
664 24
    $str = self::fix_simple_utf8($str);
665
666
    // remove all none UTF-8 symbols
667
    // && remove diamond question mark (�)
668
    // && remove remove invisible characters (e.g. "\0")
669
    // && remove BOM
670
    // && normalize whitespace chars (but keep non-breaking-spaces)
671 24
    $str = self::clean(
672 24
        $str,
673 24
        true,
674 24
        true,
675 24
        false,
676 24
        true,
677 24
        true,
678 24
        true
679
    );
680
681 24
    return $str;
682
  }
683
684
  /**
685
   * Accepts a string or a array of strings and returns an array of Unicode code points.
686
   *
687
   * INFO: opposite to UTF8::string()
688
   *
689
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
690
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
691
   *                                    default, code points will be returned as integers.</p>
692
   *
693
   * @return array <p>The array of code points.</p>
694
   */
695 7
  public static function codepoints($arg, bool $u_style = false): array
696
  {
697 7
    if (\is_string($arg) === true) {
698 7
      $arg = self::split($arg);
699
    }
700
701 7
    $arg = \array_map(
702
        [
703 7
            self::class,
704
            'ord',
705
        ],
706 7
        $arg
707
    );
708
709 7
    if ($u_style) {
710 1
      $arg = \array_map(
711
          [
712 1
              self::class,
713
              'int_to_hex',
714
          ],
715 1
          $arg
716
      );
717
    }
718
719 7
    return $arg;
720
  }
721
722
  /**
723
   * Returns count of characters used in a string.
724
   *
725
   * @param string $str       <p>The input string.</p>
726
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
727
   *
728
   * @return array <p>An associative array of Character as keys and
729
   *               their count as values.</p>
730
   */
731 9
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
732
  {
733 9
    return \array_count_values(self::split($str, 1, $cleanUtf8));
734
  }
735
736
  /**
737
   * Converts a int-value into an UTF-8 character.
738
   *
739
   * @param mixed $int
740
   *
741
   * @return string
742
   */
743 5
  public static function decimal_to_chr($int): string
744
  {
745 5
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
746
  }
747
748
  /**
749
   * Encode a string with a new charset-encoding.
750
   *
751
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
752
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
753
   *
754
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
755
   * @param string $str      <p>The input string</p>
756
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
757
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
758
   *
759
   * @return string
760
   */
761 14
  public static function encode(string $encoding, string $str, bool $force = true): string
762
  {
763 14
    if (!isset($str[0], $encoding[0])) {
764 6
      return $str;
765
    }
766
767 14
    if ($encoding !== 'UTF-8') {
768 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
769
    }
770
771 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
772
      self::checkForSupport();
773
    }
774
775 14
    $encodingDetected = self::str_detect_encoding($str);
776
777
    if (
778 14
        $force === true
779
        ||
780 14
        $encodingDetected !== $encoding
781
    ) {
782
783 View Code Duplication
      if (
784 14
          $encoding === 'UTF-8'
785
          &&
786
          (
787 14
              $force === true
788 4
              || $encodingDetected === 'UTF-8'
789 4
              || $encodingDetected === 'WINDOWS-1252'
790 14
              || $encodingDetected === 'ISO-8859-1'
791
          )
792
      ) {
793 13
        return self::to_utf8($str);
794
      }
795
796 View Code Duplication
      if (
797 4
          $encoding === 'ISO-8859-1'
798
          &&
799
          (
800 2
              $force === true
801 1
              || $encodingDetected === 'ISO-8859-1'
802 1
              || $encodingDetected === 'WINDOWS-1252'
803 4
              || $encodingDetected === 'UTF-8'
804
          )
805
      ) {
806 2
        return self::to_iso8859($str);
807
      }
808
809 View Code Duplication
      if (
810 3
          $encoding !== 'UTF-8'
811
          &&
812 3
          $encoding !== 'WINDOWS-1252'
813
          &&
814 3
          self::$SUPPORT['mbstring'] === false
815
      ) {
816
        \trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
817
      }
818
819 3
      $strEncoded = \mb_convert_encoding(
820 3
          $str,
821 3
          $encoding,
822 3
          $encodingDetected
823
      );
824
825 3
      if ($strEncoded) {
826 3
        return $strEncoded;
827
      }
828
    }
829
830 3
    return $str;
831
  }
832
833
  /**
834
   * Reads entire file into a string.
835
   *
836
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
837
   *
838
   * @link http://php.net/manual/en/function.file-get-contents.php
839
   *
840
   * @param string        $filename         <p>
841
   *                                        Name of the file to read.
842
   *                                        </p>
843
   * @param bool          $use_include_path [optional] <p>
844
   *                                        Prior to PHP 5, this parameter is called
845
   *                                        use_include_path and is a bool.
846
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
847
   *                                        to trigger include path
848
   *                                        search.
849
   *                                        </p>
850
   * @param resource|null $context          [optional] <p>
851
   *                                        A valid context resource created with
852
   *                                        stream_context_create. If you don't need to use a
853
   *                                        custom context, you can skip this parameter by &null;.
854
   *                                        </p>
855
   * @param int|null      $offset           [optional] <p>
856
   *                                        The offset where the reading starts.
857
   *                                        </p>
858
   * @param int|null      $maxLength        [optional] <p>
859
   *                                        Maximum length of data read. The default is to read until end
860
   *                                        of file is reached.
861
   *                                        </p>
862
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
863
   *
864
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
865
   *                                        images or pdf, because they used non default utf-8 chars.</p>
866
   *
867
   * @return string|false <p>The function returns the read data or false on failure.</p>
868
   */
869 6
  public static function file_get_contents(string $filename, bool $use_include_path = false, $context = null, int $offset = null, int $maxLength = null, int $timeout = 10, bool $convertToUtf8 = true)
870
  {
871
    // init
872 6
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
873
874 6
    if ($timeout && $context === null) {
875 5
      $context = \stream_context_create(
876
          [
877
              'http' =>
878
                  [
879 5
                      'timeout' => $timeout,
880
                  ],
881
          ]
882
      );
883
    }
884
885 6
    if ($offset === null) {
886 6
      $offset = 0;
887
    }
888
889 6
    if (\is_int($maxLength) === true) {
890 1
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
891
    } else {
892 6
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
893
    }
894
895
    // return false on error
896 6
    if ($data === false) {
897
      return false;
898
    }
899
900 6
    if ($convertToUtf8 === true) {
901
      if (
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
902 6
          self::is_binary($data, true) === true
903
          &&
904 6
          self::is_utf16($data) === false
905
          &&
906 6
          self::is_utf32($data) === false
907 3
      ) {
908
        // do nothing, it's binary and not UTF16 or UTF32
909
      } else {
910
911 5
        $data = self::encode('UTF-8', $data, false);
912 5
        $data = self::cleanup($data);
913
914
      }
915
    }
916
917 6
    return $data;
918
  }
919
920
  /**
921
   * Checks if a file starts with BOM (Byte Order Mark) character.
922
   *
923
   * @param string $file_path <p>Path to a valid file.</p>
924
   *
925
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
926
   */
927 1
  public static function file_has_bom(string $file_path): bool
928
  {
929 1
    return self::string_has_bom(\file_get_contents($file_path));
930
  }
931
932
  /**
933
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
934
   *
935
   * @param mixed  $var
936
   * @param int    $normalization_form
937
   * @param string $leading_combining
938
   *
939
   * @return mixed
940
   */
941 9
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
942
  {
943 9
    switch (\gettype($var)) {
944 9 View Code Duplication
      case 'array':
945 3
        foreach ($var as $k => $v) {
946
          /** @noinspection AlterInForeachInspection */
947 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
948
        }
949 3
        break;
950 9 View Code Duplication
      case 'object':
951 2
        foreach ($var as $k => $v) {
952 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
953
        }
954 2
        break;
955 9
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
956
957 9
        if (false !== \strpos($var, "\r")) {
958
          // Workaround https://bugs.php.net/65732
959 2
          $var = \str_replace(["\r\n", "\r"], "\n", $var);
960
        }
961
962 9
        if (self::is_ascii($var) === false) {
963
          /** @noinspection PhpUndefinedClassInspection */
964 9
          if (\Normalizer::isNormalized($var, $normalization_form)) {
965 6
            $n = '-';
966
          } else {
967
            /** @noinspection PhpUndefinedClassInspection */
968 7
            $n = \Normalizer::normalize($var, $normalization_form);
969
970 7
            if (isset($n[0])) {
971 4
              $var = $n;
972
            } else {
973 5
              $var = self::encode('UTF-8', $var, true);
974
            }
975
          }
976
977
          if (
978 9
              $var[0] >= "\x80"
979
              &&
980 9
              isset($n[0], $leading_combining[0])
981
              &&
982 9
              \preg_match('/^\p{Mn}/u', $var)
983
          ) {
984
            // Prevent leading combining chars
985
            // for NFC-safe concatenations.
986 2
            $var = $leading_combining . $var;
987
          }
988
        }
989
990 9
        break;
991
    }
992
993 9
    return $var;
994
  }
995
996
  /**
997
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
998
   *
999
   * Gets a specific external variable by name and optionally filters it
1000
   *
1001
   * @link  http://php.net/manual/en/function.filter-input.php
1002
   *
1003
   * @param int    $type          <p>
1004
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1005
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1006
   *                              <b>INPUT_ENV</b>.
1007
   *                              </p>
1008
   * @param string $variable_name <p>
1009
   *                              Name of a variable to get.
1010
   *                              </p>
1011
   * @param int    $filter        [optional] <p>
1012
   *                              The ID of the filter to apply. The
1013
   *                              manual page lists the available filters.
1014
   *                              </p>
1015
   * @param mixed  $options       [optional] <p>
1016
   *                              Associative array of options or bitwise disjunction of flags. If filter
1017
   *                              accepts options, flags can be provided in "flags" field of array.
1018
   *                              </p>
1019
   *
1020
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1021
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1022
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1023
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1024
   * @since 5.2.0
1025
   */
1026 View Code Duplication
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1027
  {
1028
    if (4 > \func_num_args()) {
1029
      $var = \filter_input($type, $variable_name, $filter);
1030
    } else {
1031
      $var = \filter_input($type, $variable_name, $filter, $options);
1032
    }
1033
1034
    return self::filter($var);
1035
  }
1036
1037
  /**
1038
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1039
   *
1040
   * Gets external variables and optionally filters them
1041
   *
1042
   * @link  http://php.net/manual/en/function.filter-input-array.php
1043
   *
1044
   * @param int   $type       <p>
1045
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1046
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1047
   *                          <b>INPUT_ENV</b>.
1048
   *                          </p>
1049
   * @param mixed $definition [optional] <p>
1050
   *                          An array defining the arguments. A valid key is a string
1051
   *                          containing a variable name and a valid value is either a filter type, or an array
1052
   *                          optionally specifying the filter, flags and options. If the value is an
1053
   *                          array, valid keys are filter which specifies the
1054
   *                          filter type,
1055
   *                          flags which specifies any flags that apply to the
1056
   *                          filter, and options which specifies any options that
1057
   *                          apply to the filter. See the example below for a better understanding.
1058
   *                          </p>
1059
   *                          <p>
1060
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1061
   *                          input array are filtered by this filter.
1062
   *                          </p>
1063
   * @param bool  $add_empty  [optional] <p>
1064
   *                          Add missing keys as <b>NULL</b> to the return value.
1065
   *                          </p>
1066
   *
1067
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1068
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1069
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1070
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1071
   * fails.
1072
   * @since 5.2.0
1073
   */
1074 View Code Duplication
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1075
  {
1076
    if (2 > \func_num_args()) {
1077
      $a = \filter_input_array($type);
1078
    } else {
1079
      $a = \filter_input_array($type, $definition, $add_empty);
1080
    }
1081
1082
    return self::filter($a);
1083
  }
1084
1085
  /**
1086
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1087
   *
1088
   * Filters a variable with a specified filter
1089
   *
1090
   * @link  http://php.net/manual/en/function.filter-var.php
1091
   *
1092
   * @param mixed $variable <p>
1093
   *                        Value to filter.
1094
   *                        </p>
1095
   * @param int   $filter   [optional] <p>
1096
   *                        The ID of the filter to apply. The
1097
   *                        manual page lists the available filters.
1098
   *                        </p>
1099
   * @param mixed $options  [optional] <p>
1100
   *                        Associative array of options or bitwise disjunction of flags. If filter
1101
   *                        accepts options, flags can be provided in "flags" field of array. For
1102
   *                        the "callback" filter, callable type should be passed. The
1103
   *                        callback must accept one argument, the value to be filtered, and return
1104
   *                        the value after filtering/sanitizing it.
1105
   *                        </p>
1106
   *                        <p>
1107
   *                        <code>
1108
   *                        // for filters that accept options, use this format
1109
   *                        $options = array(
1110
   *                        'options' => array(
1111
   *                        'default' => 3, // value to return if the filter fails
1112
   *                        // other options here
1113
   *                        'min_range' => 0
1114
   *                        ),
1115
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1116
   *                        );
1117
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1118
   *                        // for filter that only accept flags, you can pass them directly
1119
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1120
   *                        // for filter that only accept flags, you can also pass as an array
1121
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1122
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1123
   *                        // callback validate filter
1124
   *                        function foo($value)
1125
   *                        {
1126
   *                        // Expected format: Surname, GivenNames
1127
   *                        if (strpos($value, ", ") === false) return false;
1128
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1129
   *                        $empty = (empty($surname) || empty($givennames));
1130
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1131
   *                        if ($empty || $notstrings) {
1132
   *                        return false;
1133
   *                        } else {
1134
   *                        return $value;
1135
   *                        }
1136
   *                        }
1137
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1138
   *                        </code>
1139
   *                        </p>
1140
   *
1141
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1142
   * @since 5.2.0
1143
   */
1144 1 View Code Duplication
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1145
  {
1146 1
    if (3 > \func_num_args()) {
1147 1
      $variable = \filter_var($variable, $filter);
1148
    } else {
1149 1
      $variable = \filter_var($variable, $filter, $options);
1150
    }
1151
1152 1
    return self::filter($variable);
1153
  }
1154
1155
  /**
1156
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1157
   *
1158
   * Gets multiple variables and optionally filters them
1159
   *
1160
   * @link  http://php.net/manual/en/function.filter-var-array.php
1161
   *
1162
   * @param array $data       <p>
1163
   *                          An array with string keys containing the data to filter.
1164
   *                          </p>
1165
   * @param mixed $definition [optional] <p>
1166
   *                          An array defining the arguments. A valid key is a string
1167
   *                          containing a variable name and a valid value is either a
1168
   *                          filter type, or an
1169
   *                          array optionally specifying the filter, flags and options.
1170
   *                          If the value is an array, valid keys are filter
1171
   *                          which specifies the filter type,
1172
   *                          flags which specifies any flags that apply to the
1173
   *                          filter, and options which specifies any options that
1174
   *                          apply to the filter. See the example below for a better understanding.
1175
   *                          </p>
1176
   *                          <p>
1177
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1178
   *                          input array are filtered by this filter.
1179
   *                          </p>
1180
   * @param bool  $add_empty  [optional] <p>
1181
   *                          Add missing keys as <b>NULL</b> to the return value.
1182
   *                          </p>
1183
   *
1184
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1185
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1186
   * the variable is not set.
1187
   * @since 5.2.0
1188
   */
1189 1 View Code Duplication
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1190
  {
1191 1
    if (2 > \func_num_args()) {
1192 1
      $a = \filter_var_array($data);
1193
    } else {
1194 1
      $a = \filter_var_array($data, $definition, $add_empty);
1195
    }
1196
1197 1
    return self::filter($a);
1198
  }
1199
1200
  /**
1201
   * Check if the number of unicode characters are not more than the specified integer.
1202
   *
1203
   * @param string $str      The original string to be checked.
1204
   * @param int    $box_size The size in number of chars to be checked against string.
1205
   *
1206
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1207
   */
1208 1
  public static function fits_inside(string $str, int $box_size): bool
1209
  {
1210 1
    return (self::strlen($str) <= $box_size);
1211
  }
1212
1213
  /**
1214
   * Try to fix simple broken UTF-8 strings.
1215
   *
1216
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1217
   *
1218
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1219
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1220
   * See: http://en.wikipedia.org/wiki/Windows-1252
1221
   *
1222
   * @param string $str <p>The input string</p>
1223
   *
1224
   * @return string
1225
   */
1226 29 View Code Duplication
  public static function fix_simple_utf8(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1227
  {
1228 29
    if (!isset($str[0])) {
1229 2
      return '';
1230
    }
1231
1232 29
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1233 29
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1234
1235 29
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1236
1237 1
      if (self::$BROKEN_UTF8_FIX === null) {
1238 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1239
      }
1240
1241 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1242 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1243
    }
1244
1245 29
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1246
  }
1247
1248
  /**
1249
   * Fix a double (or multiple) encoded UTF8 string.
1250
   *
1251
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1252
   *
1253
   * @return string|string[] <p>Will return the fixed input-"array" or
1254
   *                         the fixed input-"string".</p>
1255
   */
1256 1
  public static function fix_utf8($str)
1257
  {
1258 1
    if (\is_array($str) === true) {
1259 1
      foreach ($str as $k => $v) {
1260 1
        $str[$k] = self::fix_utf8($v);
1261
      }
1262
1263 1
      return $str;
1264
    }
1265
1266 1
    $last = '';
1267 1
    while ($last !== $str) {
1268 1
      $last = $str;
1269 1
      $str = self::to_utf8(
1270 1
          self::utf8_decode($str, true)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str, true)) on line 1269 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1271
      );
1272
    }
1273
1274 1
    return $str;
1275
  }
1276
1277
  /**
1278
   * Get character of a specific character.
1279
   *
1280
   * @param string $char
1281
   *
1282
   * @return string <p>'RTL' or 'LTR'</p>
1283
   */
1284 1
  public static function getCharDirection(string $char): string
1285
  {
1286 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1287
      self::checkForSupport();
1288
    }
1289
1290 1
    if (self::$SUPPORT['intlChar'] === true) {
1291 1
      $tmpReturn = \IntlChar::charDirection($char);
1292
1293
      // from "IntlChar"-Class
1294
      $charDirection = [
1295 1
          'RTL' => [1, 13, 14, 15, 21],
1296
          'LTR' => [0, 11, 12, 20],
1297
      ];
1298
1299 1
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1300
        return 'LTR';
1301
      }
1302
1303 1
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1304 1
        return 'RTL';
1305
      }
1306
    }
1307
1308 1
    $c = static::chr_to_decimal($char);
1309
1310 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1311 1
      return 'LTR';
1312
    }
1313
1314 1
    if (0x85e >= $c) {
1315
1316 1
      if (0x5be === $c ||
1317 1
          0x5c0 === $c ||
1318 1
          0x5c3 === $c ||
1319 1
          0x5c6 === $c ||
1320 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1321 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1322 1
          0x608 === $c ||
1323 1
          0x60b === $c ||
1324 1
          0x60d === $c ||
1325 1
          0x61b === $c ||
1326 1
          (0x61e <= $c && 0x64a >= $c) ||
1327
          (0x66d <= $c && 0x66f >= $c) ||
1328
          (0x671 <= $c && 0x6d5 >= $c) ||
1329
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1330
          (0x6ee <= $c && 0x6ef >= $c) ||
1331
          (0x6fa <= $c && 0x70d >= $c) ||
1332
          0x710 === $c ||
1333
          (0x712 <= $c && 0x72f >= $c) ||
1334
          (0x74d <= $c && 0x7a5 >= $c) ||
1335
          0x7b1 === $c ||
1336
          (0x7c0 <= $c && 0x7ea >= $c) ||
1337
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1338
          0x7fa === $c ||
1339
          (0x800 <= $c && 0x815 >= $c) ||
1340
          0x81a === $c ||
1341
          0x824 === $c ||
1342
          0x828 === $c ||
1343
          (0x830 <= $c && 0x83e >= $c) ||
1344
          (0x840 <= $c && 0x858 >= $c) ||
1345 1
          0x85e === $c
1346
      ) {
1347 1
        return 'RTL';
1348
      }
1349
1350 1
    } elseif (0x200f === $c) {
1351
1352
      return 'RTL';
1353
1354 1
    } elseif (0xfb1d <= $c) {
1355
1356 1
      if (0xfb1d === $c ||
1357 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1358 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1359 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1360 1
          0xfb3e === $c ||
1361 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1362 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1363 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1364 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1365 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1366 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1367 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1368 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1369 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1370 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1371 1
          0x10808 === $c ||
1372 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1373 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1374 1
          0x1083c === $c ||
1375 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1376 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1377 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1378 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1379 1
          0x1093f === $c ||
1380 1
          0x10a00 === $c ||
1381 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1382 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1383 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1384 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1385 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1386 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1387 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1388 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1389 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1390 1
          (0x10b78 <= $c && 0x10b7f >= $c)
1391
      ) {
1392 1
        return 'RTL';
1393
      }
1394
    }
1395
1396 1
    return 'LTR';
1397
  }
1398
1399
  /**
1400
   * get data from "/data/*.ser"
1401
   *
1402
   * @param string $file
1403
   *
1404
   * @return bool|string|array|int <p>Will return false on error.</p>
1405
   */
1406 6
  private static function getData(string $file)
1407
  {
1408 6
    $file = __DIR__ . '/data/' . $file . '.php';
1409 6
    if (\file_exists($file)) {
1410
      /** @noinspection PhpIncludeInspection */
1411 6
      return require $file;
1412
    }
1413
1414 1
    return false;
1415
  }
1416
1417
  /**
1418
   * Check for php-support.
1419
   *
1420
   * @param string|null $key
1421
   *
1422
   * @return mixed <p>Return the full support-"array", if $key === null<br>
1423
   *               return bool-value, if $key is used and available<br>
1424
   *               otherwise return null</p>
1425
   */
1426 19
  public static function getSupportInfo(string $key = null)
1427
  {
1428 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1429
      self::checkForSupport();
1430
    }
1431
1432 19
    if ($key === null) {
1433 2
      return self::$SUPPORT;
1434
    }
1435
1436 18
    if (!isset(self::$SUPPORT[$key])) {
1437 1
      return null;
1438
    }
1439
1440 17
    return self::$SUPPORT[$key];
1441
  }
1442
1443
  /**
1444
   * alias for "UTF8::string_has_bom()"
1445
   *
1446
   * @see        UTF8::string_has_bom()
1447
   *
1448
   * @param string $str
1449
   *
1450
   * @return bool
1451
   *
1452
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1453
   */
1454 1
  public static function hasBom(string $str): bool
1455
  {
1456 1
    return self::string_has_bom($str);
1457
  }
1458
1459
  /**
1460
   * Converts a hexadecimal-value into an UTF-8 character.
1461
   *
1462
   * @param string $hexdec <p>The hexadecimal value.</p>
1463
   *
1464
   * @return string|false <p>One single UTF-8 character.</p>
1465
   */
1466 2
  public static function hex_to_chr(string $hexdec)
1467
  {
1468 2
    return self::decimal_to_chr(\hexdec($hexdec));
1469
  }
1470
1471
  /**
1472
   * Converts hexadecimal U+xxxx code point representation to integer.
1473
   *
1474
   * INFO: opposite to UTF8::int_to_hex()
1475
   *
1476
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1477
   *
1478
   * @return int|false <p>The code point, or false on failure.</p>
1479
   */
1480 1
  public static function hex_to_int(string $hexDec)
1481
  {
1482 1
    if (!isset($hexDec[0])) {
1483 1
      return false;
1484
    }
1485
1486 1
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1487 1
      return \intval($match[1], 16);
1488
    }
1489
1490 1
    return false;
1491
  }
1492
1493
  /**
1494
   * alias for "UTF8::html_entity_decode()"
1495
   *
1496
   * @see UTF8::html_entity_decode()
1497
   *
1498
   * @param string $str
1499
   * @param int    $flags
1500
   * @param string $encoding
1501
   *
1502
   * @return string
1503
   */
1504 1
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1505
  {
1506 1
    return self::html_entity_decode($str, $flags, $encoding);
1507
  }
1508
1509
  /**
1510
   * Converts a UTF-8 string to a series of HTML numbered entities.
1511
   *
1512
   * INFO: opposite to UTF8::html_decode()
1513
   *
1514
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1515
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1516
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
1517
   *
1518
   * @return string <p>HTML numbered entities.</p>
1519
   */
1520 2
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
1521
  {
1522 2
    if (!isset($str[0])) {
1523 1
      return '';
1524
    }
1525
1526 2
    if ($encoding !== 'UTF-8') {
1527 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1528
    }
1529
1530
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1531 2
    if (\function_exists('mb_encode_numericentity')) {
1532
1533 2
      $startCode = 0x00;
1534 2
      if ($keepAsciiChars === true) {
1535 1
        $startCode = 0x80;
1536
      }
1537
1538 2
      return \mb_encode_numericentity(
1539 2
          $str,
1540 2
          [$startCode, 0xfffff, 0, 0xfffff, 0],
1541 2
          $encoding
1542
      );
1543
    }
1544
1545
    return \implode(
1546
        '',
1547
        \array_map(
1548
            function ($data) use ($keepAsciiChars, $encoding) {
1549
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1550
            },
1551
            self::split($str)
1552
        )
1553
    );
1554
  }
1555
1556
  /**
1557
   * UTF-8 version of html_entity_decode()
1558
   *
1559
   * The reason we are not using html_entity_decode() by itself is because
1560
   * while it is not technically correct to leave out the semicolon
1561
   * at the end of an entity most browsers will still interpret the entity
1562
   * correctly. html_entity_decode() does not convert entities without
1563
   * semicolons, so we are left with our own little solution here. Bummer.
1564
   *
1565
   * Convert all HTML entities to their applicable characters
1566
   *
1567
   * INFO: opposite to UTF8::html_encode()
1568
   *
1569
   * @link http://php.net/manual/en/function.html-entity-decode.php
1570
   *
1571
   * @param string $str      <p>
1572
   *                         The input string.
1573
   *                         </p>
1574
   * @param int    $flags    [optional] <p>
1575
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1576
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1577
   *                         <table>
1578
   *                         Available <i>flags</i> constants
1579
   *                         <tr valign="top">
1580
   *                         <td>Constant Name</td>
1581
   *                         <td>Description</td>
1582
   *                         </tr>
1583
   *                         <tr valign="top">
1584
   *                         <td><b>ENT_COMPAT</b></td>
1585
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1586
   *                         </tr>
1587
   *                         <tr valign="top">
1588
   *                         <td><b>ENT_QUOTES</b></td>
1589
   *                         <td>Will convert both double and single quotes.</td>
1590
   *                         </tr>
1591
   *                         <tr valign="top">
1592
   *                         <td><b>ENT_NOQUOTES</b></td>
1593
   *                         <td>Will leave both double and single quotes unconverted.</td>
1594
   *                         </tr>
1595
   *                         <tr valign="top">
1596
   *                         <td><b>ENT_HTML401</b></td>
1597
   *                         <td>
1598
   *                         Handle code as HTML 4.01.
1599
   *                         </td>
1600
   *                         </tr>
1601
   *                         <tr valign="top">
1602
   *                         <td><b>ENT_XML1</b></td>
1603
   *                         <td>
1604
   *                         Handle code as XML 1.
1605
   *                         </td>
1606
   *                         </tr>
1607
   *                         <tr valign="top">
1608
   *                         <td><b>ENT_XHTML</b></td>
1609
   *                         <td>
1610
   *                         Handle code as XHTML.
1611
   *                         </td>
1612
   *                         </tr>
1613
   *                         <tr valign="top">
1614
   *                         <td><b>ENT_HTML5</b></td>
1615
   *                         <td>
1616
   *                         Handle code as HTML 5.
1617
   *                         </td>
1618
   *                         </tr>
1619
   *                         </table>
1620
   *                         </p>
1621
   * @param string $encoding [optional] <p>Encoding to use.</p>
1622
   *
1623
   * @return string <p>The decoded string.</p>
1624
   */
1625 17
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1626
  {
1627 17
    if (!isset($str[0])) {
1628 6
      return '';
1629
    }
1630
1631 17
    if (!isset($str[3])) { // examples: &; || &x;
1632 10
      return $str;
1633
    }
1634
1635
    if (
1636 16
        \strpos($str, '&') === false
1637
        ||
1638
        (
1639 16
            \strpos($str, '&#') === false
1640
            &&
1641 16
            \strpos($str, ';') === false
1642
        )
1643
    ) {
1644 9
      return $str;
1645
    }
1646
1647 16
    if ($encoding !== 'UTF-8') {
1648 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1649
    }
1650
1651 16
    if ($flags === null) {
1652 5
      $flags = ENT_QUOTES | ENT_HTML5;
1653
    }
1654
1655 View Code Duplication
    if (
1656 16
        $encoding !== 'UTF-8'
1657
        &&
1658 16
        $encoding !== 'WINDOWS-1252'
1659
        &&
1660 16
        self::$SUPPORT['mbstring'] === false
1661
    ) {
1662
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1663
    }
1664
1665
    do {
1666 16
      $str_compare = $str;
1667
1668 16
      $str = (string)\preg_replace_callback(
1669 16
          "/&#\d{2,6};/",
1670 16
          function ($matches) use ($encoding) {
1671 14
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
1672
1673 14
            if ($returnTmp !== '"' && $returnTmp !== "'") {
1674 13
              return $returnTmp;
1675
            }
1676
1677 7
            return $matches[0];
1678 16
          },
1679 16
          $str
1680
      );
1681
1682
      // decode numeric & UTF16 two byte entities
1683 16
      $str = \html_entity_decode(
1684 16
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
1685 16
          $flags,
1686 16
          $encoding
1687
      );
1688
1689 16
    } while ($str_compare !== $str);
1690
1691 16
    return $str;
1692
  }
1693
1694
  /**
1695
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
1696
   *
1697
   * @link http://php.net/manual/en/function.htmlentities.php
1698
   *
1699
   * @param string $str           <p>
1700
   *                              The input string.
1701
   *                              </p>
1702
   * @param int    $flags         [optional] <p>
1703
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1704
   *                              invalid code unit sequences and the used document type. The default is
1705
   *                              ENT_COMPAT | ENT_HTML401.
1706
   *                              <table>
1707
   *                              Available <i>flags</i> constants
1708
   *                              <tr valign="top">
1709
   *                              <td>Constant Name</td>
1710
   *                              <td>Description</td>
1711
   *                              </tr>
1712
   *                              <tr valign="top">
1713
   *                              <td><b>ENT_COMPAT</b></td>
1714
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1715
   *                              </tr>
1716
   *                              <tr valign="top">
1717
   *                              <td><b>ENT_QUOTES</b></td>
1718
   *                              <td>Will convert both double and single quotes.</td>
1719
   *                              </tr>
1720
   *                              <tr valign="top">
1721
   *                              <td><b>ENT_NOQUOTES</b></td>
1722
   *                              <td>Will leave both double and single quotes unconverted.</td>
1723
   *                              </tr>
1724
   *                              <tr valign="top">
1725
   *                              <td><b>ENT_IGNORE</b></td>
1726
   *                              <td>
1727
   *                              Silently discard invalid code unit sequences instead of returning
1728
   *                              an empty string. Using this flag is discouraged as it
1729
   *                              may have security implications.
1730
   *                              </td>
1731
   *                              </tr>
1732
   *                              <tr valign="top">
1733
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1734
   *                              <td>
1735
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1736
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1737
   *                              </td>
1738
   *                              </tr>
1739
   *                              <tr valign="top">
1740
   *                              <td><b>ENT_DISALLOWED</b></td>
1741
   *                              <td>
1742
   *                              Replace invalid code points for the given document type with a
1743
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1744
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1745
   *                              instance, to ensure the well-formedness of XML documents with
1746
   *                              embedded external content.
1747
   *                              </td>
1748
   *                              </tr>
1749
   *                              <tr valign="top">
1750
   *                              <td><b>ENT_HTML401</b></td>
1751
   *                              <td>
1752
   *                              Handle code as HTML 4.01.
1753
   *                              </td>
1754
   *                              </tr>
1755
   *                              <tr valign="top">
1756
   *                              <td><b>ENT_XML1</b></td>
1757
   *                              <td>
1758
   *                              Handle code as XML 1.
1759
   *                              </td>
1760
   *                              </tr>
1761
   *                              <tr valign="top">
1762
   *                              <td><b>ENT_XHTML</b></td>
1763
   *                              <td>
1764
   *                              Handle code as XHTML.
1765
   *                              </td>
1766
   *                              </tr>
1767
   *                              <tr valign="top">
1768
   *                              <td><b>ENT_HTML5</b></td>
1769
   *                              <td>
1770
   *                              Handle code as HTML 5.
1771
   *                              </td>
1772
   *                              </tr>
1773
   *                              </table>
1774
   *                              </p>
1775
   * @param string $encoding      [optional] <p>
1776
   *                              Like <b>htmlspecialchars</b>,
1777
   *                              <b>htmlentities</b> takes an optional third argument
1778
   *                              <i>encoding</i> which defines encoding used in
1779
   *                              conversion.
1780
   *                              Although this argument is technically optional, you are highly
1781
   *                              encouraged to specify the correct value for your code.
1782
   *                              </p>
1783
   * @param bool   $double_encode [optional] <p>
1784
   *                              When <i>double_encode</i> is turned off PHP will not
1785
   *                              encode existing html entities. The default is to convert everything.
1786
   *                              </p>
1787
   *
1788
   *
1789
   * @return string the encoded string.
1790
   * </p>
1791
   * <p>
1792
   * If the input <i>string</i> contains an invalid code unit
1793
   * sequence within the given <i>encoding</i> an empty string
1794
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1795
   * <b>ENT_SUBSTITUTE</b> flags are set.
1796
   */
1797 2
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
1798
  {
1799 2
    if ($encoding !== 'UTF-8') {
1800 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1801
    }
1802
1803 2
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
1804
1805
    /**
1806
     * PHP doesn't replace a backslash to its html entity since this is something
1807
     * that's mostly used to escape characters when inserting in a database. Since
1808
     * we're using a decent database layer, we don't need this shit and we're replacing
1809
     * the double backslashes by its' html entity equivalent.
1810
     *
1811
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
1812
     */
1813 2
    $str = \str_replace('\\', '&#92;', $str);
1814
1815 2
    if ($encoding !== 'UTF-8') {
1816 1
      return $str;
1817
    }
1818
1819 2
    $byteLengths = self::chr_size_list($str);
1820 2
    $search = [];
1821 2
    $replacements = [];
1822 2
    foreach ($byteLengths as $counter => $byteLength) {
1823 2
      if ($byteLength >= 3) {
1824 1
        $char = self::access($str, $counter);
1825
1826 1
        if (!isset($replacements[$char])) {
1827 1
          $search[$char] = $char;
1828 2
          $replacements[$char] = self::html_encode($char);
1829
        }
1830
      }
1831
    }
1832
1833 2
    return \str_replace($search, $replacements, $str);
1834
  }
1835
1836
  /**
1837
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
1838
   *
1839
   * INFO: Take a look at "UTF8::htmlentities()"
1840
   *
1841
   * @link http://php.net/manual/en/function.htmlspecialchars.php
1842
   *
1843
   * @param string $str           <p>
1844
   *                              The string being converted.
1845
   *                              </p>
1846
   * @param int    $flags         [optional] <p>
1847
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1848
   *                              invalid code unit sequences and the used document type. The default is
1849
   *                              ENT_COMPAT | ENT_HTML401.
1850
   *                              <table>
1851
   *                              Available <i>flags</i> constants
1852
   *                              <tr valign="top">
1853
   *                              <td>Constant Name</td>
1854
   *                              <td>Description</td>
1855
   *                              </tr>
1856
   *                              <tr valign="top">
1857
   *                              <td><b>ENT_COMPAT</b></td>
1858
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1859
   *                              </tr>
1860
   *                              <tr valign="top">
1861
   *                              <td><b>ENT_QUOTES</b></td>
1862
   *                              <td>Will convert both double and single quotes.</td>
1863
   *                              </tr>
1864
   *                              <tr valign="top">
1865
   *                              <td><b>ENT_NOQUOTES</b></td>
1866
   *                              <td>Will leave both double and single quotes unconverted.</td>
1867
   *                              </tr>
1868
   *                              <tr valign="top">
1869
   *                              <td><b>ENT_IGNORE</b></td>
1870
   *                              <td>
1871
   *                              Silently discard invalid code unit sequences instead of returning
1872
   *                              an empty string. Using this flag is discouraged as it
1873
   *                              may have security implications.
1874
   *                              </td>
1875
   *                              </tr>
1876
   *                              <tr valign="top">
1877
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1878
   *                              <td>
1879
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1880
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1881
   *                              </td>
1882
   *                              </tr>
1883
   *                              <tr valign="top">
1884
   *                              <td><b>ENT_DISALLOWED</b></td>
1885
   *                              <td>
1886
   *                              Replace invalid code points for the given document type with a
1887
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1888
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1889
   *                              instance, to ensure the well-formedness of XML documents with
1890
   *                              embedded external content.
1891
   *                              </td>
1892
   *                              </tr>
1893
   *                              <tr valign="top">
1894
   *                              <td><b>ENT_HTML401</b></td>
1895
   *                              <td>
1896
   *                              Handle code as HTML 4.01.
1897
   *                              </td>
1898
   *                              </tr>
1899
   *                              <tr valign="top">
1900
   *                              <td><b>ENT_XML1</b></td>
1901
   *                              <td>
1902
   *                              Handle code as XML 1.
1903
   *                              </td>
1904
   *                              </tr>
1905
   *                              <tr valign="top">
1906
   *                              <td><b>ENT_XHTML</b></td>
1907
   *                              <td>
1908
   *                              Handle code as XHTML.
1909
   *                              </td>
1910
   *                              </tr>
1911
   *                              <tr valign="top">
1912
   *                              <td><b>ENT_HTML5</b></td>
1913
   *                              <td>
1914
   *                              Handle code as HTML 5.
1915
   *                              </td>
1916
   *                              </tr>
1917
   *                              </table>
1918
   *                              </p>
1919
   * @param string $encoding      [optional] <p>
1920
   *                              Defines encoding used in conversion.
1921
   *                              </p>
1922
   *                              <p>
1923
   *                              For the purposes of this function, the encodings
1924
   *                              ISO-8859-1, ISO-8859-15,
1925
   *                              UTF-8, cp866,
1926
   *                              cp1251, cp1252, and
1927
   *                              KOI8-R are effectively equivalent, provided the
1928
   *                              <i>string</i> itself is valid for the encoding, as
1929
   *                              the characters affected by <b>htmlspecialchars</b> occupy
1930
   *                              the same positions in all of these encodings.
1931
   *                              </p>
1932
   * @param bool   $double_encode [optional] <p>
1933
   *                              When <i>double_encode</i> is turned off PHP will not
1934
   *                              encode existing html entities, the default is to convert everything.
1935
   *                              </p>
1936
   *
1937
   * @return string The converted string.
1938
   * </p>
1939
   * <p>
1940
   * If the input <i>string</i> contains an invalid code unit
1941
   * sequence within the given <i>encoding</i> an empty string
1942
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1943
   * <b>ENT_SUBSTITUTE</b> flags are set.
1944
   */
1945 1
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
1946
  {
1947 1
    if ($encoding !== 'UTF-8') {
1948 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1949
    }
1950
1951 1
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
1952
  }
1953
1954
  /**
1955
   * Checks whether iconv is available on the server.
1956
   *
1957
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1958
   */
1959 1
  public static function iconv_loaded(): bool
1960
  {
1961 1
    return \extension_loaded('iconv') ? true : false;
1962
  }
1963
1964
  /**
1965
   * alias for "UTF8::decimal_to_chr()"
1966
   *
1967
   * @see UTF8::decimal_to_chr()
1968
   *
1969
   * @param mixed $int
1970
   *
1971
   * @return string
1972
   */
1973 2
  public static function int_to_chr($int): string
1974
  {
1975 2
    return self::decimal_to_chr($int);
1976
  }
1977
1978
  /**
1979
   * Converts Integer to hexadecimal U+xxxx code point representation.
1980
   *
1981
   * INFO: opposite to UTF8::hex_to_int()
1982
   *
1983
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
1984
   * @param string $pfix [optional]
1985
   *
1986
   * @return string <p>The code point, or empty string on failure.</p>
1987
   */
1988 3
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
1989
  {
1990 3
    $hex = \dechex($int);
1991
1992 3
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
1993
1994 3
    return $pfix . $hex;
1995
  }
1996
1997
  /**
1998
   * Checks whether intl-char is available on the server.
1999
   *
2000
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2001
   */
2002 1
  public static function intlChar_loaded(): bool
2003
  {
2004 1
    return \class_exists('IntlChar');
2005
  }
2006
2007
  /**
2008
   * Checks whether intl is available on the server.
2009
   *
2010
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2011
   */
2012 4
  public static function intl_loaded(): bool
2013
  {
2014 4
    return \extension_loaded('intl');
2015
  }
2016
2017
  /**
2018
   * alias for "UTF8::is_ascii()"
2019
   *
2020
   * @see        UTF8::is_ascii()
2021
   *
2022
   * @param string $str
2023
   *
2024
   * @return boolean
2025
   *
2026
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2027
   */
2028 1
  public static function isAscii(string $str): bool
2029
  {
2030 1
    return self::is_ascii($str);
2031
  }
2032
2033
  /**
2034
   * alias for "UTF8::is_base64()"
2035
   *
2036
   * @see        UTF8::is_base64()
2037
   *
2038
   * @param string $str
2039
   *
2040
   * @return bool
2041
   *
2042
   * @deprecated <p>use "UTF8::is_base64()"</p>
2043
   */
2044 1
  public static function isBase64(string $str): bool
2045
  {
2046 1
    return self::is_base64($str);
2047
  }
2048
2049
  /**
2050
   * alias for "UTF8::is_binary()"
2051
   *
2052
   * @see        UTF8::is_binary()
2053
   *
2054
   * @param mixed $str
2055
   * @param bool  $strict
2056
   *
2057
   * @return bool
2058
   *
2059
   * @deprecated <p>use "UTF8::is_binary()"</p>
2060
   */
2061 2
  public static function isBinary($str, $strict = false): bool
2062
  {
2063 2
    return self::is_binary($str, $strict);
2064
  }
2065
2066
  /**
2067
   * alias for "UTF8::is_bom()"
2068
   *
2069
   * @see        UTF8::is_bom()
2070
   *
2071
   * @param string $utf8_chr
2072
   *
2073
   * @return boolean
2074
   *
2075
   * @deprecated <p>use "UTF8::is_bom()"</p>
2076
   */
2077 1
  public static function isBom(string $utf8_chr): bool
2078
  {
2079 1
    return self::is_bom($utf8_chr);
2080
  }
2081
2082
  /**
2083
   * alias for "UTF8::is_html()"
2084
   *
2085
   * @see        UTF8::is_html()
2086
   *
2087
   * @param string $str
2088
   *
2089
   * @return boolean
2090
   *
2091
   * @deprecated <p>use "UTF8::is_html()"</p>
2092
   */
2093 1
  public static function isHtml(string $str): bool
2094
  {
2095 1
    return self::is_html($str);
2096
  }
2097
2098
  /**
2099
   * alias for "UTF8::is_json()"
2100
   *
2101
   * @see        UTF8::is_json()
2102
   *
2103
   * @param string $str
2104
   *
2105
   * @return bool
2106
   *
2107
   * @deprecated <p>use "UTF8::is_json()"</p>
2108
   */
2109
  public static function isJson(string $str): bool
2110
  {
2111
    return self::is_json($str);
2112
  }
2113
2114
  /**
2115
   * alias for "UTF8::is_utf16()"
2116
   *
2117
   * @see        UTF8::is_utf16()
2118
   *
2119
   * @param string $str
2120
   *
2121
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2122
   *
2123
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2124
   */
2125 1
  public static function isUtf16(string $str)
2126
  {
2127 1
    return self::is_utf16($str);
2128
  }
2129
2130
  /**
2131
   * alias for "UTF8::is_utf32()"
2132
   *
2133
   * @see        UTF8::is_utf32()
2134
   *
2135
   * @param string $str
2136
   *
2137
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2138
   *
2139
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2140
   */
2141 1
  public static function isUtf32(string $str)
2142
  {
2143 1
    return self::is_utf32($str);
2144
  }
2145
2146
  /**
2147
   * alias for "UTF8::is_utf8()"
2148
   *
2149
   * @see        UTF8::is_utf8()
2150
   *
2151
   * @param string $str
2152
   * @param bool   $strict
2153
   *
2154
   * @return bool
2155
   *
2156
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2157
   */
2158 16
  public static function isUtf8($str, $strict = false): bool
2159
  {
2160 16
    return self::is_utf8($str, $strict);
2161
  }
2162
2163
  /**
2164
   * Checks if a string is 7 bit ASCII.
2165
   *
2166
   * @param string $str <p>The string to check.</p>
2167
   *
2168
   * @return bool <p>
2169
   *              <strong>true</strong> if it is ASCII<br>
2170
   *              <strong>false</strong> otherwise
2171
   *              </p>
2172
   */
2173 58
  public static function is_ascii(string $str): bool
2174
  {
2175 58
    if (!isset($str[0])) {
2176 6
      return true;
2177
    }
2178
2179 57
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2180
  }
2181
2182
  /**
2183
   * Returns true if the string is base64 encoded, false otherwise.
2184
   *
2185
   * @param string $str <p>The input string.</p>
2186
   *
2187
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2188
   */
2189 1
  public static function is_base64(string $str): bool
2190
  {
2191 1
    $base64String = (string)\base64_decode($str, true);
2192
2193 1
    return $base64String && \base64_encode($base64String) === $str;
2194
  }
2195
2196
  /**
2197
   * Check if the input is binary... (is look like a hack).
2198
   *
2199
   * @param mixed $input
2200
   * @param bool  $strict
2201
   *
2202
   * @return bool
2203
   */
2204 19
  public static function is_binary($input, bool $strict = false): bool
2205
  {
2206 19
    $input = (string)$input;
2207 19
    if (!isset($input[0])) {
2208 5
      return false;
2209
    }
2210
2211 19
    if (\preg_match('~^[01]+$~', $input)) {
2212 6
      return true;
2213
    }
2214
2215 19
    $testNull = 0;
2216 19
    $testLength = \strlen($input);
2217 19
    if ($testLength) {
2218 19
      $testNull = \substr_count($input, "\x0");
2219 19
      if (($testNull / $testLength) > 0.3) {
2220 6
        return true;
2221
      }
2222
    }
2223
2224
    if (
2225 18
        $strict === true
2226
        &&
2227 18
        \class_exists('finfo')
2228
    ) {
2229
2230 16
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
2231 16
      $finfo_encoding = $finfo->buffer($input);
2232 16
      if ($finfo_encoding && $finfo_encoding === 'binary') {
2233 16
        return true;
2234
      }
2235
2236
2237
    } else {
2238
2239 8
      if ($testNull > 0) {
2240 3
        return true;
2241
      }
2242
2243
    }
2244
2245 17
    return false;
2246
  }
2247
2248
  /**
2249
   * Check if the file is binary.
2250
   *
2251
   * @param string $file
2252
   *
2253
   * @return boolean
2254
   */
2255 3
  public static function is_binary_file($file): bool
2256
  {
2257
    try {
2258 3
      $fp = \fopen($file, 'rb');
2259 3
      $block = \fread($fp, 512);
2260 3
      \fclose($fp);
2261
    } catch (\Exception $e) {
2262
      $block = '';
2263
    }
2264
2265 3
    return self::is_binary($block, true);
2266
  }
2267
2268
  /**
2269
   * Checks if the given string is equal to any "Byte Order Mark".
2270
   *
2271
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2272
   *
2273
   * @param string $str <p>The input string.</p>
2274
   *
2275
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2276
   */
2277 1
  public static function is_bom($str): bool
2278
  {
2279 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2280 1
      if ($str === $bomString) {
2281 1
        return true;
2282
      }
2283
    }
2284
2285 1
    return false;
2286
  }
2287
2288
  /**
2289
   * Check if the string contains any html-tags <lall>.
2290
   *
2291
   * @param string $str <p>The input string.</p>
2292
   *
2293
   * @return boolean
2294
   */
2295 1
  public static function is_html(string $str): bool
2296
  {
2297 1
    if (!isset($str[0])) {
2298 1
      return false;
2299
    }
2300
2301
    // init
2302 1
    $matches = [];
2303
2304 1
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2305
2306 1
    return !(\count($matches) === 0);
2307
  }
2308
2309
  /**
2310
   * Try to check if "$str" is an json-string.
2311
   *
2312
   * @param string $str <p>The input string.</p>
2313
   *
2314
   * @return bool
2315
   */
2316 1
  public static function is_json(string $str): bool
2317
  {
2318 1
    if (!isset($str[0])) {
2319 1
      return false;
2320
    }
2321
2322 1
    $json = self::json_decode($str);
2323
2324
    return (
2325 1
               \is_object($json) === true
2326
               ||
2327 1
               \is_array($json) === true
2328
           )
2329
           &&
2330 1
           \json_last_error() === JSON_ERROR_NONE;
2331
  }
2332
2333
  /**
2334
   * Check if the string is UTF-16.
2335
   *
2336
   * @param string $str <p>The input string.</p>
2337
   *
2338
   * @return int|false <p>
2339
   *                   <strong>false</strong> if is't not UTF-16,<br>
2340
   *                   <strong>1</strong> for UTF-16LE,<br>
2341
   *                   <strong>2</strong> for UTF-16BE.
2342
   *                   </p>
2343
   */
2344 10 View Code Duplication
  public static function is_utf16(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2345
  {
2346 10
    if (self::is_binary($str) === false) {
2347 4
      return false;
2348
    }
2349
2350
    // init
2351 8
    $strChars = [];
2352
2353 8
    $str = self::remove_bom($str);
2354
2355 8
    $maybeUTF16LE = 0;
2356 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2357 8
    if ($test) {
2358 7
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2359 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2360 7
      if ($test3 === $test) {
2361 7
        if (\count($strChars) === 0) {
2362 7
          $strChars = self::count_chars($str, true);
2363
        }
2364 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2365 7
          if (\in_array($test3char, $strChars, true) === true) {
2366 7
            $maybeUTF16LE++;
2367
          }
2368
        }
2369
      }
2370
    }
2371
2372 8
    $maybeUTF16BE = 0;
2373 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2374 8
    if ($test) {
2375 7
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2376 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2377 7
      if ($test3 === $test) {
2378 7
        if (\count($strChars) === 0) {
2379 3
          $strChars = self::count_chars($str, true);
2380
        }
2381 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2382 7
          if (\in_array($test3char, $strChars, true) === true) {
2383 7
            $maybeUTF16BE++;
2384
          }
2385
        }
2386
      }
2387
    }
2388
2389 8
    if ($maybeUTF16BE !== $maybeUTF16LE) {
2390 3
      if ($maybeUTF16LE > $maybeUTF16BE) {
2391 2
        return 1;
2392
      }
2393
2394 3
      return 2;
2395
    }
2396
2397 6
    return false;
2398
  }
2399
2400
  /**
2401
   * Check if the string is UTF-32.
2402
   *
2403
   * @param string $str
2404
   *
2405
   * @return int|false <p>
2406
   *                   <strong>false</strong> if is't not UTF-32,<br>
2407
   *                   <strong>1</strong> for UTF-32LE,<br>
2408
   *                   <strong>2</strong> for UTF-32BE.
2409
   *                   </p>
2410
   */
2411 8 View Code Duplication
  public static function is_utf32(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2412
  {
2413 8
    if (self::is_binary($str) === false) {
2414 4
      return false;
2415
    }
2416
2417
    // init
2418 6
    $strChars = [];
2419
2420 6
    $str = self::remove_bom($str);
2421
2422 6
    $maybeUTF32LE = 0;
2423 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2424 6
    if ($test) {
2425 5
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2426 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2427 5
      if ($test3 === $test) {
2428 5
        if (\count($strChars) === 0) {
2429 5
          $strChars = self::count_chars($str, true);
2430
        }
2431 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2432 5
          if (\in_array($test3char, $strChars, true) === true) {
2433 5
            $maybeUTF32LE++;
2434
          }
2435
        }
2436
      }
2437
    }
2438
2439 6
    $maybeUTF32BE = 0;
2440 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2441 6
    if ($test) {
2442 5
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2443 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2444 5
      if ($test3 === $test) {
2445 5
        if (\count($strChars) === 0) {
2446 3
          $strChars = self::count_chars($str, true);
2447
        }
2448 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2449 5
          if (\in_array($test3char, $strChars, true) === true) {
2450 5
            $maybeUTF32BE++;
2451
          }
2452
        }
2453
      }
2454
    }
2455
2456 6
    if ($maybeUTF32BE !== $maybeUTF32LE) {
2457 1
      if ($maybeUTF32LE > $maybeUTF32BE) {
2458 1
        return 1;
2459
      }
2460
2461 1
      return 2;
2462
    }
2463
2464 6
    return false;
2465
  }
2466
2467
  /**
2468
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2469
   *
2470
   * @see    http://hsivonen.iki.fi/php-utf8/
2471
   *
2472
   * @param string|string[] $str    <p>The string to be checked.</p>
2473
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2474
   *
2475
   * @return bool
2476
   */
2477 61
  public static function is_utf8($str, bool $strict = false): bool
2478
  {
2479 61
    if (\is_array($str) === true) {
2480 1
      foreach ($str as $k => $v) {
2481 1
        if (false === self::is_utf8($v, $strict)) {
2482 1
          return false;
2483
        }
2484
      }
2485
2486
      return true;
2487
    }
2488
2489 61
    if (!isset($str[0])) {
2490 3
      return true;
2491
    }
2492
2493 59
    if ($strict === true) {
2494 1
      if (self::is_utf16($str) !== false) {
2495 1
        return false;
2496
      }
2497
2498
      if (self::is_utf32($str) !== false) {
2499
        return false;
2500
      }
2501
    }
2502
2503 59
    if (self::pcre_utf8_support() !== true) {
2504
2505
      // If even just the first character can be matched, when the /u
2506
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2507
      // invalid, nothing at all will match, even if the string contains
2508
      // some valid sequences
2509
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
2510
    }
2511
2512 59
    $mState = 0; // cached expected number of octets after the current octet
2513
    // until the beginning of the next UTF8 character sequence
2514 59
    $mUcs4 = 0; // cached Unicode character
2515 59
    $mBytes = 1; // cached expected number of octets in the current sequence
2516
2517 59
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2518
      self::checkForSupport();
2519
    }
2520
2521 59
    if (self::$ORD === null) {
2522
      self::$ORD = self::getData('ord');
2523
    }
2524
2525 59
    $len = self::strlen_in_byte($str);
2526
    /** @noinspection ForeachInvariantsInspection */
2527 59
    for ($i = 0; $i < $len; $i++) {
2528 59
      $in = self::$ORD[$str[$i]];
2529 59
      if ($mState === 0) {
2530
        // When mState is zero we expect either a US-ASCII character or a
2531
        // multi-octet sequence.
2532 59
        if (0 === (0x80 & $in)) {
2533
          // US-ASCII, pass straight through.
2534 56
          $mBytes = 1;
2535 56 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
2536
          // First octet of 2 octet sequence.
2537 49
          $mUcs4 = $in;
2538 49
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
2539 49
          $mState = 1;
2540 49
          $mBytes = 2;
2541 46
        } elseif (0xE0 === (0xF0 & $in)) {
2542
          // First octet of 3 octet sequence.
2543 30
          $mUcs4 = $in;
2544 30
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
2545 30
          $mState = 2;
2546 30
          $mBytes = 3;
2547 23 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
2548
          // First octet of 4 octet sequence.
2549 13
          $mUcs4 = $in;
2550 13
          $mUcs4 = ($mUcs4 & 0x07) << 18;
2551 13
          $mState = 3;
2552 13
          $mBytes = 4;
2553 11
        } elseif (0xF8 === (0xFC & $in)) {
2554
          /* First octet of 5 octet sequence.
2555
          *
2556
          * This is illegal because the encoded codepoint must be either
2557
          * (a) not the shortest form or
2558
          * (b) outside the Unicode range of 0-0x10FFFF.
2559
          * Rather than trying to resynchronize, we will carry on until the end
2560
          * of the sequence and let the later error handling code catch it.
2561
          */
2562 4
          $mUcs4 = $in;
2563 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
2564 4
          $mState = 4;
2565 4
          $mBytes = 5;
2566 8 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
2567
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
2568 4
          $mUcs4 = $in;
2569 4
          $mUcs4 = ($mUcs4 & 1) << 30;
2570 4
          $mState = 5;
2571 4
          $mBytes = 6;
2572
        } else {
2573
          /* Current octet is neither in the US-ASCII range nor a legal first
2574
           * octet of a multi-octet sequence.
2575
           */
2576 59
          return false;
2577
        }
2578
      } else {
2579
        // When mState is non-zero, we expect a continuation of the multi-octet
2580
        // sequence
2581 56
        if (0x80 === (0xC0 & $in)) {
2582
          // Legal continuation.
2583 50
          $shift = ($mState - 1) * 6;
2584 50
          $tmp = $in;
2585 50
          $tmp = ($tmp & 0x0000003F) << $shift;
2586 50
          $mUcs4 |= $tmp;
2587
          /**
2588
           * End of the multi-octet sequence. mUcs4 now contains the final
2589
           * Unicode code point to be output
2590
           */
2591 50
          if (0 === --$mState) {
2592
            /*
2593
            * Check for illegal sequences and code points.
2594
            */
2595
            // From Unicode 3.1, non-shortest form is illegal
2596
            if (
2597 50
                (2 === $mBytes && $mUcs4 < 0x0080) ||
2598 50
                (3 === $mBytes && $mUcs4 < 0x0800) ||
2599 50
                (4 === $mBytes && $mUcs4 < 0x10000) ||
2600 50
                (4 < $mBytes) ||
2601
                // From Unicode 3.2, surrogate characters are illegal.
2602 50
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
2603
                // Code points outside the Unicode range are illegal.
2604 50
                ($mUcs4 > 0x10FFFF)
2605
            ) {
2606 7
              return false;
2607
            }
2608
            // initialize UTF8 cache
2609 50
            $mState = 0;
2610 50
            $mUcs4 = 0;
2611 50
            $mBytes = 1;
2612
          }
2613
        } else {
2614
          /**
2615
           *((0xC0 & (*in) != 0x80) && (mState != 0))
2616
           * Incomplete multi-octet sequence.
2617
           */
2618 28
          return false;
2619
        }
2620
      }
2621
    }
2622
2623 27
    return true;
2624
  }
2625
2626
  /**
2627
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2628
   * Decodes a JSON string
2629
   *
2630
   * @link http://php.net/manual/en/function.json-decode.php
2631
   *
2632
   * @param string $json    <p>
2633
   *                        The <i>json</i> string being decoded.
2634
   *                        </p>
2635
   *                        <p>
2636
   *                        This function only works with UTF-8 encoded strings.
2637
   *                        </p>
2638
   *                        <p>PHP implements a superset of
2639
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2640
   *                        only supports these values when they are nested inside an array or an object.
2641
   *                        </p>
2642
   * @param bool   $assoc   [optional] <p>
2643
   *                        When <b>TRUE</b>, returned objects will be converted into
2644
   *                        associative arrays.
2645
   *                        </p>
2646
   * @param int    $depth   [optional] <p>
2647
   *                        User specified recursion depth.
2648
   *                        </p>
2649
   * @param int    $options [optional] <p>
2650
   *                        Bitmask of JSON decode options. Currently only
2651
   *                        <b>JSON_BIGINT_AS_STRING</b>
2652
   *                        is supported (default is to cast large integers as floats)
2653
   *                        </p>
2654
   *
2655
   * @return mixed the value encoded in <i>json</i> in appropriate
2656
   * PHP type. Values true, false and
2657
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
2658
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
2659
   * <i>json</i> cannot be decoded or if the encoded
2660
   * data is deeper than the recursion limit.
2661
   */
2662 2
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
2663
  {
2664 2
    $json = self::filter($json);
2665
2666 2
    $json = \json_decode($json, $assoc, $depth, $options);
2667
2668 2
    return $json;
2669
  }
2670
2671
  /**
2672
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2673
   * Returns the JSON representation of a value.
2674
   *
2675
   * @link http://php.net/manual/en/function.json-encode.php
2676
   *
2677
   * @param mixed $value   <p>
2678
   *                       The <i>value</i> being encoded. Can be any type except
2679
   *                       a resource.
2680
   *                       </p>
2681
   *                       <p>
2682
   *                       All string data must be UTF-8 encoded.
2683
   *                       </p>
2684
   *                       <p>PHP implements a superset of
2685
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2686
   *                       only supports these values when they are nested inside an array or an object.
2687
   *                       </p>
2688
   * @param int   $options [optional] <p>
2689
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
2690
   *                       <b>JSON_HEX_TAG</b>,
2691
   *                       <b>JSON_HEX_AMP</b>,
2692
   *                       <b>JSON_HEX_APOS</b>,
2693
   *                       <b>JSON_NUMERIC_CHECK</b>,
2694
   *                       <b>JSON_PRETTY_PRINT</b>,
2695
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
2696
   *                       <b>JSON_FORCE_OBJECT</b>,
2697
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
2698
   *                       constants is described on
2699
   *                       the JSON constants page.
2700
   *                       </p>
2701
   * @param int   $depth   [optional] <p>
2702
   *                       Set the maximum depth. Must be greater than zero.
2703
   *                       </p>
2704
   *
2705
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
2706
   */
2707 2
  public static function json_encode($value, int $options = 0, int $depth = 512): string
2708
  {
2709 2
    $value = self::filter($value);
2710
2711 2
    $json = \json_encode($value, $options, $depth);
2712
2713 2
    return $json;
2714
  }
2715
2716
  /**
2717
   * Makes string's first char lowercase.
2718
   *
2719
   * @param string $str       <p>The input string</p>
2720
   * @param string $encoding  [optional] <p>Set the charset.</p>
2721
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
2722
   *
2723
   * @return string <p>The resulting string</p>
2724
   */
2725 7
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2726
  {
2727 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
2728 7
    if ($strPartTwo === false) {
2729
      $strPartTwo = '';
2730
    }
2731
2732 7
    $strPartOne = self::strtolower(
2733 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
2734 7
        $encoding,
2735 7
        $cleanUtf8
2736
    );
2737
2738 7
    return $strPartOne . $strPartTwo;
2739
  }
2740
2741
  /**
2742
   * alias for "UTF8::lcfirst()"
2743
   *
2744
   * @see UTF8::lcfirst()
2745
   *
2746
   * @param string $word
2747
   * @param string $encoding
2748
   * @param bool   $cleanUtf8
2749
   *
2750
   * @return string
2751
   */
2752 1
  public static function lcword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2753
  {
2754 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
2755
  }
2756
2757
  /**
2758
   * Lowercase for all words in the string.
2759
   *
2760
   * @param string   $str        <p>The input string.</p>
2761
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
2762
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
2763
   * @param string   $encoding   [optional] <p>Set the charset.</p>
2764
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
2765
   *
2766
   * @return string
2767
   */
2768 1
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
2769
  {
2770 1
    if (!$str) {
2771 1
      return '';
2772
    }
2773
2774 1
    $words = self::str_to_words($str, $charlist);
2775 1
    $newWords = [];
2776
2777 1
    if (\count($exceptions) > 0) {
2778 1
      $useExceptions = true;
2779
    } else {
2780 1
      $useExceptions = false;
2781
    }
2782
2783 1 View Code Duplication
    foreach ($words as $word) {
2784
2785 1
      if (!$word) {
2786 1
        continue;
2787
      }
2788
2789
      if (
2790 1
          $useExceptions === false
2791
          ||
2792
          (
2793 1
              $useExceptions === true
2794
              &&
2795 1
              !\in_array($word, $exceptions, true)
2796
          )
2797
      ) {
2798 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
2799
      }
2800
2801 1
      $newWords[] = $word;
2802
    }
2803
2804 1
    return \implode('', $newWords);
2805
  }
2806
2807
  /**
2808
   * Strip whitespace or other characters from beginning of a UTF-8 string.
2809
   *
2810
   * @param string $str   <p>The string to be trimmed</p>
2811
   * @param mixed  $chars <p>Optional characters to be stripped</p>
2812
   *
2813
   * @return string <p>The string with unwanted characters stripped from the left.</p>
2814
   */
2815 24 View Code Duplication
  public static function ltrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2816
  {
2817 24
    if (!isset($str[0])) {
2818 2
      return '';
2819
    }
2820
2821
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
2822 23
    if ($chars === INF || !$chars) {
2823 2
      return \preg_replace('/^[\pZ\pC]+/u', '', $str);
2824
    }
2825
2826 23
    return \preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
2827
  }
2828
2829
  /**
2830
   * Returns the UTF-8 character with the maximum code point in the given data.
2831
   *
2832
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
2833
   *
2834
   * @return string <p>The character with the highest code point than others.</p>
2835
   */
2836 1 View Code Duplication
  public static function max($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2837
  {
2838 1
    if (\is_array($arg) === true) {
2839 1
      $arg = \implode('', $arg);
2840
    }
2841
2842 1
    return self::chr(\max(self::codepoints($arg)));
2843
  }
2844
2845
  /**
2846
   * Calculates and returns the maximum number of bytes taken by any
2847
   * UTF-8 encoded character in the given string.
2848
   *
2849
   * @param string $str <p>The original Unicode string.</p>
2850
   *
2851
   * @return int <p>Max byte lengths of the given chars.</p>
2852
   */
2853 1
  public static function max_chr_width(string $str): int
2854
  {
2855 1
    $bytes = self::chr_size_list($str);
2856 1
    if (\count($bytes) > 0) {
2857 1
      return (int)\max($bytes);
2858
    }
2859
2860 1
    return 0;
2861
  }
2862
2863
  /**
2864
   * Checks whether mbstring is available on the server.
2865
   *
2866
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2867
   */
2868 12
  public static function mbstring_loaded(): bool
2869
  {
2870 12
    $return = \extension_loaded('mbstring') ? true : false;
2871
2872 12
    if ($return === true) {
2873 12
      \mb_internal_encoding('UTF-8');
2874
    }
2875
2876 12
    return $return;
2877
  }
2878
2879 1
  private static function mbstring_overloaded(): bool
2880
  {
2881 1
    return \defined('MB_OVERLOAD_STRING')
2882
           &&
2883 1
           \ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING;
2884
  }
2885
2886
  /**
2887
   * Returns the UTF-8 character with the minimum code point in the given data.
2888
   *
2889
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
2890
   *
2891
   * @return string <p>The character with the lowest code point than others.</p>
2892
   */
2893 1 View Code Duplication
  public static function min($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2894
  {
2895 1
    if (\is_array($arg) === true) {
2896 1
      $arg = \implode('', $arg);
2897
    }
2898
2899 1
    return self::chr(\min(self::codepoints($arg)));
2900
  }
2901
2902
  /**
2903
   * alias for "UTF8::normalize_encoding()"
2904
   *
2905
   * @see        UTF8::normalize_encoding()
2906
   *
2907
   * @param string $encoding
2908
   * @param mixed  $fallback
2909
   *
2910
   * @return string
2911
   *
2912
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
2913
   */
2914 1
  public static function normalizeEncoding(string $encoding, $fallback = '')
2915
  {
2916 1
    return self::normalize_encoding($encoding, $fallback);
2917
  }
2918
2919
  /**
2920
   * Normalize the encoding-"name" input.
2921
   *
2922
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
2923
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
2924
   *
2925
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by
2926
   *                default)</p>
2927
   */
2928 79
  public static function normalize_encoding(string $encoding, $fallback = '')
2929
  {
2930 79
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
2931
2932 79
    if (!$encoding) {
2933 4
      return $fallback;
2934
    }
2935
2936
    if (
2937 78
        'UTF-8' === $encoding
2938
        ||
2939 78
        'UTF8' === $encoding
2940
    ) {
2941 11
      return 'UTF-8';
2942
    }
2943
2944 74
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
2945 71
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
2946
    }
2947
2948 7
    if (self::$ENCODINGS === null) {
2949 1
      self::$ENCODINGS = self::getData('encodings');
2950
    }
2951
2952 7
    if (\in_array($encoding, self::$ENCODINGS, true)) {
2953 3
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
2954
2955 3
      return $encoding;
2956
    }
2957
2958 6
    $encodingOrig = $encoding;
2959 6
    $encoding = \strtoupper($encoding);
2960 6
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
2961
2962
    $equivalences = [
2963 6
        'ISO8859'     => 'ISO-8859-1',
2964
        'ISO88591'    => 'ISO-8859-1',
2965
        'ISO'         => 'ISO-8859-1',
2966
        'LATIN'       => 'ISO-8859-1',
2967
        'LATIN1'      => 'ISO-8859-1', // Western European
2968
        'ISO88592'    => 'ISO-8859-2',
2969
        'LATIN2'      => 'ISO-8859-2', // Central European
2970
        'ISO88593'    => 'ISO-8859-3',
2971
        'LATIN3'      => 'ISO-8859-3', // Southern European
2972
        'ISO88594'    => 'ISO-8859-4',
2973
        'LATIN4'      => 'ISO-8859-4', // Northern European
2974
        'ISO88595'    => 'ISO-8859-5',
2975
        'ISO88596'    => 'ISO-8859-6', // Greek
2976
        'ISO88597'    => 'ISO-8859-7',
2977
        'ISO88598'    => 'ISO-8859-8', // Hebrew
2978
        'ISO88599'    => 'ISO-8859-9',
2979
        'LATIN5'      => 'ISO-8859-9', // Turkish
2980
        'ISO885911'   => 'ISO-8859-11',
2981
        'TIS620'      => 'ISO-8859-11', // Thai
2982
        'ISO885910'   => 'ISO-8859-10',
2983
        'LATIN6'      => 'ISO-8859-10', // Nordic
2984
        'ISO885913'   => 'ISO-8859-13',
2985
        'LATIN7'      => 'ISO-8859-13', // Baltic
2986
        'ISO885914'   => 'ISO-8859-14',
2987
        'LATIN8'      => 'ISO-8859-14', // Celtic
2988
        'ISO885915'   => 'ISO-8859-15',
2989
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
2990
        'ISO885916'   => 'ISO-8859-16',
2991
        'LATIN10'     => 'ISO-8859-16', // Southeast European
2992
        'CP1250'      => 'WINDOWS-1250',
2993
        'WIN1250'     => 'WINDOWS-1250',
2994
        'WINDOWS1250' => 'WINDOWS-1250',
2995
        'CP1251'      => 'WINDOWS-1251',
2996
        'WIN1251'     => 'WINDOWS-1251',
2997
        'WINDOWS1251' => 'WINDOWS-1251',
2998
        'CP1252'      => 'WINDOWS-1252',
2999
        'WIN1252'     => 'WINDOWS-1252',
3000
        'WINDOWS1252' => 'WINDOWS-1252',
3001
        'CP1253'      => 'WINDOWS-1253',
3002
        'WIN1253'     => 'WINDOWS-1253',
3003
        'WINDOWS1253' => 'WINDOWS-1253',
3004
        'CP1254'      => 'WINDOWS-1254',
3005
        'WIN1254'     => 'WINDOWS-1254',
3006
        'WINDOWS1254' => 'WINDOWS-1254',
3007
        'CP1255'      => 'WINDOWS-1255',
3008
        'WIN1255'     => 'WINDOWS-1255',
3009
        'WINDOWS1255' => 'WINDOWS-1255',
3010
        'CP1256'      => 'WINDOWS-1256',
3011
        'WIN1256'     => 'WINDOWS-1256',
3012
        'WINDOWS1256' => 'WINDOWS-1256',
3013
        'CP1257'      => 'WINDOWS-1257',
3014
        'WIN1257'     => 'WINDOWS-1257',
3015
        'WINDOWS1257' => 'WINDOWS-1257',
3016
        'CP1258'      => 'WINDOWS-1258',
3017
        'WIN1258'     => 'WINDOWS-1258',
3018
        'WINDOWS1258' => 'WINDOWS-1258',
3019
        'UTF16'       => 'UTF-16',
3020
        'UTF32'       => 'UTF-32',
3021
        'UTF8'        => 'UTF-8',
3022
        'UTF'         => 'UTF-8',
3023
        'UTF7'        => 'UTF-7',
3024
        '8BIT'        => 'CP850',
3025
        'BINARY'      => 'CP850',
3026
    ];
3027
3028 6
    if (!empty($equivalences[$encodingUpperHelper])) {
3029 5
      $encoding = $equivalences[$encodingUpperHelper];
3030
    }
3031
3032 6
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3033
3034 6
    return $encoding;
3035
  }
3036
3037
  /**
3038
   * Normalize some MS Word special characters.
3039
   *
3040
   * @param string $str <p>The string to be normalized.</p>
3041
   *
3042
   * @return string
3043
   */
3044 16 View Code Duplication
  public static function normalize_msword(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3045
  {
3046 16
    if (!isset($str[0])) {
3047 1
      return '';
3048
    }
3049
3050 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
3051 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3052
3053 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3054
3055 1
      if (self::$UTF8_MSWORD === null) {
3056 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
3057
      }
3058
3059 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
3060 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
3061
    }
3062
3063 16
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3064
  }
3065
3066
  /**
3067
   * Normalize the whitespace.
3068
   *
3069
   * @param string $str                     <p>The string to be normalized.</p>
3070
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3071
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3072
   *                                        bidirectional text chars.</p>
3073
   *
3074
   * @return string
3075
   */
3076 39
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
3077
  {
3078 39
    if (!isset($str[0])) {
3079 4
      return '';
3080
    }
3081
3082 39
    static $WHITESPACE_CACHE = [];
3083 39
    $cacheKey = (int)$keepNonBreakingSpace;
3084
3085 39
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3086
3087 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3088
3089 2
      if ($keepNonBreakingSpace === true) {
3090 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3091
      }
3092
3093 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
3094
    }
3095
3096 39
    if ($keepBidiUnicodeControls === false) {
3097 39
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3098
3099 39
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3100 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3101
      }
3102
3103 39
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3104
    }
3105
3106 39
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3107
  }
3108
3109
  /**
3110
   * Calculates Unicode code point of the given UTF-8 encoded character.
3111
   *
3112
   * INFO: opposite to UTF8::chr()
3113
   *
3114
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3115
   * @param string $encoding [optional] <p>Default is UTF-8</p>
3116
   *
3117
   * @return int <p>
3118
   *             Unicode code point of the given character,<br>
3119
   *             0 on invalid UTF-8 byte sequence.
3120
   *             </p>
3121
   */
3122 23
  public static function ord(string $chr, string $encoding = 'UTF-8'): int
3123
  {
3124
    // init
3125 23
    static $CHAR_CACHE = [];
3126
3127
    // save the original string
3128 23
    $chr_orig = $chr;
3129
3130 23
    if ($encoding !== 'UTF-8') {
3131 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3132
3133
      // check again, if it's still not UTF-8
3134
      /** @noinspection NotOptimalIfConditionsInspection */
3135 2
      if ($encoding !== 'UTF-8') {
3136 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3137
      }
3138
    }
3139
3140 23
    $cacheKey = $chr_orig . $encoding;
3141 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3142 23
      return $CHAR_CACHE[$cacheKey];
3143
    }
3144
3145 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3146
      self::checkForSupport();
3147
    }
3148
3149 11
    if (self::$SUPPORT['intlChar'] === true) {
3150 10
      $code = \IntlChar::ord($chr);
3151 10
      if ($code) {
3152 9
        return $CHAR_CACHE[$cacheKey] = $code;
3153
      }
3154
    }
3155
3156
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3157 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3158 6
    $code = $chr ? $chr[1] : 0;
3159
3160 6
    if (0xF0 <= $code && isset($chr[4])) {
3161
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3162
    }
3163
3164 6
    if (0xE0 <= $code && isset($chr[3])) {
3165 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3166
    }
3167
3168 6
    if (0xC0 <= $code && isset($chr[2])) {
3169 2
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3170
    }
3171
3172 5
    return $CHAR_CACHE[$cacheKey] = $code;
3173
  }
3174
3175
  /**
3176
   * Parses the string into an array (into the the second parameter).
3177
   *
3178
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3179
   *          if the second parameter is not set!
3180
   *
3181
   * @link http://php.net/manual/en/function.parse-str.php
3182
   *
3183
   * @param string $str       <p>The input string.</p>
3184
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
3185
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3186
   *
3187
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3188
   */
3189 1
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
3190
  {
3191 1
    if ($cleanUtf8 === true) {
3192 1
      $str = self::clean($str);
3193
    }
3194
3195
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3196 1
    $return = \mb_parse_str($str, $result);
3197
3198 1
    return !($return === false || empty($result));
3199
  }
3200
3201
  /**
3202
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3203
   *
3204
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3205
   */
3206 60
  public static function pcre_utf8_support(): bool
3207
  {
3208
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3209 60
    return (bool)@\preg_match('//u', '');
3210
  }
3211
3212
  /**
3213
   * Create an array containing a range of UTF-8 characters.
3214
   *
3215
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3216
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3217
   *
3218
   * @return array
3219
   */
3220 1
  public static function range($var1, $var2): array
3221
  {
3222 1
    if (!$var1 || !$var2) {
3223 1
      return [];
3224
    }
3225
3226 1 View Code Duplication
    if (\ctype_digit((string)$var1)) {
3227 1
      $start = (int)$var1;
3228 1
    } elseif (\ctype_xdigit($var1)) {
3229
      $start = (int)self::hex_to_int($var1);
3230
    } else {
3231 1
      $start = self::ord($var1);
3232
    }
3233
3234 1
    if (!$start) {
3235
      return [];
3236
    }
3237
3238 1 View Code Duplication
    if (\ctype_digit((string)$var2)) {
3239 1
      $end = (int)$var2;
3240 1
    } elseif (\ctype_xdigit($var2)) {
3241
      $end = (int)self::hex_to_int($var2);
3242
    } else {
3243 1
      $end = self::ord($var2);
3244
    }
3245
3246 1
    if (!$end) {
3247
      return [];
3248
    }
3249
3250 1
    return \array_map(
3251
        [
3252 1
            self::class,
3253
            'chr',
3254
        ],
3255 1
        \range($start, $end)
3256
    );
3257
  }
3258
3259
  /**
3260
   * Multi decode html entity & fix urlencoded-win1252-chars.
3261
   *
3262
   * e.g:
3263
   * 'test+test'                     => 'test+test'
3264
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3265
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3266
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3267
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3268
   * 'Düsseldorf'                   => 'Düsseldorf'
3269
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3270
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3271
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3272
   *
3273
   * @param string $str          <p>The input string.</p>
3274
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3275
   *
3276
   * @return string
3277
   */
3278 2 View Code Duplication
  public static function rawurldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3279
  {
3280 2
    if (!isset($str[0])) {
3281 1
      return '';
3282
    }
3283
3284 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3285 2
    if (\preg_match($pattern, $str)) {
3286 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
3287
    }
3288
3289 2
    $flags = ENT_QUOTES | ENT_HTML5;
3290
3291
    do {
3292 2
      $str_compare = $str;
3293
3294 2
      $str = self::fix_simple_utf8(
3295 2
          \rawurldecode(
3296 2
              self::html_entity_decode(
3297 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3298 2
                  $flags
3299
              )
3300
          )
3301
      );
3302
3303 2
    } while ($multi_decode === true && $str_compare !== $str);
3304
3305 2
    return $str;
3306
  }
3307
3308
  /**
3309
   * alias for "UTF8::remove_bom()"
3310
   *
3311
   * @see        UTF8::remove_bom()
3312
   *
3313
   * @param string $str
3314
   *
3315
   * @return string
3316
   *
3317
   * @deprecated <p>use "UTF8::remove_bom()"</p>
3318
   */
3319
  public static function removeBOM(string $str): string
3320
  {
3321
    return self::remove_bom($str);
3322
  }
3323
3324
  /**
3325
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3326
   *
3327
   * @param string $str <p>The input string.</p>
3328
   *
3329
   * @return string <p>String without UTF-BOM</p>
3330
   */
3331 43
  public static function remove_bom(string $str): string
3332
  {
3333 43
    if (!isset($str[0])) {
3334 3
      return '';
3335
    }
3336
3337 43
    foreach (self::$BOM as $bomString => $bomByteLength) {
3338 43
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
3339 5
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
3340 5
        if ($strTmp === false) {
3341
          $strTmp = '';
3342
        }
3343 43
        $str = (string)$strTmp;
3344
      }
3345
    }
3346
3347 43
    return $str;
3348
  }
3349
3350
  /**
3351
   * Removes duplicate occurrences of a string in another string.
3352
   *
3353
   * @param string          $str  <p>The base string.</p>
3354
   * @param string|string[] $what <p>String to search for in the base string.</p>
3355
   *
3356
   * @return string <p>The result string with removed duplicates.</p>
3357
   */
3358 1
  public static function remove_duplicates(string $str, $what = ' '): string
3359
  {
3360 1
    if (\is_string($what) === true) {
3361 1
      $what = [$what];
3362
    }
3363
3364 1
    if (\is_array($what) === true) {
3365
      /** @noinspection ForeachSourceInspection */
3366 1
      foreach ($what as $item) {
3367 1
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
3368
      }
3369
    }
3370
3371 1
    return $str;
3372
  }
3373
3374
  /**
3375
   * Remove invisible characters from a string.
3376
   *
3377
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3378
   *
3379
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3380
   *
3381
   * @param string $str
3382
   * @param bool   $url_encoded
3383
   * @param string $replacement
3384
   *
3385
   * @return string
3386
   */
3387 65
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
3388
  {
3389
    // init
3390 65
    $non_displayables = [];
3391
3392
    // every control character except newline (dec 10),
3393
    // carriage return (dec 13) and horizontal tab (dec 09)
3394 65
    if ($url_encoded) {
3395 65
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
3396 65
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3397
    }
3398
3399 65
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
3400
3401
    do {
3402 65
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
3403 65
    } while ($count !== 0);
3404
3405 65
    return $str;
3406
  }
3407
3408
  /**
3409
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3410
   *
3411
   * @param string $str                <p>The input string</p>
3412
   * @param string $replacementChar    <p>The replacement character.</p>
3413
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3414
   *
3415
   * @return string
3416
   */
3417 37
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
3418
  {
3419 37
    if (!isset($str[0])) {
3420 4
      return '';
3421
    }
3422
3423 37
    if ($processInvalidUtf8 === true) {
3424 37
      $replacementCharHelper = $replacementChar;
3425 37
      if ($replacementChar === '') {
3426 37
        $replacementCharHelper = 'none';
3427
      }
3428
3429 37
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3430
        self::checkForSupport();
3431
      }
3432
3433 37
      $save = \mb_substitute_character();
3434 37
      \mb_substitute_character($replacementCharHelper);
3435 37
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3436 37
      \mb_substitute_character($save);
3437
3438 37
      if (\is_string($strTmp)) {
3439 37
        $str = $strTmp;
3440
      } else {
3441
        $str = '';
3442
      }
3443
    }
3444
3445 37
    return str_replace(
3446
        [
3447 37
            "\xEF\xBF\xBD",
3448
            '�',
3449
        ],
3450
        [
3451 37
            $replacementChar,
3452 37
            $replacementChar,
3453
        ],
3454 37
        $str
3455
    );
3456
  }
3457
3458
  /**
3459
   * Strip whitespace or other characters from end of a UTF-8 string.
3460
   *
3461
   * @param string $str   <p>The string to be trimmed.</p>
3462
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
3463
   *
3464
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3465
   */
3466 23 View Code Duplication
  public static function rtrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3467
  {
3468 23
    if (!isset($str[0])) {
3469 5
      return '';
3470
    }
3471
3472
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3473 19
    if ($chars === INF || !$chars) {
3474 3
      return \preg_replace('/[\pZ\pC]+$/u', '', $str);
3475
    }
3476
3477 18
    return \preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3478
  }
3479
3480
  /**
3481
   * rxClass
3482
   *
3483
   * @param string $s
3484
   * @param string $class
3485
   *
3486
   * @return string
3487
   */
3488 60
  private static function rxClass(string $s, string $class = ''): string
3489
  {
3490 60
    static $RX_CLASSS_CACHE = [];
3491
3492 60
    $cacheKey = $s . $class;
3493
3494 60
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
3495 48
      return $RX_CLASSS_CACHE[$cacheKey];
3496
    }
3497
3498
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3499 20
    $class = [$class];
3500
3501
    /** @noinspection SuspiciousLoopInspection */
3502 20
    foreach (self::str_split($s) as $s) {
3503 19
      if ('-' === $s) {
3504
        $class[0] = '-' . $class[0];
3505 19
      } elseif (!isset($s[2])) {
3506 19
        $class[0] .= \preg_quote($s, '/');
3507 2
      } elseif (1 === self::strlen($s)) {
3508 2
        $class[0] .= $s;
3509
      } else {
3510 19
        $class[] = $s;
3511
      }
3512
    }
3513
3514 20
    if ($class[0]) {
3515 20
      $class[0] = '[' . $class[0] . ']';
3516
    }
3517
3518 20
    if (1 === \count($class)) {
3519 20
      $return = $class[0];
3520
    } else {
3521
      $return = '(?:' . \implode('|', $class) . ')';
3522
    }
3523
3524 20
    $RX_CLASSS_CACHE[$cacheKey] = $return;
3525
3526 20
    return $return;
3527
  }
3528
3529
  /**
3530
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
3531
   */
3532 1
  public static function showSupport()
3533
  {
3534 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3535
      self::checkForSupport();
3536
    }
3537
3538 1
    echo '<pre>';
3539 1
    foreach (self::$SUPPORT as $key => $value) {
3540 1
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
3541
    }
3542 1
    echo '</pre>';
3543 1
  }
3544
3545
  /**
3546
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3547
   *
3548
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3549
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3550
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3551
   *
3552
   * @return string <p>The HTML numbered entity.</p>
3553
   */
3554 1
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
3555
  {
3556 1
    if (!isset($char[0])) {
3557 1
      return '';
3558
    }
3559
3560
    if (
3561 1
        $keepAsciiChars === true
3562
        &&
3563 1
        self::is_ascii($char) === true
3564
    ) {
3565 1
      return $char;
3566
    }
3567
3568 1
    if ($encoding !== 'UTF-8') {
3569 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3570
    }
3571
3572 1
    return '&#' . self::ord($char, $encoding) . ';';
3573
  }
3574
3575
  /**
3576
   * Convert a string to an array of Unicode characters.
3577
   *
3578
   * @param string $str       <p>The string to split into array.</p>
3579
   * @param int    $length    [optional] <p>Max character length of each array element.</p>
3580
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3581
   *
3582
   * @return string[] <p>An array containing chunks of the string.</p>
3583
   */
3584 41
  public static function split(string $str, int $length = 1, bool $cleanUtf8 = false): array
3585
  {
3586 41
    if (!isset($str[0])) {
3587 3
      return [];
3588
    }
3589
3590
    // init
3591 40
    $ret = [];
3592
3593 40
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3594
      self::checkForSupport();
3595
    }
3596
3597 40
    if ($cleanUtf8 === true) {
3598 9
      $str = self::clean($str);
3599
    }
3600
3601 40
    if (self::$SUPPORT['pcre_utf8'] === true) {
3602
3603 40
      \preg_match_all('/./us', $str, $retArray);
3604 40
      if (isset($retArray[0])) {
3605 40
        $ret = $retArray[0];
3606
      }
3607 40
      unset($retArray);
3608
3609
    } else {
3610
3611
      // fallback
3612
3613 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3614
        self::checkForSupport();
3615
      }
3616
3617 2
      $len = self::strlen_in_byte($str);
3618
3619
      /** @noinspection ForeachInvariantsInspection */
3620 2
      for ($i = 0; $i < $len; $i++) {
3621
3622 2
        if (($str[$i] & "\x80") === "\x00") {
3623
3624 2
          $ret[] = $str[$i];
3625
3626
        } elseif (
3627 2
            isset($str[$i + 1])
3628
            &&
3629 2
            ($str[$i] & "\xE0") === "\xC0"
3630
        ) {
3631
3632
          if (($str[$i + 1] & "\xC0") === "\x80") {
3633
            $ret[] = $str[$i] . $str[$i + 1];
3634
3635
            $i++;
3636
          }
3637
3638 View Code Duplication
        } elseif (
3639 2
            isset($str[$i + 2])
3640
            &&
3641 2
            ($str[$i] & "\xF0") === "\xE0"
3642
        ) {
3643
3644
          if (
3645 2
              ($str[$i + 1] & "\xC0") === "\x80"
3646
              &&
3647 2
              ($str[$i + 2] & "\xC0") === "\x80"
3648
          ) {
3649 2
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3650
3651 2
            $i += 2;
3652
          }
3653
3654
        } elseif (
3655
            isset($str[$i + 3])
3656
            &&
3657
            ($str[$i] & "\xF8") === "\xF0"
3658
        ) {
3659
3660 View Code Duplication
          if (
3661
              ($str[$i + 1] & "\xC0") === "\x80"
3662
              &&
3663
              ($str[$i + 2] & "\xC0") === "\x80"
3664
              &&
3665
              ($str[$i + 3] & "\xC0") === "\x80"
3666
          ) {
3667
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3668
3669
            $i += 3;
3670
          }
3671
3672
        }
3673
      }
3674
    }
3675
3676 40
    if ($length > 1) {
3677 5
      $ret = \array_chunk($ret, $length);
3678
3679 5
      return \array_map(
3680 5
          function ($item) {
3681 5
            return \implode('', $item);
3682 5
          }, $ret
3683
      );
3684
    }
3685
3686 36
    if (isset($ret[0]) && $ret[0] === '') {
3687
      return [];
3688
    }
3689
3690 36
    return $ret;
3691
  }
3692
3693
  /**
3694
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3695
   *
3696
   * @param string $str <p>The input string.</p>
3697
   *
3698
   * @return false|string <p>
3699
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
3700
   *                      otherwise it will return false.
3701
   *                      </p>
3702
   */
3703 15
  public static function str_detect_encoding(string $str)
3704
  {
3705
    //
3706
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
3707
    //
3708
3709 15
    if (self::is_binary($str, true) === true) {
3710
3711 5
      if (self::is_utf16($str) === 1) {
3712 1
        return 'UTF-16LE';
3713
      }
3714
3715 5
      if (self::is_utf16($str) === 2) {
3716 1
        return 'UTF-16BE';
3717
      }
3718
3719 4
      if (self::is_utf32($str) === 1) {
3720
        return 'UTF-32LE';
3721
      }
3722
3723 4
      if (self::is_utf32($str) === 2) {
3724
        return 'UTF-32BE';
3725
      }
3726
3727 4
      return false;
3728
    }
3729
3730
    //
3731
    // 2.) simple check for ASCII chars
3732
    //
3733
3734 13
    if (self::is_ascii($str) === true) {
3735 5
      return 'ASCII';
3736
    }
3737
3738
    //
3739
    // 3.) simple check for UTF-8 chars
3740
    //
3741
3742 13
    if (self::is_utf8($str) === true) {
3743 9
      return 'UTF-8';
3744
    }
3745
3746
    //
3747
    // 4.) check via "\mb_detect_encoding()"
3748
    //
3749
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
3750
3751
    $detectOrder = [
3752 8
        'ISO-8859-1',
3753
        'ISO-8859-2',
3754
        'ISO-8859-3',
3755
        'ISO-8859-4',
3756
        'ISO-8859-5',
3757
        'ISO-8859-6',
3758
        'ISO-8859-7',
3759
        'ISO-8859-8',
3760
        'ISO-8859-9',
3761
        'ISO-8859-10',
3762
        'ISO-8859-13',
3763
        'ISO-8859-14',
3764
        'ISO-8859-15',
3765
        'ISO-8859-16',
3766
        'WINDOWS-1251',
3767
        'WINDOWS-1252',
3768
        'WINDOWS-1254',
3769
        'ISO-2022-JP',
3770
        'JIS',
3771
        'EUC-JP',
3772
    ];
3773
3774 8
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
3775 8
    if ($encoding) {
3776 8
      return $encoding;
3777
    }
3778
3779
    //
3780
    // 5.) check via "iconv()"
3781
    //
3782
3783
    if (self::$ENCODINGS === null) {
3784
      self::$ENCODINGS = self::getData('encodings');
3785
    }
3786
3787
    $md5 = \md5($str);
3788
    foreach (self::$ENCODINGS as $encodingTmp) {
0 ignored issues
show
Bug introduced by
The expression self::$ENCODINGS of type boolean|string|array|integer is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
3789
      # INFO: //IGNORE and //TRANSLIT still throw notice
3790
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3791
      if (\md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
3792
        return $encodingTmp;
3793
      }
3794
    }
3795
3796
    return false;
3797
  }
3798
3799
  /**
3800
   * Check if the string ends with the given substring.
3801
   *
3802
   * @param string $haystack <p>The string to search in.</p>
3803
   * @param string $needle   <p>The substring to search for.</p>
3804
   *
3805
   * @return bool
3806
   */
3807 2 View Code Duplication
  public static function str_ends_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3808
  {
3809 2
    if (!isset($haystack[0], $needle[0])) {
3810 1
      return false;
3811
    }
3812
3813 2
    if (\substr($haystack, -\strlen($needle)) === $needle) {
3814 2
      return true;
3815
    }
3816
3817 2
    return false;
3818
  }
3819
3820
  /**
3821
   * Check if the string ends with the given substring, case insensitive.
3822
   *
3823
   * @param string $haystack <p>The string to search in.</p>
3824
   * @param string $needle   <p>The substring to search for.</p>
3825
   *
3826
   * @return bool
3827
   */
3828 2 View Code Duplication
  public static function str_iends_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3829
  {
3830 2
    if (!isset($haystack[0], $needle[0])) {
3831 1
      return false;
3832
    }
3833
3834 2
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
3835 2
      return true;
3836
    }
3837
3838 2
    return false;
3839
  }
3840
3841
  /**
3842
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
3843
   *
3844
   * @link  http://php.net/manual/en/function.str-ireplace.php
3845
   *
3846
   * @param mixed $search  <p>
3847
   *                       Every replacement with search array is
3848
   *                       performed on the result of previous replacement.
3849
   *                       </p>
3850
   * @param mixed $replace <p>
3851
   *                       </p>
3852
   * @param mixed $subject <p>
3853
   *                       If subject is an array, then the search and
3854
   *                       replace is performed with every entry of
3855
   *                       subject, and the return value is an array as
3856
   *                       well.
3857
   *                       </p>
3858
   * @param int   $count   [optional] <p>
3859
   *                       The number of matched and replaced needles will
3860
   *                       be returned in count which is passed by
3861
   *                       reference.
3862
   *                       </p>
3863
   *
3864
   * @return mixed <p>A string or an array of replacements.</p>
3865
   */
3866 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
3867
  {
3868 26
    $search = (array)$search;
3869
3870
    /** @noinspection AlterInForeachInspection */
3871 26
    foreach ($search as &$s) {
3872 26
      if ('' === $s .= '') {
3873 2
        $s = '/^(?<=.)$/';
3874
      } else {
3875 26
        $s = '/' . \preg_quote($s, '/') . '/ui';
3876
      }
3877
    }
3878
3879 26
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
3880 26
    $count = $replace; // used as reference parameter
3881
3882 26
    return $subject;
3883
  }
3884
3885
  /**
3886
   * Check if the string starts with the given substring, case insensitive.
3887
   *
3888
   * @param string $haystack <p>The string to search in.</p>
3889
   * @param string $needle   <p>The substring to search for.</p>
3890
   *
3891
   * @return bool
3892
   */
3893 2 View Code Duplication
  public static function str_istarts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3894
  {
3895 2
    if (!isset($haystack[0], $needle[0])) {
3896 1
      return false;
3897
    }
3898
3899 2
    if (self::stripos($haystack, $needle) === 0) {
3900 2
      return true;
3901
    }
3902
3903 2
    return false;
3904
  }
3905
3906
  /**
3907
   * Limit the number of characters in a string, but also after the next word.
3908
   *
3909
   * @param string $str
3910
   * @param int    $length
3911
   * @param string $strAddOn
3912
   *
3913
   * @return string
3914
   */
3915 1
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…'): string
3916
  {
3917 1
    if (!isset($str[0])) {
3918 1
      return '';
3919
    }
3920
3921 1
    if (self::strlen($str) <= $length) {
3922 1
      return $str;
3923
    }
3924
3925 1
    if (self::substr($str, $length - 1, 1) === ' ') {
3926 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
3927
    }
3928
3929 1
    $str = (string)self::substr($str, 0, $length);
3930 1
    $array = \explode(' ', $str);
3931 1
    \array_pop($array);
3932 1
    $new_str = \implode(' ', $array);
3933
3934 1
    if ($new_str === '') {
3935 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
3936
    } else {
3937 1
      $str = $new_str . $strAddOn;
3938
    }
3939
3940 1
    return $str;
3941
  }
3942
3943
  /**
3944
   * Pad a UTF-8 string to given length with another string.
3945
   *
3946
   * @param string $str        <p>The input string.</p>
3947
   * @param int    $pad_length <p>The length of return string.</p>
3948
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
3949
   * @param int    $pad_type   [optional] <p>
3950
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
3951
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
3952
   *                           </p>
3953
   *
3954
   * @return string <strong>Returns the padded string</strong>
3955
   */
3956 2
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', int $pad_type = STR_PAD_RIGHT): string
3957
  {
3958 2
    $str_length = self::strlen($str);
3959
3960
    if (
3961 2
        \is_int($pad_length) === true
3962
        &&
3963 2
        $pad_length > 0
3964
        &&
3965 2
        $pad_length >= $str_length
3966
    ) {
3967 2
      $ps_length = self::strlen($pad_string);
3968
3969 2
      $diff = ($pad_length - $str_length);
3970
3971
      switch ($pad_type) {
3972 2 View Code Duplication
        case STR_PAD_LEFT:
3973 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
3974 2
          $pre = (string)self::substr($pre, 0, $diff);
3975 2
          $post = '';
3976 2
          break;
3977
3978 2
        case STR_PAD_BOTH:
3979 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
3980 2
          $pre = (string)self::substr($pre, 0, $diff / 2);
3981 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
3982 2
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2));
3983 2
          break;
3984
3985 2
        case STR_PAD_RIGHT:
3986 View Code Duplication
        default:
3987 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
3988 2
          $post = (string)self::substr($post, 0, $diff);
3989 2
          $pre = '';
3990
      }
3991
3992 2
      return $pre . $str . $post;
3993
    }
3994
3995 2
    return $str;
3996
  }
3997
3998
  /**
3999
   * Repeat a string.
4000
   *
4001
   * @param string $str        <p>
4002
   *                           The string to be repeated.
4003
   *                           </p>
4004
   * @param int    $multiplier <p>
4005
   *                           Number of time the input string should be
4006
   *                           repeated.
4007
   *                           </p>
4008
   *                           <p>
4009
   *                           multiplier has to be greater than or equal to 0.
4010
   *                           If the multiplier is set to 0, the function
4011
   *                           will return an empty string.
4012
   *                           </p>
4013
   *
4014
   * @return string <p>The repeated string.</p>
4015
   */
4016 1
  public static function str_repeat(string $str, int $multiplier): string
4017
  {
4018 1
    $str = self::filter($str);
4019
4020 1
    return \str_repeat($str, $multiplier);
4021
  }
4022
4023
  /**
4024
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4025
   *
4026
   * Replace all occurrences of the search string with the replacement string
4027
   *
4028
   * @link http://php.net/manual/en/function.str-replace.php
4029
   *
4030
   * @param mixed $search  <p>
4031
   *                       The value being searched for, otherwise known as the needle.
4032
   *                       An array may be used to designate multiple needles.
4033
   *                       </p>
4034
   * @param mixed $replace <p>
4035
   *                       The replacement value that replaces found search
4036
   *                       values. An array may be used to designate multiple replacements.
4037
   *                       </p>
4038
   * @param mixed $subject <p>
4039
   *                       The string or array being searched and replaced on,
4040
   *                       otherwise known as the haystack.
4041
   *                       </p>
4042
   *                       <p>
4043
   *                       If subject is an array, then the search and
4044
   *                       replace is performed with every entry of
4045
   *                       subject, and the return value is an array as
4046
   *                       well.
4047
   *                       </p>
4048
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4049
   *
4050
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4051
   */
4052 12
  public static function str_replace($search, $replace, $subject, int &$count = null)
4053
  {
4054 12
    return \str_replace($search, $replace, $subject, $count);
4055
  }
4056
4057
  /**
4058
   * Replace the first "$search"-term with the "$replace"-term.
4059
   *
4060
   * @param string $search
4061
   * @param string $replace
4062
   * @param string $subject
4063
   *
4064
   * @return string
4065
   */
4066 1
  public static function str_replace_first(string $search, string $replace, string $subject): string
4067
  {
4068 1
    $pos = self::strpos($subject, $search);
4069
4070 1
    if ($pos !== false) {
4071 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4072
    }
4073
4074 1
    return $subject;
4075
  }
4076
4077
  /**
4078
   * Shuffles all the characters in the string.
4079
   *
4080
   * @param string $str <p>The input string</p>
4081
   *
4082
   * @return string <p>The shuffled string.</p>
4083
   */
4084 1
  public static function str_shuffle(string $str): string
4085
  {
4086 1
    $array = self::split($str);
4087
4088 1
    \shuffle($array);
4089
4090 1
    return \implode('', $array);
4091
  }
4092
4093
  /**
4094
   * Sort all characters according to code points.
4095
   *
4096
   * @param string $str    <p>A UTF-8 string.</p>
4097
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4098
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4099
   *
4100
   * @return string <p>String of sorted characters.</p>
4101
   */
4102 1
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
4103
  {
4104 1
    $array = self::codepoints($str);
4105
4106 1
    if ($unique) {
4107 1
      $array = \array_flip(\array_flip($array));
4108
    }
4109
4110 1
    if ($desc) {
4111 1
      \arsort($array);
4112
    } else {
4113 1
      \asort($array);
4114
    }
4115
4116 1
    return self::string($array);
4117
  }
4118
4119
  /**
4120
   * Split a string into an array.
4121
   *
4122
   * @param string|string[] $str
4123
   * @param int             $len
4124
   *
4125
   * @return array
4126
   */
4127 23
  public static function str_split($str, int $len = 1): array
4128
  {
4129 23 View Code Duplication
    if (\is_array($str) === true) {
4130 1
      foreach ($str as $k => $v) {
4131 1
        $str[$k] = self::str_split($v, $len);
4132
      }
4133
4134 1
      return $str;
4135
    }
4136
4137 23
    if (!isset($str[0])) {
4138 1
      return [];
4139
    }
4140
4141 22
    if ($len < 1) {
4142
      return \str_split($str, $len);
4143
    }
4144
4145
    /** @noinspection PhpInternalEntityUsedInspection */
4146 22
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4147 22
    $a = $a[0];
4148
4149 22
    if ($len === 1) {
4150 22
      return $a;
4151
    }
4152
4153 1
    $arrayOutput = [];
4154 1
    $p = -1;
4155
4156
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4157 1
    foreach ($a as $l => $a) {
4158 1
      if ($l % $len) {
4159 1
        $arrayOutput[$p] .= $a;
4160
      } else {
4161 1
        $arrayOutput[++$p] = $a;
4162
      }
4163
    }
4164
4165 1
    return $arrayOutput;
4166
  }
4167
4168
  /**
4169
   * Check if the string starts with the given substring.
4170
   *
4171
   * @param string $haystack <p>The string to search in.</p>
4172
   * @param string $needle   <p>The substring to search for.</p>
4173
   *
4174
   * @return bool
4175
   */
4176 2 View Code Duplication
  public static function str_starts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4177
  {
4178 2
    if (!isset($haystack[0], $needle[0])) {
4179 1
      return false;
4180
    }
4181
4182 2
    if (\strpos($haystack, $needle) === 0) {
4183 2
      return true;
4184
    }
4185
4186 2
    return false;
4187
  }
4188
4189
  /**
4190
   * Get a binary representation of a specific string.
4191
   *
4192
   * @param string $str <p>The input string.</p>
4193
   *
4194
   * @return string
4195
   */
4196 1
  public static function str_to_binary(string $str): string
4197
  {
4198 1
    $value = \unpack('H*', $str);
4199
4200 1
    return \base_convert($value[1], 16, 2);
4201
  }
4202
4203
  /**
4204
   * Convert a string into an array of words.
4205
   *
4206
   * @param string   $str
4207
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
4208
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4209
   * @param null|int $removeShortValues
4210
   *
4211
   * @return array
4212
   */
4213 10
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
4214
  {
4215 10
    if (!isset($str[0])) {
4216 2
      if ($removeEmptyValues === true) {
4217
        return [];
4218
      }
4219
4220 2
      return [''];
4221
    }
4222
4223 10
    $charList = self::rxClass($charList, '\pL');
4224
4225 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4226
4227
    if (
4228 10
        $removeShortValues === null
4229
        &&
4230 10
        $removeEmptyValues === false
4231
    ) {
4232 10
      return $return;
4233
    }
4234
4235 1
    $tmpReturn = [];
4236 1
    foreach ($return as $returnValue) {
4237
      if (
4238 1
          $removeShortValues !== null
4239
          &&
4240 1
          self::strlen($returnValue) <= $removeShortValues
4241
      ) {
4242 1
        continue;
4243
      }
4244
4245
      if (
4246 1
          $removeEmptyValues === true
4247
          &&
4248 1
          \trim($returnValue) === ''
4249
      ) {
4250 1
        continue;
4251
      }
4252
4253 1
      $tmpReturn[] = $returnValue;
4254
    }
4255
4256 1
    return $tmpReturn;
4257
  }
4258
4259
  /**
4260
   * alias for "UTF8::to_ascii()"
4261
   *
4262
   * @see UTF8::to_ascii()
4263
   *
4264
   * @param string $str
4265
   * @param string $unknown
4266
   * @param bool   $strict
4267
   *
4268
   * @return string
4269
   */
4270 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
4271
  {
4272 7
    return self::to_ascii($str, $unknown, $strict);
4273
  }
4274
4275
  /**
4276
   * Counts number of words in the UTF-8 string.
4277
   *
4278
   * @param string $str      <p>The input string.</p>
4279
   * @param int    $format   [optional] <p>
4280
   *                         <strong>0</strong> => return a number of words (default)<br>
4281
   *                         <strong>1</strong> => return an array of words<br>
4282
   *                         <strong>2</strong> => return an array of words with word-offset as key
4283
   *                         </p>
4284
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4285
   *
4286
   * @return array|int <p>The number of words in the string</p>
4287
   */
4288 1
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
4289
  {
4290 1
    $strParts = self::str_to_words($str, $charlist);
4291
4292 1
    $len = \count($strParts);
4293
4294 1
    if ($format === 1) {
4295
4296 1
      $numberOfWords = [];
4297 1
      for ($i = 1; $i < $len; $i += 2) {
4298 1
        $numberOfWords[] = $strParts[$i];
4299
      }
4300
4301 1
    } elseif ($format === 2) {
4302
4303 1
      $numberOfWords = [];
4304 1
      $offset = self::strlen($strParts[0]);
4305 1
      for ($i = 1; $i < $len; $i += 2) {
4306 1
        $numberOfWords[$offset] = $strParts[$i];
4307 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4308
      }
4309
4310
    } else {
4311
4312 1
      $numberOfWords = ($len - 1) / 2;
4313
4314
    }
4315
4316 1
    return $numberOfWords;
4317
  }
4318
4319
  /**
4320
   * Case-insensitive string comparison.
4321
   *
4322
   * INFO: Case-insensitive version of UTF8::strcmp()
4323
   *
4324
   * @param string $str1
4325
   * @param string $str2
4326
   *
4327
   * @return int <p>
4328
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
4329
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
4330
   *             <strong>0</strong> if they are equal.
4331
   *             </p>
4332
   */
4333 11
  public static function strcasecmp(string $str1, string $str2): int
4334
  {
4335 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4336
  }
4337
4338
  /**
4339
   * alias for "UTF8::strstr()"
4340
   *
4341
   * @see UTF8::strstr()
4342
   *
4343
   * @param string $haystack
4344
   * @param string $needle
4345
   * @param bool   $before_needle
4346
   * @param string $encoding
4347
   * @param bool   $cleanUtf8
4348
   *
4349
   * @return string|false
4350
   */
4351 1
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4352
  {
4353 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4354
  }
4355
4356
  /**
4357
   * Case-sensitive string comparison.
4358
   *
4359
   * @param string $str1
4360
   * @param string $str2
4361
   *
4362
   * @return int  <p>
4363
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
4364
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
4365
   *              <strong>0</strong> if they are equal.
4366
   *              </p>
4367
   */
4368 14
  public static function strcmp(string $str1, string $str2): int
4369
  {
4370
    /** @noinspection PhpUndefinedClassInspection */
4371 14
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
4372 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
4373 14
        \Normalizer::normalize($str2, \Normalizer::NFD)
4374
    );
4375
  }
4376
4377
  /**
4378
   * Find length of initial segment not matching mask.
4379
   *
4380
   * @param string $str
4381
   * @param string $charList
4382
   * @param int    $offset
4383
   * @param int    $length
4384
   *
4385
   * @return int|null
4386
   */
4387 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
4388
  {
4389 15
    if ('' === $charList .= '') {
4390 1
      return null;
4391
    }
4392
4393 14 View Code Duplication
    if ($offset || $length !== null) {
4394 2
      $strTmp = self::substr($str, $offset, $length);
4395 2
      if ($strTmp === false) {
4396
        return null;
4397
      }
4398 2
      $str = (string)$strTmp;
4399
    }
4400
4401 14
    if (!isset($str[0])) {
4402 1
      return null;
4403
    }
4404
4405 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4406 13
      return self::strlen($length[1]);
4407
    }
4408
4409 1
    return self::strlen($str);
4410
  }
4411
4412
  /**
4413
   * alias for "UTF8::stristr()"
4414
   *
4415
   * @see UTF8::stristr()
4416
   *
4417
   * @param string $haystack
4418
   * @param string $needle
4419
   * @param bool   $before_needle
4420
   * @param string $encoding
4421
   * @param bool   $cleanUtf8
4422
   *
4423
   * @return string|false
4424
   */
4425 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4426
  {
4427 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4428
  }
4429
4430
  /**
4431
   * Create a UTF-8 string from code points.
4432
   *
4433
   * INFO: opposite to UTF8::codepoints()
4434
   *
4435
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4436
   *
4437
   * @return string <p>UTF-8 encoded string.</p>
4438
   */
4439 2
  public static function string(array $array): string
4440
  {
4441 2
    return \implode(
4442 2
        '',
4443 2
        \array_map(
4444
            [
4445 2
                self::class,
4446
                'chr',
4447
            ],
4448 2
            $array
4449
        )
4450
    );
4451
  }
4452
4453
  /**
4454
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4455
   *
4456
   * @param string $str <p>The input string.</p>
4457
   *
4458
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4459
   */
4460 3
  public static function string_has_bom(string $str): bool
4461
  {
4462 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
4463 3
      if (0 === \strpos($str, $bomString)) {
4464 3
        return true;
4465
      }
4466
    }
4467
4468 3
    return false;
4469
  }
4470
4471
  /**
4472
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4473
   *
4474
   * @link http://php.net/manual/en/function.strip-tags.php
4475
   *
4476
   * @param string $str             <p>
4477
   *                                The input string.
4478
   *                                </p>
4479
   * @param string $allowable_tags  [optional] <p>
4480
   *                                You can use the optional second parameter to specify tags which should
4481
   *                                not be stripped.
4482
   *                                </p>
4483
   *                                <p>
4484
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4485
   *                                can not be changed with allowable_tags.
4486
   *                                </p>
4487
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
4488
   *
4489
   * @return string <p>The stripped string.</p>
4490
   */
4491 2
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
4492
  {
4493 2
    if (!isset($str[0])) {
4494 1
      return '';
4495
    }
4496
4497 2
    if ($cleanUtf8 === true) {
4498 1
      $str = self::clean($str);
4499
    }
4500
4501 2
    return \strip_tags($str, $allowable_tags);
4502
  }
4503
4504
  /**
4505
   * Strip all whitespace characters. This includes tabs and newline
4506
   * characters, as well as multibyte whitespace such as the thin space
4507
   * and ideographic space.
4508
   *
4509
   * @param string $str
4510
   *
4511
   * @return string
4512
   */
4513 12
  public static function strip_whitespace(string $str): string
4514
  {
4515 12
    if (!isset($str[0])) {
4516 1
      return '';
4517
    }
4518
4519 11
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
4520
  }
4521
4522
  /**
4523
   * Finds position of first occurrence of a string within another, case insensitive.
4524
   *
4525
   * @link http://php.net/manual/en/function.mb-stripos.php
4526
   *
4527
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4528
   * @param string $needle    <p>The string to find in haystack.</p>
4529
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
4530
   * @param string $encoding  [optional] <p>Set the charset.</p>
4531
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4532
   *
4533
   * @return int|false <p>
4534
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
4535
   *                   or false if needle is not found.
4536
   *                   </p>
4537
   */
4538 10
  public static function stripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4539
  {
4540 10
    if (!isset($haystack[0], $needle[0])) {
4541 3
      return false;
4542
    }
4543
4544 9
    if ($cleanUtf8 === true) {
4545
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4546
      // if invalid characters are found in $haystack before $needle
4547 1
      $haystack = self::clean($haystack);
4548 1
      $needle = self::clean($needle);
4549
    }
4550
4551 9
    if ($encoding !== 'UTF-8') {
4552 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4553
    }
4554
4555 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4556
      self::checkForSupport();
4557
    }
4558
4559 View Code Duplication
    if (
4560 9
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4561
        &&
4562 9
        self::$SUPPORT['intl'] === true
4563
    ) {
4564 9
      return \grapheme_stripos($haystack, $needle, $offset);
4565
    }
4566
4567
    // fallback to "mb_"-function via polyfill
4568 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4569
  }
4570
4571
  /**
4572
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4573
   *
4574
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
4575
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
4576
   * @param bool   $before_needle  [optional] <p>
4577
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4578
   *                               haystack before the first occurrence of the needle (excluding the needle).
4579
   *                               </p>
4580
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
4581
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
4582
   *
4583
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
4584
   */
4585 17
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4586
  {
4587 17
    if (!isset($haystack[0], $needle[0])) {
4588 6
      return false;
4589
    }
4590
4591 11
    if ($encoding !== 'UTF-8') {
4592 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4593
    }
4594
4595 11
    if ($cleanUtf8 === true) {
4596
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4597
      // if invalid characters are found in $haystack before $needle
4598 1
      $needle = self::clean($needle);
4599 1
      $haystack = self::clean($haystack);
4600
    }
4601
4602 11
    if (!$needle) {
4603
      return $haystack;
4604
    }
4605
4606 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4607
      self::checkForSupport();
4608
    }
4609
4610 View Code Duplication
    if (
4611 11
        $encoding !== 'UTF-8'
4612
        &&
4613 11
        self::$SUPPORT['mbstring'] === false
4614
    ) {
4615
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4616
    }
4617
4618 11
    if (self::$SUPPORT['mbstring'] === true) {
4619 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4620
    }
4621
4622 View Code Duplication
    if (
4623
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4624
        &&
4625
        self::$SUPPORT['intl'] === true
4626
    ) {
4627
      return \grapheme_stristr($haystack, $needle, $before_needle);
4628
    }
4629
4630
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
4631
      return \stristr($haystack, $needle, $before_needle);
4632
    }
4633
4634
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
4635
4636
    if (!isset($match[1])) {
4637
      return false;
4638
    }
4639
4640
    if ($before_needle) {
4641
      return $match[1];
4642
    }
4643
4644
    return self::substr($haystack, self::strlen($match[1]));
4645
  }
4646
4647
  /**
4648
   * Get the string length, not the byte-length!
4649
   *
4650
   * @link     http://php.net/manual/en/function.mb-strlen.php
4651
   *
4652
   * @param string $str       <p>The string being checked for length.</p>
4653
   * @param string $encoding  [optional] <p>Set the charset.</p>
4654
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4655
   *
4656
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4657
   *             character counted as +1)</p>
4658
   */
4659 89
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
4660
  {
4661 89
    if (!isset($str[0])) {
4662 6
      return 0;
4663
    }
4664
4665 88
    if ($encoding !== 'UTF-8') {
4666 14
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4667
    }
4668
4669 88
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4670
      self::checkForSupport();
4671
    }
4672
4673
    switch ($encoding) {
4674 88
      case 'ASCII':
4675 88
      case 'CP850':
4676 80
      case '8BIT':
4677
        if (
4678 10
            $encoding === 'CP850'
4679
            &&
4680 10
            self::$SUPPORT['mbstring_func_overload'] === false
4681
        ) {
4682 10
          return \strlen($str);
4683
        }
4684
4685
        return \mb_strlen($str, '8BIT');
4686
    }
4687
4688 80
    if ($cleanUtf8 === true) {
4689
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
4690
      // if invalid characters are found in $str
4691 2
      $str = self::clean($str);
4692
    }
4693
4694 View Code Duplication
    if (
4695 80
        $encoding !== 'UTF-8'
4696
        &&
4697 80
        self::$SUPPORT['mbstring'] === false
4698
        &&
4699 80
        self::$SUPPORT['iconv'] === false
4700
    ) {
4701
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4702
    }
4703
4704
    if (
4705 80
        $encoding !== 'UTF-8'
4706
        &&
4707 80
        self::$SUPPORT['iconv'] === true
4708
        &&
4709 80
        self::$SUPPORT['mbstring'] === false
4710
    ) {
4711
      $returnTmp = \iconv_strlen($str, $encoding);
4712
      if ($returnTmp !== false) {
4713
        return $returnTmp;
4714
      }
4715
    }
4716
4717 80 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
4718 79
      $returnTmp = \mb_strlen($str, $encoding);
4719 79
      if ($returnTmp !== false) {
4720 79
        return $returnTmp;
4721
      }
4722
    }
4723
4724 2 View Code Duplication
    if (self::$SUPPORT['iconv'] === true) {
4725
      $returnTmp = \iconv_strlen($str, $encoding);
4726
      if ($returnTmp !== false) {
4727
        return $returnTmp;
4728
      }
4729
    }
4730
4731
    if (
4732 2
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4733
        &&
4734 2
        self::$SUPPORT['intl'] === true
4735
    ) {
4736
      return \grapheme_strlen($str);
4737
    }
4738
4739 2
    if (self::is_ascii($str)) {
4740 1
      return \strlen($str);
4741
    }
4742
4743
    // fallback via vanilla php
4744 2
    \preg_match_all('/./us', $str, $parts);
4745 2
    $returnTmp = \count($parts[0]);
4746 2
    if ($returnTmp !== 0) {
4747 2
      return $returnTmp;
4748
    }
4749
4750
    // fallback to "mb_"-function via polyfill
4751
    return \mb_strlen($str, $encoding);
4752
  }
4753
4754
  /**
4755
   * Get string length in byte.
4756
   *
4757
   * @param string $str
4758
   *
4759
   * @return int
4760
   */
4761 72
  public static function strlen_in_byte(string $str): int
4762
  {
4763 72
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
4764
      $len = \mb_strlen($str, '8BIT');
4765
    } else {
4766 72
      $len = \strlen($str);
4767
    }
4768
4769 72
    return $len;
4770
  }
4771
4772
  /**
4773
   * Case insensitive string comparisons using a "natural order" algorithm.
4774
   *
4775
   * INFO: natural order version of UTF8::strcasecmp()
4776
   *
4777
   * @param string $str1 <p>The first string.</p>
4778
   * @param string $str2 <p>The second string.</p>
4779
   *
4780
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
4781
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
4782
   *             <strong>0</strong> if they are equal
4783
   */
4784 1
  public static function strnatcasecmp(string $str1, string $str2): int
4785
  {
4786 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4787
  }
4788
4789
  /**
4790
   * String comparisons using a "natural order" algorithm
4791
   *
4792
   * INFO: natural order version of UTF8::strcmp()
4793
   *
4794
   * @link  http://php.net/manual/en/function.strnatcmp.php
4795
   *
4796
   * @param string $str1 <p>The first string.</p>
4797
   * @param string $str2 <p>The second string.</p>
4798
   *
4799
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
4800
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
4801
   *             <strong>0</strong> if they are equal
4802
   */
4803 2
  public static function strnatcmp(string $str1, string $str2): int
4804
  {
4805 2
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4806
  }
4807
4808
  /**
4809
   * Case-insensitive string comparison of the first n characters.
4810
   *
4811
   * @link  http://php.net/manual/en/function.strncasecmp.php
4812
   *
4813
   * @param string $str1 <p>The first string.</p>
4814
   * @param string $str2 <p>The second string.</p>
4815
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
4816
   *
4817
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4818
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4819
   *             <strong>0</strong> if they are equal
4820
   */
4821 1
  public static function strncasecmp(string $str1, string $str2, int $len): int
4822
  {
4823 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4824
  }
4825
4826
  /**
4827
   * String comparison of the first n characters.
4828
   *
4829
   * @link  http://php.net/manual/en/function.strncmp.php
4830
   *
4831
   * @param string $str1 <p>The first string.</p>
4832
   * @param string $str2 <p>The second string.</p>
4833
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
4834
   *
4835
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4836
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4837
   *             <strong>0</strong> if they are equal
4838
   */
4839 2
  public static function strncmp(string $str1, string $str2, int $len): int
4840
  {
4841 2
    $str1 = (string)self::substr($str1, 0, $len);
4842 2
    $str2 = (string)self::substr($str2, 0, $len);
4843
4844 2
    return self::strcmp($str1, $str2);
4845
  }
4846
4847
  /**
4848
   * Search a string for any of a set of characters.
4849
   *
4850
   * @link  http://php.net/manual/en/function.strpbrk.php
4851
   *
4852
   * @param string $haystack  <p>The string where char_list is looked for.</p>
4853
   * @param string $char_list <p>This parameter is case sensitive.</p>
4854
   *
4855
   * @return string|false <p>String starting from the character found, or false if it is not found.</p>
4856
   */
4857 1
  public static function strpbrk(string $haystack, string $char_list)
4858
  {
4859 1
    if (!isset($haystack[0], $char_list[0])) {
4860 1
      return false;
4861
    }
4862
4863 1
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
4864 1
      return \substr($haystack, \strpos($haystack, $m[0]));
4865
    }
4866
4867 1
    return false;
4868
  }
4869
4870
  /**
4871
   * Find position of first occurrence of string in a string.
4872
   *
4873
   * @link http://php.net/manual/en/function.mb-strpos.php
4874
   *
4875
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4876
   * @param string $needle    <p>The string to find in haystack.</p>
4877
   * @param int    $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
4878
   * @param string $encoding  [optional] <p>Set the charset.</p>
4879
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4880
   *
4881
   * @return int|false <p>
4882
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
4883
   *                   If needle is not found it returns false.
4884
   *                   </p>
4885
   */
4886 59
  public static function strpos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
4887
  {
4888 59
    if (!isset($haystack[0], $needle[0])) {
4889 3
      return false;
4890
    }
4891
4892
    // iconv and mbstring do not support integer $needle
4893 58 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4894
      $needle = (string)self::chr((int)$needle);
4895
    }
4896
4897 58
    if ($cleanUtf8 === true) {
4898
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4899
      // if invalid characters are found in $haystack before $needle
4900 2
      $needle = self::clean($needle);
4901 2
      $haystack = self::clean($haystack);
4902
    }
4903
4904 58
    if ($encoding !== 'UTF-8') {
4905 45
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4906
    }
4907
4908 58
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4909
      self::checkForSupport();
4910
    }
4911
4912
    if (
4913 58
        $encoding === 'CP850'
4914
        &&
4915 58
        self::$SUPPORT['mbstring_func_overload'] === false
4916
    ) {
4917 44
      return \strpos($haystack, $needle, $offset);
4918
    }
4919
4920 View Code Duplication
    if (
4921 15
        $encoding !== 'UTF-8'
4922
        &&
4923 15
        self::$SUPPORT['iconv'] === false
4924
        &&
4925 15
        self::$SUPPORT['mbstring'] === false
4926
    ) {
4927
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4928
    }
4929
4930
    if (
4931 15
        $offset >= 0 // iconv_strpos() can't handle negative offset
4932
        &&
4933 15
        $encoding !== 'UTF-8'
4934
        &&
4935 15
        self::$SUPPORT['mbstring'] === false
4936
        &&
4937 15
        self::$SUPPORT['iconv'] === true
4938
    ) {
4939
      // ignore invalid negative offset to keep compatibility
4940
      // with php < 5.5.35, < 5.6.21, < 7.0.6
4941
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4942
      if ($returnTmp !== false) {
4943
        return $returnTmp;
4944
      }
4945
    }
4946
4947 15 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
4948 15
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
4949 15
      if ($returnTmp !== false) {
4950 13
        return $returnTmp;
4951
      }
4952
    }
4953
4954 View Code Duplication
    if (
4955 10
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4956
        &&
4957 10
        self::$SUPPORT['intl'] === true
4958
    ) {
4959 9
      return \grapheme_strpos($haystack, $needle, $offset);
4960
    }
4961
4962
    if (
4963 2
        $offset >= 0 // iconv_strpos() can't handle negative offset
4964
        &&
4965 2
        self::$SUPPORT['iconv'] === true
4966
    ) {
4967
      // ignore invalid negative offset to keep compatibility
4968
      // with php < 5.5.35, < 5.6.21, < 7.0.6
4969 1
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4970 1
      if ($returnTmp !== false) {
4971
        return $returnTmp;
4972
      }
4973
    }
4974
4975 2
    $haystackIsAscii = self::is_ascii($haystack);
4976 2
    if ($haystackIsAscii && self::is_ascii($needle)) {
4977 1
      return \strpos($haystack, $needle, $offset);
4978
    }
4979
4980
    // fallback via vanilla php
4981
4982 2
    if ($haystackIsAscii) {
4983
      $haystackTmp = \substr($haystack, $offset);
4984
    } else {
4985 2
      $haystackTmp = self::substr($haystack, $offset);
4986
    }
4987 2
    if ($haystackTmp === false) {
4988
      $haystackTmp = '';
4989
    }
4990 2
    $haystack = (string)$haystackTmp;
4991
4992 2
    if ($offset < 0) {
4993
      $offset = 0;
4994
    }
4995
4996 2
    $pos = \strpos($haystack, $needle);
4997 2
    if ($pos === false) {
4998
      return false;
4999
    }
5000
5001 2
    $returnTmp = $offset + self::strlen(\substr($haystack, 0, $pos));
5002 2
    if ($returnTmp !== false) {
5003 2
      return $returnTmp;
5004
    }
5005
5006
    // fallback to "mb_"-function via polyfill
5007
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5008
  }
5009
5010
  /**
5011
   * Finds the last occurrence of a character in a string within another.
5012
   *
5013
   * @link http://php.net/manual/en/function.mb-strrchr.php
5014
   *
5015
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5016
   * @param string $needle        <p>The string to find in haystack</p>
5017
   * @param bool   $before_needle [optional] <p>
5018
   *                              Determines which portion of haystack
5019
   *                              this function returns.
5020
   *                              If set to true, it returns all of haystack
5021
   *                              from the beginning to the last occurrence of needle.
5022
   *                              If set to false, it returns all of haystack
5023
   *                              from the last occurrence of needle to the end,
5024
   *                              </p>
5025
   * @param string $encoding      [optional] <p>
5026
   *                              Character encoding name to use.
5027
   *                              If it is omitted, internal character encoding is used.
5028
   *                              </p>
5029
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5030
   *
5031
   * @return string|false The portion of haystack or false if needle is not found.
5032
   */
5033 1 View Code Duplication
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5034
  {
5035 1
    if ($encoding !== 'UTF-8') {
5036 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5037
    }
5038
5039 1
    if ($cleanUtf8 === true) {
5040
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5041
      // if invalid characters are found in $haystack before $needle
5042 1
      $needle = self::clean($needle);
5043 1
      $haystack = self::clean($haystack);
5044
    }
5045
5046
    // fallback to "mb_"-function via polyfill
5047 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5048
  }
5049
5050
  /**
5051
   * Reverses characters order in the string.
5052
   *
5053
   * @param string $str The input string
5054
   *
5055
   * @return string The string with characters in the reverse sequence
5056
   */
5057 4
  public static function strrev(string $str): string
5058
  {
5059 4
    if (!isset($str[0])) {
5060 2
      return '';
5061
    }
5062
5063 3
    return \implode('', \array_reverse(self::split($str)));
5064
  }
5065
5066
  /**
5067
   * Finds the last occurrence of a character in a string within another, case insensitive.
5068
   *
5069
   * @link http://php.net/manual/en/function.mb-strrichr.php
5070
   *
5071
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
5072
   * @param string $needle         <p>The string to find in haystack.</p>
5073
   * @param bool   $before_needle  [optional] <p>
5074
   *                               Determines which portion of haystack
5075
   *                               this function returns.
5076
   *                               If set to true, it returns all of haystack
5077
   *                               from the beginning to the last occurrence of needle.
5078
   *                               If set to false, it returns all of haystack
5079
   *                               from the last occurrence of needle to the end,
5080
   *                               </p>
5081
   * @param string $encoding       [optional] <p>
5082
   *                               Character encoding name to use.
5083
   *                               If it is omitted, internal character encoding is used.
5084
   *                               </p>
5085
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5086
   *
5087
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
5088
   */
5089 1 View Code Duplication
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5090
  {
5091 1
    if ($encoding !== 'UTF-8') {
5092 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5093
    }
5094
5095 1
    if ($cleanUtf8 === true) {
5096
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5097
      // if invalid characters are found in $haystack before $needle
5098 1
      $needle = self::clean($needle);
5099 1
      $haystack = self::clean($haystack);
5100
    }
5101
5102 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5103
  }
5104
5105
  /**
5106
   * Find position of last occurrence of a case-insensitive string.
5107
   *
5108
   * @param string $haystack  <p>The string to look in.</p>
5109
   * @param string $needle    <p>The string to look for.</p>
5110
   * @param int    $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5111
   * @param string $encoding  [optional] <p>Set the charset.</p>
5112
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5113
   *
5114
   * @return int|false <p>
5115
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5116
   *                   not found, it returns false.
5117
   *                   </p>
5118
   */
5119 1
  public static function strripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5120
  {
5121 1 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5122
      $needle = (string)self::chr((int)$needle);
5123
    }
5124
5125 1
    if (!isset($haystack[0], $needle[0])) {
5126
      return false;
5127
    }
5128
5129 1
    if ($cleanUtf8 === true) {
5130
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5131 1
      $needle = self::clean($needle);
5132 1
      $haystack = self::clean($haystack);
5133
    }
5134
5135 1
    if ($encoding !== 'UTF-8') {
5136 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5137
    }
5138
5139 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5140
      self::checkForSupport();
5141
    }
5142
5143 View Code Duplication
    if (
5144 1
        $encoding !== 'UTF-8'
5145
        &&
5146 1
        self::$SUPPORT['mbstring'] === false
5147
    ) {
5148
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5149
    }
5150
5151 1
    if (self::$SUPPORT['mbstring'] === true) {
5152 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5153
    }
5154
5155 View Code Duplication
    if (
5156
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5157
        &&
5158
        self::$SUPPORT['intl'] === true
5159
    ) {
5160
      return \grapheme_strripos($haystack, $needle, $offset);
5161
    }
5162
5163
    // fallback via vanilla php
5164
5165
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5166
  }
5167
5168
  /**
5169
   * Find position of last occurrence of a string in a string.
5170
   *
5171
   * @link http://php.net/manual/en/function.mb-strrpos.php
5172
   *
5173
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5174
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5175
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5176
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5177
   *                              the end of the string.
5178
   *                              </p>
5179
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5180
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5181
   *
5182
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5183
   *                   is not found, it returns false.</p>
5184
   */
5185 10
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5186
  {
5187 10
    if ((int)$needle === $needle && $needle >= 0) {
5188 2
      $needle = (string)self::chr($needle);
5189
    }
5190 10
    $needle = (string)$needle;
5191
5192 10
    if (!isset($haystack[0], $needle[0])) {
5193 2
      return false;
5194
    }
5195
5196
    if (
5197 9
        $cleanUtf8 === true
5198
        ||
5199 9
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5200
    ) {
5201
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5202 3
      $needle = self::clean($needle);
5203 3
      $haystack = self::clean($haystack);
5204
    }
5205
5206 9
    if ($encoding !== 'UTF-8') {
5207 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5208
    }
5209
5210 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5211
      self::checkForSupport();
5212
    }
5213
5214 View Code Duplication
    if (
5215 9
        $encoding !== 'UTF-8'
5216
        &&
5217 9
        self::$SUPPORT['mbstring'] === false
5218
    ) {
5219
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5220
    }
5221
5222 9
    if (self::$SUPPORT['mbstring'] === true) {
5223 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5224
    }
5225
5226 View Code Duplication
    if (
5227
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5228
        &&
5229
        self::$SUPPORT['intl'] === true
5230
    ) {
5231
      return \grapheme_strrpos($haystack, $needle, $offset);
5232
    }
5233
5234
    // fallback via vanilla php
5235
5236
    $haystackTmp = null;
5237
    if ($offset > 0) {
5238
      $haystackTmp = self::substr($haystack, $offset);
5239
    } elseif ($offset < 0) {
5240
      $haystackTmp = self::substr($haystack, 0, $offset);
5241
      $offset = 0;
5242
    }
5243
5244
    if ($haystackTmp !== null) {
5245
      if ($haystackTmp === false) {
5246
        $haystackTmp = '';
5247
      }
5248
      $haystack = (string)$haystackTmp;
5249
    }
5250
5251
    $pos = \strrpos($haystack, $needle);
5252
    if ($pos === false) {
5253
      return false;
5254
    }
5255
5256
    return $offset + self::strlen(\substr($haystack, 0, $pos));
5257
  }
5258
5259
  /**
5260
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5261
   * mask.
5262
   *
5263
   * @param string $str    <p>The input string.</p>
5264
   * @param string $mask   <p>The mask of chars</p>
5265
   * @param int    $offset [optional]
5266
   * @param int    $length [optional]
5267
   *
5268
   * @return int
5269
   */
5270 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
5271
  {
5272 10 View Code Duplication
    if ($offset || $length !== null) {
5273 2
      $strTmp = self::substr($str, $offset, $length);
5274 2
      if ($strTmp === false) {
5275
        $strTmp = '';
5276
      }
5277 2
      $str = (string)$strTmp;
5278
    }
5279
5280 10
    if (!isset($str[0], $mask[0])) {
5281 2
      return 0;
5282
    }
5283
5284 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5285
  }
5286
5287
  /**
5288
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5289
   *
5290
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
5291
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
5292
   * @param bool   $before_needle  [optional] <p>
5293
   *                               If <b>TRUE</b>, strstr() returns the part of the
5294
   *                               haystack before the first occurrence of the needle (excluding the needle).
5295
   *                               </p>
5296
   * @param string $encoding       [optional] <p>Set the charset.</p>
5297
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5298
   *
5299
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
5300
   */
5301 2
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5302
  {
5303 2
    if (!isset($haystack[0], $needle[0])) {
5304 1
      return false;
5305
    }
5306
5307 2
    if ($cleanUtf8 === true) {
5308
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5309
      // if invalid characters are found in $haystack before $needle
5310
      $needle = self::clean($needle);
5311
      $haystack = self::clean($haystack);
5312
    }
5313
5314 2
    if ($encoding !== 'UTF-8') {
5315 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5316
    }
5317
5318 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5319
      self::checkForSupport();
5320
    }
5321
5322 View Code Duplication
    if (
5323 2
        $encoding !== 'UTF-8'
5324
        &&
5325 2
        self::$SUPPORT['mbstring'] === false
5326
    ) {
5327
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5328
    }
5329
5330 2
    if (self::$SUPPORT['mbstring'] === true) {
5331 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5332
    }
5333
5334 View Code Duplication
    if (
5335
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5336
        &&
5337
        self::$SUPPORT['intl'] === true
5338
    ) {
5339
      return \grapheme_strstr($haystack, $needle, $before_needle);
5340
    }
5341
5342
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
5343
5344
    if (!isset($match[1])) {
5345
      return false;
5346
    }
5347
5348
    if ($before_needle) {
5349
      return $match[1];
5350
    }
5351
5352
    return self::substr($haystack, self::strlen($match[1]));
5353
  }
5354
5355
  /**
5356
   * Unicode transformation for case-less matching.
5357
   *
5358
   * @link http://unicode.org/reports/tr21/tr21-5.html
5359
   *
5360
   * @param string $str        <p>The input string.</p>
5361
   * @param bool   $full       [optional] <p>
5362
   *                           <b>true</b>, replace full case folding chars (default)<br>
5363
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5364
   *                           </p>
5365
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
5366
   *
5367
   * @return string
5368
   */
5369 13
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
5370
  {
5371 13
    if (!isset($str[0])) {
5372 4
      return '';
5373
    }
5374
5375 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5376 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5377
5378 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5379 1
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
5380 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
5381
    }
5382
5383 12
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5384
5385 12
    if ($full) {
5386
5387 12
      static $FULL_CASE_FOLD = null;
5388 12
      if ($FULL_CASE_FOLD === null) {
5389 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
5390
      }
5391
5392 12
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
5393
    }
5394
5395 12
    if ($cleanUtf8 === true) {
5396 1
      $str = self::clean($str);
5397
    }
5398
5399 12
    return self::strtolower($str);
5400
  }
5401
5402
  /**
5403
   * Make a string lowercase.
5404
   *
5405
   * @link http://php.net/manual/en/function.mb-strtolower.php
5406
   *
5407
   * @param string      $str       <p>The string being lowercased.</p>
5408
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5409
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5410
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5411
   *
5412
   * @return string str with all alphabetic characters converted to lowercase.
5413
   */
5414 25 View Code Duplication
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5415
  {
5416
    // init
5417 25
    $str = (string)$str;
5418 25
    if (!isset($str[0])) {
5419 3
      return '';
5420
    }
5421
5422 23
    if ($cleanUtf8 === true) {
5423
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5424
      // if invalid characters are found in $haystack before $needle
5425 1
      $str = self::clean($str);
5426
    }
5427
5428 23
    if ($encoding !== 'UTF-8') {
5429 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5430
    }
5431
5432 23
    if ($lang !== null) {
5433 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5434
        self::checkForSupport();
5435
      }
5436
5437 1
      if (self::$SUPPORT['intl'] === true) {
5438
5439 1
        $langCode = $lang . '-Lower';
5440 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5441
          \trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
5442
5443
          $langCode = 'Any-Lower';
5444
        }
5445
5446 1
        return transliterator_transliterate($langCode, $str);
5447
      }
5448
5449
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5450
    }
5451
5452 23
    return \mb_strtolower($str, $encoding);
5453
  }
5454
5455
  /**
5456
   * Generic case sensitive transformation for collation matching.
5457
   *
5458
   * @param string $str <p>The input string</p>
5459
   *
5460
   * @return string
5461
   */
5462 3
  private static function strtonatfold(string $str): string
5463
  {
5464
    /** @noinspection PhpUndefinedClassInspection */
5465 3
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5466
  }
5467
5468
  /**
5469
   * Make a string uppercase.
5470
   *
5471
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5472
   *
5473
   * @param string      $str       <p>The string being uppercased.</p>
5474
   * @param string      $encoding  [optional] <p>Set the charset.</p>
5475
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5476
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5477
   *
5478
   * @return string <p>$str with all alphabetic characters converted to uppercase.</p>
5479
   */
5480 19 View Code Duplication
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5481
  {
5482 19
    $str = (string)$str;
5483 19
    if (!isset($str[0])) {
5484 3
      return '';
5485
    }
5486
5487 17
    if ($cleanUtf8 === true) {
5488
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5489
      // if invalid characters are found in $haystack before $needle
5490 2
      $str = self::clean($str);
5491
    }
5492
5493 17
    if ($encoding !== 'UTF-8') {
5494 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5495
    }
5496
5497 17
    if ($lang !== null) {
5498 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5499
        self::checkForSupport();
5500
      }
5501
5502 1
      if (self::$SUPPORT['intl'] === true) {
5503
5504 1
        $langCode = $lang . '-Upper';
5505 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5506
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
5507
5508
          $langCode = 'Any-Upper';
5509
        }
5510
5511 1
        return transliterator_transliterate($langCode, $str);
5512
      }
5513
5514
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5515
    }
5516
5517 17
    return \mb_strtoupper($str, $encoding);
5518
  }
5519
5520
  /**
5521
   * Translate characters or replace sub-strings.
5522
   *
5523
   * @link  http://php.net/manual/en/function.strtr.php
5524
   *
5525
   * @param string          $str  <p>The string being translated.</p>
5526
   * @param string|string[] $from <p>The string replacing from.</p>
5527
   * @param string|string[] $to   <p>The string being translated to to.</p>
5528
   *
5529
   * @return string <p>
5530
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5531
   *                corresponding character in to.
5532
   *                </p>
5533
   */
5534 1
  public static function strtr(string $str, $from, $to = INF): string
5535
  {
5536 1
    if (!isset($str[0])) {
5537
      return '';
5538
    }
5539
5540 1
    if ($from === $to) {
5541
      return $str;
5542
    }
5543
5544 1
    if (INF !== $to) {
5545 1
      $from = self::str_split($from);
5546 1
      $to = self::str_split($to);
5547 1
      $countFrom = \count($from);
5548 1
      $countTo = \count($to);
5549
5550 1
      if ($countFrom > $countTo) {
5551 1
        $from = \array_slice($from, 0, $countTo);
5552 1
      } elseif ($countFrom < $countTo) {
5553 1
        $to = \array_slice($to, 0, $countFrom);
5554
      }
5555
5556 1
      $from = \array_combine($from, $to);
5557
    }
5558
5559 1
    if (\is_string($from)) {
5560 1
      return \str_replace($from, '', $str);
5561
    }
5562
5563 1
    return \strtr($str, $from);
5564
  }
5565
5566
  /**
5567
   * Return the width of a string.
5568
   *
5569
   * @param string $str       <p>The input string.</p>
5570
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5571
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5572
   *
5573
   * @return int
5574
   */
5575 1
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
5576
  {
5577 1
    if ($encoding !== 'UTF-8') {
5578 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5579
    }
5580
5581 1
    if ($cleanUtf8 === true) {
5582
      // iconv and mbstring are not tolerant to invalid encoding
5583
      // further, their behaviour is inconsistent with that of PHP's substr
5584 1
      $str = self::clean($str);
5585
    }
5586
5587
    // fallback to "mb_"-function via polyfill
5588 1
    return \mb_strwidth($str, $encoding);
5589
  }
5590
5591
  /**
5592
   * Get part of a string.
5593
   *
5594
   * @link http://php.net/manual/en/function.mb-substr.php
5595
   *
5596
   * @param string $str       <p>The string being checked.</p>
5597
   * @param int    $offset    <p>The first position used in str.</p>
5598
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
5599
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
5600
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5601
   *
5602
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
5603
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
5604
   *                      characters long, <b>FALSE</b> will be returned.</p>
5605
   */
5606 72
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5607
  {
5608 72
    if (!isset($str[0])) {
5609 10
      return '';
5610
    }
5611
5612
    // Empty string
5613 69
    if ($length === 0) {
5614 3
      return '';
5615
    }
5616
5617 68
    if ($cleanUtf8 === true) {
5618
      // iconv and mbstring are not tolerant to invalid encoding
5619
      // further, their behaviour is inconsistent with that of PHP's substr
5620 1
      $str = self::clean($str);
5621
    }
5622
5623
    // Whole string
5624 68
    if (!$offset && $length === null) {
5625 3
      return $str;
5626
    }
5627
5628 65
    $str_length = 0;
5629 65
    if ($offset || $length === null) {
5630 45
      $str_length = self::strlen($str, $encoding);
5631
    }
5632
5633
    // Empty string
5634 65
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be zero. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5635 10
      return '';
5636
    }
5637
5638
    // Impossible
5639 65
    if ($offset && $offset > $str_length) {
5640 2
      return false;
5641
    }
5642
5643 63
    if ($length === null) {
5644 27
      $length = $str_length;
5645
    } else {
5646 54
      $length = (int)$length;
5647
    }
5648
5649 63
    if ($encoding !== 'UTF-8') {
5650 19
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5651
    }
5652
5653 63
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5654
      self::checkForSupport();
5655
    }
5656
5657
    if (
5658 63
        $encoding === 'CP850'
5659
        &&
5660 63
        self::$SUPPORT['mbstring_func_overload'] === false
5661
    ) {
5662 16
      return \substr($str, $offset, $length ?? $str_length);
5663
    }
5664
5665 View Code Duplication
    if (
5666 47
        $encoding !== 'UTF-8'
5667
        &&
5668 47
        self::$SUPPORT['mbstring'] === false
5669
    ) {
5670
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5671
    }
5672
5673 47
    if (self::$SUPPORT['mbstring'] === true) {
5674 47
      return \mb_substr($str, $offset, $length, $encoding);
5675
    }
5676
5677
    if (
5678
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5679
        &&
5680
        self::$SUPPORT['intl'] === true
5681
    ) {
5682
      return \grapheme_substr($str, $offset, $length);
5683
    }
5684
5685
    if (
5686
        $length >= 0 // "iconv_substr()" can't handle negative length
5687
        &&
5688
        self::$SUPPORT['iconv'] === true
5689
    ) {
5690
      $returnTmp = \iconv_substr($str, $offset, $length);
5691
      if ($returnTmp !== false) {
5692
        return $returnTmp;
5693
      }
5694
    }
5695
5696
    if (self::is_ascii($str)) {
5697
      return ($length === null) ?
5698
          \substr($str, $offset) :
5699
          \substr($str, $offset, $length);
5700
    }
5701
5702
    // fallback via vanilla php
5703
5704
    // split to array, and remove invalid characters
5705
    $array = self::split($str);
5706
5707
    // extract relevant part, and join to make sting again
5708
    return \implode('', \array_slice($array, $offset, $length));
5709
  }
5710
5711
  /**
5712
   * Binary safe comparison of two strings from an offset, up to length characters.
5713
   *
5714
   * @param string   $str1               <p>The main string being compared.</p>
5715
   * @param string   $str2               <p>The secondary string being compared.</p>
5716
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
5717
   *                                     counting from the end of the string.</p>
5718
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
5719
   *                                     the length of the str compared to the length of main_str less the offset.</p>
5720
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5721
   *                                     insensitive.</p>
5722
   *
5723
   * @return int <p>
5724
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5725
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5726
   *             <strong>0</strong> if they are equal.
5727
   *             </p>
5728
   */
5729 1
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
5730
  {
5731
    if (
5732 1
        $offset !== 0
5733
        ||
5734 1
        $length !== null
5735
    ) {
5736 1
      $str1Tmp = self::substr($str1, $offset, $length);
5737 1
      if ($str1Tmp === false) {
5738
        $str1Tmp = '';
5739
      }
5740 1
      $str1 = (string)$str1Tmp;
5741
5742 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
5743 1
      if ($str2Tmp === false) {
5744
        $str2Tmp = '';
5745
      }
5746 1
      $str2 = (string)$str2Tmp;
5747
    }
5748
5749 1
    if ($case_insensitivity === true) {
5750 1
      return self::strcasecmp($str1, $str2);
5751
    }
5752
5753 1
    return self::strcmp($str1, $str2);
5754
  }
5755
5756
  /**
5757
   * Count the number of substring occurrences.
5758
   *
5759
   * @link  http://php.net/manual/en/function.substr-count.php
5760
   *
5761
   * @param string $haystack   <p>The string to search in.</p>
5762
   * @param string $needle     <p>The substring to search for.</p>
5763
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
5764
   * @param int    $length     [optional] <p>
5765
   *                           The maximum length after the specified offset to search for the
5766
   *                           substring. It outputs a warning if the offset plus the length is
5767
   *                           greater than the haystack length.
5768
   *                           </p>
5769
   * @param string $encoding   <p>Set the charset.</p>
5770
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
5771
   *
5772
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5773
   */
5774 1
  public static function substr_count(string $haystack, string $needle, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5775
  {
5776 1
    if (!isset($haystack[0], $needle[0])) {
5777 1
      return false;
5778
    }
5779
5780 1
    if ($offset || $length !== null) {
5781
5782 1
      if ($length === null) {
5783 1
        $length = self::strlen($haystack);
5784
      }
5785
5786
      if (
5787
          (
5788 1
              $length !== 0
5789
              &&
5790 1
              $offset !== 0
5791
          )
5792
          &&
5793 1
          ($length + $offset) <= 0
5794
          &&
5795 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
5796
      ) {
5797 1
        return false;
5798
      }
5799
5800 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
5801 1
      if ($haystackTmp === false) {
5802
        $haystackTmp = '';
5803
      }
5804 1
      $haystack = (string)$haystackTmp;
5805
    }
5806
5807 1
    if ($encoding !== 'UTF-8') {
5808 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5809
    }
5810
5811 1
    if ($cleanUtf8 === true) {
5812
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5813
      // if invalid characters are found in $haystack before $needle
5814
      $needle = self::clean($needle);
5815
      $haystack = self::clean($haystack);
5816
    }
5817
5818 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5819
      self::checkForSupport();
5820
    }
5821
5822 View Code Duplication
    if (
5823 1
        $encoding !== 'UTF-8'
5824
        &&
5825 1
        self::$SUPPORT['mbstring'] === false
5826
    ) {
5827
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5828
    }
5829
5830 1
    if (self::$SUPPORT['mbstring'] === true) {
5831 1
      return \mb_substr_count($haystack, $needle, $encoding);
5832
    }
5833
5834
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
5835
5836
    return \count($matches);
5837
  }
5838
5839
  /**
5840
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
5841
   *
5842
   * @param string $haystack <p>The string to search in.</p>
5843
   * @param string $needle   <p>The substring to search for.</p>
5844
   *
5845
   * @return string <p>Return the sub-string.</p>
5846
   */
5847 1 View Code Duplication
  public static function substr_ileft(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5848
  {
5849 1
    if (!isset($haystack[0])) {
5850 1
      return '';
5851
    }
5852
5853 1
    if (!isset($needle[0])) {
5854 1
      return $haystack;
5855
    }
5856
5857 1
    if (self::str_istarts_with($haystack, $needle) === true) {
5858 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
5859 1
      if ($haystackTmp === false) {
5860
        $haystackTmp = '';
5861
      }
5862 1
      $haystack = (string)$haystackTmp;
5863
    }
5864
5865 1
    return $haystack;
5866
  }
5867
5868
  /**
5869
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
5870
   *
5871
   * @param string $haystack <p>The string to search in.</p>
5872
   * @param string $needle   <p>The substring to search for.</p>
5873
   *
5874
   * @return string <p>Return the sub-string.</p>
5875
   */
5876 1 View Code Duplication
  public static function substr_iright(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5877
  {
5878 1
    if (!isset($haystack[0])) {
5879 1
      return '';
5880
    }
5881
5882 1
    if (!isset($needle[0])) {
5883 1
      return $haystack;
5884
    }
5885
5886 1
    if (self::str_iends_with($haystack, $needle) === true) {
5887 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5888 1
      if ($haystackTmp === false) {
5889
        $haystackTmp = '';
5890
      }
5891 1
      $haystack = (string)$haystackTmp;
5892
    }
5893
5894 1
    return $haystack;
5895
  }
5896
5897
  /**
5898
   * Removes an prefix ($needle) from start of the string ($haystack).
5899
   *
5900
   * @param string $haystack <p>The string to search in.</p>
5901
   * @param string $needle   <p>The substring to search for.</p>
5902
   *
5903
   * @return string <p>Return the sub-string.</p>
5904
   */
5905 1 View Code Duplication
  public static function substr_left(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5906
  {
5907 1
    if (!isset($haystack[0])) {
5908 1
      return '';
5909
    }
5910
5911 1
    if (!isset($needle[0])) {
5912 1
      return $haystack;
5913
    }
5914
5915 1
    if (self::str_starts_with($haystack, $needle) === true) {
5916 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
5917 1
      if ($haystackTmp === false) {
5918
        $haystackTmp = '';
5919
      }
5920 1
      $haystack = (string)$haystackTmp;
5921
    }
5922
5923 1
    return $haystack;
5924
  }
5925
5926
  /**
5927
   * Replace text within a portion of a string.
5928
   *
5929
   * source: https://gist.github.com/stemar/8287074
5930
   *
5931
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
5932
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
5933
   * @param int|int[]       $offset           <p>
5934
   *                                          If start is positive, the replacing will begin at the start'th offset
5935
   *                                          into string.
5936
   *                                          <br><br>
5937
   *                                          If start is negative, the replacing will begin at the start'th character
5938
   *                                          from the end of string.
5939
   *                                          </p>
5940
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
5941
   *                                          portion of string which is to be replaced. If it is negative, it
5942
   *                                          represents the number of characters from the end of string at which to
5943
   *                                          stop replacing. If it is not given, then it will default to strlen(
5944
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
5945
   *                                          length is zero then this function will have the effect of inserting
5946
   *                                          replacement into string at the given start offset.</p>
5947
   *
5948
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
5949
   */
5950 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
5951
  {
5952 7
    if (\is_array($str) === true) {
5953 1
      $num = \count($str);
5954
5955
      // the replacement
5956 1
      if (\is_array($replacement) === true) {
5957 1
        $replacement = \array_slice($replacement, 0, $num);
5958
      } else {
5959 1
        $replacement = \array_pad([$replacement], $num, $replacement);
5960
      }
5961
5962
      // the offset
5963 1
      if (\is_array($offset) === true) {
5964 1
        $offset = \array_slice($offset, 0, $num);
5965 1
        foreach ($offset as &$valueTmp) {
5966 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5967
        }
5968 1
        unset($valueTmp);
5969
      } else {
5970 1
        $offset = \array_pad([$offset], $num, $offset);
5971
      }
5972
5973
      // the length
5974 1
      if (null === $length) {
5975 1
        $length = \array_fill(0, $num, 0);
5976 1
      } elseif (\is_array($length) === true) {
5977 1
        $length = \array_slice($length, 0, $num);
5978 1
        foreach ($length as &$valueTmpV2) {
5979 1
          if (null !== $valueTmpV2) {
5980 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5981
          } else {
5982 1
            $valueTmpV2 = 0;
5983
          }
5984
        }
5985 1
        unset($valueTmpV2);
5986
      } else {
5987 1
        $length = \array_pad([$length], $num, $length);
5988
      }
5989
5990
      // recursive call
5991 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return \array_map(array(...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
5992
    }
5993
5994 7
    if (\is_array($replacement) === true) {
5995 1
      if (\count($replacement) > 0) {
5996 1
        $replacement = $replacement[0];
5997
      } else {
5998 1
        $replacement = '';
5999
      }
6000
    }
6001
6002
    // init
6003 7
    $str = (string)$str;
6004 7
    $replacement = (string)$replacement;
6005
6006 7
    if (!isset($str[0])) {
6007 1
      return $replacement;
6008
    }
6009
6010 6
    if (self::is_ascii($str)) {
6011 3
      return ($length === null) ?
6012
          \substr_replace($str, $replacement, $offset) :
6013 3
          \substr_replace($str, $replacement, $offset, $length);
6014
    }
6015
6016 5
    \preg_match_all('/./us', $str, $smatches);
6017 5
    \preg_match_all('/./us', $replacement, $rmatches);
6018
6019 5
    if ($length === null) {
6020 3
      $length = self::strlen($str);
6021
    }
6022
6023 5
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
6024
6025 5
    return \implode('', $smatches[0]);
6026
  }
6027
6028
  /**
6029
   * Removes an suffix ($needle) from end of the string ($haystack).
6030
   *
6031
   * @param string $haystack <p>The string to search in.</p>
6032
   * @param string $needle   <p>The substring to search for.</p>
6033
   *
6034
   * @return string <p>Return the sub-string.</p>
6035
   */
6036 1 View Code Duplication
  public static function substr_right(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6037
  {
6038 1
    if (!isset($haystack[0])) {
6039 1
      return '';
6040
    }
6041
6042 1
    if (!isset($needle[0])) {
6043 1
      return $haystack;
6044
    }
6045
6046 1
    if (self::str_ends_with($haystack, $needle) === true) {
6047 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6048 1
      if ($haystackTmp === false) {
6049
        $haystackTmp = '';
6050
      }
6051 1
      $haystack = (string)$haystackTmp;
6052
    }
6053
6054 1
    return $haystack;
6055
  }
6056
6057
  /**
6058
   * Returns a case swapped version of the string.
6059
   *
6060
   * @param string $str       <p>The input string.</p>
6061
   * @param string $encoding  [optional] <p>Default is UTF-8</p>
6062
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6063
   *
6064
   * @return string <p>Each character's case swapped.</p>
6065
   */
6066 1
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6067
  {
6068 1
    if (!isset($str[0])) {
6069 1
      return '';
6070
    }
6071
6072 1
    if ($encoding !== 'UTF-8') {
6073 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6074
    }
6075
6076 1
    if ($cleanUtf8 === true) {
6077
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6078
      // if invalid characters are found in $haystack before $needle
6079 1
      $str = self::clean($str);
6080
    }
6081
6082 1
    $strSwappedCase = \preg_replace_callback(
6083 1
        '/[\S]/u',
6084 1
        function ($match) use ($encoding) {
6085 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6086
6087 1
          if ($match[0] === $marchToUpper) {
6088 1
            return UTF8::strtolower($match[0], $encoding);
6089
          }
6090
6091 1
          return $marchToUpper;
6092 1
        },
6093 1
        $str
6094
    );
6095
6096 1
    return $strSwappedCase;
6097
  }
6098
6099
  /**
6100
   * alias for "UTF8::to_ascii()"
6101
   *
6102
   * @see        UTF8::to_ascii()
6103
   *
6104
   * @param string $str
6105
   * @param string $subst_chr
6106
   * @param bool   $strict
6107
   *
6108
   * @return string
6109
   *
6110
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6111
   */
6112 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
6113
  {
6114 7
    return self::to_ascii($str, $subst_chr, $strict);
6115
  }
6116
6117
  /**
6118
   * alias for "UTF8::to_iso8859()"
6119
   *
6120
   * @see        UTF8::to_iso8859()
6121
   *
6122
   * @param string|string[] $str
6123
   *
6124
   * @return string|string[]
6125
   *
6126
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6127
   */
6128 1
  public static function toIso8859($str)
6129
  {
6130 1
    return self::to_iso8859($str);
6131
  }
6132
6133
  /**
6134
   * alias for "UTF8::to_latin1()"
6135
   *
6136
   * @see        UTF8::to_latin1()
6137
   *
6138
   * @param string|string[] $str
6139
   *
6140
   * @return string|string[]
6141
   *
6142
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6143
   */
6144 1
  public static function toLatin1($str)
6145
  {
6146 1
    return self::to_latin1($str);
6147
  }
6148
6149
  /**
6150
   * alias for "UTF8::to_utf8()"
6151
   *
6152
   * @see        UTF8::to_utf8()
6153
   *
6154
   * @param string|string[] $str
6155
   *
6156
   * @return string|string[]
6157
   *
6158
   * @deprecated <p>use "UTF8::to_utf8()"</p>
6159
   */
6160 1
  public static function toUTF8($str)
6161
  {
6162 1
    return self::to_utf8($str);
6163
  }
6164
6165
  /**
6166
   * Convert a string into ASCII.
6167
   *
6168
   * @param string $str     <p>The input string.</p>
6169
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6170
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6171
   *                        performance</p>
6172
   *
6173
   * @return string
6174
   */
6175 21
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
6176
  {
6177 21
    static $UTF8_TO_ASCII;
6178
6179 21
    if (!isset($str[0])) {
6180 4
      return '';
6181
    }
6182
6183
    // check if we only have ASCII, first (better performance)
6184 18
    if (self::is_ascii($str) === true) {
6185 6
      return $str;
6186
    }
6187
6188 13
    $str = self::clean(
6189 13
        $str,
6190 13
        true,
6191 13
        true,
6192 13
        true,
6193 13
        false,
6194 13
        true,
6195 13
        true
6196
    );
6197
6198
    // check again, if we only have ASCII, now ...
6199 13
    if (self::is_ascii($str) === true) {
6200 7
      return $str;
6201
    }
6202
6203 7
    if ($strict === true) {
6204 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6205
        self::checkForSupport();
6206
      }
6207
6208 1
      if (self::$SUPPORT['intl'] === true) {
6209
6210
        // HACK for issue from "transliterator_transliterate()"
6211 1
        $str = \str_replace(
6212 1
            'ℌ',
6213 1
            'H',
6214 1
            $str
6215
        );
6216
6217 1
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
6218
6219
        // check again, if we only have ASCII, now ...
6220 1
        if (self::is_ascii($str) === true) {
6221 1
          return $str;
6222
        }
6223
6224
      }
6225
    }
6226
6227 7
    if (self::$ORD === null) {
6228
      self::$ORD = self::getData('ord');
6229
    }
6230
6231 7
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6232 7
    $chars = $ar[0];
6233 7
    foreach ($chars as &$c) {
6234
6235 7
      $ordC0 = self::$ORD[$c[0]];
6236
6237 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6238 7
        continue;
6239
      }
6240
6241 7
      $ordC1 = self::$ORD[$c[1]];
6242
6243
      // ASCII - next please
6244 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6245 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6246
      }
6247
6248 7
      if ($ordC0 >= 224) {
6249 2
        $ordC2 = self::$ORD[$c[2]];
6250
6251 2
        if ($ordC0 <= 239) {
6252 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6253
        }
6254
6255 2
        if ($ordC0 >= 240) {
6256 1
          $ordC3 = self::$ORD[$c[3]];
6257
6258 1
          if ($ordC0 <= 247) {
6259 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6260
          }
6261
6262 1
          if ($ordC0 >= 248) {
6263
            $ordC4 = self::$ORD[$c[4]];
6264
6265 View Code Duplication
            if ($ordC0 <= 251) {
6266
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6267
            }
6268
6269
            if ($ordC0 >= 252) {
6270
              $ordC5 = self::$ORD[$c[5]];
6271
6272 View Code Duplication
              if ($ordC0 <= 253) {
6273
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6274
              }
6275
            }
6276
          }
6277
        }
6278
      }
6279
6280 7
      if ($ordC0 === 254 || $ordC0 === 255) {
6281
        $c = $unknown;
6282
        continue;
6283
      }
6284
6285 7
      if (!isset($ord)) {
6286
        $c = $unknown;
6287
        continue;
6288
      }
6289
6290 7
      $bank = $ord >> 8;
6291 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
6292 3
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
6293 3
        if ($UTF8_TO_ASCII[$bank] === false) {
6294 1
          $UTF8_TO_ASCII[$bank] = [];
6295
        }
6296
      }
6297
6298 7
      $newchar = $ord & 255;
6299
6300 7
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
6301
6302
        // keep for debugging
6303
        /*
6304
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6305
        echo "char: " . $c . "\n";
6306
        echo "ord: " . $ord . "\n";
6307
        echo "newchar: " . $newchar . "\n";
6308
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
6309
        echo "bank:" . $bank . "\n\n";
6310
        */
6311
6312 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6313
      } else {
6314
6315
        // keep for debugging missing chars
6316
        /*
6317
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6318
        echo "char: " . $c . "\n";
6319
        echo "ord: " . $ord . "\n";
6320
        echo "newchar: " . $newchar . "\n";
6321
        echo "bank:" . $bank . "\n\n";
6322
        */
6323
6324 7
        $c = $unknown;
6325
      }
6326
    }
6327
6328 7
    return \implode('', $chars);
6329
  }
6330
6331
  /**
6332
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6333
   *
6334
   * @param string|string[] $str
6335
   *
6336
   * @return string|string[]
6337
   */
6338 3
  public static function to_iso8859($str)
6339
  {
6340 3
    if (\is_array($str) === true) {
6341 1
      foreach ($str as $k => $v) {
6342 1
        $str[$k] = self::to_iso8859($v);
6343
      }
6344
6345 1
      return $str;
6346
    }
6347
6348 3
    $str = (string)$str;
6349 3
    if (!isset($str[0])) {
6350 1
      return '';
6351
    }
6352
6353 3
    return self::utf8_decode($str);
6354
  }
6355
6356
  /**
6357
   * alias for "UTF8::to_iso8859()"
6358
   *
6359
   * @see UTF8::to_iso8859()
6360
   *
6361
   * @param string|string[] $str
6362
   *
6363
   * @return string|string[]
6364
   */
6365 1
  public static function to_latin1($str)
6366
  {
6367 1
    return self::to_iso8859($str);
6368
  }
6369
6370
  /**
6371
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6372
   *
6373
   * <ul>
6374
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6375
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
6376
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
6377
   * case.</li>
6378
   * </ul>
6379
   *
6380
   * @param string|string[] $str                    <p>Any string or array.</p>
6381
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6382
   *
6383
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6384
   */
6385 22
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
6386
  {
6387 22 View Code Duplication
    if (\is_array($str) === true) {
6388 2
      foreach ($str as $k => $v) {
6389 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6390
      }
6391
6392 2
      return $str;
6393
    }
6394
6395 22
    $str = (string)$str;
6396 22
    if (!isset($str[0])) {
6397 3
      return $str;
6398
    }
6399
6400 22
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6401
      self::checkForSupport();
6402
    }
6403
6404 22
    $max = self::strlen_in_byte($str);
6405 22
    $buf = '';
6406
6407
    /** @noinspection ForeachInvariantsInspection */
6408 22
    for ($i = 0; $i < $max; $i++) {
6409 22
      $c1 = $str[$i];
6410
6411 22
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
6412
6413 22
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
6414
6415 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6416
6417 20
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
6418 15
            $buf .= $c1 . $c2;
6419 15
            $i++;
6420
          } else { // not valid UTF8 - convert it
6421 20
            $buf .= self::to_utf8_convert($c1);
6422
          }
6423
6424 21
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
6425
6426 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6427 20
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6428
6429 20
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
6430 12
            $buf .= $c1 . $c2 . $c3;
6431 12
            $i += 2;
6432
          } else { // not valid UTF8 - convert it
6433 20
            $buf .= self::to_utf8_convert($c1);
6434
          }
6435
6436 14
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
6437
6438 14
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6439 14
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6440 14
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6441
6442 14
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
6443 5
            $buf .= $c1 . $c2 . $c3 . $c4;
6444 5
            $i += 3;
6445
          } else { // not valid UTF8 - convert it
6446 14
            $buf .= self::to_utf8_convert($c1);
6447
          }
6448
6449
        } else { // doesn't look like UTF8, but should be converted
6450 22
          $buf .= self::to_utf8_convert($c1);
6451
        }
6452
6453 20
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
6454
6455 2
        $buf .= self::to_utf8_convert($c1);
6456
6457
      } else { // it doesn't need conversion
6458 20
        $buf .= $c1;
6459
      }
6460
    }
6461
6462
    // decode unicode escape sequences
6463 22
    $buf = \preg_replace_callback(
6464 22
        '/\\\\u([0-9a-f]{4})/i',
6465 22
        function ($match) {
6466 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6467 22
        },
6468 22
        $buf
6469
    );
6470
6471
    // decode UTF-8 codepoints
6472 22
    if ($decodeHtmlEntityToUtf8 === true) {
6473 1
      $buf = self::html_entity_decode($buf);
6474
    }
6475
6476 22
    return $buf;
6477
  }
6478
6479
  /**
6480
   * @param int $int
6481
   *
6482
   * @return string
6483
   */
6484 16
  private static function to_utf8_convert($int): string
6485
  {
6486
    // init
6487 16
    $buf = '';
6488
6489 16
    if (self::$ORD === null) {
6490 1
      self::$ORD = self::getData('ord');
6491
    }
6492
6493 16
    if (self::$CHR === null) {
6494 1
      self::$CHR = self::getData('chr');
6495
    }
6496
6497 16
    if (self::$WIN1252_TO_UTF8 === null) {
6498 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6499
    }
6500
6501 16
    $ordC1 = self::$ORD[$int];
6502 16
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
6503 16
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
6504
    } else {
6505 1
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
6506 1
      $cc2 = ($int & "\x3F") | "\x80";
6507 1
      $buf .= $cc1 . $cc2;
6508
    }
6509
6510 16
    return $buf;
6511
  }
6512
6513
  /**
6514
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6515
   *
6516
   * INFO: This is slower then "trim()"
6517
   *
6518
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6519
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6520
   *
6521
   * @param string $str   <p>The string to be trimmed</p>
6522
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
6523
   *
6524
   * @return string <p>The trimmed string.</p>
6525
   */
6526 26
  public static function trim(string $str = '', $chars = INF): string
6527
  {
6528 26
    if (!isset($str[0])) {
6529 5
      return '';
6530
    }
6531
6532
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6533 22
    if ($chars === INF || !$chars) {
6534 6
      return \preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6535
    }
6536
6537 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
6538
  }
6539
6540
  /**
6541
   * Makes string's first char uppercase.
6542
   *
6543
   * @param string $str       <p>The input string.</p>
6544
   * @param string $encoding  [optional] <p>Set the charset.</p>
6545
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6546
   *
6547
   * @return string <p>The resulting string</p>
6548
   */
6549 14
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6550
  {
6551 14
    if ($cleanUtf8 === true) {
6552
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6553
      // if invalid characters are found in $haystack before $needle
6554 1
      $str = self::clean($str);
6555
    }
6556
6557 14
    $strPartTwo = self::substr($str, 1, null, $encoding);
6558 14
    if ($strPartTwo === false) {
6559
      $strPartTwo = '';
6560
    }
6561
6562 14
    $strPartOne = self::strtoupper(
6563 14
        (string)self::substr($str, 0, 1, $encoding),
6564 14
        $encoding,
6565 14
        $cleanUtf8
6566
    );
6567
6568 14
    return $strPartOne . $strPartTwo;
6569
  }
6570
6571
  /**
6572
   * alias for "UTF8::ucfirst()"
6573
   *
6574
   * @see UTF8::ucfirst()
6575
   *
6576
   * @param string $word
6577
   * @param string $encoding
6578
   * @param bool   $cleanUtf8
6579
   *
6580
   * @return string
6581
   */
6582 1
  public static function ucword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6583
  {
6584 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
6585
  }
6586
6587
  /**
6588
   * Uppercase for all words in the string.
6589
   *
6590
   * @param string   $str        <p>The input string.</p>
6591
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6592
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6593
   * @param string   $encoding   [optional] <p>Set the charset.</p>
6594
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6595
   *
6596
   * @return string
6597
   */
6598 8
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
6599
  {
6600 8
    if (!$str) {
6601 2
      return '';
6602
    }
6603
6604
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
6605
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
6606
6607 7
    if ($cleanUtf8 === true) {
6608
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6609
      // if invalid characters are found in $haystack before $needle
6610 1
      $str = self::clean($str);
6611
    }
6612
6613 7
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
6614
6615
    if (
6616 7
        $usePhpDefaultFunctions === true
6617
        &&
6618 7
        self::is_ascii($str) === true
6619
    ) {
6620
      return \ucwords($str);
6621
    }
6622
6623 7
    $words = self::str_to_words($str, $charlist);
6624 7
    $newWords = [];
6625
6626 7
    if (\count($exceptions) > 0) {
6627 1
      $useExceptions = true;
6628
    } else {
6629 7
      $useExceptions = false;
6630
    }
6631
6632 7 View Code Duplication
    foreach ($words as $word) {
6633
6634 7
      if (!$word) {
6635 7
        continue;
6636
      }
6637
6638
      if (
6639 7
          $useExceptions === false
6640
          ||
6641
          (
6642 1
              $useExceptions === true
6643
              &&
6644 7
              !\in_array($word, $exceptions, true)
6645
          )
6646
      ) {
6647 7
        $word = self::ucfirst($word, $encoding);
6648
      }
6649
6650 7
      $newWords[] = $word;
6651
    }
6652
6653 7
    return \implode('', $newWords);
6654
  }
6655
6656
  /**
6657
   * Multi decode html entity & fix urlencoded-win1252-chars.
6658
   *
6659
   * e.g:
6660
   * 'test+test'                     => 'test test'
6661
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6662
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6663
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6664
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6665
   * 'Düsseldorf'                   => 'Düsseldorf'
6666
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6667
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6668
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6669
   *
6670
   * @param string $str          <p>The input string.</p>
6671
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6672
   *
6673
   * @return string
6674
   */
6675 1 View Code Duplication
  public static function urldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6676
  {
6677 1
    if (!isset($str[0])) {
6678 1
      return '';
6679
    }
6680
6681 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
6682 1
    if (\preg_match($pattern, $str)) {
6683 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
6684
    }
6685
6686 1
    $flags = ENT_QUOTES | ENT_HTML5;
6687
6688
    do {
6689 1
      $str_compare = $str;
6690
6691 1
      $str = self::fix_simple_utf8(
6692 1
          \urldecode(
6693 1
              self::html_entity_decode(
6694 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6695 1
                  $flags
6696
              )
6697
          )
6698
      );
6699
6700 1
    } while ($multi_decode === true && $str_compare !== $str);
6701
6702 1
    return $str;
6703
  }
6704
6705
  /**
6706
   * Return a array with "urlencoded"-win1252 -> UTF-8
6707
   *
6708
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
6709
   *
6710
   * @return array
6711
   */
6712 1
  public static function urldecode_fix_win1252_chars(): array
6713
  {
6714
    return [
6715 1
        '%20' => ' ',
6716
        '%21' => '!',
6717
        '%22' => '"',
6718
        '%23' => '#',
6719
        '%24' => '$',
6720
        '%25' => '%',
6721
        '%26' => '&',
6722
        '%27' => "'",
6723
        '%28' => '(',
6724
        '%29' => ')',
6725
        '%2A' => '*',
6726
        '%2B' => '+',
6727
        '%2C' => ',',
6728
        '%2D' => '-',
6729
        '%2E' => '.',
6730
        '%2F' => '/',
6731
        '%30' => '0',
6732
        '%31' => '1',
6733
        '%32' => '2',
6734
        '%33' => '3',
6735
        '%34' => '4',
6736
        '%35' => '5',
6737
        '%36' => '6',
6738
        '%37' => '7',
6739
        '%38' => '8',
6740
        '%39' => '9',
6741
        '%3A' => ':',
6742
        '%3B' => ';',
6743
        '%3C' => '<',
6744
        '%3D' => '=',
6745
        '%3E' => '>',
6746
        '%3F' => '?',
6747
        '%40' => '@',
6748
        '%41' => 'A',
6749
        '%42' => 'B',
6750
        '%43' => 'C',
6751
        '%44' => 'D',
6752
        '%45' => 'E',
6753
        '%46' => 'F',
6754
        '%47' => 'G',
6755
        '%48' => 'H',
6756
        '%49' => 'I',
6757
        '%4A' => 'J',
6758
        '%4B' => 'K',
6759
        '%4C' => 'L',
6760
        '%4D' => 'M',
6761
        '%4E' => 'N',
6762
        '%4F' => 'O',
6763
        '%50' => 'P',
6764
        '%51' => 'Q',
6765
        '%52' => 'R',
6766
        '%53' => 'S',
6767
        '%54' => 'T',
6768
        '%55' => 'U',
6769
        '%56' => 'V',
6770
        '%57' => 'W',
6771
        '%58' => 'X',
6772
        '%59' => 'Y',
6773
        '%5A' => 'Z',
6774
        '%5B' => '[',
6775
        '%5C' => '\\',
6776
        '%5D' => ']',
6777
        '%5E' => '^',
6778
        '%5F' => '_',
6779
        '%60' => '`',
6780
        '%61' => 'a',
6781
        '%62' => 'b',
6782
        '%63' => 'c',
6783
        '%64' => 'd',
6784
        '%65' => 'e',
6785
        '%66' => 'f',
6786
        '%67' => 'g',
6787
        '%68' => 'h',
6788
        '%69' => 'i',
6789
        '%6A' => 'j',
6790
        '%6B' => 'k',
6791
        '%6C' => 'l',
6792
        '%6D' => 'm',
6793
        '%6E' => 'n',
6794
        '%6F' => 'o',
6795
        '%70' => 'p',
6796
        '%71' => 'q',
6797
        '%72' => 'r',
6798
        '%73' => 's',
6799
        '%74' => 't',
6800
        '%75' => 'u',
6801
        '%76' => 'v',
6802
        '%77' => 'w',
6803
        '%78' => 'x',
6804
        '%79' => 'y',
6805
        '%7A' => 'z',
6806
        '%7B' => '{',
6807
        '%7C' => '|',
6808
        '%7D' => '}',
6809
        '%7E' => '~',
6810
        '%7F' => '',
6811
        '%80' => '`',
6812
        '%81' => '',
6813
        '%82' => '‚',
6814
        '%83' => 'ƒ',
6815
        '%84' => '„',
6816
        '%85' => '…',
6817
        '%86' => '†',
6818
        '%87' => '‡',
6819
        '%88' => 'ˆ',
6820
        '%89' => '‰',
6821
        '%8A' => 'Š',
6822
        '%8B' => '‹',
6823
        '%8C' => 'Œ',
6824
        '%8D' => '',
6825
        '%8E' => 'Ž',
6826
        '%8F' => '',
6827
        '%90' => '',
6828
        '%91' => '‘',
6829
        '%92' => '’',
6830
        '%93' => '“',
6831
        '%94' => '”',
6832
        '%95' => '•',
6833
        '%96' => '–',
6834
        '%97' => '—',
6835
        '%98' => '˜',
6836
        '%99' => '™',
6837
        '%9A' => 'š',
6838
        '%9B' => '›',
6839
        '%9C' => 'œ',
6840
        '%9D' => '',
6841
        '%9E' => 'ž',
6842
        '%9F' => 'Ÿ',
6843
        '%A0' => '',
6844
        '%A1' => '¡',
6845
        '%A2' => '¢',
6846
        '%A3' => '£',
6847
        '%A4' => '¤',
6848
        '%A5' => '¥',
6849
        '%A6' => '¦',
6850
        '%A7' => '§',
6851
        '%A8' => '¨',
6852
        '%A9' => '©',
6853
        '%AA' => 'ª',
6854
        '%AB' => '«',
6855
        '%AC' => '¬',
6856
        '%AD' => '',
6857
        '%AE' => '®',
6858
        '%AF' => '¯',
6859
        '%B0' => '°',
6860
        '%B1' => '±',
6861
        '%B2' => '²',
6862
        '%B3' => '³',
6863
        '%B4' => '´',
6864
        '%B5' => 'µ',
6865
        '%B6' => '¶',
6866
        '%B7' => '·',
6867
        '%B8' => '¸',
6868
        '%B9' => '¹',
6869
        '%BA' => 'º',
6870
        '%BB' => '»',
6871
        '%BC' => '¼',
6872
        '%BD' => '½',
6873
        '%BE' => '¾',
6874
        '%BF' => '¿',
6875
        '%C0' => 'À',
6876
        '%C1' => 'Á',
6877
        '%C2' => 'Â',
6878
        '%C3' => 'Ã',
6879
        '%C4' => 'Ä',
6880
        '%C5' => 'Å',
6881
        '%C6' => 'Æ',
6882
        '%C7' => 'Ç',
6883
        '%C8' => 'È',
6884
        '%C9' => 'É',
6885
        '%CA' => 'Ê',
6886
        '%CB' => 'Ë',
6887
        '%CC' => 'Ì',
6888
        '%CD' => 'Í',
6889
        '%CE' => 'Î',
6890
        '%CF' => 'Ï',
6891
        '%D0' => 'Ð',
6892
        '%D1' => 'Ñ',
6893
        '%D2' => 'Ò',
6894
        '%D3' => 'Ó',
6895
        '%D4' => 'Ô',
6896
        '%D5' => 'Õ',
6897
        '%D6' => 'Ö',
6898
        '%D7' => '×',
6899
        '%D8' => 'Ø',
6900
        '%D9' => 'Ù',
6901
        '%DA' => 'Ú',
6902
        '%DB' => 'Û',
6903
        '%DC' => 'Ü',
6904
        '%DD' => 'Ý',
6905
        '%DE' => 'Þ',
6906
        '%DF' => 'ß',
6907
        '%E0' => 'à',
6908
        '%E1' => 'á',
6909
        '%E2' => 'â',
6910
        '%E3' => 'ã',
6911
        '%E4' => 'ä',
6912
        '%E5' => 'å',
6913
        '%E6' => 'æ',
6914
        '%E7' => 'ç',
6915
        '%E8' => 'è',
6916
        '%E9' => 'é',
6917
        '%EA' => 'ê',
6918
        '%EB' => 'ë',
6919
        '%EC' => 'ì',
6920
        '%ED' => 'í',
6921
        '%EE' => 'î',
6922
        '%EF' => 'ï',
6923
        '%F0' => 'ð',
6924
        '%F1' => 'ñ',
6925
        '%F2' => 'ò',
6926
        '%F3' => 'ó',
6927
        '%F4' => 'ô',
6928
        '%F5' => 'õ',
6929
        '%F6' => 'ö',
6930
        '%F7' => '÷',
6931
        '%F8' => 'ø',
6932
        '%F9' => 'ù',
6933
        '%FA' => 'ú',
6934
        '%FB' => 'û',
6935
        '%FC' => 'ü',
6936
        '%FD' => 'ý',
6937
        '%FE' => 'þ',
6938
        '%FF' => 'ÿ',
6939
    ];
6940
  }
6941
6942
  /**
6943
   * Decodes an UTF-8 string to ISO-8859-1.
6944
   *
6945
   * @param string $str <p>The input string.</p>
6946
   * @param bool   $keepUtf8Chars
6947
   *
6948
   * @return string
6949
   */
6950 6
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
6951
  {
6952 6
    if (!isset($str[0])) {
6953 3
      return '';
6954
    }
6955
6956 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
6957 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
6958
6959 6 View Code Duplication
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
6960
6961 1
      if (self::$WIN1252_TO_UTF8 === null) {
6962
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6963
      }
6964
6965 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
6966 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
6967
    }
6968
6969
    /** @noinspection PhpInternalEntityUsedInspection */
6970 6
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
6971
6972 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6973
      self::checkForSupport();
6974
    }
6975
6976
    // save for later comparision
6977 6
    $str_backup = $str;
6978 6
    $len = self::strlen_in_byte($str);
6979
6980 6
    if (self::$ORD === null) {
6981
      self::$ORD = self::getData('ord');
6982
    }
6983
6984 6
    if (self::$CHR === null) {
6985
      self::$CHR = self::getData('chr');
6986
    }
6987
6988 6
    $noCharFound = '?';
6989
    /** @noinspection ForeachInvariantsInspection */
6990 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
6991 6
      switch ($str[$i] & "\xF0") {
6992 6
        case "\xC0":
6993 6
        case "\xD0":
6994 6
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
6995 6
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
6996 6
          break;
6997
6998
        /** @noinspection PhpMissingBreakStatementInspection */
6999 6
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7000
          ++$i;
7001 6
        case "\xE0":
7002 5
          $str[$j] = $noCharFound;
7003 5
          $i += 2;
7004 5
          break;
7005
7006
        default:
7007 6
          $str[$j] = $str[$i];
7008
      }
7009
    }
7010
7011 6
    $return = (string)self::substr($str, 0, $j, '8BIT');
7012
7013
    if (
7014 6
        $keepUtf8Chars === true
7015
        &&
7016 6
        self::strlen($return) >= self::strlen($str_backup)
7017
    ) {
7018 1
      return $str_backup;
7019
    }
7020
7021 6
    return $return;
7022
  }
7023
7024
  /**
7025
   * Encodes an ISO-8859-1 string to UTF-8.
7026
   *
7027
   * @param string $str <p>The input string.</p>
7028
   *
7029
   * @return string
7030
   */
7031 7
  public static function utf8_encode(string $str): string
7032
  {
7033 7
    if (!isset($str[0])) {
7034 7
      return '';
7035
    }
7036
7037 7
    $strTmp = \utf8_encode($str);
7038
7039
    // the polyfill maybe return false
7040 7
    if ($strTmp === false) {
7041
      return '';
7042
    }
7043
7044 7
    $str = (string)$strTmp;
7045 7
    if (false === \strpos($str, "\xC2")) {
7046 3
      return $str;
7047
    }
7048
7049 6
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
7050 6
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
7051
7052 6 View Code Duplication
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
7053
7054 1
      if (self::$WIN1252_TO_UTF8 === null) {
7055
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
7056
      }
7057
7058 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
7059 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
7060
    }
7061
7062 6
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
7063
  }
7064
7065
  /**
7066
   * fix -> utf8-win1252 chars
7067
   *
7068
   * @param string $str <p>The input string.</p>
7069
   *
7070
   * @return string
7071
   *
7072
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
7073
   */
7074 1
  public static function utf8_fix_win1252_chars(string $str): string
7075
  {
7076 1
    return self::fix_simple_utf8($str);
7077
  }
7078
7079
  /**
7080
   * Returns an array with all utf8 whitespace characters.
7081
   *
7082
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7083
   *
7084
   * @author: Derek E. [email protected]
7085
   *
7086
   * @return array <p>
7087
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7088
   *               as defined in above URL.
7089
   *               </p>
7090
   */
7091 1
  public static function whitespace_table(): array
7092
  {
7093 1
    return self::$WHITESPACE_TABLE;
7094
  }
7095
7096
  /**
7097
   * Limit the number of words in a string.
7098
   *
7099
   * @param string $str      <p>The input string.</p>
7100
   * @param int    $limit    <p>The limit of words as integer.</p>
7101
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7102
   *
7103
   * @return string
7104
   */
7105 1
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
7106
  {
7107 1
    if (!isset($str[0])) {
7108 1
      return '';
7109
    }
7110
7111 1
    if ($limit < 1) {
7112 1
      return '';
7113
    }
7114
7115 1
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7116
7117
    if (
7118 1
        !isset($matches[0])
7119
        ||
7120 1
        self::strlen($str) === self::strlen($matches[0])
7121
    ) {
7122 1
      return $str;
7123
    }
7124
7125 1
    return self::rtrim($matches[0]) . $strAddOn;
7126
  }
7127
7128
  /**
7129
   * Wraps a string to a given number of characters
7130
   *
7131
   * @link  http://php.net/manual/en/function.wordwrap.php
7132
   *
7133
   * @param string $str   <p>The input string.</p>
7134
   * @param int    $width [optional] <p>The column width.</p>
7135
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7136
   * @param bool   $cut   [optional] <p>
7137
   *                      If the cut is set to true, the string is
7138
   *                      always wrapped at or before the specified width. So if you have
7139
   *                      a word that is larger than the given width, it is broken apart.
7140
   *                      </p>
7141
   *
7142
   * @return string <p>The given string wrapped at the specified column.</p>
7143
   */
7144 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
7145
  {
7146 10
    if (!isset($str[0], $break[0])) {
7147 3
      return '';
7148
    }
7149
7150 8
    $w = '';
7151 8
    $strSplit = \explode($break, $str);
7152 8
    $count = \count($strSplit);
7153
7154 8
    $chars = [];
7155
    /** @noinspection ForeachInvariantsInspection */
7156 8
    for ($i = 0; $i < $count; ++$i) {
7157
7158 8
      if ($i) {
7159 1
        $chars[] = $break;
7160 1
        $w .= '#';
7161
      }
7162
7163 8
      $c = $strSplit[$i];
7164 8
      unset($strSplit[$i]);
7165
7166 8
      foreach (self::split($c) as $c) {
7167 8
        $chars[] = $c;
7168 8
        $w .= ' ' === $c ? ' ' : '?';
7169
      }
7170
    }
7171
7172 8
    $strReturn = '';
7173 8
    $j = 0;
7174 8
    $b = $i = -1;
7175 8
    $w = \wordwrap($w, $width, '#', $cut);
7176
7177 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7178 6
      for (++$i; $i < $b; ++$i) {
7179 6
        $strReturn .= $chars[$j];
7180 6
        unset($chars[$j++]);
7181
      }
7182
7183 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7184 3
        unset($chars[$j++]);
7185
      }
7186
7187 6
      $strReturn .= $break;
7188
    }
7189
7190 8
    return $strReturn . \implode('', $chars);
7191
  }
7192
7193
  /**
7194
   * Returns an array of Unicode White Space characters.
7195
   *
7196
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7197
   */
7198 1
  public static function ws(): array
7199
  {
7200 1
    return self::$WHITESPACE;
7201
  }
7202
7203
}
7204