Completed
Push — master ( a1c48e...664013 )
by Lars
03:16
created

UTF8::is_json()   B

Complexity

Conditions 6
Paths 9

Size

Total Lines 25

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 6.2163

Importance

Changes 0
Metric Value
dl 0
loc 25
ccs 9
cts 11
cp 0.8182
rs 8.8977
c 0
b 0
f 0
cc 6
nc 9
nop 1
crap 6.2163
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'ſ'            => 's',
157
      "\xCD\x85"     => 'ι',
158
      'ς'            => 'σ',
159
      "\xCF\x90"     => 'β',
160
      "\xCF\x91"     => 'θ',
161
      "\xCF\x95"     => 'φ',
162
      "\xCF\x96"     => 'π',
163
      "\xCF\xB0"     => 'κ',
164
      "\xCF\xB1"     => 'ρ',
165
      "\xCF\xB5"     => 'ε',
166
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
167
      "\xE1\xBE\xBE" => 'ι',
168
  ];
169
170
  /**
171
   * @var array
172
   */
173
  private static $SUPPORT = [];
174
175
  /**
176
   * @var null|array
177
   */
178
  private static $UTF8_MSWORD;
179
180
  /**
181
   * @var null|array
182
   */
183
  private static $BROKEN_UTF8_FIX;
184
185
  /**
186
   * @var null|array
187
   */
188
  private static $WIN1252_TO_UTF8;
189
190
  /**
191
   * @var null|array
192
   */
193
  private static $ENCODINGS;
194
195
  /**
196
   * @var null|array
197
   */
198
  private static $ORD;
199
200
  /**
201
   * @var null|array
202
   */
203
  private static $CHR;
204
205
  /**
206
   * __construct()
207
   */
208 16
  public function __construct()
209
  {
210 16
    self::checkForSupport();
211 16
  }
212
213
  /**
214
   * Return the character at the specified position: $str[1] like functionality.
215
   *
216
   * @param string $str <p>A UTF-8 string.</p>
217
   * @param int    $pos <p>The position of character to return.</p>
218
   *
219
   * @return string <p>Single Multi-Byte character.</p>
220
   */
221 2
  public static function access(string $str, int $pos): string
222
  {
223 2
    if (!isset($str[0])) {
224 1
      return '';
225
    }
226
227 2
    if ($pos < 0) {
228 1
      return '';
229
    }
230
231 2
    return (string)self::substr($str, $pos, 1);
232
  }
233
234
  /**
235
   * Prepends UTF-8 BOM character to the string and returns the whole string.
236
   *
237
   * INFO: If BOM already existed there, the Input string is returned.
238
   *
239
   * @param string $str <p>The input string.</p>
240
   *
241
   * @return string <p>The output string that contains BOM.</p>
242
   */
243 1
  public static function add_bom_to_string(string $str): string
244
  {
245 1
    if (self::string_has_bom($str) === false) {
246 1
      $str = self::bom() . $str;
247
    }
248
249 1
    return $str;
250
  }
251
252
  /**
253
   * Changes all keys in an array.
254
   *
255
   * @param array $array <p>The array to work on</p>
256
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
257
   *                     or <strong>CASE_LOWER</strong> (default)</p>
258
   *
259
   * @return string[] <p>An array with its keys lower or uppercased.</p>
260
   */
261 1
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
262
  {
263
    if (
264 1
        $case !== CASE_LOWER
265
        &&
266 1
        $case !== CASE_UPPER
267
    ) {
268
      $case = CASE_LOWER;
269
    }
270
271 1
    $return = [];
272 1
    foreach ($array as $key => $value) {
273 1
      if ($case === CASE_LOWER) {
274 1
        $key = self::strtolower($key);
275
      } else {
276 1
        $key = self::strtoupper($key);
277
      }
278
279 1
      $return[$key] = $value;
280
    }
281
282 1
    return $return;
283
  }
284
285
  /**
286
   * Returns the substring between $start and $end, if found, or an empty
287
   * string. An optional offset may be supplied from which to begin the
288
   * search for the start string.
289
   *
290
   * @param string $str
291
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
292
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
293
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
294
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
295
   *
296
   * @return string
297
   */
298
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
299
  {
300
    $posStart = self::strpos($str, $start, $offset, $encoding);
301
    if ($posStart === false) {
302
      return '';
303
    }
304
305
    $substrIndex = $posStart + self::strlen($start, $encoding);
306
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
307
    if (
308
        $posEnd === false
309
        ||
310
        $posEnd === $substrIndex
311
    ) {
312
      return '';
313
    }
314
315
    return self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
316
  }
317
318
  /**
319
   * Convert binary into an string.
320
   *
321
   * @param mixed $bin 1|0
322
   *
323
   * @return string
324
   */
325 1
  public static function binary_to_str($bin): string
326
  {
327 1
    if (!isset($bin[0])) {
328
      return '';
329
    }
330
331 1
    $convert = \base_convert($bin, 2, 16);
332 1
    if ($convert === '0') {
333 1
      return '';
334
    }
335
336 1
    return \pack('H*', $convert);
337
  }
338
339
  /**
340
   * Returns the UTF-8 Byte Order Mark Character.
341
   *
342
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
343
   *
344
   * @return string UTF-8 Byte Order Mark
345
   */
346 2
  public static function bom(): string
347
  {
348 2
    return "\xef\xbb\xbf";
349
  }
350
351
  /**
352
   * @alias of UTF8::chr_map()
353
   *
354
   * @see   UTF8::chr_map()
355
   *
356
   * @param string|array $callback
357
   * @param string       $str
358
   *
359
   * @return string[]
360
   */
361 1
  public static function callback($callback, string $str): array
362
  {
363 1
    return self::chr_map($callback, $str);
364
  }
365
366
  /**
367
   * Returns the character at $index, with indexes starting at 0.
368
   *
369
   * @param string $str
370
   * @param int    $index <p>Position of the character.</p>
371
   *
372
   * @return string <p>The character at $index.</p>
373
   */
374
  public static function char_at(string $str, int $index): string
375
  {
376
    return self::substr($str, $index, 1);
377
  }
378
379
  /**
380
   * Returns an array consisting of the characters in the string.
381
   *
382
   * @param string $str <p>The input string.</p>
383
   *
384
   * @return string[] <p>An array of chars.</p>
385
   */
386
  public static function chars(string $str): array
387
  {
388
    return self::str_split($str, 1);
389
  }
390
391
  /**
392
   * This method will auto-detect your server environment for UTF-8 support.
393
   *
394
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
395
   */
396 19
  public static function checkForSupport()
397
  {
398 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
399
400 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
401
402
      // http://php.net/manual/en/book.mbstring.php
403 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
404 1
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
405
406
      // http://php.net/manual/en/book.iconv.php
407 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
408
409
      // http://php.net/manual/en/book.intl.php
410 1
      self::$SUPPORT['intl'] = self::intl_loaded();
411 1
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
412
      if (
413 1
          self::$SUPPORT['intl'] === true
414
          &&
415 1
          \function_exists('transliterator_list_ids') === true
416
      ) {
417
        /** @noinspection PhpComposerExtensionStubsInspection */
418 1
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
419
      }
420
421
      // http://php.net/manual/en/class.intlchar.php
422 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
423
424
      // http://php.net/manual/en/book.ctype.php
425 1
      self::$SUPPORT['ctype'] = self::ctype_loaded();
426
427
      // http://php.net/manual/en/class.finfo.php
428 1
      self::$SUPPORT['finfo'] = self::finfo_loaded();
429
430
      // http://php.net/manual/en/book.json.php
431 1
      self::$SUPPORT['json'] = self::json_loaded();
432
433
      // http://php.net/manual/en/book.pcre.php
434 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
435
    }
436 19
  }
437
438
  /**
439
   * Generates a UTF-8 encoded character from the given code point.
440
   *
441
   * INFO: opposite to UTF8::ord()
442
   *
443
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
444
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
445
   *
446
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
447
   */
448 10
  public static function chr($code_point, string $encoding = 'UTF-8')
449
  {
450
    // init
451 10
    static $CHAR_CACHE = [];
452
453 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
454
      self::checkForSupport();
455
    }
456
457 10
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
458 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
459
    }
460
461 View Code Duplication
    if (
462 10
        $encoding !== 'UTF-8'
463
        &&
464 10
        $encoding !== 'ISO-8859-1'
465
        &&
466 10
        $encoding !== 'WINDOWS-1252'
467
        &&
468 10
        self::$SUPPORT['mbstring'] === false
469
    ) {
470
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
471
    }
472
473 10
    $cacheKey = $code_point . $encoding;
474 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
475 8
      return $CHAR_CACHE[$cacheKey];
476
    }
477
478 9
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
479
480 7
      if (self::$CHR === null) {
481
        self::$CHR = self::getData('chr');
482
      }
483
484 7
      $chr = self::$CHR[$code_point];
485
486 7
      if ($encoding !== 'UTF-8') {
487 1
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
488
      }
489
490 7
      return $CHAR_CACHE[$cacheKey] = $chr;
491
    }
492
493 7
    if (self::$SUPPORT['intlChar'] === true) {
494
      /** @noinspection PhpComposerExtensionStubsInspection */
495 7
      $chr = \IntlChar::chr($code_point);
496
497 7
      if ($encoding !== 'UTF-8') {
498
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
499
      }
500
501 7
      return $CHAR_CACHE[$cacheKey] = $chr;
502
    }
503
504
    if (self::$CHR === null) {
505
      self::$CHR = self::getData('chr');
506
    }
507
508
    if ($code_point <= 0x7F) {
509
      $chr = self::$CHR[$code_point];
510
    } elseif ($code_point <= 0x7FF) {
511
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
512
             self::$CHR[($code_point & 0x3F) + 0x80];
513
    } elseif ($code_point <= 0xFFFF) {
514
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
515
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
516
             self::$CHR[($code_point & 0x3F) + 0x80];
517
    } else {
518
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
519
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
520
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
521
             self::$CHR[($code_point & 0x3F) + 0x80];
522
    }
523
524
    if ($encoding !== 'UTF-8') {
525
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
526
    }
527
528
    return $CHAR_CACHE[$cacheKey] = $chr;
529
  }
530
531
  /**
532
   * Applies callback to all characters of a string.
533
   *
534
   * @param string|array $callback <p>The callback function.</p>
535
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
536
   *
537
   * @return string[] <p>The outcome of callback.</p>
538
   */
539 1
  public static function chr_map($callback, string $str): array
540
  {
541 1
    $chars = self::split($str);
542
543 1
    return \array_map($callback, $chars);
544
  }
545
546
  /**
547
   * Generates an array of byte length of each character of a Unicode string.
548
   *
549
   * 1 byte => U+0000  - U+007F
550
   * 2 byte => U+0080  - U+07FF
551
   * 3 byte => U+0800  - U+FFFF
552
   * 4 byte => U+10000 - U+10FFFF
553
   *
554
   * @param string $str <p>The original unicode string.</p>
555
   *
556
   * @return int[] <p>An array of byte lengths of each character.</p>
557
   */
558 2
  public static function chr_size_list(string $str): array
559
  {
560 2
    if (!isset($str[0])) {
561 2
      return [];
562
    }
563
564 2
    $strSplit = self::split($str);
565
566 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
567
      return \array_map(
568
          function ($data) {
569
            return UTF8::strlen($data, 'CP850'); // 8-BIT
570
          },
571
          $strSplit
572
      );
573
    }
574
575 2
    return \array_map('\strlen', $strSplit);
576
  }
577
578
  /**
579
   * Get a decimal code representation of a specific character.
580
   *
581
   * @param string $char <p>The input character.</p>
582
   *
583
   * @return int
584
   */
585 2
  public static function chr_to_decimal(string $char): int
586
  {
587 2
    $code = self::ord($char[0]);
588 2
    $bytes = 1;
589
590 2
    if (!($code & 0x80)) {
591
      // 0xxxxxxx
592 2
      return $code;
593
    }
594
595 2
    if (($code & 0xe0) === 0xc0) {
596
      // 110xxxxx
597 2
      $bytes = 2;
598 2
      $code &= ~0xc0;
599 2
    } elseif (($code & 0xf0) === 0xe0) {
600
      // 1110xxxx
601 2
      $bytes = 3;
602 2
      $code &= ~0xe0;
603 1
    } elseif (($code & 0xf8) === 0xf0) {
604
      // 11110xxx
605 1
      $bytes = 4;
606 1
      $code &= ~0xf0;
607
    }
608
609 2
    for ($i = 2; $i <= $bytes; $i++) {
610
      // 10xxxxxx
611 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
612
    }
613
614 2
    return $code;
615
  }
616
617
  /**
618
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
619
   *
620
   * @param string $char <p>The input character</p>
621
   * @param string $pfix [optional]
622
   *
623
   * @return string <p>The code point encoded as U+xxxx<p>
624
   */
625 1
  public static function chr_to_hex(string $char, string $pfix = 'U+'): string
626
  {
627 1
    if (!isset($char[0])) {
628 1
      return '';
629
    }
630
631 1
    if ($char === '&#0;') {
632 1
      $char = '';
633
    }
634
635 1
    return self::int_to_hex(self::ord($char), $pfix);
636
  }
637
638
  /**
639
   * alias for "UTF8::chr_to_decimal()"
640
   *
641
   * @see UTF8::chr_to_decimal()
642
   *
643
   * @param string $chr
644
   *
645
   * @return int
646
   */
647 1
  public static function chr_to_int(string $chr): int
648
  {
649 1
    return self::chr_to_decimal($chr);
650
  }
651
652
  /**
653
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
654
   *
655
   * @param string $body     <p>The original string to be split.</p>
656
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
657
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
658
   *
659
   * @return string <p>The chunked string</p>
660
   */
661 1
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
662
  {
663 1
    return \implode($end, self::split($body, $chunklen));
664
  }
665
666
  /**
667
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
668
   *
669
   * @param string $str                           <p>The string to be sanitized.</p>
670
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
671
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
672
   *                                              whitespace.</p>
673
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
674
   *                                              e.g.: "…"
675
   *                                              => "..."</p>
676
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
677
   *                                              combination with
678
   *                                              $normalize_whitespace</p>
679
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
680
   *                                              mark e.g.: "�"</p>
681
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
682
   *                                              characters e.g.: "\0"</p>
683
   *
684
   * @return string <p>Clean UTF-8 encoded string.</p>
685
   */
686 64
  public static function clean(string $str, bool $remove_bom = false, bool $normalize_whitespace = false, bool $normalize_msword = false, bool $keep_non_breaking_space = false, bool $replace_diamond_question_mark = false, bool $remove_invisible_characters = true): string
687
  {
688
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
689
    // caused connection reset problem on larger strings
690
691 64
    $regx = '/
692
      (
693
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
694
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
695
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
696
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
697
        ){1,100}                      # ...one or more times
698
      )
699
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
700
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
701
    /x';
702 64
    $str = (string)\preg_replace($regx, '$1', $str);
703
704 64
    if ($replace_diamond_question_mark === true) {
705 36
      $str = self::replace_diamond_question_mark($str, '');
706
    }
707
708 64
    if ($remove_invisible_characters === true) {
709 64
      $str = self::remove_invisible_characters($str);
710
    }
711
712 64
    if ($normalize_whitespace === true) {
713 38
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
714
    }
715
716 64
    if ($normalize_msword === true) {
717 15
      $str = self::normalize_msword($str);
718
    }
719
720 64
    if ($remove_bom === true) {
721 37
      $str = self::remove_bom($str);
722
    }
723
724 64
    return $str;
725
  }
726
727
  /**
728
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
729
   *
730
   * @param string $str <p>The input string.</p>
731
   *
732
   * @return string
733
   */
734 24
  public static function cleanup(string $str): string
735
  {
736 24
    if (!isset($str[0])) {
737 2
      return '';
738
    }
739
740
    // fixed ISO <-> UTF-8 Errors
741 24
    $str = self::fix_simple_utf8($str);
742
743
    // remove all none UTF-8 symbols
744
    // && remove diamond question mark (�)
745
    // && remove remove invisible characters (e.g. "\0")
746
    // && remove BOM
747
    // && normalize whitespace chars (but keep non-breaking-spaces)
748 24
    $str = self::clean(
749 24
        $str,
750 24
        true,
751 24
        true,
752 24
        false,
753 24
        true,
754 24
        true,
755 24
        true
756
    );
757
758 24
    return $str;
759
  }
760
761
  /**
762
   * Accepts a string or a array of strings and returns an array of Unicode code points.
763
   *
764
   * INFO: opposite to UTF8::string()
765
   *
766
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
767
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
768
   *                                    default, code points will be returned as integers.</p>
769
   *
770
   * @return int[] <p>The array of code points.</p>
771
   */
772 7
  public static function codepoints($arg, bool $u_style = false): array
773
  {
774 7
    if (\is_string($arg) === true) {
775 7
      $arg = self::split($arg);
776
    }
777
778 7
    $arg = \array_map(
779
        [
780 7
            self::class,
781
            'ord',
782
        ],
783 7
        $arg
784
    );
785
786 7
    if ($u_style) {
787 1
      $arg = \array_map(
788
          [
789 1
              self::class,
790
              'int_to_hex',
791
          ],
792 1
          $arg
793
      );
794
    }
795
796 7
    return $arg;
797
  }
798
799
  /**
800
   * Trims the string and replaces consecutive whitespace characters with a
801
   * single space. This includes tabs and newline characters, as well as
802
   * multibyte whitespace such as the thin space and ideographic space.
803
   *
804
   * @param string $str <p>The input string.</p>
805
   *
806
   * @return string <p>String with a trimmed $str and condensed whitespace.</p>
807
   */
808
  public static function collapse_whitespace(string $str): string
809
  {
810
    return self::trim(
811
        self::regexReplace($str, '[[:space:]]+', ' ')
812
    );
813
  }
814
815
  /**
816
   * Returns count of characters used in a string.
817
   *
818
   * @param string $str       <p>The input string.</p>
819
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
820
   *
821
   * @return int[] <p>An associative array of Character as keys and
822
   *               their count as values.</p>
823
   */
824 9
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
825
  {
826 9
    return \array_count_values(self::split($str, 1, $cleanUtf8));
827
  }
828
829
  /**
830
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
831
   * inserted before uppercase characters (with the exception of the first
832
   * character of the string), and in place of spaces as well as underscores.
833
   *
834
   * @param string $str <p>The input string.</p>
835
   *
836
   * @return string
837
   */
838
  public static function dasherize(string $str): string
839
  {
840
    return self::delimit($str, '-');
841
  }
842
843
  /**
844
   * Converts a int-value into an UTF-8 character.
845
   *
846
   * @param mixed $int
847
   *
848
   * @return string
849
   */
850 5
  public static function decimal_to_chr($int): string
851
  {
852 5
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
853
  }
854
855
  /**
856
   * Returns a lowercase and trimmed string separated by the given delimiter.
857
   * Delimiters are inserted before uppercase characters (with the exception
858
   * of the first character of the string), and in place of spaces, dashes,
859
   * and underscores. Alpha delimiters are not converted to lowercase.
860
   *
861
   * @param string $str       <p>The input string.</p>
862
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
863
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
864
   *
865
   * @return string
866
   */
867
  public static function delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
868
  {
869
    $str = self::trim($str);
870
871
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
872
873
    $str = self::strtolower($str, $encoding);
874
875
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
876
  }
877
878
  /**
879
   * Encode a string with a new charset-encoding.
880
   *
881
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
882
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
883
   *
884
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
885
   * @param string $str      <p>The input string</p>
886
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
887
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
888
   *
889
   * @return string
890
   */
891 14
  public static function encode(string $encoding, string $str, bool $force = true): string
892
  {
893 14
    if (!isset($str[0], $encoding[0])) {
894 6
      return $str;
895
    }
896
897 14
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
898 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
899
    }
900
901 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
902
      self::checkForSupport();
903
    }
904
905 14
    $encodingDetected = self::str_detect_encoding($str);
906
907
    // DEBUG
908
    //var_dump($encoding, $encodingDetected, $str, "\n\n");
909
910
    if (
911 14
        $force === true
912
        ||
913
        (
914 7
            $encodingDetected !== false
915
            &&
916 14
            $encodingDetected !== $encoding
917
        )
918
    ) {
919
920 View Code Duplication
      if (
921 14
          $encoding === 'UTF-8'
922
          &&
923
          (
924 14
              $force === true
925 5
              || $encodingDetected === 'UTF-8'
926 5
              || $encodingDetected === 'WINDOWS-1252'
927 14
              || $encodingDetected === 'ISO-8859-1'
928
          )
929
      ) {
930 13
        return self::to_utf8($str);
931
      }
932
933 View Code Duplication
      if (
934 4
          $encoding === 'ISO-8859-1'
935
          &&
936
          (
937 2
              $force === true
938 2
              || $encodingDetected === 'ISO-8859-1'
939 2
              || $encodingDetected === 'WINDOWS-1252'
940 4
              || $encodingDetected === 'UTF-8'
941
          )
942
      ) {
943 2
        return self::to_iso8859($str);
944
      }
945
946 View Code Duplication
      if (
947 3
          $encoding !== 'UTF-8'
948
          &&
949 3
          $encoding !== 'ISO-8859-1'
950
          &&
951 3
          $encoding !== 'WINDOWS-1252'
952
          &&
953 3
          self::$SUPPORT['mbstring'] === false
954
      ) {
955
        \trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
956
      }
957
958 3
      $strEncoded = \mb_convert_encoding(
959 3
          $str,
960 3
          $encoding,
961 3
          ($force === true ? $encoding : $encodingDetected)
962
      );
963
964 3
      if ($strEncoded) {
965 3
        return $strEncoded;
966
      }
967
    }
968
969 4
    return $str;
970
  }
971
972
  /**
973
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
974
   *
975
   * @param string   $str                    <p>The input string.</p>
976
   * @param string   $search                 <p>The searched string.</p>
977
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
978
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
979
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "\mb_" function</p>
980
   *
981
   * @return string
982
   */
983
  public static function extractText(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
984
  {
985
    // init
986
    $text = $str;
987
988
    if (empty($text)) {
989
      return '';
990
    }
991
992
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
993
994
    if ($length === null) {
995
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
996
    }
997
998
    if (empty($search)) {
999
1000
      $stringLength = self::strlen($text, $encoding);
1001
1002
      if ($length > 0) {
1003
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1004
      } else {
1005
        $end = 0;
1006
      }
1007
1008
      $pos = \min(
1009
          self::strpos($text, ' ', $end, $encoding),
1010
          self::strpos($text, '.', $end, $encoding)
1011
      );
1012
1013
      if ($pos) {
1014
        return \rtrim(
1015
                   self::substr($text, 0, $pos, $encoding),
1016
                   $trimChars
1017
               ) . $replacerForSkippedText;
1018
      }
1019
1020
      return $text;
1021
    }
1022
1023
    $wordPos = self::stripos(
1024
        $text,
1025
        $search,
1026
        0,
1027
        $encoding
1028
    );
1029
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1030
1031
    if ($halfSide > 0) {
1032
1033
      $halfText = self::substr($text, 0, $halfSide, $encoding);
1034
      $pos_start = \max(
1035
          self::strrpos($halfText, ' ', 0, $encoding),
0 ignored issues
show
Security Bug introduced by
It seems like $halfText defined by self::substr($text, 0, $halfSide, $encoding) on line 1033 can also be of type false; however, voku\helper\UTF8::strrpos() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1036
          self::strrpos($halfText, '.', 0, $encoding)
0 ignored issues
show
Security Bug introduced by
It seems like $halfText defined by self::substr($text, 0, $halfSide, $encoding) on line 1033 can also be of type false; however, voku\helper\UTF8::strrpos() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
1037
      );
1038
1039
      if (!$pos_start) {
1040
        $pos_start = 0;
1041
      }
1042
1043
    } else {
1044
      $pos_start = 0;
1045
    }
1046
1047
    if ($wordPos && $halfSide > 0) {
1048
      $l = $pos_start + $length - 1;
1049
      $realLength = self::strlen($text, $encoding);
1050
1051
      if ($l > $realLength) {
1052
        $l = $realLength;
1053
      }
1054
1055
      $pos_end = \min(
1056
                     self::strpos($text, ' ', $l, $encoding),
1057
                     self::strpos($text, '.', $l, $encoding)
1058
                 ) - $pos_start;
1059
1060
      if (!$pos_end || $pos_end <= 0) {
1061
        $extract = $replacerForSkippedText . \ltrim(
1062
                self::substr(
1063
                    $text,
1064
                    $pos_start,
1065
                    self::strlen($text),
1066
                    $encoding
1067
                ),
1068
                $trimChars
1069
            );
1070 View Code Duplication
      } else {
1071
        $extract = $replacerForSkippedText . \trim(
1072
                self::substr(
1073
                    $text,
1074
                    $pos_start,
1075
                    $pos_end,
1076
                    $encoding
1077
                ),
1078
                $trimChars
1079
            ) . $replacerForSkippedText;
1080
      }
1081
1082
    } else {
1083
1084
      $l = $length - 1;
1085
      $trueLength = self::strlen($text, $encoding);
1086
1087
      if ($l > $trueLength) {
1088
        $l = $trueLength;
1089
      }
1090
1091
      $pos_end = \min(
1092
          self::strpos($text, ' ', $l, $encoding),
1093
          self::strpos($text, '.', $l, $encoding)
1094
      );
1095
1096 View Code Duplication
      if ($pos_end) {
1097
        $extract = \rtrim(
1098
                       self::substr($text, 0, $pos_end, $encoding),
1099
                       $trimChars
1100
                   ) . $replacerForSkippedText;
1101
      } else {
1102
        $extract = $text;
1103
      }
1104
    }
1105
1106
    return $extract;
1107
  }
1108
1109
  /**
1110
   * Reads entire file into a string.
1111
   *
1112
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1113
   *
1114
   * @link http://php.net/manual/en/function.file-get-contents.php
1115
   *
1116
   * @param string        $filename         <p>
1117
   *                                        Name of the file to read.
1118
   *                                        </p>
1119
   * @param bool          $use_include_path [optional] <p>
1120
   *                                        Prior to PHP 5, this parameter is called
1121
   *                                        use_include_path and is a bool.
1122
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1123
   *                                        to trigger include path
1124
   *                                        search.
1125
   *                                        </p>
1126
   * @param resource|null $context          [optional] <p>
1127
   *                                        A valid context resource created with
1128
   *                                        stream_context_create. If you don't need to use a
1129
   *                                        custom context, you can skip this parameter by &null;.
1130
   *                                        </p>
1131
   * @param int|null      $offset           [optional] <p>
1132
   *                                        The offset where the reading starts.
1133
   *                                        </p>
1134
   * @param int|null      $maxLength        [optional] <p>
1135
   *                                        Maximum length of data read. The default is to read until end
1136
   *                                        of file is reached.
1137
   *                                        </p>
1138
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1139
   *
1140
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
1141
   *                                        images or pdf, because they used non default utf-8 chars.</p>
1142
   *
1143
   * @return string|false <p>The function returns the read data or false on failure.</p>
1144
   */
1145 6
  public static function file_get_contents(string $filename, bool $use_include_path = false, $context = null, int $offset = null, int $maxLength = null, int $timeout = 10, bool $convertToUtf8 = true)
1146
  {
1147
    // init
1148 6
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1149
1150 6
    if ($timeout && $context === null) {
1151 5
      $context = \stream_context_create(
1152
          [
1153
              'http' =>
1154
                  [
1155 5
                      'timeout' => $timeout,
1156
                  ],
1157
          ]
1158
      );
1159
    }
1160
1161 6
    if ($offset === null) {
1162 6
      $offset = 0;
1163
    }
1164
1165 6
    if (\is_int($maxLength) === true) {
1166 1
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1167
    } else {
1168 6
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1169
    }
1170
1171
    // return false on error
1172 6
    if ($data === false) {
1173
      return false;
1174
    }
1175
1176 6
    if ($convertToUtf8 === true) {
1177
      // only for non binary, but also for UTF-16 or UTF-32
1178
      if (
1179 6
          self::is_binary($data, true) !== true
1180
          ||
1181 4
          self::is_utf16($data) !== false
1182
          ||
1183 6
          self::is_utf32($data) !== false
1184
      ) {
1185 5
        $data = self::encode('UTF-8', $data, false);
1186 5
        $data = self::cleanup($data);
1187
      }
1188
    }
1189
1190 6
    return $data;
1191
  }
1192
1193
  /**
1194
   * Returns whether or not a character exists at an index. Offsets may be
1195
   * negative to count from the last character in the string. Implements
1196
   * part of the ArrayAccess interface.
1197
   *
1198
   * @param int    $offset   <p>The index to check.</p>
1199
   * @param string $str      <p>The input string.</p>
1200
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
1201
   *
1202
   *
1203
   * @return boolean <p>Whether or not the index exists.</p>
1204
   */
1205
  public static function offset_exists($offset, string $str, string $encoding = 'UTF-8'): bool
1206
  {
1207
    // init
1208
    $length = self::strlen($str, $encoding);
1209
    $offset = (int)$offset;
1210
1211
    if ($offset >= 0) {
1212
      return ($length > $offset);
1213
    }
1214
1215
    return ($length >= \abs($offset));
1216
  }
1217
1218
  /**
1219
   * Checks if a file starts with BOM (Byte Order Mark) character.
1220
   *
1221
   * @param string $file_path <p>Path to a valid file.</p>
1222
   *
1223
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1224
   */
1225 1
  public static function file_has_bom(string $file_path): bool
1226
  {
1227 1
    return self::string_has_bom(\file_get_contents($file_path));
1228
  }
1229
1230
  /**
1231
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1232
   *
1233
   * @param mixed  $var
1234
   * @param int    $normalization_form
1235
   * @param string $leading_combining
1236
   *
1237
   * @return mixed
1238
   */
1239 9
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1240
  {
1241 9
    switch (\gettype($var)) {
1242 9 View Code Duplication
      case 'array':
1243 3
        foreach ($var as $k => $v) {
1244
          /** @noinspection AlterInForeachInspection */
1245 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1246
        }
1247 3
        break;
1248 9 View Code Duplication
      case 'object':
1249 2
        foreach ($var as $k => $v) {
1250 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1251
        }
1252 2
        break;
1253 9
      case 'string':
1254
1255 9
        if (false !== \strpos($var, "\r")) {
1256
          // Workaround https://bugs.php.net/65732
1257 2
          $var = \str_replace(["\r\n", "\r"], "\n", $var);
1258
        }
1259
1260 9
        if (self::is_ascii($var) === false) {
1261
          /** @noinspection PhpUndefinedClassInspection */
1262 9
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1263 6
            $n = '-';
1264
          } else {
1265
            /** @noinspection PhpUndefinedClassInspection */
1266 7
            $n = \Normalizer::normalize($var, $normalization_form);
1267
1268 7
            if (isset($n[0])) {
1269 4
              $var = $n;
1270
            } else {
1271 5
              $var = self::encode('UTF-8', $var, true);
1272
            }
1273
          }
1274
1275
          if (
1276 9
              $var[0] >= "\x80"
1277
              &&
1278 9
              isset($n[0], $leading_combining[0])
1279
              &&
1280 9
              \preg_match('/^\p{Mn}/u', $var)
1281
          ) {
1282
            // Prevent leading combining chars
1283
            // for NFC-safe concatenations.
1284 2
            $var = $leading_combining . $var;
1285
          }
1286
        }
1287
1288 9
        break;
1289
    }
1290
1291 9
    return $var;
1292
  }
1293
1294
  /**
1295
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1296
   *
1297
   * Gets a specific external variable by name and optionally filters it
1298
   *
1299
   * @link  http://php.net/manual/en/function.filter-input.php
1300
   *
1301
   * @param int    $type          <p>
1302
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1303
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1304
   *                              <b>INPUT_ENV</b>.
1305
   *                              </p>
1306
   * @param string $variable_name <p>
1307
   *                              Name of a variable to get.
1308
   *                              </p>
1309
   * @param int    $filter        [optional] <p>
1310
   *                              The ID of the filter to apply. The
1311
   *                              manual page lists the available filters.
1312
   *                              </p>
1313
   * @param mixed  $options       [optional] <p>
1314
   *                              Associative array of options or bitwise disjunction of flags. If filter
1315
   *                              accepts options, flags can be provided in "flags" field of array.
1316
   *                              </p>
1317
   *
1318
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1319
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1320
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1321
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1322
   * @since 5.2.0
1323
   */
1324 View Code Duplication
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1325
  {
1326
    if (4 > \func_num_args()) {
1327
      $var = \filter_input($type, $variable_name, $filter);
1328
    } else {
1329
      $var = \filter_input($type, $variable_name, $filter, $options);
1330
    }
1331
1332
    return self::filter($var);
1333
  }
1334
1335
  /**
1336
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1337
   *
1338
   * Gets external variables and optionally filters them
1339
   *
1340
   * @link  http://php.net/manual/en/function.filter-input-array.php
1341
   *
1342
   * @param int   $type       <p>
1343
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1344
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1345
   *                          <b>INPUT_ENV</b>.
1346
   *                          </p>
1347
   * @param mixed $definition [optional] <p>
1348
   *                          An array defining the arguments. A valid key is a string
1349
   *                          containing a variable name and a valid value is either a filter type, or an array
1350
   *                          optionally specifying the filter, flags and options. If the value is an
1351
   *                          array, valid keys are filter which specifies the
1352
   *                          filter type,
1353
   *                          flags which specifies any flags that apply to the
1354
   *                          filter, and options which specifies any options that
1355
   *                          apply to the filter. See the example below for a better understanding.
1356
   *                          </p>
1357
   *                          <p>
1358
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1359
   *                          input array are filtered by this filter.
1360
   *                          </p>
1361
   * @param bool  $add_empty  [optional] <p>
1362
   *                          Add missing keys as <b>NULL</b> to the return value.
1363
   *                          </p>
1364
   *
1365
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1366
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1367
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1368
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1369
   * fails.
1370
   * @since 5.2.0
1371
   */
1372 View Code Duplication
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1373
  {
1374
    if (2 > \func_num_args()) {
1375
      $a = \filter_input_array($type);
1376
    } else {
1377
      $a = \filter_input_array($type, $definition, $add_empty);
1378
    }
1379
1380
    return self::filter($a);
1381
  }
1382
1383
  /**
1384
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1385
   *
1386
   * Filters a variable with a specified filter
1387
   *
1388
   * @link  http://php.net/manual/en/function.filter-var.php
1389
   *
1390
   * @param mixed $variable <p>
1391
   *                        Value to filter.
1392
   *                        </p>
1393
   * @param int   $filter   [optional] <p>
1394
   *                        The ID of the filter to apply. The
1395
   *                        manual page lists the available filters.
1396
   *                        </p>
1397
   * @param mixed $options  [optional] <p>
1398
   *                        Associative array of options or bitwise disjunction of flags. If filter
1399
   *                        accepts options, flags can be provided in "flags" field of array. For
1400
   *                        the "callback" filter, callable type should be passed. The
1401
   *                        callback must accept one argument, the value to be filtered, and return
1402
   *                        the value after filtering/sanitizing it.
1403
   *                        </p>
1404
   *                        <p>
1405
   *                        <code>
1406
   *                        // for filters that accept options, use this format
1407
   *                        $options = array(
1408
   *                        'options' => array(
1409
   *                        'default' => 3, // value to return if the filter fails
1410
   *                        // other options here
1411
   *                        'min_range' => 0
1412
   *                        ),
1413
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1414
   *                        );
1415
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1416
   *                        // for filter that only accept flags, you can pass them directly
1417
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1418
   *                        // for filter that only accept flags, you can also pass as an array
1419
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1420
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1421
   *                        // callback validate filter
1422
   *                        function foo($value)
1423
   *                        {
1424
   *                        // Expected format: Surname, GivenNames
1425
   *                        if (strpos($value, ", ") === false) return false;
1426
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1427
   *                        $empty = (empty($surname) || empty($givennames));
1428
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1429
   *                        if ($empty || $notstrings) {
1430
   *                        return false;
1431
   *                        } else {
1432
   *                        return $value;
1433
   *                        }
1434
   *                        }
1435
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1436
   *                        </code>
1437
   *                        </p>
1438
   *
1439
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1440
   * @since 5.2.0
1441
   */
1442 1 View Code Duplication
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1443
  {
1444 1
    if (3 > \func_num_args()) {
1445 1
      $variable = \filter_var($variable, $filter);
1446
    } else {
1447 1
      $variable = \filter_var($variable, $filter, $options);
1448
    }
1449
1450 1
    return self::filter($variable);
1451
  }
1452
1453
  /**
1454
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1455
   *
1456
   * Gets multiple variables and optionally filters them
1457
   *
1458
   * @link  http://php.net/manual/en/function.filter-var-array.php
1459
   *
1460
   * @param array $data       <p>
1461
   *                          An array with string keys containing the data to filter.
1462
   *                          </p>
1463
   * @param mixed $definition [optional] <p>
1464
   *                          An array defining the arguments. A valid key is a string
1465
   *                          containing a variable name and a valid value is either a
1466
   *                          filter type, or an
1467
   *                          array optionally specifying the filter, flags and options.
1468
   *                          If the value is an array, valid keys are filter
1469
   *                          which specifies the filter type,
1470
   *                          flags which specifies any flags that apply to the
1471
   *                          filter, and options which specifies any options that
1472
   *                          apply to the filter. See the example below for a better understanding.
1473
   *                          </p>
1474
   *                          <p>
1475
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1476
   *                          input array are filtered by this filter.
1477
   *                          </p>
1478
   * @param bool  $add_empty  [optional] <p>
1479
   *                          Add missing keys as <b>NULL</b> to the return value.
1480
   *                          </p>
1481
   *
1482
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1483
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1484
   * the variable is not set.
1485
   * @since 5.2.0
1486
   */
1487 1 View Code Duplication
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1488
  {
1489 1
    if (2 > \func_num_args()) {
1490 1
      $a = \filter_var_array($data);
1491
    } else {
1492 1
      $a = \filter_var_array($data, $definition, $add_empty);
1493
    }
1494
1495 1
    return self::filter($a);
1496
  }
1497
1498
  /**
1499
   * Returns the first $n characters of the string.
1500
   *
1501
   * @param string $str      <p>The input string.</p>
1502
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1503
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
1504
   *
1505
   * @return string
1506
   */
1507
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1508
  {
1509
    if ($n <= 0) {
1510
      return '';
1511
    }
1512
1513
    return self::substr($str, 0, $n, $encoding);
1514
  }
1515
1516
  /**
1517
   * Check if the number of unicode characters are not more than the specified integer.
1518
   *
1519
   * @param string $str      The original string to be checked.
1520
   * @param int    $box_size The size in number of chars to be checked against string.
1521
   *
1522
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1523
   */
1524 1
  public static function fits_inside(string $str, int $box_size): bool
1525
  {
1526 1
    return (self::strlen($str) <= $box_size);
1527
  }
1528
1529
  /**
1530
   * Try to fix simple broken UTF-8 strings.
1531
   *
1532
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1533
   *
1534
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1535
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1536
   * See: http://en.wikipedia.org/wiki/Windows-1252
1537
   *
1538
   * @param string $str <p>The input string</p>
1539
   *
1540
   * @return string
1541
   */
1542 29 View Code Duplication
  public static function fix_simple_utf8(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1543
  {
1544 29
    if (!isset($str[0])) {
1545 2
      return '';
1546
    }
1547
1548 29
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1549 29
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1550
1551 29
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1552
1553 1
      if (self::$BROKEN_UTF8_FIX === null) {
1554 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1555
      }
1556
1557 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1558 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1559
    }
1560
1561 29
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1562
  }
1563
1564
  /**
1565
   * Fix a double (or multiple) encoded UTF8 string.
1566
   *
1567
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1568
   *
1569
   * @return string|string[] <p>Will return the fixed input-"array" or
1570
   *                         the fixed input-"string".</p>
1571
   */
1572 1
  public static function fix_utf8($str)
1573
  {
1574 1
    if (\is_array($str) === true) {
1575 1
      foreach ($str as $k => $v) {
1576 1
        $str[$k] = self::fix_utf8($v);
1577
      }
1578
1579 1
      return $str;
1580
    }
1581
1582 1
    $last = '';
1583 1
    while ($last !== $str) {
1584 1
      $last = $str;
1585 1
      $str = self::to_utf8(
1586 1
          self::utf8_decode($str, true)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str, true)) on line 1585 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1587
      );
1588
    }
1589
1590 1
    return $str;
1591
  }
1592
1593
  /**
1594
   * Get character of a specific character.
1595
   *
1596
   * @param string $char
1597
   *
1598
   * @return string <p>'RTL' or 'LTR'</p>
1599
   */
1600 1
  public static function getCharDirection(string $char): string
1601
  {
1602 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1603
      self::checkForSupport();
1604
    }
1605
1606 1
    if (self::$SUPPORT['intlChar'] === true) {
1607
      /** @noinspection PhpComposerExtensionStubsInspection */
1608 1
      $tmpReturn = \IntlChar::charDirection($char);
1609
1610
      // from "IntlChar"-Class
1611
      $charDirection = [
1612 1
          'RTL' => [1, 13, 14, 15, 21],
1613
          'LTR' => [0, 11, 12, 20],
1614
      ];
1615
1616 1
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1617
        return 'LTR';
1618
      }
1619
1620 1
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1621 1
        return 'RTL';
1622
      }
1623
    }
1624
1625 1
    $c = static::chr_to_decimal($char);
1626
1627 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1628 1
      return 'LTR';
1629
    }
1630
1631 1
    if (0x85e >= $c) {
1632
1633 1
      if (0x5be === $c ||
1634 1
          0x5c0 === $c ||
1635 1
          0x5c3 === $c ||
1636 1
          0x5c6 === $c ||
1637 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1638 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1639 1
          0x608 === $c ||
1640 1
          0x60b === $c ||
1641 1
          0x60d === $c ||
1642 1
          0x61b === $c ||
1643 1
          (0x61e <= $c && 0x64a >= $c) ||
1644
          (0x66d <= $c && 0x66f >= $c) ||
1645
          (0x671 <= $c && 0x6d5 >= $c) ||
1646
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1647
          (0x6ee <= $c && 0x6ef >= $c) ||
1648
          (0x6fa <= $c && 0x70d >= $c) ||
1649
          0x710 === $c ||
1650
          (0x712 <= $c && 0x72f >= $c) ||
1651
          (0x74d <= $c && 0x7a5 >= $c) ||
1652
          0x7b1 === $c ||
1653
          (0x7c0 <= $c && 0x7ea >= $c) ||
1654
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1655
          0x7fa === $c ||
1656
          (0x800 <= $c && 0x815 >= $c) ||
1657
          0x81a === $c ||
1658
          0x824 === $c ||
1659
          0x828 === $c ||
1660
          (0x830 <= $c && 0x83e >= $c) ||
1661
          (0x840 <= $c && 0x858 >= $c) ||
1662 1
          0x85e === $c
1663
      ) {
1664 1
        return 'RTL';
1665
      }
1666
1667 1
    } elseif (0x200f === $c) {
1668
1669
      return 'RTL';
1670
1671 1
    } elseif (0xfb1d <= $c) {
1672
1673 1
      if (0xfb1d === $c ||
1674 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1675 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1676 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1677 1
          0xfb3e === $c ||
1678 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1679 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1680 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1681 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1682 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1683 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1684 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1685 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1686 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1687 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1688 1
          0x10808 === $c ||
1689 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1690 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1691 1
          0x1083c === $c ||
1692 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1693 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1694 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1695 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1696 1
          0x1093f === $c ||
1697 1
          0x10a00 === $c ||
1698 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1699 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1700 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1701 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1702 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1703 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1704 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1705 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1706 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1707 1
          (0x10b78 <= $c && 0x10b7f >= $c)
1708
      ) {
1709 1
        return 'RTL';
1710
      }
1711
    }
1712
1713 1
    return 'LTR';
1714
  }
1715
1716
  /**
1717
   * get data from "/data/*.ser"
1718
   *
1719
   * @param string $file
1720
   *
1721
   * @return bool|string|array|int <p>Will return false on error.</p>
1722
   */
1723 7
  private static function getData(string $file)
1724
  {
1725 7
    $file = __DIR__ . '/data/' . $file . '.php';
1726 7
    if (\file_exists($file)) {
1727
      /** @noinspection PhpIncludeInspection */
1728 7
      return require $file;
1729
    }
1730
1731 1
    return false;
1732
  }
1733
1734
  /**
1735
   * Check for php-support.
1736
   *
1737
   * @param string|null $key
1738
   *
1739
   * @return mixed <p>Return the full support-"array", if $key === null<br>
1740
   *               return bool-value, if $key is used and available<br>
1741
   *               otherwise return null</p>
1742
   */
1743 19
  public static function getSupportInfo(string $key = null)
1744
  {
1745 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1746
      self::checkForSupport();
1747
    }
1748
1749 19
    if ($key === null) {
1750 2
      return self::$SUPPORT;
1751
    }
1752
1753 18
    if (!isset(self::$SUPPORT[$key])) {
1754 1
      return null;
1755
    }
1756
1757 17
    return self::$SUPPORT[$key];
1758
  }
1759
1760
  /**
1761
   * @param int    $length        <p>Length of the random string.</p>
1762
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
1763
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
1764
   *
1765
   * @return string
1766
   */
1767
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
1768
  {
1769
    // init
1770
    $i = 0;
1771
    $str = '';
1772
    $maxlength = self::strlen($possibleChars, $encoding);
1773
1774
    if ($maxlength === 0) {
1775
      return '';
1776
    }
1777
1778
    // add random chars
1779
    while ($i < $length) {
1780
      try {
1781
        $randInt = \random_int(0, $maxlength - 1);
1782
      } catch (\Exception $e) {
1783
        /** @noinspection RandomApiMigrationInspection */
1784
        $randInt = \mt_rand(0, $maxlength - 1);
1785
      }
1786
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
1787
      $str .= $char;
1788
      $i++;
1789
    }
1790
1791
    return $str;
1792
  }
1793
1794
  /**
1795
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
1796
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
1797
   *
1798
   * @return string
1799
   */
1800
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
1801
  {
1802
    $uniqueHelper = \mt_rand() .
1803
                    \session_id() .
1804
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
1805
                    ($_SERVER['SERVER_ADDR'] ?? '') .
1806
                    $entropyExtra;
1807
1808
    $uniqueString = \uniqid($uniqueHelper, true);
1809
1810
    if ($md5) {
1811
      $uniqueString = \md5($uniqueString . $uniqueHelper);
1812
    }
1813
1814
    return $uniqueString;
1815
  }
1816
1817
  /**
1818
   * alias for "UTF8::string_has_bom()"
1819
   *
1820
   * @see        UTF8::string_has_bom()
1821
   *
1822
   * @param string $str
1823
   *
1824
   * @return bool
1825
   *
1826
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1827
   */
1828 1
  public static function hasBom(string $str): bool
1829
  {
1830 1
    return self::string_has_bom($str);
1831
  }
1832
1833
  /**
1834
   * Returns true if the string contains a lower case char, false otherwise.
1835
   *
1836
   * @param string $str <p>The input string.</p>
1837
   *
1838
   * @return bool <p>Whether or not the string contains a lower case character.</p>
1839
   */
1840
  public static function has_lowercase(string $str): bool
1841
  {
1842
    return self::matchesPattern($str, '.*[[:lower:]]');
1843
  }
1844
1845
  /**
1846
   * Returns true if the string contains an upper case char, false otherwise.
1847
   *
1848
   * @param string $str <p>The input string.</p>
1849
   *
1850
   * @return bool <p>Whether or not the string contains an upper case character.</p>
1851
   */
1852
  public static function has_uppercase(string $str): bool
1853
  {
1854
    return self::matchesPattern($str, '.*[[:upper:]]');
1855
  }
1856
1857
  /**
1858
   * Converts a hexadecimal-value into an UTF-8 character.
1859
   *
1860
   * @param string $hexdec <p>The hexadecimal value.</p>
1861
   *
1862
   * @return string|false <p>One single UTF-8 character.</p>
1863
   */
1864 2
  public static function hex_to_chr(string $hexdec)
1865
  {
1866 2
    return self::decimal_to_chr(\hexdec($hexdec));
1867
  }
1868
1869
  /**
1870
   * Converts hexadecimal U+xxxx code point representation to integer.
1871
   *
1872
   * INFO: opposite to UTF8::int_to_hex()
1873
   *
1874
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1875
   *
1876
   * @return int|false <p>The code point, or false on failure.</p>
1877
   */
1878 1
  public static function hex_to_int(string $hexDec)
1879
  {
1880 1
    if (!isset($hexDec[0])) {
1881 1
      return false;
1882
    }
1883
1884 1
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1885 1
      return \intval($match[1], 16);
1886
    }
1887
1888 1
    return false;
1889
  }
1890
1891
  /**
1892
   * alias for "UTF8::html_entity_decode()"
1893
   *
1894
   * @see UTF8::html_entity_decode()
1895
   *
1896
   * @param string $str
1897
   * @param int    $flags
1898
   * @param string $encoding
1899
   *
1900
   * @return string
1901
   */
1902 1
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
1903
  {
1904 1
    return self::html_entity_decode($str, $flags, $encoding);
1905
  }
1906
1907
  /**
1908
   * Converts a UTF-8 string to a series of HTML numbered entities.
1909
   *
1910
   * INFO: opposite to UTF8::html_decode()
1911
   *
1912
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1913
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1914
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
1915
   *
1916
   * @return string <p>HTML numbered entities.</p>
1917
   */
1918 3
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
1919
  {
1920 3
    if (!isset($str[0])) {
1921 2
      return '';
1922
    }
1923
1924 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1925 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1926
    }
1927
1928
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1929 3
    if (\function_exists('mb_encode_numericentity')) {
1930
1931 3
      $startCode = 0x00;
1932 3
      if ($keepAsciiChars === true) {
1933 3
        $startCode = 0x80;
1934
      }
1935
1936 3
      return \mb_encode_numericentity(
1937 3
          $str,
1938 3
          [$startCode, 0xfffff, 0, 0xfffff, 0],
1939 3
          $encoding
1940
      );
1941
    }
1942
1943
    return \implode(
1944
        '',
1945
        \array_map(
1946
            function ($data) use ($keepAsciiChars, $encoding) {
1947
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1948
            },
1949
            self::split($str)
1950
        )
1951
    );
1952
  }
1953
1954
  /**
1955
   * UTF-8 version of html_entity_decode()
1956
   *
1957
   * The reason we are not using html_entity_decode() by itself is because
1958
   * while it is not technically correct to leave out the semicolon
1959
   * at the end of an entity most browsers will still interpret the entity
1960
   * correctly. html_entity_decode() does not convert entities without
1961
   * semicolons, so we are left with our own little solution here. Bummer.
1962
   *
1963
   * Convert all HTML entities to their applicable characters
1964
   *
1965
   * INFO: opposite to UTF8::html_encode()
1966
   *
1967
   * @link http://php.net/manual/en/function.html-entity-decode.php
1968
   *
1969
   * @param string $str      <p>
1970
   *                         The input string.
1971
   *                         </p>
1972
   * @param int    $flags    [optional] <p>
1973
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1974
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1975
   *                         <table>
1976
   *                         Available <i>flags</i> constants
1977
   *                         <tr valign="top">
1978
   *                         <td>Constant Name</td>
1979
   *                         <td>Description</td>
1980
   *                         </tr>
1981
   *                         <tr valign="top">
1982
   *                         <td><b>ENT_COMPAT</b></td>
1983
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1984
   *                         </tr>
1985
   *                         <tr valign="top">
1986
   *                         <td><b>ENT_QUOTES</b></td>
1987
   *                         <td>Will convert both double and single quotes.</td>
1988
   *                         </tr>
1989
   *                         <tr valign="top">
1990
   *                         <td><b>ENT_NOQUOTES</b></td>
1991
   *                         <td>Will leave both double and single quotes unconverted.</td>
1992
   *                         </tr>
1993
   *                         <tr valign="top">
1994
   *                         <td><b>ENT_HTML401</b></td>
1995
   *                         <td>
1996
   *                         Handle code as HTML 4.01.
1997
   *                         </td>
1998
   *                         </tr>
1999
   *                         <tr valign="top">
2000
   *                         <td><b>ENT_XML1</b></td>
2001
   *                         <td>
2002
   *                         Handle code as XML 1.
2003
   *                         </td>
2004
   *                         </tr>
2005
   *                         <tr valign="top">
2006
   *                         <td><b>ENT_XHTML</b></td>
2007
   *                         <td>
2008
   *                         Handle code as XHTML.
2009
   *                         </td>
2010
   *                         </tr>
2011
   *                         <tr valign="top">
2012
   *                         <td><b>ENT_HTML5</b></td>
2013
   *                         <td>
2014
   *                         Handle code as HTML 5.
2015
   *                         </td>
2016
   *                         </tr>
2017
   *                         </table>
2018
   *                         </p>
2019
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
2020
   *
2021
   * @return string <p>The decoded string.</p>
2022
   */
2023 17
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2024
  {
2025 17
    if (!isset($str[0])) {
2026 6
      return '';
2027
    }
2028
2029 17
    if (!isset($str[3])) { // examples: &; || &x;
2030 10
      return $str;
2031
    }
2032
2033
    if (
2034 16
        \strpos($str, '&') === false
2035
        ||
2036
        (
2037 16
            \strpos($str, '&#') === false
2038
            &&
2039 16
            \strpos($str, ';') === false
2040
        )
2041
    ) {
2042 9
      return $str;
2043
    }
2044
2045 16
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2046 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2047
    }
2048
2049 16
    if ($flags === null) {
2050 5
      $flags = ENT_QUOTES | ENT_HTML5;
2051
    }
2052
2053 View Code Duplication
    if (
2054 16
        $encoding !== 'UTF-8'
2055
        &&
2056 16
        $encoding !== 'ISO-8859-1'
2057
        &&
2058 16
        $encoding !== 'WINDOWS-1252'
2059
        &&
2060 16
        self::$SUPPORT['mbstring'] === false
2061
    ) {
2062
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2063
    }
2064
2065
    do {
2066 16
      $str_compare = $str;
2067
2068 16
      $str = (string)\preg_replace_callback(
2069 16
          "/&#\d{2,6};/",
2070 16
          function ($matches) use ($encoding) {
2071 14
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2072
2073 14
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2074 13
              return $returnTmp;
2075
            }
2076
2077 7
            return $matches[0];
2078 16
          },
2079 16
          $str
2080
      );
2081
2082
      // decode numeric & UTF16 two byte entities
2083 16
      $str = \html_entity_decode(
2084 16
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2085 16
          $flags,
2086 16
          $encoding
2087
      );
2088
2089 16
    } while ($str_compare !== $str);
2090
2091 16
    return $str;
2092
  }
2093
2094
  /**
2095
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2096
   *
2097
   * @link http://php.net/manual/en/function.htmlentities.php
2098
   *
2099
   * @param string $str           <p>
2100
   *                              The input string.
2101
   *                              </p>
2102
   * @param int    $flags         [optional] <p>
2103
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2104
   *                              invalid code unit sequences and the used document type. The default is
2105
   *                              ENT_COMPAT | ENT_HTML401.
2106
   *                              <table>
2107
   *                              Available <i>flags</i> constants
2108
   *                              <tr valign="top">
2109
   *                              <td>Constant Name</td>
2110
   *                              <td>Description</td>
2111
   *                              </tr>
2112
   *                              <tr valign="top">
2113
   *                              <td><b>ENT_COMPAT</b></td>
2114
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2115
   *                              </tr>
2116
   *                              <tr valign="top">
2117
   *                              <td><b>ENT_QUOTES</b></td>
2118
   *                              <td>Will convert both double and single quotes.</td>
2119
   *                              </tr>
2120
   *                              <tr valign="top">
2121
   *                              <td><b>ENT_NOQUOTES</b></td>
2122
   *                              <td>Will leave both double and single quotes unconverted.</td>
2123
   *                              </tr>
2124
   *                              <tr valign="top">
2125
   *                              <td><b>ENT_IGNORE</b></td>
2126
   *                              <td>
2127
   *                              Silently discard invalid code unit sequences instead of returning
2128
   *                              an empty string. Using this flag is discouraged as it
2129
   *                              may have security implications.
2130
   *                              </td>
2131
   *                              </tr>
2132
   *                              <tr valign="top">
2133
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2134
   *                              <td>
2135
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2136
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2137
   *                              </td>
2138
   *                              </tr>
2139
   *                              <tr valign="top">
2140
   *                              <td><b>ENT_DISALLOWED</b></td>
2141
   *                              <td>
2142
   *                              Replace invalid code points for the given document type with a
2143
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2144
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2145
   *                              instance, to ensure the well-formedness of XML documents with
2146
   *                              embedded external content.
2147
   *                              </td>
2148
   *                              </tr>
2149
   *                              <tr valign="top">
2150
   *                              <td><b>ENT_HTML401</b></td>
2151
   *                              <td>
2152
   *                              Handle code as HTML 4.01.
2153
   *                              </td>
2154
   *                              </tr>
2155
   *                              <tr valign="top">
2156
   *                              <td><b>ENT_XML1</b></td>
2157
   *                              <td>
2158
   *                              Handle code as XML 1.
2159
   *                              </td>
2160
   *                              </tr>
2161
   *                              <tr valign="top">
2162
   *                              <td><b>ENT_XHTML</b></td>
2163
   *                              <td>
2164
   *                              Handle code as XHTML.
2165
   *                              </td>
2166
   *                              </tr>
2167
   *                              <tr valign="top">
2168
   *                              <td><b>ENT_HTML5</b></td>
2169
   *                              <td>
2170
   *                              Handle code as HTML 5.
2171
   *                              </td>
2172
   *                              </tr>
2173
   *                              </table>
2174
   *                              </p>
2175
   * @param string $encoding      [optional] <p>
2176
   *                              Like <b>htmlspecialchars</b>,
2177
   *                              <b>htmlentities</b> takes an optional third argument
2178
   *                              <i>encoding</i> which defines encoding used in
2179
   *                              conversion.
2180
   *                              Although this argument is technically optional, you are highly
2181
   *                              encouraged to specify the correct value for your code.
2182
   *                              </p>
2183
   * @param bool   $double_encode [optional] <p>
2184
   *                              When <i>double_encode</i> is turned off PHP will not
2185
   *                              encode existing html entities. The default is to convert everything.
2186
   *                              </p>
2187
   *
2188
   *
2189
   * @return string the encoded string.
2190
   * </p>
2191
   * <p>
2192
   * If the input <i>string</i> contains an invalid code unit
2193
   * sequence within the given <i>encoding</i> an empty string
2194
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2195
   * <b>ENT_SUBSTITUTE</b> flags are set.
2196
   */
2197 2
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2198
  {
2199 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2200 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2201
    }
2202
2203 2
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2204
2205
    /**
2206
     * PHP doesn't replace a backslash to its html entity since this is something
2207
     * that's mostly used to escape characters when inserting in a database. Since
2208
     * we're using a decent database layer, we don't need this shit and we're replacing
2209
     * the double backslashes by its' html entity equivalent.
2210
     *
2211
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2212
     */
2213 2
    $str = \str_replace('\\', '&#92;', $str);
2214
2215 2
    return self::html_encode($str, true, $encoding);
2216
  }
2217
2218
  /**
2219
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2220
   *
2221
   * INFO: Take a look at "UTF8::htmlentities()"
2222
   *
2223
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2224
   *
2225
   * @param string $str           <p>
2226
   *                              The string being converted.
2227
   *                              </p>
2228
   * @param int    $flags         [optional] <p>
2229
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2230
   *                              invalid code unit sequences and the used document type. The default is
2231
   *                              ENT_COMPAT | ENT_HTML401.
2232
   *                              <table>
2233
   *                              Available <i>flags</i> constants
2234
   *                              <tr valign="top">
2235
   *                              <td>Constant Name</td>
2236
   *                              <td>Description</td>
2237
   *                              </tr>
2238
   *                              <tr valign="top">
2239
   *                              <td><b>ENT_COMPAT</b></td>
2240
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2241
   *                              </tr>
2242
   *                              <tr valign="top">
2243
   *                              <td><b>ENT_QUOTES</b></td>
2244
   *                              <td>Will convert both double and single quotes.</td>
2245
   *                              </tr>
2246
   *                              <tr valign="top">
2247
   *                              <td><b>ENT_NOQUOTES</b></td>
2248
   *                              <td>Will leave both double and single quotes unconverted.</td>
2249
   *                              </tr>
2250
   *                              <tr valign="top">
2251
   *                              <td><b>ENT_IGNORE</b></td>
2252
   *                              <td>
2253
   *                              Silently discard invalid code unit sequences instead of returning
2254
   *                              an empty string. Using this flag is discouraged as it
2255
   *                              may have security implications.
2256
   *                              </td>
2257
   *                              </tr>
2258
   *                              <tr valign="top">
2259
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2260
   *                              <td>
2261
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2262
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2263
   *                              </td>
2264
   *                              </tr>
2265
   *                              <tr valign="top">
2266
   *                              <td><b>ENT_DISALLOWED</b></td>
2267
   *                              <td>
2268
   *                              Replace invalid code points for the given document type with a
2269
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2270
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2271
   *                              instance, to ensure the well-formedness of XML documents with
2272
   *                              embedded external content.
2273
   *                              </td>
2274
   *                              </tr>
2275
   *                              <tr valign="top">
2276
   *                              <td><b>ENT_HTML401</b></td>
2277
   *                              <td>
2278
   *                              Handle code as HTML 4.01.
2279
   *                              </td>
2280
   *                              </tr>
2281
   *                              <tr valign="top">
2282
   *                              <td><b>ENT_XML1</b></td>
2283
   *                              <td>
2284
   *                              Handle code as XML 1.
2285
   *                              </td>
2286
   *                              </tr>
2287
   *                              <tr valign="top">
2288
   *                              <td><b>ENT_XHTML</b></td>
2289
   *                              <td>
2290
   *                              Handle code as XHTML.
2291
   *                              </td>
2292
   *                              </tr>
2293
   *                              <tr valign="top">
2294
   *                              <td><b>ENT_HTML5</b></td>
2295
   *                              <td>
2296
   *                              Handle code as HTML 5.
2297
   *                              </td>
2298
   *                              </tr>
2299
   *                              </table>
2300
   *                              </p>
2301
   * @param string $encoding      [optional] <p>
2302
   *                              Defines encoding used in conversion.
2303
   *                              </p>
2304
   *                              <p>
2305
   *                              For the purposes of this function, the encodings
2306
   *                              ISO-8859-1, ISO-8859-15,
2307
   *                              UTF-8, cp866,
2308
   *                              cp1251, cp1252, and
2309
   *                              KOI8-R are effectively equivalent, provided the
2310
   *                              <i>string</i> itself is valid for the encoding, as
2311
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2312
   *                              the same positions in all of these encodings.
2313
   *                              </p>
2314
   * @param bool   $double_encode [optional] <p>
2315
   *                              When <i>double_encode</i> is turned off PHP will not
2316
   *                              encode existing html entities, the default is to convert everything.
2317
   *                              </p>
2318
   *
2319
   * @return string The converted string.
2320
   * </p>
2321
   * <p>
2322
   * If the input <i>string</i> contains an invalid code unit
2323
   * sequence within the given <i>encoding</i> an empty string
2324
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2325
   * <b>ENT_SUBSTITUTE</b> flags are set.
2326
   */
2327 1 View Code Duplication
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2328
  {
2329 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2330 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2331
    }
2332
2333 1
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2334
  }
2335
2336
  /**
2337
   * Checks whether iconv is available on the server.
2338
   *
2339
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2340
   */
2341 1
  public static function iconv_loaded(): bool
2342
  {
2343 1
    return \extension_loaded('iconv') ? true : false;
2344
  }
2345
2346
  /**
2347
   * alias for "UTF8::decimal_to_chr()"
2348
   *
2349
   * @see UTF8::decimal_to_chr()
2350
   *
2351
   * @param mixed $int
2352
   *
2353
   * @return string
2354
   */
2355 2
  public static function int_to_chr($int): string
2356
  {
2357 2
    return self::decimal_to_chr($int);
2358
  }
2359
2360
  /**
2361
   * Converts Integer to hexadecimal U+xxxx code point representation.
2362
   *
2363
   * INFO: opposite to UTF8::hex_to_int()
2364
   *
2365
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2366
   * @param string $pfix [optional]
2367
   *
2368
   * @return string <p>The code point, or empty string on failure.</p>
2369
   */
2370 3
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2371
  {
2372 3
    $hex = \dechex($int);
2373
2374 3
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2375
2376 3
    return $pfix . $hex;
2377
  }
2378
2379
  /**
2380
   * Checks whether intl-char is available on the server.
2381
   *
2382
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2383
   */
2384 1
  public static function intlChar_loaded(): bool
2385
  {
2386 1
    return \class_exists('IntlChar');
2387
  }
2388
2389
  /**
2390
   * Checks whether JSON is available on the server.
2391
   *
2392
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2393
   */
2394 1
  public static function json_loaded(): bool
2395
  {
2396 1
    return \function_exists('json_decode');
2397
  }
2398
2399
  /**
2400
   * Checks whether finfo is available on the server.
2401
   *
2402
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2403
   */
2404 1
  public static function finfo_loaded(): bool
2405
  {
2406 1
    return \class_exists('finfo');
2407
  }
2408
2409
  /**
2410
   * Checks whether intl is available on the server.
2411
   *
2412
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2413
   */
2414 4
  public static function intl_loaded(): bool
2415
  {
2416 4
    return \extension_loaded('intl');
2417
  }
2418
2419
  /**
2420
   * Checks whether ctype is available on the server.
2421
   *
2422
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2423
   */
2424 1
  public static function ctype_loaded(): bool
2425
  {
2426 1
    return \extension_loaded('ctype');
2427
  }
2428
2429
  /**
2430
   * alias for "UTF8::is_ascii()"
2431
   *
2432
   * @see        UTF8::is_ascii()
2433
   *
2434
   * @param string $str
2435
   *
2436
   * @return boolean
2437
   *
2438
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2439
   */
2440 1
  public static function isAscii(string $str): bool
2441
  {
2442 1
    return self::is_ascii($str);
2443
  }
2444
2445
  /**
2446
   * alias for "UTF8::is_base64()"
2447
   *
2448
   * @see        UTF8::is_base64()
2449
   *
2450
   * @param string $str
2451
   *
2452
   * @return bool
2453
   *
2454
   * @deprecated <p>use "UTF8::is_base64()"</p>
2455
   */
2456 1
  public static function isBase64(string $str): bool
2457
  {
2458 1
    return self::is_base64($str);
2459
  }
2460
2461
  /**
2462
   * alias for "UTF8::is_binary()"
2463
   *
2464
   * @see        UTF8::is_binary()
2465
   *
2466
   * @param mixed $str
2467
   * @param bool  $strict
2468
   *
2469
   * @return bool
2470
   *
2471
   * @deprecated <p>use "UTF8::is_binary()"</p>
2472
   */
2473 2
  public static function isBinary($str, $strict = false): bool
2474
  {
2475 2
    return self::is_binary($str, $strict);
2476
  }
2477
2478
  /**
2479
   * alias for "UTF8::is_bom()"
2480
   *
2481
   * @see        UTF8::is_bom()
2482
   *
2483
   * @param string $utf8_chr
2484
   *
2485
   * @return boolean
2486
   *
2487
   * @deprecated <p>use "UTF8::is_bom()"</p>
2488
   */
2489 1
  public static function isBom(string $utf8_chr): bool
2490
  {
2491 1
    return self::is_bom($utf8_chr);
2492
  }
2493
2494
  /**
2495
   * alias for "UTF8::is_html()"
2496
   *
2497
   * @see        UTF8::is_html()
2498
   *
2499
   * @param string $str
2500
   *
2501
   * @return boolean
2502
   *
2503
   * @deprecated <p>use "UTF8::is_html()"</p>
2504
   */
2505 1
  public static function isHtml(string $str): bool
2506
  {
2507 1
    return self::is_html($str);
2508
  }
2509
2510
  /**
2511
   * alias for "UTF8::is_json()"
2512
   *
2513
   * @see        UTF8::is_json()
2514
   *
2515
   * @param string $str
2516
   *
2517
   * @return bool
2518
   *
2519
   * @deprecated <p>use "UTF8::is_json()"</p>
2520
   */
2521
  public static function isJson(string $str): bool
2522
  {
2523
    return self::is_json($str);
2524
  }
2525
2526
  /**
2527
   * alias for "UTF8::is_utf16()"
2528
   *
2529
   * @see        UTF8::is_utf16()
2530
   *
2531
   * @param string $str
2532
   *
2533
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2534
   *
2535
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2536
   */
2537 1
  public static function isUtf16(string $str)
2538
  {
2539 1
    return self::is_utf16($str);
2540
  }
2541
2542
  /**
2543
   * alias for "UTF8::is_utf32()"
2544
   *
2545
   * @see        UTF8::is_utf32()
2546
   *
2547
   * @param string $str
2548
   *
2549
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2550
   *
2551
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2552
   */
2553 1
  public static function isUtf32(string $str)
2554
  {
2555 1
    return self::is_utf32($str);
2556
  }
2557
2558
  /**
2559
   * alias for "UTF8::is_utf8()"
2560
   *
2561
   * @see        UTF8::is_utf8()
2562
   *
2563
   * @param string $str
2564
   * @param bool   $strict
2565
   *
2566
   * @return bool
2567
   *
2568
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2569
   */
2570 16
  public static function isUtf8($str, $strict = false): bool
2571
  {
2572 16
    return self::is_utf8($str, $strict);
2573
  }
2574
2575
  /**
2576
   * Returns true if the string contains only alphabetic chars, false otherwise.
2577
   *
2578
   * @param string $str
2579
   *
2580
   * @return bool <p>Whether or not $str contains only alphabetic chars.</p>
2581
   */
2582
  public static function is_alpha(string $str): bool
2583
  {
2584
    return self::matchesPattern($str, '^[[:alpha:]]*$');
2585
  }
2586
2587
  /**
2588
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2589
   *
2590
   * @param string $str
2591
   *
2592
   * @return bool <p>Whether or not $str contains only alphanumeric chars.</p>
2593
   */
2594
  public static function is_alphanumeric(string $str): bool
2595
  {
2596
    return self::matchesPattern($str, '^[[:alnum:]]*$');
2597
  }
2598
2599
  /**
2600
   * Checks if a string is 7 bit ASCII.
2601
   *
2602
   * @param string $str <p>The string to check.</p>
2603
   *
2604
   * @return bool <p>
2605
   *              <strong>true</strong> if it is ASCII<br>
2606
   *              <strong>false</strong> otherwise
2607
   *              </p>
2608
   */
2609 58
  public static function is_ascii(string $str): bool
2610
  {
2611 58
    if (!isset($str[0])) {
2612 6
      return true;
2613
    }
2614
2615 57
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2616
  }
2617
2618
  /**
2619
   * Returns true if the string is base64 encoded, false otherwise.
2620
   *
2621
   * @param string $str <p>The input string.</p>
2622
   *
2623
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2624
   */
2625 1
  public static function is_base64(string $str): bool
2626
  {
2627 1
    $base64String = (string)\base64_decode($str, true);
2628
2629 1
    return $base64String && \base64_encode($base64String) === $str;
2630
  }
2631
2632
  /**
2633
   * Check if the input is binary... (is look like a hack).
2634
   *
2635
   * @param mixed $input
2636
   * @param bool  $strict
2637
   *
2638
   * @return bool
2639
   */
2640 19
  public static function is_binary($input, bool $strict = false): bool
2641
  {
2642 19
    $input = (string)$input;
2643 19
    if (!isset($input[0])) {
2644 5
      return false;
2645
    }
2646
2647 19
    if (\preg_match('~^[01]+$~', $input)) {
2648 6
      return true;
2649
    }
2650
2651 19
    $testNull = 0;
2652 19
    $testLength = \strlen($input);
2653 19
    if ($testLength) {
2654 19
      $testNull = \substr_count($input, "\x0");
2655 19
      if (($testNull / $testLength) > 0.3) {
2656 6
        return true;
2657
      }
2658
    }
2659
2660 18
    if ($strict === true) {
2661
2662 16
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2663
        self::checkForSupport();
2664
      }
2665
2666 16
      if (self::$SUPPORT['finfo'] === false) {
2667
        throw new \RuntimeException('ext-fileinfo: is not installed');
2668
      }
2669
2670
      /** @noinspection PhpComposerExtensionStubsInspection */
2671 16
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
2672 16
      $finfo_encoding = $finfo->buffer($input);
2673 16
      if ($finfo_encoding && $finfo_encoding === 'binary') {
2674 16
        return true;
2675
      }
2676
2677 8
    } elseif ($testNull > 0) {
2678
2679 3
      return true;
2680
2681
    }
2682
2683 17
    return false;
2684
  }
2685
2686
  /**
2687
   * Check if the file is binary.
2688
   *
2689
   * @param string $file
2690
   *
2691
   * @return boolean
2692
   */
2693 3
  public static function is_binary_file($file): bool
2694
  {
2695
    try {
2696 3
      $fp = \fopen($file, 'rb');
2697 3
      $block = \fread($fp, 512);
2698 3
      \fclose($fp);
2699
    } catch (\Exception $e) {
2700
      $block = '';
2701
    }
2702
2703 3
    return self::is_binary($block, true);
2704
  }
2705
2706
  /**
2707
   * Returns true if the string contains only whitespace chars, false otherwise.
2708
   *
2709
   * @param string $str
2710
   *
2711
   * @return bool <p>Whether or not $str contains only whitespace characters.</p>
2712
   */
2713
  public static function is_blank(string $str): bool
2714
  {
2715
    return self::matchesPattern($str, '^[[:space:]]*$');
2716
  }
2717
2718
  /**
2719
   * Checks if the given string is equal to any "Byte Order Mark".
2720
   *
2721
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2722
   *
2723
   * @param string $str <p>The input string.</p>
2724
   *
2725
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2726
   */
2727 1
  public static function is_bom($str): bool
2728
  {
2729 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2730 1
      if ($str === $bomString) {
2731 1
        return true;
2732
      }
2733
    }
2734
2735 1
    return false;
2736
  }
2737
2738
  /**
2739
   * Determine whether the string is considered to be empty.
2740
   *
2741
   * A variable is considered empty if it does not exist or if its value equals FALSE.
2742
   * empty() does not generate a warning if the variable does not exist.
2743
   *
2744
   * @param mixed $str
2745
   *
2746
   * @return bool <p>Whether or not $str is empty().</p>
2747
   */
2748
  public static function is_empty($str): bool
2749
  {
2750
    return empty($str);
2751
  }
2752
2753
  /**
2754
   * Returns true if the string contains only hexadecimal chars, false otherwise.
2755
   *
2756
   * @param string $str
2757
   *
2758
   * @return bool <p>Whether or not $str contains only hexadecimal chars.</p>
2759
   */
2760
  public static function is_hexadecimal(string $str): bool
2761
  {
2762
    return self::matchesPattern($str, '^[[:xdigit:]]*$');
2763
  }
2764
2765
  /**
2766
   * Check if the string contains any html-tags <lall>.
2767
   *
2768
   * @param string $str <p>The input string.</p>
2769
   *
2770
   * @return boolean
2771
   */
2772 1
  public static function is_html(string $str): bool
2773
  {
2774 1
    if (!isset($str[0])) {
2775 1
      return false;
2776
    }
2777
2778
    // init
2779 1
    $matches = [];
2780
2781 1
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2782
2783 1
    return !(\count($matches) === 0);
2784
  }
2785
2786
  /**
2787
   * Try to check if "$str" is an json-string.
2788
   *
2789
   * @param string $str <p>The input string.</p>
2790
   *
2791
   * @return bool
2792
   */
2793 1
  public static function is_json(string $str): bool
2794
  {
2795 1
    if (!isset($str[0])) {
2796 1
      return false;
2797
    }
2798
2799 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2800
      self::checkForSupport();
2801
    }
2802
2803 1
    if (self::$SUPPORT['json'] === false) {
2804
      throw new \RuntimeException('ext-json: is not installed');
2805
    }
2806
2807 1
    $json = self::json_decode($str);
2808
2809
    /** @noinspection PhpComposerExtensionStubsInspection */
2810
    return (
2811 1
               \is_object($json) === true
2812
               ||
2813 1
               \is_array($json) === true
2814
           )
2815
           &&
2816 1
           \json_last_error() === JSON_ERROR_NONE;
2817
  }
2818
2819
  /**
2820
   * @param string $str
2821
   *
2822
   * @return bool
2823
   */
2824
  public static function is_lowercase(string $str): bool
2825
  {
2826
    if (self::matchesPattern($str, '^[[:lower:]]*$')) {
2827
      return true;
2828
    }
2829
2830
    return false;
2831
  }
2832
2833
  /**
2834
   * Returns true if the string is serialized, false otherwise.
2835
   *
2836
   * @param string $str
2837
   *
2838
   * @return bool <p>Whether or not $str is serialized.</p>
2839
   */
2840
  public static function is_serialized(string $str): bool
2841
  {
2842
    if (!isset($str[0])) {
2843
      return false;
2844
    }
2845
2846
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2847
    /** @noinspection UnserializeExploitsInspection */
2848
    return $str === 'b:0;'
2849
           ||
2850
           @\unserialize($str) !== false;
2851
  }
2852
2853
  /**
2854
   * Returns true if the string contains only lower case chars, false
2855
   * otherwise.
2856
   *
2857
   * @param string $str <p>The input string.</p>
2858
   *
2859
   * @return bool <p>Whether or not $str contains only lower case characters.</p>
2860
   */
2861
  public static function is_uppercase(string $str): bool
2862
  {
2863
    return self::matchesPattern($str, '^[[:upper:]]*$');
2864
  }
2865
2866
  /**
2867
   * Check if the string is UTF-16.
2868
   *
2869
   * @param string $str <p>The input string.</p>
2870
   *
2871
   * @return int|false <p>
2872
   *                   <strong>false</strong> if is't not UTF-16,<br>
2873
   *                   <strong>1</strong> for UTF-16LE,<br>
2874
   *                   <strong>2</strong> for UTF-16BE.
2875
   *                   </p>
2876
   */
2877 10 View Code Duplication
  public static function is_utf16(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2878
  {
2879 10
    if (self::is_binary($str) === false) {
2880 4
      return false;
2881
    }
2882
2883
    // init
2884 8
    $strChars = [];
2885
2886 8
    $str = self::remove_bom($str);
2887
2888 8
    $maybeUTF16LE = 0;
2889 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2890 8
    if ($test) {
2891 7
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2892 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2893 7
      if ($test3 === $test) {
2894 7
        if (\count($strChars) === 0) {
2895 7
          $strChars = self::count_chars($str, true);
2896
        }
2897 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2898 7
          if (\in_array($test3char, $strChars, true) === true) {
2899 7
            $maybeUTF16LE++;
2900
          }
2901
        }
2902
      }
2903
    }
2904
2905 8
    $maybeUTF16BE = 0;
2906 8
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2907 8
    if ($test) {
2908 7
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2909 7
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2910 7
      if ($test3 === $test) {
2911 7
        if (\count($strChars) === 0) {
2912 3
          $strChars = self::count_chars($str, true);
2913
        }
2914 7
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2915 7
          if (\in_array($test3char, $strChars, true) === true) {
2916 7
            $maybeUTF16BE++;
2917
          }
2918
        }
2919
      }
2920
    }
2921
2922 8
    if ($maybeUTF16BE !== $maybeUTF16LE) {
2923 3
      if ($maybeUTF16LE > $maybeUTF16BE) {
2924 2
        return 1;
2925
      }
2926
2927 3
      return 2;
2928
    }
2929
2930 6
    return false;
2931
  }
2932
2933
  /**
2934
   * Check if the string is UTF-32.
2935
   *
2936
   * @param string $str
2937
   *
2938
   * @return int|false <p>
2939
   *                   <strong>false</strong> if is't not UTF-32,<br>
2940
   *                   <strong>1</strong> for UTF-32LE,<br>
2941
   *                   <strong>2</strong> for UTF-32BE.
2942
   *                   </p>
2943
   */
2944 8 View Code Duplication
  public static function is_utf32(string $str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2945
  {
2946 8
    if (self::is_binary($str) === false) {
2947 4
      return false;
2948
    }
2949
2950
    // init
2951 6
    $strChars = [];
2952
2953 6
    $str = self::remove_bom($str);
2954
2955 6
    $maybeUTF32LE = 0;
2956 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2957 6
    if ($test) {
2958 5
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2959 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2960 5
      if ($test3 === $test) {
2961 5
        if (\count($strChars) === 0) {
2962 5
          $strChars = self::count_chars($str, true);
2963
        }
2964 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2965 5
          if (\in_array($test3char, $strChars, true) === true) {
2966 5
            $maybeUTF32LE++;
2967
          }
2968
        }
2969
      }
2970
    }
2971
2972 6
    $maybeUTF32BE = 0;
2973 6
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2974 6
    if ($test) {
2975 5
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2976 5
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2977 5
      if ($test3 === $test) {
2978 5
        if (\count($strChars) === 0) {
2979 3
          $strChars = self::count_chars($str, true);
2980
        }
2981 5
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2982 5
          if (\in_array($test3char, $strChars, true) === true) {
2983 5
            $maybeUTF32BE++;
2984
          }
2985
        }
2986
      }
2987
    }
2988
2989 6
    if ($maybeUTF32BE !== $maybeUTF32LE) {
2990 1
      if ($maybeUTF32LE > $maybeUTF32BE) {
2991 1
        return 1;
2992
      }
2993
2994 1
      return 2;
2995
    }
2996
2997 6
    return false;
2998
  }
2999
3000
  /**
3001
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3002
   *
3003
   * @see    http://hsivonen.iki.fi/php-utf8/
3004
   *
3005
   * @param string|string[] $str    <p>The string to be checked.</p>
3006
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3007
   *
3008
   * @return bool
3009
   */
3010 61
  public static function is_utf8($str, bool $strict = false): bool
3011
  {
3012 61
    if (\is_array($str) === true) {
3013 1
      foreach ($str as $k => $v) {
3014 1
        if (false === self::is_utf8($v, $strict)) {
3015 1
          return false;
3016
        }
3017
      }
3018
3019
      return true;
3020
    }
3021
3022 61
    if (!isset($str[0])) {
3023 3
      return true;
3024
    }
3025
3026 59
    if ($strict === true) {
3027 1
      if (self::is_utf16($str) !== false) {
3028 1
        return false;
3029
      }
3030
3031
      if (self::is_utf32($str) !== false) {
3032
        return false;
3033
      }
3034
    }
3035
3036 59
    if (self::pcre_utf8_support() !== true) {
3037
3038
      // If even just the first character can be matched, when the /u
3039
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3040
      // invalid, nothing at all will match, even if the string contains
3041
      // some valid sequences
3042
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
3043
    }
3044
3045 59
    $mState = 0; // cached expected number of octets after the current octet
3046
    // until the beginning of the next UTF8 character sequence
3047 59
    $mUcs4 = 0; // cached Unicode character
3048 59
    $mBytes = 1; // cached expected number of octets in the current sequence
3049
3050 59
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3051
      self::checkForSupport();
3052
    }
3053
3054 59
    if (self::$ORD === null) {
3055
      self::$ORD = self::getData('ord');
3056
    }
3057
3058 59
    $len = self::strlen_in_byte($str);
3059
    /** @noinspection ForeachInvariantsInspection */
3060 59
    for ($i = 0; $i < $len; $i++) {
3061 59
      $in = self::$ORD[$str[$i]];
3062 59
      if ($mState === 0) {
3063
        // When mState is zero we expect either a US-ASCII character or a
3064
        // multi-octet sequence.
3065 59
        if (0 === (0x80 & $in)) {
3066
          // US-ASCII, pass straight through.
3067 56
          $mBytes = 1;
3068 56 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
3069
          // First octet of 2 octet sequence.
3070 49
          $mUcs4 = $in;
3071 49
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3072 49
          $mState = 1;
3073 49
          $mBytes = 2;
3074 46
        } elseif (0xE0 === (0xF0 & $in)) {
3075
          // First octet of 3 octet sequence.
3076 30
          $mUcs4 = $in;
3077 30
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3078 30
          $mState = 2;
3079 30
          $mBytes = 3;
3080 23 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
3081
          // First octet of 4 octet sequence.
3082 13
          $mUcs4 = $in;
3083 13
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3084 13
          $mState = 3;
3085 13
          $mBytes = 4;
3086 11
        } elseif (0xF8 === (0xFC & $in)) {
3087
          /* First octet of 5 octet sequence.
3088
          *
3089
          * This is illegal because the encoded codepoint must be either
3090
          * (a) not the shortest form or
3091
          * (b) outside the Unicode range of 0-0x10FFFF.
3092
          * Rather than trying to resynchronize, we will carry on until the end
3093
          * of the sequence and let the later error handling code catch it.
3094
          */
3095 4
          $mUcs4 = $in;
3096 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3097 4
          $mState = 4;
3098 4
          $mBytes = 5;
3099 8 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
3100
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3101 4
          $mUcs4 = $in;
3102 4
          $mUcs4 = ($mUcs4 & 1) << 30;
3103 4
          $mState = 5;
3104 4
          $mBytes = 6;
3105
        } else {
3106
          /* Current octet is neither in the US-ASCII range nor a legal first
3107
           * octet of a multi-octet sequence.
3108
           */
3109 59
          return false;
3110
        }
3111
      } else {
3112
        // When mState is non-zero, we expect a continuation of the multi-octet
3113
        // sequence
3114 56
        if (0x80 === (0xC0 & $in)) {
3115
          // Legal continuation.
3116 50
          $shift = ($mState - 1) * 6;
3117 50
          $tmp = $in;
3118 50
          $tmp = ($tmp & 0x0000003F) << $shift;
3119 50
          $mUcs4 |= $tmp;
3120
          /**Prefix
3121
           * End of the multi-octet sequence. mUcs4 now contains the final
3122
           * Unicode code point to be output
3123
           */
3124 50
          if (0 === --$mState) {
3125
            /*
3126
            * Check for illegal sequences and code points.
3127
            */
3128
            // From Unicode 3.1, non-shortest form is illegal
3129
            if (
3130 50
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3131 50
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3132 50
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3133 50
                (4 < $mBytes) ||
3134
                // From Unicode 3.2, surrogate characters are illegal.
3135 50
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3136
                // Code points outside the Unicode range are illegal.
3137 50
                ($mUcs4 > 0x10FFFF)
3138
            ) {
3139 7
              return false;
3140
            }
3141
            // initialize UTF8 cache
3142 50
            $mState = 0;
3143 50
            $mUcs4 = 0;
3144 50
            $mBytes = 1;
3145
          }
3146
        } else {
3147
          /**
3148
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3149
           * Incomplete multi-octet sequence.
3150
           */
3151 28
          return false;
3152
        }
3153
      }
3154
    }
3155
3156 27
    return true;
3157
  }
3158
3159
  /**
3160
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3161
   * Decodes a JSON string
3162
   *
3163
   * @link http://php.net/manual/en/function.json-decode.php
3164
   *
3165
   * @param string $json    <p>
3166
   *                        The <i>json</i> string being decoded.
3167
   *                        </p>
3168
   *                        <p>
3169
   *                        This function only works with UTF-8 encoded strings.
3170
   *                        </p>
3171
   *                        <p>PHP implements a superset of
3172
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3173
   *                        only supports these values when they are nested inside an array or an object.
3174
   *                        </p>
3175
   * @param bool   $assoc   [optional] <p>
3176
   *                        When <b>TRUE</b>, returned objects will be converted into
3177
   *                        associative arrays.
3178
   *                        </p>
3179
   * @param int    $depth   [optional] <p>
3180
   *                        User specified recursion depth.
3181
   *                        </p>
3182
   * @param int    $options [optional] <p>
3183
   *                        Bitmask of JSON decode options. Currently only
3184
   *                        <b>JSON_BIGINT_AS_STRING</b>
3185
   *                        is supported (default is to cast large integers as floats)
3186
   *                        </p>
3187
   *
3188
   * @return mixed the value encoded in <i>json</i> in appropriate
3189
   * PHP type. Values true, false and
3190
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3191
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3192
   * <i>json</i> cannot be decoded or if the encoded
3193
   * data is deeper than the recursion limit.
3194
   */
3195 2 View Code Duplication
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3196
  {
3197 2
    $json = self::filter($json);
3198
3199 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3200
      self::checkForSupport();
3201
    }
3202
3203 2
    if (self::$SUPPORT['json'] === false) {
3204
      throw new \RuntimeException('ext-json: is not installed');
3205
    }
3206
3207
    /** @noinspection PhpComposerExtensionStubsInspection */
3208 2
    $json = \json_decode($json, $assoc, $depth, $options);
3209
3210 2
    return $json;
3211
  }
3212
3213
  /**
3214
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3215
   * Returns the JSON representation of a value.
3216
   *
3217
   * @link http://php.net/manual/en/function.json-encode.php
3218
   *
3219
   * @param mixed $value   <p>
3220
   *                       The <i>value</i> being encoded. Can be any type except
3221
   *                       a resource.
3222
   *                       </p>
3223
   *                       <p>
3224
   *                       All string data must be UTF-8 encoded.
3225
   *                       </p>
3226
   *                       <p>PHP implements a superset of
3227
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3228
   *                       only supports these values when they are nested inside an array or an object.
3229
   *                       </p>
3230
   * @param int   $options [optional] <p>
3231
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3232
   *                       <b>JSON_HEX_TAG</b>,
3233
   *                       <b>JSON_HEX_AMP</b>,
3234
   *                       <b>JSON_HEX_APOS</b>,
3235
   *                       <b>JSON_NUMERIC_CHECK</b>,
3236
   *                       <b>JSON_PRETTY_PRINT</b>,
3237
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3238
   *                       <b>JSON_FORCE_OBJECT</b>,
3239
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3240
   *                       constants is described on
3241
   *                       the JSON constants page.
3242
   *                       </p>
3243
   * @param int   $depth   [optional] <p>
3244
   *                       Set the maximum depth. Must be greater than zero.
3245
   *                       </p>
3246
   *
3247
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3248
   */
3249 2 View Code Duplication
  public static function json_encode($value, int $options = 0, int $depth = 512): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3250
  {
3251 2
    $value = self::filter($value);
3252
3253 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3254
      self::checkForSupport();
3255
    }
3256
3257 2
    if (self::$SUPPORT['json'] === false) {
3258
      throw new \RuntimeException('ext-json: is not installed');
3259
    }
3260
3261
    /** @noinspection PhpComposerExtensionStubsInspection */
3262 2
    $json = \json_encode($value, $options, $depth);
3263
3264 2
    return $json;
3265
  }
3266
3267
  /**
3268
   * Returns the last $n characters of the string.
3269
   *
3270
   * @param string $str      <p>The input string.</p>
3271
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
3272
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3273
   *
3274
   * @return string
3275
   */
3276
  public static function last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
3277
  {
3278
    if ($n <= 0) {
3279
      return '';
3280
    }
3281
3282
    return self::substr($str, -$n, null, $encoding);
3283
  }
3284
3285
  /**
3286
   * Makes string's first char lowercase.
3287
   *
3288
   * @param string $str       <p>The input string</p>
3289
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
3290
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3291
   *
3292
   * @return string <p>The resulting string</p>
3293
   */
3294 7
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3295
  {
3296 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3297 7
    if ($strPartTwo === false) {
3298
      $strPartTwo = '';
3299
    }
3300
3301 7
    $strPartOne = self::strtolower(
3302 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3303 7
        $encoding,
3304 7
        $cleanUtf8
3305
    );
3306
3307 7
    return $strPartOne . $strPartTwo;
3308
  }
3309
3310
  /**
3311
   * alias for "UTF8::lcfirst()"
3312
   *
3313
   * @see UTF8::lcfirst()
3314
   *
3315
   * @param string $word
3316
   * @param string $encoding
3317
   * @param bool   $cleanUtf8
3318
   *
3319
   * @return string
3320
   */
3321 1
  public static function lcword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3322
  {
3323 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
3324
  }
3325
3326
  /**
3327
   * Lowercase for all words in the string.
3328
   *
3329
   * @param string   $str        <p>The input string.</p>
3330
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3331
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3332
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3333
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3334
   *
3335
   * @return string
3336
   */
3337 1
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3338
  {
3339 1
    if (!$str) {
3340 1
      return '';
3341
    }
3342
3343 1
    $words = self::str_to_words($str, $charlist);
3344 1
    $newWords = [];
3345
3346 1
    if (\count($exceptions) > 0) {
3347 1
      $useExceptions = true;
3348
    } else {
3349 1
      $useExceptions = false;
3350
    }
3351
3352 1 View Code Duplication
    foreach ($words as $word) {
3353
3354 1
      if (!$word) {
3355 1
        continue;
3356
      }
3357
3358
      if (
3359 1
          $useExceptions === false
3360
          ||
3361
          (
3362 1
              $useExceptions === true
3363
              &&
3364 1
              !\in_array($word, $exceptions, true)
3365
          )
3366
      ) {
3367 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3368
      }
3369
3370 1
      $newWords[] = $word;
3371
    }
3372
3373 1
    return \implode('', $newWords);
3374
  }
3375
3376
  /**
3377
   * Returns the longest common prefix between the string and $otherStr.
3378
   *
3379
   * @param string $str      <p>The input sting.</p>
3380
   * @param string $otherStr <p>Second string for comparison.</p>
3381
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3382
   *
3383
   * @return string
3384
   */
3385
  public static function longestCommonPrefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
3386
  {
3387
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
3388
3389
    $longestCommonPrefix = '';
3390
    for ($i = 0; $i < $maxLength; $i++) {
3391
      $char = self::substr($str, $i, 1, $encoding);
3392
3393
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
3394
        $longestCommonPrefix .= $char;
3395
      } else {
3396
        break;
3397
      }
3398
    }
3399
3400
    return $longestCommonPrefix;
3401
  }
3402
3403
  /**
3404
   * Returns the longest common substring between the string and $otherStr.
3405
   * In the case of ties, it returns that which occurs first.
3406
   *
3407
   * @param string $str
3408
   * @param string $otherStr <p>Second string for comparison.</p>
3409
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3410
   *
3411
   * @return static <p>Object with its $str being the longest common substring.</p>
3412
   */
3413
  public static function longestCommonSubstring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
3414
  {
3415
    // Uses dynamic programming to solve
3416
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
3417
    $strLength = self::strlen($str, $encoding);
3418
    $otherLength = self::strlen($otherStr, $encoding);
3419
3420
    // Return if either string is empty
3421
    if ($strLength == 0 || $otherLength == 0) {
3422
      return '';
3423
    }
3424
3425
    $len = 0;
3426
    $end = 0;
3427
    $table = \array_fill(
3428
        0,
3429
        $strLength + 1,
3430
        \array_fill(0, $otherLength + 1, 0)
3431
    );
3432
3433
    for ($i = 1; $i <= $strLength; $i++) {
3434
      for ($j = 1; $j <= $otherLength; $j++) {
3435
        $strChar = self::substr($str, $i - 1, 1, $encoding);
3436
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
3437
3438
        if ($strChar == $otherChar) {
3439
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
3440
          if ($table[$i][$j] > $len) {
3441
            $len = $table[$i][$j];
3442
            $end = $i;
3443
          }
3444
        } else {
3445
          $table[$i][$j] = 0;
3446
        }
3447
      }
3448
    }
3449
3450
    return self::substr(
3451
        $str,
3452
        $end - $len,
3453
        $len,
3454
        $encoding
3455
    );
3456
  }
3457
3458
  /**
3459
   * Returns the longest common suffix between the string and $otherStr.
3460
   *
3461
   * @param string $str
3462
   * @param string $otherStr <p>Second string for comparison.</p>
3463
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3464
   *
3465
   * @return string
3466
   */
3467
  public static function longestCommonSuffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
3468
  {
3469
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
3470
3471
    $longestCommonSuffix = '';
3472
    for ($i = 1; $i <= $maxLength; $i++) {
3473
      $char = self::substr($str, -$i, 1, $encoding);
3474
3475
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
3476
        $longestCommonSuffix = $char . $longestCommonSuffix;
3477
      } else {
3478
        break;
3479
      }
3480
    }
3481
3482
    return $longestCommonSuffix;
3483
  }
3484
3485
  /**
3486
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3487
   *
3488
   * @param string $str   <p>The string to be trimmed</p>
3489
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3490
   *
3491
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3492
   */
3493 8 View Code Duplication
  public static function ltrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3494
  {
3495 8
    if (!isset($str[0])) {
3496 2
      return '';
3497
    }
3498
3499
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3500 7
    if ($chars === INF || !$chars) {
3501 2
      $pattern = "^[\pZ\pC]+";
3502
    } else {
3503 7
      $chars = \preg_quote($chars, '/');
3504 7
      $pattern = "^[$chars]+";
3505
    }
3506
3507 7
    return self::regexReplace($str, $pattern, '', '', '/');
3508
  }
3509
3510
  /**
3511
   * Returns true if $str matches the supplied pattern, false otherwise.
3512
   *
3513
   * @param string $str     <p>The input string.</p>
3514
   * @param string $pattern <p>Regex pattern to match against.</p>
3515
   *
3516
   * @return bool <p>Whether or not $str matches the pattern.</p>
3517
   */
3518
  public static function matchesPattern(string $str, string $pattern): bool
3519
  {
3520
    if (\preg_match('/' . $pattern . '/u', $str)) {
3521
      return true;
3522
    }
3523
3524
    return false;
3525
  }
3526
3527
  /**
3528
   * Returns the UTF-8 character with the maximum code point in the given data.
3529
   *
3530
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3531
   *
3532
   * @return string <p>The character with the highest code point than others.</p>
3533
   */
3534 1 View Code Duplication
  public static function max($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3535
  {
3536 1
    if (\is_array($arg) === true) {
3537 1
      $arg = \implode('', $arg);
3538
    }
3539
3540 1
    return self::chr(\max(self::codepoints($arg)));
3541
  }
3542
3543
  /**
3544
   * Calculates and returns the maximum number of bytes taken by any
3545
   * UTF-8 encoded character in the given string.
3546
   *
3547
   * @param string $str <p>The original Unicode string.</p>
3548
   *
3549
   * @return int <p>Max byte lengths of the given chars.</p>
3550
   */
3551 1
  public static function max_chr_width(string $str): int
3552
  {
3553 1
    $bytes = self::chr_size_list($str);
3554 1
    if (\count($bytes) > 0) {
3555 1
      return (int)\max($bytes);
3556
    }
3557
3558 1
    return 0;
3559
  }
3560
3561
  /**
3562
   * Checks whether mbstring is available on the server.
3563
   *
3564
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3565
   */
3566 12
  public static function mbstring_loaded(): bool
3567
  {
3568 12
    $return = \extension_loaded('mbstring') ? true : false;
3569
3570 12
    if ($return === true) {
3571 12
      \mb_internal_encoding('UTF-8');
3572
    }
3573
3574 12
    return $return;
3575
  }
3576
3577 1
  private static function mbstring_overloaded(): bool
3578
  {
3579
    /**
3580
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3581
     */
3582
3583
    /** @noinspection PhpComposerExtensionStubsInspection */
3584
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3585 1
    return \defined('MB_OVERLOAD_STRING')
3586
           &&
3587 1
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3588
  }
3589
3590
  /**
3591
   * Returns the UTF-8 character with the minimum code point in the given data.
3592
   *
3593
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3594
   *
3595
   * @return string <p>The character with the lowest code point than others.</p>
3596
   */
3597 1 View Code Duplication
  public static function min($arg): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3598
  {
3599 1
    if (\is_array($arg) === true) {
3600 1
      $arg = \implode('', $arg);
3601
    }
3602
3603 1
    return self::chr(\min(self::codepoints($arg)));
3604
  }
3605
3606
  /**
3607
   * alias for "UTF8::normalize_encoding()"
3608
   *
3609
   * @see        UTF8::normalize_encoding()
3610
   *
3611
   * @param string $encoding
3612
   * @param mixed  $fallback
3613
   *
3614
   * @return mixed
3615
   *
3616
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3617
   */
3618 1
  public static function normalizeEncoding(string $encoding, $fallback = '')
3619
  {
3620 1
    return self::normalize_encoding($encoding, $fallback);
3621
  }
3622
3623
  /**
3624
   * Normalize the encoding-"name" input.
3625
   *
3626
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3627
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3628
   *
3629
   * @return mixed <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by
3630
   *                default)</p>
3631
   */
3632 27
  public static function normalize_encoding(string $encoding, $fallback = '')
3633
  {
3634 27
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3635
3636 27
    if (!$encoding) {
3637 4
      return $fallback;
3638
    }
3639
3640
    if (
3641 26
        'UTF-8' === $encoding
3642
        ||
3643 26
        'UTF8' === $encoding
3644
    ) {
3645 11
      return 'UTF-8';
3646
    }
3647
3648 22
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3649 19
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3650
    }
3651
3652 6
    if (self::$ENCODINGS === null) {
3653 1
      self::$ENCODINGS = self::getData('encodings');
3654
    }
3655
3656 6
    if (\in_array($encoding, self::$ENCODINGS, true)) {
3657 3
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3658
3659 3
      return $encoding;
3660
    }
3661
3662 5
    $encodingOrig = $encoding;
3663 5
    $encoding = \strtoupper($encoding);
3664 5
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3665
3666
    $equivalences = [
3667 5
        'ISO8859'     => 'ISO-8859-1',
3668
        'ISO88591'    => 'ISO-8859-1',
3669
        'ISO'         => 'ISO-8859-1',
3670
        'LATIN'       => 'ISO-8859-1',
3671
        'LATIN1'      => 'ISO-8859-1', // Western European
3672
        'ISO88592'    => 'ISO-8859-2',
3673
        'LATIN2'      => 'ISO-8859-2', // Central European
3674
        'ISO88593'    => 'ISO-8859-3',
3675
        'LATIN3'      => 'ISO-8859-3', // Southern European
3676
        'ISO88594'    => 'ISO-8859-4',
3677
        'LATIN4'      => 'ISO-8859-4', // Northern European
3678
        'ISO88595'    => 'ISO-8859-5',
3679
        'ISO88596'    => 'ISO-8859-6', // Greek
3680
        'ISO88597'    => 'ISO-8859-7',
3681
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3682
        'ISO88599'    => 'ISO-8859-9',
3683
        'LATIN5'      => 'ISO-8859-9', // Turkish
3684
        'ISO885911'   => 'ISO-8859-11',
3685
        'TIS620'      => 'ISO-8859-11', // Thai
3686
        'ISO885910'   => 'ISO-8859-10',
3687
        'LATIN6'      => 'ISO-8859-10', // Nordic
3688
        'ISO885913'   => 'ISO-8859-13',
3689
        'LATIN7'      => 'ISO-8859-13', // Baltic
3690
        'ISO885914'   => 'ISO-8859-14',
3691
        'LATIN8'      => 'ISO-8859-14', // Celtic
3692
        'ISO885915'   => 'ISO-8859-15',
3693
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3694
        'ISO885916'   => 'ISO-8859-16',
3695
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3696
        'CP1250'      => 'WINDOWS-1250',
3697
        'WIN1250'     => 'WINDOWS-1250',
3698
        'WINDOWS1250' => 'WINDOWS-1250',
3699
        'CP1251'      => 'WINDOWS-1251',
3700
        'WIN1251'     => 'WINDOWS-1251',
3701
        'WINDOWS1251' => 'WINDOWS-1251',
3702
        'CP1252'      => 'WINDOWS-1252',
3703
        'WIN1252'     => 'WINDOWS-1252',
3704
        'WINDOWS1252' => 'WINDOWS-1252',
3705
        'CP1253'      => 'WINDOWS-1253',
3706
        'WIN1253'     => 'WINDOWS-1253',
3707
        'WINDOWS1253' => 'WINDOWS-1253',
3708
        'CP1254'      => 'WINDOWS-1254',
3709
        'WIN1254'     => 'WINDOWS-1254',
3710
        'WINDOWS1254' => 'WINDOWS-1254',
3711
        'CP1255'      => 'WINDOWS-1255',
3712
        'WIN1255'     => 'WINDOWS-1255',
3713
        'WINDOWS1255' => 'WINDOWS-1255',
3714
        'CP1256'      => 'WINDOWS-1256',
3715
        'WIN1256'     => 'WINDOWS-1256',
3716
        'WINDOWS1256' => 'WINDOWS-1256',
3717
        'CP1257'      => 'WINDOWS-1257',
3718
        'WIN1257'     => 'WINDOWS-1257',
3719
        'WINDOWS1257' => 'WINDOWS-1257',
3720
        'CP1258'      => 'WINDOWS-1258',
3721
        'WIN1258'     => 'WINDOWS-1258',
3722
        'WINDOWS1258' => 'WINDOWS-1258',
3723
        'UTF16'       => 'UTF-16',
3724
        'UTF32'       => 'UTF-32',
3725
        'UTF8'        => 'UTF-8',
3726
        'UTF'         => 'UTF-8',
3727
        'UTF7'        => 'UTF-7',
3728
        '8BIT'        => 'CP850',
3729
        'BINARY'      => 'CP850',
3730
    ];
3731
3732 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3733 4
      $encoding = $equivalences[$encodingUpperHelper];
3734
    }
3735
3736 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3737
3738 5
    return $encoding;
3739
  }
3740
3741
  /**
3742
   * Normalize some MS Word special characters.
3743
   *
3744
   * @param string $str <p>The string to be normalized.</p>
3745
   *
3746
   * @return string
3747
   */
3748 16 View Code Duplication
  public static function normalize_msword(string $str): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3749
  {
3750 16
    if (!isset($str[0])) {
3751 1
      return '';
3752
    }
3753
3754 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
3755 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3756
3757 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3758
3759 1
      if (self::$UTF8_MSWORD === null) {
3760 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
3761
      }
3762
3763 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
3764 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
3765
    }
3766
3767 16
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3768
  }
3769
3770
  /**
3771
   * Normalize the whitespace.
3772
   *
3773
   * @param string $str                     <p>The string to be normalized.</p>
3774
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3775
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3776
   *                                        bidirectional text chars.</p>
3777
   *
3778
   * @return string
3779
   */
3780 39
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
3781
  {
3782 39
    if (!isset($str[0])) {
3783 4
      return '';
3784
    }
3785
3786 39
    static $WHITESPACE_CACHE = [];
3787 39
    $cacheKey = (int)$keepNonBreakingSpace;
3788
3789 39
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3790
3791 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3792
3793 2
      if ($keepNonBreakingSpace === true) {
3794 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3795
      }
3796
3797 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
3798
    }
3799
3800 39
    if ($keepBidiUnicodeControls === false) {
3801 39
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3802
3803 39
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3804 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3805
      }
3806
3807 39
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3808
    }
3809
3810 39
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3811
  }
3812
3813
  /**
3814
   * Calculates Unicode code point of the given UTF-8 encoded character.
3815
   *
3816
   * INFO: opposite to UTF8::chr()
3817
   *
3818
   * @param string $chr      <p>The character of which to calculate code point.<p/>
3819
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
3820
   *
3821
   * @return int <p>
3822
   *             Unicode code point of the given character,<br>
3823
   *             0 on invalid UTF-8 byte sequence.
3824
   *             </p>
3825
   */
3826 23
  public static function ord(string $chr, string $encoding = 'UTF-8'): int
3827
  {
3828
    // init
3829 23
    static $CHAR_CACHE = [];
3830
3831
    // save the original string
3832 23
    $chr_orig = $chr;
3833
3834 23
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3835 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3836
3837
      // check again, if it's still not UTF-8
3838
      /** @noinspection NotOptimalIfConditionsInspection */
3839 2
      if ($encoding !== 'UTF-8') {
3840 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3841
      }
3842
    }
3843
3844 23
    $cacheKey = $chr_orig . $encoding;
3845 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3846 23
      return $CHAR_CACHE[$cacheKey];
3847
    }
3848
3849 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3850
      self::checkForSupport();
3851
    }
3852
3853 11
    if (self::$SUPPORT['intlChar'] === true) {
3854
      /** @noinspection PhpComposerExtensionStubsInspection */
3855 10
      $code = \IntlChar::ord($chr);
3856 10
      if ($code) {
3857 9
        return $CHAR_CACHE[$cacheKey] = $code;
3858
      }
3859
    }
3860
3861
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3862 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
3863 6
    $code = $chr ? $chr[1] : 0;
3864
3865 6
    if (0xF0 <= $code && isset($chr[4])) {
3866
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3867
    }
3868
3869 6
    if (0xE0 <= $code && isset($chr[3])) {
3870 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3871
    }
3872
3873 6
    if (0xC0 <= $code && isset($chr[2])) {
3874 2
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3875
    }
3876
3877 5
    return $CHAR_CACHE[$cacheKey] = $code;
3878
  }
3879
3880
  /**
3881
   * Parses the string into an array (into the the second parameter).
3882
   *
3883
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3884
   *          if the second parameter is not set!
3885
   *
3886
   * @link http://php.net/manual/en/function.parse-str.php
3887
   *
3888
   * @param string $str       <p>The input string.</p>
3889
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
3890
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3891
   *
3892
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3893
   */
3894 1
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
3895
  {
3896 1
    if ($cleanUtf8 === true) {
3897 1
      $str = self::clean($str);
3898
    }
3899
3900
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3901 1
    $return = \mb_parse_str($str, $result);
3902
3903 1
    return !($return === false || empty($result));
3904
  }
3905
3906
  /**
3907
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3908
   *
3909
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3910
   */
3911 60
  public static function pcre_utf8_support(): bool
3912
  {
3913
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3914 60
    return (bool)@\preg_match('//u', '');
3915
  }
3916
3917
  /**
3918
   * Create an array containing a range of UTF-8 characters.
3919
   *
3920
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3921
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3922
   *
3923
   * @return string[]
3924
   */
3925 1
  public static function range($var1, $var2): array
3926
  {
3927 1
    if (!$var1 || !$var2) {
3928 1
      return [];
3929
    }
3930
3931 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3932
      self::checkForSupport();
3933
    }
3934
3935 1
    if (self::$SUPPORT['ctype'] === false) {
3936
      throw new \RuntimeException('ext-ctype: is not installed');
3937
    }
3938
3939
    /** @noinspection PhpComposerExtensionStubsInspection */
3940 1 View Code Duplication
    if (\ctype_digit((string)$var1)) {
3941 1
      $start = (int)$var1;
3942 1
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
3943
      $start = (int)self::hex_to_int($var1);
3944
    } else {
3945 1
      $start = self::ord($var1);
3946
    }
3947
3948 1
    if (!$start) {
3949
      return [];
3950
    }
3951
3952
    /** @noinspection PhpComposerExtensionStubsInspection */
3953 1 View Code Duplication
    if (\ctype_digit((string)$var2)) {
3954 1
      $end = (int)$var2;
3955 1
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
3956
      $end = (int)self::hex_to_int($var2);
3957
    } else {
3958 1
      $end = self::ord($var2);
3959
    }
3960
3961 1
    if (!$end) {
3962
      return [];
3963
    }
3964
3965 1
    return \array_map(
3966
        [
3967 1
            self::class,
3968
            'chr',
3969
        ],
3970 1
        \range($start, $end)
3971
    );
3972
  }
3973
3974
  /**
3975
   * Multi decode html entity & fix urlencoded-win1252-chars.
3976
   *
3977
   * e.g:
3978
   * 'test+test'                     => 'test+test'
3979
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3980
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3981
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3982
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3983
   * 'Düsseldorf'                   => 'Düsseldorf'
3984
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3985
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3986
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3987
   *
3988
   * @param string $str          <p>The input string.</p>
3989
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3990
   *
3991
   * @return string
3992
   */
3993 2 View Code Duplication
  public static function rawurldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3994
  {
3995 2
    if (!isset($str[0])) {
3996 1
      return '';
3997
    }
3998
3999 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
4000 2
    if (\preg_match($pattern, $str)) {
4001 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4002
    }
4003
4004 2
    $flags = ENT_QUOTES | ENT_HTML5;
4005
4006
    do {
4007 2
      $str_compare = $str;
4008
4009 2
      $str = self::fix_simple_utf8(
4010 2
          \rawurldecode(
4011 2
              self::html_entity_decode(
4012 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
4013 2
                  $flags
4014
              )
4015
          )
4016
      );
4017
4018 2
    } while ($multi_decode === true && $str_compare !== $str);
4019
4020 2
    return $str;
4021
  }
4022
4023
  /**
4024
   * @param array $strings
4025
   * @param bool  $removeEmptyValues
4026
   * @param int   $removeShortValues
4027
   *
4028
   * @return array
4029
   */
4030 1
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
4031
  {
4032
    // init
4033 1
    $return = [];
4034
4035 1
    foreach ($strings as $str) {
4036
      if (
4037 1
          $removeShortValues !== null
4038
          &&
4039 1
          self::strlen($str) <= $removeShortValues
4040
      ) {
4041 1
        continue;
4042
      }
4043
4044
      if (
4045 1
          $removeEmptyValues === true
4046
          &&
4047 1
          \trim($str) === ''
4048
      ) {
4049 1
        continue;
4050
      }
4051
4052 1
      $return[] = $str;
4053
    }
4054
4055 1
    return $return;
4056
  }
4057
4058
  /**
4059
   * Replaces all occurrences of $pattern in $str by $replacement.
4060
   *
4061
   * @param string $str         <p>The input string.</p>
4062
   * @param string $pattern     <p>The regular expression pattern.</p>
4063
   * @param string $replacement <p>The string to replace with.</p>
4064
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
4065
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4066
   *
4067
   * @return string
4068
   */
4069 35
  public static function regexReplace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4070
  {
4071 35
    if ($options === 'msr') {
4072
      $options = 'ms';
4073
    }
4074
4075
    // fallback
4076 35
    if (!$delimiter) {
4077
      $delimiter = '/';
4078
    }
4079
4080 35
    $str = (string)\preg_replace(
4081 35
        $delimiter . $pattern . $delimiter . 'u' . $options,
4082 35
        $replacement,
4083 35
        $str
4084
    );
4085
4086 35
    return $str;
4087
  }
4088
4089
  /**
4090
   * alias for "UTF8::remove_bom()"
4091
   *
4092
   * @see        UTF8::remove_bom()
4093
   *
4094
   * @param string $str
4095
   *
4096
   * @return string
4097
   *
4098
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4099
   */
4100
  public static function removeBOM(string $str): string
4101
  {
4102
    return self::remove_bom($str);
4103
  }
4104
4105
  /**
4106
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4107
   *
4108
   * @param string $str <p>The input string.</p>
4109
   *
4110
   * @return string <p>String without UTF-BOM</p>
4111
   */
4112 43
  public static function remove_bom(string $str): string
4113
  {
4114 43
    if (!isset($str[0])) {
4115 3
      return '';
4116
    }
4117
4118 43
    foreach (self::$BOM as $bomString => $bomByteLength) {
4119 43
      if (0 === self::strpos($str, $bomString, 0, 'CP850')) {
4120 5
        $strTmp = self::substr($str, $bomByteLength, null, 'CP850');
4121 5
        if ($strTmp === false) {
4122
          $strTmp = '';
4123
        }
4124 43
        $str = (string)$strTmp;
4125
      }
4126
    }
4127
4128 43
    return $str;
4129
  }
4130
4131
  /**
4132
   * Removes duplicate occurrences of a string in another string.
4133
   *
4134
   * @param string          $str  <p>The base string.</p>
4135
   * @param string|string[] $what <p>String to search for in the base string.</p>
4136
   *
4137
   * @return string <p>The result string with removed duplicates.</p>
4138
   */
4139 1
  public static function remove_duplicates(string $str, $what = ' '): string
4140
  {
4141 1
    if (\is_string($what) === true) {
4142 1
      $what = [$what];
4143
    }
4144
4145 1
    if (\is_array($what) === true) {
4146
      /** @noinspection ForeachSourceInspection */
4147 1
      foreach ($what as $item) {
4148 1
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4149
      }
4150
    }
4151
4152 1
    return $str;
4153
  }
4154
4155
  /**
4156
   * Remove invisible characters from a string.
4157
   *
4158
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4159
   *
4160
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4161
   *
4162
   * @param string $str
4163
   * @param bool   $url_encoded
4164
   * @param string $replacement
4165
   *
4166
   * @return string
4167
   */
4168 65
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4169
  {
4170
    // init
4171 65
    $non_displayables = [];
4172
4173
    // every control character except newline (dec 10),
4174
    // carriage return (dec 13) and horizontal tab (dec 09)
4175 65
    if ($url_encoded) {
4176 65
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4177 65
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4178
    }
4179
4180 65
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4181
4182
    do {
4183 65
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4184 65
    } while ($count !== 0);
4185
4186 65
    return $str;
4187
  }
4188
4189
  /**
4190
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4191
   *
4192
   * @param string $str                <p>The input string</p>
4193
   * @param string $replacementChar    <p>The replacement character.</p>
4194
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4195
   *
4196
   * @return string
4197
   */
4198 37
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4199
  {
4200 37
    if (!isset($str[0])) {
4201 4
      return '';
4202
    }
4203
4204 37
    if ($processInvalidUtf8 === true) {
4205 37
      $replacementCharHelper = $replacementChar;
4206 37
      if ($replacementChar === '') {
4207 37
        $replacementCharHelper = 'none';
4208
      }
4209
4210 37
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4211
        self::checkForSupport();
4212
      }
4213
4214 37
      $save = \mb_substitute_character();
4215 37
      \mb_substitute_character($replacementCharHelper);
4216 37
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4217 37
      \mb_substitute_character($save);
4218
4219 37
      if (\is_string($strTmp)) {
4220 37
        $str = $strTmp;
4221
      } else {
4222
        $str = '';
4223
      }
4224
    }
4225
4226 37
    return str_replace(
4227
        [
4228 37
            "\xEF\xBF\xBD",
4229
            '�',
4230
        ],
4231
        [
4232 37
            $replacementChar,
4233 37
            $replacementChar,
4234
        ],
4235 37
        $str
4236
    );
4237
  }
4238
4239
  /**
4240
   * Strip whitespace or other characters from end of a UTF-8 string.
4241
   *
4242
   * @param string $str   <p>The string to be trimmed.</p>
4243
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4244
   *
4245
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4246
   */
4247 7 View Code Duplication
  public static function rtrim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4248
  {
4249 7
    if (!isset($str[0])) {
4250 2
      return '';
4251
    }
4252
4253
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4254 6
    if ($chars === INF || !$chars) {
4255 3
      $pattern = "[\pZ\pC]+\$";
4256
    } else {
4257 5
      $chars = \preg_quote($chars, '/');
4258 5
      $pattern = "[$chars]+\$";
4259
    }
4260
4261 6
    return self::regexReplace($str, $pattern, '', '', '/');
4262
  }
4263
4264
  /**
4265
   * rxClass
4266
   *
4267
   * @param string $s
4268
   * @param string $class
4269
   *
4270
   * @return string
4271
   */
4272 32
  private static function rxClass(string $s, string $class = ''): string
4273
  {
4274 32
    static $RX_CLASSS_CACHE = [];
4275
4276 32
    $cacheKey = $s . $class;
4277
4278 32
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4279 20
      return $RX_CLASSS_CACHE[$cacheKey];
4280
    }
4281
4282
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4283 15
    $class = [$class];
4284
4285
    /** @noinspection SuspiciousLoopInspection */
4286 15
    foreach (self::str_split($s) as $s) {
4287 14
      if ('-' === $s) {
4288
        $class[0] = '-' . $class[0];
4289 14
      } elseif (!isset($s[2])) {
4290 14
        $class[0] .= \preg_quote($s, '/');
4291 1
      } elseif (1 === self::strlen($s)) {
4292 1
        $class[0] .= $s;
4293
      } else {
4294 14
        $class[] = $s;
4295
      }
4296
    }
4297
4298 15
    if ($class[0]) {
4299 15
      $class[0] = '[' . $class[0] . ']';
4300
    }
4301
4302 15
    if (1 === \count($class)) {
4303 15
      $return = $class[0];
4304
    } else {
4305
      $return = '(?:' . \implode('|', $class) . ')';
4306
    }
4307
4308 15
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4309
4310 15
    return $return;
4311
  }
4312
4313
  /**
4314
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4315
   */
4316 1
  public static function showSupport()
4317
  {
4318 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4319
      self::checkForSupport();
4320
    }
4321
4322 1
    echo '<pre>';
4323 1
    foreach (self::$SUPPORT as $key => $value) {
4324 1
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4325
    }
4326 1
    echo '</pre>';
4327 1
  }
4328
4329
  /**
4330
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4331
   *
4332
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4333
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4334
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
4335
   *
4336
   * @return string <p>The HTML numbered entity.</p>
4337
   */
4338 1
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4339
  {
4340 1
    if (!isset($char[0])) {
4341 1
      return '';
4342
    }
4343
4344
    if (
4345 1
        $keepAsciiChars === true
4346
        &&
4347 1
        self::is_ascii($char) === true
4348
    ) {
4349 1
      return $char;
4350
    }
4351
4352 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4353 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4354
    }
4355
4356 1
    return '&#' . self::ord($char, $encoding) . ';';
4357
  }
4358
4359
  /**
4360
   * @param string $str
4361
   * @param int    $tabLength
4362
   *
4363
   * @return string
4364
   */
4365
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4366
  {
4367
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4368
  }
4369
4370
  /**
4371
   * Convert a string to an array of Unicode characters.
4372
   *
4373
   * @param string $str       <p>The string to split into array.</p>
4374
   * @param int    $length    [optional] <p>Max character length of each array element.</p>
4375
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4376
   *
4377
   * @return string[] <p>An array containing chunks of the string.</p>
4378
   */
4379 35
  public static function split(string $str, int $length = 1, bool $cleanUtf8 = false): array
4380
  {
4381 35
    if (!isset($str[0])) {
4382 3
      return [];
4383
    }
4384
4385
    // init
4386 34
    $ret = [];
4387
4388 34
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4389
      self::checkForSupport();
4390
    }
4391
4392 34
    if ($cleanUtf8 === true) {
4393 9
      $str = self::clean($str);
4394
    }
4395
4396 34
    if (self::$SUPPORT['pcre_utf8'] === true) {
4397
4398 34
      \preg_match_all('/./us', $str, $retArray);
4399 34
      if (isset($retArray[0])) {
4400 34
        $ret = $retArray[0];
4401
      }
4402 34
      unset($retArray);
4403
4404
    } else {
4405
4406
      // fallback
4407
4408 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4409
        self::checkForSupport();
4410
      }
4411
4412 2
      $len = self::strlen_in_byte($str);
4413
4414
      /** @noinspection ForeachInvariantsInspection */
4415 2
      for ($i = 0; $i < $len; $i++) {
4416
4417 2
        if (($str[$i] & "\x80") === "\x00") {
4418
4419 2
          $ret[] = $str[$i];
4420
4421
        } elseif (
4422 2
            isset($str[$i + 1])
4423
            &&
4424 2
            ($str[$i] & "\xE0") === "\xC0"
4425
        ) {
4426
4427
          if (($str[$i + 1] & "\xC0") === "\x80") {
4428
            $ret[] = $str[$i] . $str[$i + 1];
4429
4430
            $i++;
4431
          }
4432
4433 View Code Duplication
        } elseif (
4434 2
            isset($str[$i + 2])
4435
            &&
4436 2
            ($str[$i] & "\xF0") === "\xE0"
4437
        ) {
4438
4439
          if (
4440 2
              ($str[$i + 1] & "\xC0") === "\x80"
4441
              &&
4442 2
              ($str[$i + 2] & "\xC0") === "\x80"
4443
          ) {
4444 2
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4445
4446 2
            $i += 2;
4447
          }
4448
4449
        } elseif (
4450
            isset($str[$i + 3])
4451
            &&
4452
            ($str[$i] & "\xF8") === "\xF0"
4453
        ) {
4454
4455 View Code Duplication
          if (
4456
              ($str[$i + 1] & "\xC0") === "\x80"
4457
              &&
4458
              ($str[$i + 2] & "\xC0") === "\x80"
4459
              &&
4460
              ($str[$i + 3] & "\xC0") === "\x80"
4461
          ) {
4462
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4463
4464
            $i += 3;
4465
          }
4466
4467
        }
4468
      }
4469
    }
4470
4471 34
    if ($length > 1) {
4472 5
      $ret = \array_chunk($ret, $length);
4473
4474 5
      return \array_map(
4475 5
          function ($item) {
4476 5
            return \implode('', $item);
4477 5
          }, $ret
4478
      );
4479
    }
4480
4481 30
    if (isset($ret[0]) && $ret[0] === '') {
4482
      return [];
4483
    }
4484
4485 30
    return $ret;
4486
  }
4487
4488
  /**
4489
   * Returns true if the string contains $needle, false otherwise. By default
4490
   * the comparison is case-sensitive, but can be made insensitive by setting
4491
   * $caseSensitive to false.
4492
   *
4493
   * @param string $haystack      <p>The input string.</p>
4494
   * @param string $needle        <p>Substring to look for.</p>
4495
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4496
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4497
   *
4498
   * @return bool <p>Whether or not $haystack contains $needle.</p>
4499
   */
4500
  public static function str_contains(string $haystack, string $needle, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4501
  {
4502
    if ($caseSensitive) {
4503
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
4504
    }
4505
4506
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
4507
  }
4508
4509
  /**
4510
   * Returns true if the string contains all $needles, false otherwise. By
4511
   * default the comparison is case-sensitive, but can be made insensitive by
4512
   * setting $caseSensitive to false.
4513
   *
4514
   * @param string $haystack      <p>The input string.</p>
4515
   * @param array  $needles       <p>SubStrings to look for.</p>
4516
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4517
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4518
   *
4519
   * @return bool <p>Whether or not $haystack contains $needle.</p>
4520
   */
4521
  public static function str_contains_all(string $haystack, array $needles, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4522
  {
4523
    if (!isset($haystack[0])) {
4524
      return false;
4525
    }
4526
4527
    if (empty($needles)) {
4528
      return false;
4529
    }
4530
4531
    foreach ($needles as $needle) {
4532
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4533
        return false;
4534
      }
4535
    }
4536
4537
    return true;
4538
  }
4539
4540
  /**
4541
   * Returns true if the string contains any $needles, false otherwise. By
4542
   * default the comparison is case-sensitive, but can be made insensitive by
4543
   * setting $caseSensitive to false.
4544
   *
4545
   * @param string <p>The input stiring.</p>
4546
   * @param array  $needles       <p>SubStrings to look for.</p>
4547
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4548
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4549
   *
4550
   * @return bool <p>Whether or not $str contains $needle.</p>
4551
   */
4552
  public static function str_contains_any(string $haystack, array $needles, bool $caseSensitive = true, string $encoding = 'UTF-8'): bool
4553
  {
4554
    if (empty($needles)) {
4555
      return false;
4556
    }
4557
4558
    foreach ($needles as $needle) {
4559
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4560
        return true;
4561
      }
4562
    }
4563
4564
    return false;
4565
  }
4566
4567
  /**
4568
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4569
   *
4570
   * @param string $str <p>The input string.</p>
4571
   *
4572
   * @return false|string <p>
4573
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4574
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
4575
   *                      </p>
4576
   */
4577 15
  public static function str_detect_encoding(string $str)
4578
  {
4579
    //
4580
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
4581
    //
4582
4583 15
    if (self::is_binary($str, true) === true) {
4584
4585 5
      if (self::is_utf16($str) === 1) {
4586 1
        return 'UTF-16LE';
4587
      }
4588
4589 5
      if (self::is_utf16($str) === 2) {
4590 1
        return 'UTF-16BE';
4591
      }
4592
4593 4
      if (self::is_utf32($str) === 1) {
4594
        return 'UTF-32LE';
4595
      }
4596
4597 4
      if (self::is_utf32($str) === 2) {
4598
        return 'UTF-32BE';
4599
      }
4600
4601
      // is binary but not "UTF-16" or "UTF-32"
4602 4
      return false;
4603
    }
4604
4605
    //
4606
    // 2.) simple check for ASCII chars
4607
    //
4608
4609 13
    if (self::is_ascii($str) === true) {
4610 5
      return 'ASCII';
4611
    }
4612
4613
    //
4614
    // 3.) simple check for UTF-8 chars
4615
    //
4616
4617 13
    if (self::is_utf8($str) === true) {
4618 9
      return 'UTF-8';
4619
    }
4620
4621
    //
4622
    // 4.) check via "\mb_detect_encoding()"
4623
    //
4624
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4625
4626
    $detectOrder = [
4627 8
        'ISO-8859-1',
4628
        'ISO-8859-2',
4629
        'ISO-8859-3',
4630
        'ISO-8859-4',
4631
        'ISO-8859-5',
4632
        'ISO-8859-6',
4633
        'ISO-8859-7',
4634
        'ISO-8859-8',
4635
        'ISO-8859-9',
4636
        'ISO-8859-10',
4637
        'ISO-8859-13',
4638
        'ISO-8859-14',
4639
        'ISO-8859-15',
4640
        'ISO-8859-16',
4641
        'WINDOWS-1251',
4642
        'WINDOWS-1252',
4643
        'WINDOWS-1254',
4644
        'CP932',
4645
        'CP936',
4646
        'CP950',
4647
        'CP866',
4648
        'CP850',
4649
        'CP51932',
4650
        'CP50220',
4651
        'CP50221',
4652
        'CP50222',
4653
        'ISO-2022-JP',
4654
        'ISO-2022-KR',
4655
        'JIS',
4656
        'JIS-ms',
4657
        'EUC-CN',
4658
        'EUC-JP',
4659
    ];
4660
4661 8
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4662 8
    if ($encoding) {
4663 8
      return $encoding;
4664
    }
4665
4666
    //
4667
    // 5.) check via "iconv()"
4668
    //
4669
4670
    if (self::$ENCODINGS === null) {
4671
      self::$ENCODINGS = self::getData('encodings');
4672
    }
4673
4674
    $md5 = \md5($str);
4675
    foreach (self::$ENCODINGS as $encodingTmp) {
0 ignored issues
show
Bug introduced by
The expression self::$ENCODINGS of type boolean|string|array|integer is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
4676
      # INFO: //IGNORE and //TRANSLIT still throw notice
4677
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4678
      if (\md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4679
        return $encodingTmp;
4680
      }
4681
    }
4682
4683
    return false;
4684
  }
4685
4686
  /**
4687
   * Check if the string ends with the given substring.
4688
   *
4689
   * @param string $haystack <p>The string to search in.</p>
4690
   * @param string $needle   <p>The substring to search for.</p>
4691
   *
4692
   * @return bool
4693
   */
4694 2
  public static function str_ends_with(string $haystack, string $needle): bool
4695
  {
4696 2
    if (!isset($haystack[0], $needle[0])) {
4697 1
      return false;
4698
    }
4699
4700 2
    return \substr($haystack, -\strlen($needle)) === $needle;
4701
  }
4702
4703
  /**
4704
   * Returns true if the string ends with any of $substrings, false otherwise.
4705
   *
4706
   * - case-sensitive
4707
   *
4708
   * @param string   $str        <p>The input string.</p>
4709
   * @param string[] $substrings <p>Substrings to look for.</p>
4710
   *
4711
   * @return bool     <p>Whether or not $str ends with $substring.</p>
4712
   */
4713
  public static function str_ends_with_any(string $str, array $substrings): bool
4714
  {
4715
    if (empty($substrings)) {
4716
      return false;
4717
    }
4718
4719
    foreach ($substrings as $substring) {
4720
      if (self::str_ends_with($str, $substring)) {
4721
        return true;
4722
      }
4723
    }
4724
4725
    return false;
4726
  }
4727
4728
  /**
4729
   * Ensures that the string begins with $substring. If it doesn't, it's
4730
   * prepended.
4731
   *
4732
   * @param string str <p>The input string.</p>
4733
   * @param string $substring <p>The substring to add if not present.</p>
4734
   *
4735
   * @return string
4736
   */
4737
  public static function str_ensure_left(string $str, string $substring): string
4738
  {
4739
    if (!self::str_starts_with($str, $substring)) {
4740
      $str = $substring . $str;
4741
    }
4742
4743
    return $str;
4744
  }
4745
4746
  /**
4747
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
4748
   *
4749
   * @param string str <p>The input string.</p>
4750
   * @param string $substring <p>The substring to add if not present.</p>
4751
   *
4752
   * @return string
4753
   */
4754
  public static function str_ensure_right(string $str, string $substring): string
4755
  {
4756
    if (!self::str_ends_with($str, $substring)) {
4757
      $str .= $substring;
4758
    }
4759
4760
    return $str;
4761
  }
4762
4763
  /**
4764
   * Check if the string ends with the given substring, case insensitive.
4765
   *
4766
   * @param string $haystack <p>The string to search in.</p>
4767
   * @param string $needle   <p>The substring to search for.</p>
4768
   *
4769
   * @return bool
4770
   */
4771 2
  public static function str_iends_with(string $haystack, string $needle): bool
4772
  {
4773 2
    if (!isset($haystack[0], $needle[0])) {
4774 1
      return false;
4775
    }
4776
4777 2
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
4778 2
      return true;
4779
    }
4780
4781 2
    return false;
4782
  }
4783
4784
  /**
4785
   * Returns true if the string ends with any of $substrings, false otherwise.
4786
   *
4787
   * - case-insensitive
4788
   *
4789
   * @param string   $str        <p>The input string.</p>
4790
   * @param string[] $substrings <p>Substrings to look for.</p>
4791
   *
4792
   * @return bool     <p>Whether or not $str ends with $substring.</p>
4793
   */
4794
  public static function str_iends_with_any(string $str, array $substrings): bool
4795
  {
4796
    if (empty($substrings)) {
4797
      return false;
4798
    }
4799
4800
    foreach ($substrings as $substring) {
4801
      if (self::str_iends_with($str, $substring)) {
4802
        return true;
4803
      }
4804
    }
4805
4806
    return false;
4807
  }
4808
4809
  /**
4810
   * Inserts $substring into the string at the $index provided.
4811
   *
4812
   * @param string $str       <p>The input string.</p>
4813
   * @param string $substring <p>String to be inserted.</p>
4814
   * @param int    $index     <p>The index at which to insert the substring.</p>
4815
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
4816
   *
4817
   * @return string
4818
   */
4819
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
4820
  {
4821
    $len = self::strlen($str, $encoding);
4822
4823
    if ($index > $len) {
4824
      return $str;
4825
    }
4826
4827
    $start = self::substr($str, 0, $index, $encoding);
4828
    $end = self::substr($str, $index, $len, $encoding);
4829
4830
    return $start . $substring . $end;
4831
  }
4832
4833
  /**
4834
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4835
   *
4836
   * @link  http://php.net/manual/en/function.str-ireplace.php
4837
   *
4838
   * @param mixed $search  <p>
4839
   *                       Every replacement with search array is
4840
   *                       performed on the result of previous replacement.
4841
   *                       </p>
4842
   * @param mixed $replace <p>
4843
   *                       </p>
4844
   * @param mixed $subject <p>
4845
   *                       If subject is an array, then the search and
4846
   *                       replace is performed with every entry of
4847
   *                       subject, and the return value is an array as
4848
   *                       well.
4849
   *                       </p>
4850
   * @param int   $count   [optional] <p>
4851
   *                       The number of matched and replaced needles will
4852
   *                       be returned in count which is passed by
4853
   *                       reference.
4854
   *                       </p>
4855
   *
4856
   * @return mixed <p>A string or an array of replacements.</p>
4857
   */
4858 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4859
  {
4860 26
    $search = (array)$search;
4861
4862
    /** @noinspection AlterInForeachInspection */
4863 26
    foreach ($search as &$s) {
4864 26
      if ('' === $s .= '') {
4865 2
        $s = '/^(?<=.)$/';
4866
      } else {
4867 26
        $s = '/' . \preg_quote($s, '/') . '/ui';
4868
      }
4869
    }
4870
4871 26
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
4872 26
    $count = $replace; // used as reference parameter
4873
4874 26
    return $subject;
4875
  }
4876
4877
  /**
4878
   * Check if the string starts with the given substring, case insensitive.
4879
   *
4880
   * @param string $haystack <p>The string to search in.</p>
4881
   * @param string $needle   <p>The substring to search for.</p>
4882
   *
4883
   * @return bool
4884
   */
4885 2 View Code Duplication
  public static function str_istarts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4886
  {
4887 2
    if (!isset($haystack[0], $needle[0])) {
4888 1
      return false;
4889
    }
4890
4891 2
    if (self::stripos($haystack, $needle) === 0) {
4892 2
      return true;
4893
    }
4894
4895 2
    return false;
4896
  }
4897
4898
  /**
4899
   * Returns true if the string begins with any of $substrings, false otherwise.
4900
   *
4901
   * - case-insensitive
4902
   *
4903
   * @param string $str        <p>The input string.</p>
4904
   * @param array  $substrings <p>Substrings to look for.</p>
4905
   *
4906
   * @return bool <p>Whether or not $str starts with $substring.</p>
4907
   */
4908 View Code Duplication
  public static function str_istarts_with_any(string $str, array $substrings): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4909
  {
4910
    if (!isset($str[0])) {
4911
      return false;
4912
    }
4913
4914
    if (empty($substrings)) {
4915
      return false;
4916
    }
4917
4918
    foreach ($substrings as $substring) {
4919
      if (self::str_istarts_with($str, $substring)) {
4920
        return true;
4921
      }
4922
    }
4923
4924
    return false;
4925
  }
4926
4927
  /**
4928
   * Limit the number of characters in a string, but also after the next word.
4929
   *
4930
   * @param string $str
4931
   * @param int    $length
4932
   * @param string $strAddOn
4933
   *
4934
   * @return string
4935
   */
4936 1
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…'): string
4937
  {
4938 1
    if (!isset($str[0])) {
4939 1
      return '';
4940
    }
4941
4942 1
    if (self::strlen($str) <= $length) {
4943 1
      return $str;
4944
    }
4945
4946 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4947 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4948
    }
4949
4950 1
    $str = (string)self::substr($str, 0, $length);
4951 1
    $array = \explode(' ', $str);
4952 1
    \array_pop($array);
4953 1
    $new_str = \implode(' ', $array);
4954
4955 1
    if ($new_str === '') {
4956 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4957
    } else {
4958 1
      $str = $new_str . $strAddOn;
4959
    }
4960
4961 1
    return $str;
4962
  }
4963
4964
  /**
4965
   * Pad a UTF-8 string to given length with another string.
4966
   *
4967
   * @param string $str        <p>The input string.</p>
4968
   * @param int    $pad_length <p>The length of return string.</p>
4969
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4970
   * @param int    $pad_type   [optional] <p>
4971
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4972
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4973
   *                           </p>
4974
   *
4975
   * @encoding
4976
   *
4977
   * @return string <strong>Returns the padded string</strong>
4978
   */
4979 2
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', int $pad_type = STR_PAD_RIGHT): string
4980
  {
4981 2
    $str_length = self::strlen($str);
4982
4983
    if (
4984 2
        $pad_length > 0
4985
        &&
4986 2
        $pad_length >= $str_length
4987
    ) {
4988 2
      $ps_length = self::strlen($pad_string);
4989
4990 2
      $diff = ($pad_length - $str_length);
4991
4992
      switch ($pad_type) {
4993 2 View Code Duplication
        case STR_PAD_LEFT:
4994 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
4995 2
          $pre = (string)self::substr($pre, 0, $diff);
4996 2
          $post = '';
4997 2
          break;
4998
4999 2
        case STR_PAD_BOTH:
5000 2
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
5001 2
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2));
5002 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
5003 2
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2));
5004 2
          break;
5005
5006 2
        case STR_PAD_RIGHT:
5007 View Code Duplication
        default:
5008 2
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
5009 2
          $post = (string)self::substr($post, 0, $diff);
5010 2
          $pre = '';
5011
      }
5012
5013 2
      return $pre . $str . $post;
5014
    }
5015
5016 2
    return $str;
5017
  }
5018
5019
  /**
5020
   * Repeat a string.
5021
   *
5022
   * @param string $str        <p>
5023
   *                           The string to be repeated.
5024
   *                           </p>
5025
   * @param int    $multiplier <p>
5026
   *                           Number of time the input string should be
5027
   *                           repeated.
5028
   *                           </p>
5029
   *                           <p>
5030
   *                           multiplier has to be greater than or equal to 0.
5031
   *                           If the multiplier is set to 0, the function
5032
   *                           will return an empty string.
5033
   *                           </p>
5034
   *
5035
   * @return string <p>The repeated string.</p>
5036
   */
5037 1
  public static function str_repeat(string $str, int $multiplier): string
5038
  {
5039 1
    $str = self::filter($str);
5040
5041 1
    return \str_repeat($str, $multiplier);
5042
  }
5043
5044
  /**
5045
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
5046
   *
5047
   * Replace all occurrences of the search string with the replacement string
5048
   *
5049
   * @link http://php.net/manual/en/function.str-replace.php
5050
   *
5051
   * @param mixed $search  <p>
5052
   *                       The value being searched for, otherwise known as the needle.
5053
   *                       An array may be used to designate multiple needles.
5054
   *                       </p>
5055
   * @param mixed $replace <p>
5056
   *                       The replacement value that replaces found search
5057
   *                       values. An array may be used to designate multiple replacements.
5058
   *                       </p>
5059
   * @param mixed $subject <p>
5060
   *                       The string or array being searched and replaced on,
5061
   *                       otherwise known as the haystack.
5062
   *                       </p>
5063
   *                       <p>
5064
   *                       If subject is an array, then the search and
5065
   *                       replace is performed with every entry of
5066
   *                       subject, and the return value is an array as
5067
   *                       well.
5068
   *                       </p>
5069
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
5070
   *
5071
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
5072
   */
5073 12
  public static function str_replace($search, $replace, $subject, int &$count = null)
5074
  {
5075 12
    return \str_replace($search, $replace, $subject, $count);
5076
  }
5077
5078
  /**
5079
   * Replace the first "$search"-term with the "$replace"-term.
5080
   *
5081
   * @param string $search
5082
   * @param string $replace
5083
   * @param string $subject
5084
   *
5085
   * @return string
5086
   */
5087 1
  public static function str_replace_first(string $search, string $replace, string $subject): string
5088
  {
5089 1
    $pos = self::strpos($subject, $search);
5090
5091 1
    if ($pos !== false) {
5092 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
5093
    }
5094
5095 1
    return $subject;
5096
  }
5097
5098
  /**
5099
   * Shuffles all the characters in the string.
5100
   *
5101
   * PS: uses random algorithm which is weak for cryptography purposes
5102
   *
5103
   * @param string $str <p>The input string</p>
5104
   *
5105
   * @return string <p>The shuffled string.</p>
5106
   */
5107 1
  public static function str_shuffle(string $str): string
5108
  {
5109 1
    $indexes = \range(0, self::strlen($str) - 1);
5110
    /** @noinspection NonSecureShuffleUsageInspection */
5111 1
    \shuffle($indexes);
5112
5113 1
    $shuffledStr = '';
5114 1
    foreach ($indexes as $i) {
5115 1
      $shuffledStr .= self::substr($str, $i, 1);
5116
    }
5117
5118 1
    return $shuffledStr;
5119
  }
5120
5121
  /**
5122
   * Sort all characters according to code points.
5123
   *
5124
   * @param string $str    <p>A UTF-8 string.</p>
5125
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
5126
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
5127
   *
5128
   * @return string <p>String of sorted characters.</p>
5129
   */
5130 1
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
5131
  {
5132 1
    $array = self::codepoints($str);
5133
5134 1
    if ($unique) {
5135 1
      $array = \array_flip(\array_flip($array));
5136
    }
5137
5138 1
    if ($desc) {
5139 1
      \arsort($array);
5140
    } else {
5141 1
      \asort($array);
5142
    }
5143
5144 1
    return self::string($array);
5145
  }
5146
5147
  /**
5148
   * Split a string into an array.
5149
   *
5150
   * @param string|string[] $str
5151
   * @param int             $len
5152
   *
5153
   * @return string[]
5154
   */
5155 18
  public static function str_split($str, int $len = 1): array
5156
  {
5157 18 View Code Duplication
    if (\is_array($str) === true) {
5158 1
      foreach ($str as $k => $v) {
5159 1
        $str[$k] = self::str_split($v, $len);
5160
      }
5161
5162 1
      return $str;
5163
    }
5164
5165 18
    if (!isset($str[0])) {
5166 1
      return [];
5167
    }
5168
5169 17
    if ($len < 1) {
5170
      return \str_split($str, $len);
5171
    }
5172
5173
    /** @noinspection NotOptimalRegularExpressionsInspection */
5174 17
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
5175 17
    $a = $a[0];
5176
5177 17
    if ($len === 1) {
5178 17
      return $a;
5179
    }
5180
5181 1
    $arrayOutput = [];
5182 1
    $p = -1;
5183
5184
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
5185 1
    foreach ($a as $l => $a) {
5186 1
      if ($l % $len) {
5187 1
        $arrayOutput[$p] .= $a;
5188
      } else {
5189 1
        $arrayOutput[++$p] = $a;
5190
      }
5191
    }
5192
5193 1
    return $arrayOutput;
5194
  }
5195
5196
  /**
5197
   * Check if the string starts with the given substring.
5198
   *
5199
   * @param string $haystack <p>The string to search in.</p>
5200
   * @param string $needle   <p>The substring to search for.</p>
5201
   *
5202
   * @return bool
5203
   */
5204 2 View Code Duplication
  public static function str_starts_with(string $haystack, string $needle): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5205
  {
5206 2
    if (!isset($haystack[0], $needle[0])) {
5207 1
      return false;
5208
    }
5209
5210 2
    if (\strpos($haystack, $needle) === 0) {
5211 2
      return true;
5212
    }
5213
5214 2
    return false;
5215
  }
5216
5217
  /**
5218
   * Returns true if the string begins with any of $substrings, false otherwise.
5219
   *
5220
   * - case-sensitive
5221
   *
5222
   * @param string $str        <p>The input string.</p>
5223
   * @param array  $substrings <p>Substrings to look for.</p>
5224
   *
5225
   * @return bool <p>Whether or not $str starts with $substring.</p>
5226
   */
5227 View Code Duplication
  public static function str_starts_with_any(string $str, array $substrings): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5228
  {
5229
    if (!isset($str[0])) {
5230
      return false;
5231
    }
5232
5233
    if (empty($substrings)) {
5234
      return false;
5235
    }
5236
5237
    foreach ($substrings as $substring) {
5238
      if (self::str_starts_with($str, $substring)) {
5239
        return true;
5240
      }
5241
    }
5242
5243
    return false;
5244
  }
5245
5246
  /**
5247
   * Get a binary representation of a specific string.
5248
   *
5249
   * @param string $str <p>The input string.</p>
5250
   *
5251
   * @return string
5252
   */
5253 1
  public static function str_to_binary(string $str): string
5254
  {
5255 1
    $value = \unpack('H*', $str);
5256
5257 1
    return \base_convert($value[1], 16, 2);
5258
  }
5259
5260
  /**
5261
   * @param string   $str
5262
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
5263
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
5264
   *
5265
   * @return string[]
5266
   */
5267
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
5268
  {
5269 View Code Duplication
    if (!isset($str[0])) {
5270
      if ($removeEmptyValues === true) {
5271
        return [];
5272
      }
5273
5274
      return [''];
5275
    }
5276
5277
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
5278
5279
    if (
5280
        $removeShortValues === null
5281
        &&
5282
        $removeEmptyValues === false
5283
    ) {
5284
      return $return;
5285
    }
5286
5287
    $tmpReturn = self::reduce_string_array(
5288
        $return,
5289
        $removeEmptyValues,
5290
        $removeShortValues
5291
    );
5292
5293
    return $tmpReturn;
5294
  }
5295
5296
  /**
5297
   * Convert a string into an array of words.
5298
   *
5299
   * @param string   $str
5300
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
5301
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
5302
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
5303
   *
5304
   * @return string[]
5305
   */
5306 10
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
5307
  {
5308 10 View Code Duplication
    if (!isset($str[0])) {
5309 2
      if ($removeEmptyValues === true) {
5310
        return [];
5311
      }
5312
5313 2
      return [''];
5314
    }
5315
5316 10
    $charList = self::rxClass($charList, '\pL');
5317
5318 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
5319
5320
    if (
5321 10
        $removeShortValues === null
5322
        &&
5323 10
        $removeEmptyValues === false
5324
    ) {
5325 10
      return $return;
5326
    }
5327
5328 1
    $tmpReturn = self::reduce_string_array(
5329 1
        $return,
5330 1
        $removeEmptyValues,
5331 1
        $removeShortValues
5332
    );
5333
5334 1
    return $tmpReturn;
5335
  }
5336
5337
  /**
5338
   * alias for "UTF8::to_ascii()"
5339
   *
5340
   * @see UTF8::to_ascii()
5341
   *
5342
   * @param string $str
5343
   * @param string $unknown
5344
   * @param bool   $strict
5345
   *
5346
   * @return string
5347
   */
5348 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
5349
  {
5350 7
    return self::to_ascii($str, $unknown, $strict);
5351
  }
5352
5353
  /**
5354
   * Counts number of words in the UTF-8 string.
5355
   *
5356
   * @param string $str      <p>The input string.</p>
5357
   * @param int    $format   [optional] <p>
5358
   *                         <strong>0</strong> => return a number of words (default)<br>
5359
   *                         <strong>1</strong> => return an array of words<br>
5360
   *                         <strong>2</strong> => return an array of words with word-offset as key
5361
   *                         </p>
5362
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
5363
   *
5364
   * @return string[]|int <p>The number of words in the string</p>
5365
   */
5366 1
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
5367
  {
5368 1
    $strParts = self::str_to_words($str, $charlist);
5369
5370 1
    $len = \count($strParts);
5371
5372 1
    if ($format === 1) {
5373
5374 1
      $numberOfWords = [];
5375 1
      for ($i = 1; $i < $len; $i += 2) {
5376 1
        $numberOfWords[] = $strParts[$i];
5377
      }
5378
5379 1
    } elseif ($format === 2) {
5380
5381 1
      $numberOfWords = [];
5382 1
      $offset = self::strlen($strParts[0]);
5383 1
      for ($i = 1; $i < $len; $i += 2) {
5384 1
        $numberOfWords[$offset] = $strParts[$i];
5385 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
5386
      }
5387
5388
    } else {
5389
5390 1
      $numberOfWords = ($len - 1) / 2;
5391
5392
    }
5393
5394 1
    return $numberOfWords;
5395
  }
5396
5397
  /**
5398
   * Case-insensitive string comparison.
5399
   *
5400
   * INFO: Case-insensitive version of UTF8::strcmp()
5401
   *
5402
   * @param string $str1
5403
   * @param string $str2
5404
   *
5405
   * @return int <p>
5406
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5407
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5408
   *             <strong>0</strong> if they are equal.
5409
   *             </p>
5410
   */
5411 11
  public static function strcasecmp(string $str1, string $str2): int
5412
  {
5413 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5414
  }
5415
5416
  /**
5417
   * alias for "UTF8::strstr()"
5418
   *
5419
   * @see UTF8::strstr()
5420
   *
5421
   * @param string $haystack
5422
   * @param string $needle
5423
   * @param bool   $before_needle
5424
   * @param string $encoding
5425
   * @param bool   $cleanUtf8
5426
   *
5427
   * @return string|false
5428
   */
5429 1
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5430
  {
5431 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5432
  }
5433
5434
  /**
5435
   * Case-sensitive string comparison.
5436
   *
5437
   * @param string $str1
5438
   * @param string $str2
5439
   *
5440
   * @return int  <p>
5441
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
5442
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
5443
   *              <strong>0</strong> if they are equal.
5444
   *              </p>
5445
   */
5446 14
  public static function strcmp(string $str1, string $str2): int
5447
  {
5448
    /** @noinspection PhpUndefinedClassInspection */
5449 14
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
5450 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
5451 14
        \Normalizer::normalize($str2, \Normalizer::NFD)
5452
    );
5453
  }
5454
5455
  /**
5456
   * Find length of initial segment not matching mask.
5457
   *
5458
   * @param string $str
5459
   * @param string $charList
5460
   * @param int    $offset
5461
   * @param int    $length
5462
   *
5463
   * @return int|null
5464
   */
5465 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
5466
  {
5467 15
    if ('' === $charList .= '') {
5468 1
      return null;
5469
    }
5470
5471 14 View Code Duplication
    if ($offset || $length !== null) {
5472 2
      $strTmp = self::substr($str, $offset, $length);
5473 2
      if ($strTmp === false) {
5474
        return null;
5475
      }
5476 2
      $str = (string)$strTmp;
5477
    }
5478
5479 14
    if (!isset($str[0])) {
5480 1
      return null;
5481
    }
5482
5483 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5484 13
      return self::strlen($length[1]);
5485
    }
5486
5487 1
    return self::strlen($str);
5488
  }
5489
5490
  /**
5491
   * alias for "UTF8::stristr()"
5492
   *
5493
   * @see UTF8::stristr()
5494
   *
5495
   * @param string $haystack
5496
   * @param string $needle
5497
   * @param bool   $before_needle
5498
   * @param string $encoding
5499
   * @param bool   $cleanUtf8
5500
   *
5501
   * @return string|false
5502
   */
5503 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5504
  {
5505 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5506
  }
5507
5508
  /**
5509
   * Create a UTF-8 string from code points.
5510
   *
5511
   * INFO: opposite to UTF8::codepoints()
5512
   *
5513
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
5514
   *
5515
   * @return string <p>UTF-8 encoded string.</p>
5516
   */
5517 2
  public static function string(array $array): string
5518
  {
5519 2
    return \implode(
5520 2
        '',
5521 2
        \array_map(
5522
            [
5523 2
                self::class,
5524
                'chr',
5525
            ],
5526 2
            $array
5527
        )
5528
    );
5529
  }
5530
5531
  /**
5532
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5533
   *
5534
   * @param string $str <p>The input string.</p>
5535
   *
5536
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5537
   */
5538 3
  public static function string_has_bom(string $str): bool
5539
  {
5540 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
5541 3
      if (0 === \strpos($str, $bomString)) {
5542 3
        return true;
5543
      }
5544
    }
5545
5546 3
    return false;
5547
  }
5548
5549
  /**
5550
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5551
   *
5552
   * @link http://php.net/manual/en/function.strip-tags.php
5553
   *
5554
   * @param string $str             <p>
5555
   *                                The input string.
5556
   *                                </p>
5557
   * @param string $allowable_tags  [optional] <p>
5558
   *                                You can use the optional second parameter to specify tags which should
5559
   *                                not be stripped.
5560
   *                                </p>
5561
   *                                <p>
5562
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5563
   *                                can not be changed with allowable_tags.
5564
   *                                </p>
5565
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
5566
   *
5567
   * @return string <p>The stripped string.</p>
5568
   */
5569 2
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
5570
  {
5571 2
    if (!isset($str[0])) {
5572 1
      return '';
5573
    }
5574
5575 2
    if ($cleanUtf8 === true) {
5576 1
      $str = self::clean($str);
5577
    }
5578
5579 2
    return \strip_tags($str, $allowable_tags);
5580
  }
5581
5582
  /**
5583
   * Strip all whitespace characters. This includes tabs and newline
5584
   * characters, as well as multibyte whitespace such as the thin space
5585
   * and ideographic space.
5586
   *
5587
   * @param string $str
5588
   *
5589
   * @return string
5590
   */
5591 12
  public static function strip_whitespace(string $str): string
5592
  {
5593 12
    if (!isset($str[0])) {
5594 1
      return '';
5595
    }
5596
5597 11
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
5598
  }
5599
5600
  /**
5601
   * Finds position of first occurrence of a string within another, case insensitive.
5602
   *
5603
   * @link http://php.net/manual/en/function.mb-stripos.php
5604
   *
5605
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5606
   * @param string $needle    <p>The string to find in haystack.</p>
5607
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
5608
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5609
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5610
   *
5611
   * @return int|false <p>
5612
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
5613
   *                   or false if needle is not found.
5614
   *                   </p>
5615
   */
5616 10
  public static function stripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5617
  {
5618 10
    if (!isset($haystack[0], $needle[0])) {
5619 3
      return false;
5620
    }
5621
5622 9
    if ($cleanUtf8 === true) {
5623
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5624
      // if invalid characters are found in $haystack before $needle
5625 1
      $haystack = self::clean($haystack);
5626 1
      $needle = self::clean($needle);
5627
    }
5628
5629 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5630 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5631
    }
5632
5633 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5634
      self::checkForSupport();
5635
    }
5636
5637 View Code Duplication
    if (
5638 9
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5639
        &&
5640 9
        self::$SUPPORT['intl'] === true
5641
    ) {
5642 9
      return \grapheme_stripos($haystack, $needle, $offset);
5643
    }
5644
5645
    // fallback to "mb_"-function via polyfill
5646 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5647
  }
5648
5649
  /**
5650
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5651
   *
5652
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
5653
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
5654
   * @param bool   $before_needle  [optional] <p>
5655
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5656
   *                               haystack before the first occurrence of the needle (excluding the needle).
5657
   *                               </p>
5658
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
5659
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5660
   *
5661
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
5662
   */
5663 17
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5664
  {
5665 17
    if (!isset($haystack[0], $needle[0])) {
5666 6
      return false;
5667
    }
5668
5669 11
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5670 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5671
    }
5672
5673 11
    if ($cleanUtf8 === true) {
5674
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5675
      // if invalid characters are found in $haystack before $needle
5676 1
      $needle = self::clean($needle);
5677 1
      $haystack = self::clean($haystack);
5678
    }
5679
5680 11
    if (!$needle) {
5681
      return $haystack;
5682
    }
5683
5684 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5685
      self::checkForSupport();
5686
    }
5687
5688 View Code Duplication
    if (
5689 11
        $encoding !== 'UTF-8'
5690
        &&
5691 11
        self::$SUPPORT['mbstring'] === false
5692
    ) {
5693
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5694
    }
5695
5696 11
    if (self::$SUPPORT['mbstring'] === true) {
5697 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5698
    }
5699
5700 View Code Duplication
    if (
5701
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5702
        &&
5703
        self::$SUPPORT['intl'] === true
5704
    ) {
5705
      return \grapheme_stristr($haystack, $needle, $before_needle);
5706
    }
5707
5708
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
5709
      return \stristr($haystack, $needle, $before_needle);
5710
    }
5711
5712
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
5713
5714
    if (!isset($match[1])) {
5715
      return false;
5716
    }
5717
5718
    if ($before_needle) {
5719
      return $match[1];
5720
    }
5721
5722
    return self::substr($haystack, self::strlen($match[1]));
5723
  }
5724
5725
  /**
5726
   * Get the string length, not the byte-length!
5727
   *
5728
   * @link     http://php.net/manual/en/function.mb-strlen.php
5729
   *
5730
   * @param string $str       <p>The string being checked for length.</p>
5731
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5732
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5733
   *
5734
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5735
   *             character counted as +1)</p>
5736
   */
5737 88
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
5738
  {
5739 88
    if (!isset($str[0])) {
5740 6
      return 0;
5741
    }
5742
5743 87
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5744 5
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5745
    }
5746
5747 87
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5748
      self::checkForSupport();
5749
    }
5750
5751
    switch ($encoding) {
5752 87
      case 'ASCII':
5753 87
      case 'CP850':
5754
        if (
5755 6
            $encoding === 'CP850'
5756
            &&
5757 6
            self::$SUPPORT['mbstring_func_overload'] === false
5758
        ) {
5759 6
          return \strlen($str);
5760
        }
5761
5762
        return \mb_strlen($str, 'CP850'); // 8-BIT
5763
    }
5764
5765 82
    if ($cleanUtf8 === true) {
5766
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5767
      // if invalid characters are found in $str
5768 2
      $str = self::clean($str);
5769
    }
5770
5771 View Code Duplication
    if (
5772 82
        $encoding !== 'UTF-8'
5773
        &&
5774 82
        self::$SUPPORT['mbstring'] === false
5775
        &&
5776 82
        self::$SUPPORT['iconv'] === false
5777
    ) {
5778
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5779
    }
5780
5781
    if (
5782 82
        $encoding !== 'UTF-8'
5783
        &&
5784 82
        self::$SUPPORT['iconv'] === true
5785
        &&
5786 82
        self::$SUPPORT['mbstring'] === false
5787
    ) {
5788
      $returnTmp = \iconv_strlen($str, $encoding);
5789
      if ($returnTmp !== false) {
5790
        return $returnTmp;
5791
      }
5792
    }
5793
5794 82 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
5795 81
      $returnTmp = \mb_strlen($str, $encoding);
5796 81
      if ($returnTmp !== false) {
5797 81
        return $returnTmp;
5798
      }
5799
    }
5800
5801 2 View Code Duplication
    if (self::$SUPPORT['iconv'] === true) {
5802
      $returnTmp = \iconv_strlen($str, $encoding);
5803
      if ($returnTmp !== false) {
5804
        return $returnTmp;
5805
      }
5806
    }
5807
5808
    if (
5809 2
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5810
        &&
5811 2
        self::$SUPPORT['intl'] === true
5812
    ) {
5813
      return \grapheme_strlen($str);
5814
    }
5815
5816 2
    if (self::is_ascii($str)) {
5817 1
      return \strlen($str);
5818
    }
5819
5820
    // fallback via vanilla php
5821 2
    \preg_match_all('/./us', $str, $parts);
5822 2
    $returnTmp = \count($parts[0]);
5823 2
    if ($returnTmp !== 0) {
5824 2
      return $returnTmp;
5825
    }
5826
5827
    // fallback to "mb_"-function via polyfill
5828
    return \mb_strlen($str, $encoding);
5829
  }
5830
5831
  /**
5832
   * Get string length in byte.
5833
   *
5834
   * @param string $str
5835
   *
5836
   * @return int
5837
   */
5838 72
  public static function strlen_in_byte(string $str): int
5839
  {
5840 72
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
5841
      $len = \mb_strlen($str, 'CP850'); // 8-BIT
5842
    } else {
5843 72
      $len = \strlen($str);
5844
    }
5845
5846 72
    return $len;
5847
  }
5848
5849
  /**
5850
   * Case insensitive string comparisons using a "natural order" algorithm.
5851
   *
5852
   * INFO: natural order version of UTF8::strcasecmp()
5853
   *
5854
   * @param string $str1 <p>The first string.</p>
5855
   * @param string $str2 <p>The second string.</p>
5856
   *
5857
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
5858
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
5859
   *             <strong>0</strong> if they are equal
5860
   */
5861 1
  public static function strnatcasecmp(string $str1, string $str2): int
5862
  {
5863 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5864
  }
5865
5866
  /**
5867
   * String comparisons using a "natural order" algorithm
5868
   *
5869
   * INFO: natural order version of UTF8::strcmp()
5870
   *
5871
   * @link  http://php.net/manual/en/function.strnatcmp.php
5872
   *
5873
   * @param string $str1 <p>The first string.</p>
5874
   * @param string $str2 <p>The second string.</p>
5875
   *
5876
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
5877
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
5878
   *             <strong>0</strong> if they are equal
5879
   */
5880 2
  public static function strnatcmp(string $str1, string $str2): int
5881
  {
5882 2
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5883
  }
5884
5885
  /**
5886
   * Case-insensitive string comparison of the first n characters.
5887
   *
5888
   * @link  http://php.net/manual/en/function.strncasecmp.php
5889
   *
5890
   * @param string $str1 <p>The first string.</p>
5891
   * @param string $str2 <p>The second string.</p>
5892
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5893
   *
5894
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5895
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5896
   *             <strong>0</strong> if they are equal
5897
   */
5898 1
  public static function strncasecmp(string $str1, string $str2, int $len): int
5899
  {
5900 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5901
  }
5902
5903
  /**
5904
   * String comparison of the first n characters.
5905
   *
5906
   * @link  http://php.net/manual/en/function.strncmp.php
5907
   *
5908
   * @param string $str1 <p>The first string.</p>
5909
   * @param string $str2 <p>The second string.</p>
5910
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5911
   *
5912
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5913
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5914
   *             <strong>0</strong> if they are equal
5915
   */
5916 2
  public static function strncmp(string $str1, string $str2, int $len): int
5917
  {
5918 2
    $str1 = (string)self::substr($str1, 0, $len);
5919 2
    $str2 = (string)self::substr($str2, 0, $len);
5920
5921 2
    return self::strcmp($str1, $str2);
5922
  }
5923
5924
  /**
5925
   * Search a string for any of a set of characters.
5926
   *
5927
   * @link  http://php.net/manual/en/function.strpbrk.php
5928
   *
5929
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5930
   * @param string $char_list <p>This parameter is case sensitive.</p>
5931
   *
5932
   * @return string|false <p>String starting from the character found, or false if it is not found.</p>
5933
   */
5934 1
  public static function strpbrk(string $haystack, string $char_list)
5935
  {
5936 1
    if (!isset($haystack[0], $char_list[0])) {
5937 1
      return false;
5938
    }
5939
5940 1
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5941 1
      return \substr($haystack, \strpos($haystack, $m[0]));
5942
    }
5943
5944 1
    return false;
5945
  }
5946
5947
  /**
5948
   * Find position of first occurrence of string in a string.
5949
   *
5950
   * @link http://php.net/manual/en/function.mb-strpos.php
5951
   *
5952
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5953
   * @param string $needle    <p>The string to find in haystack.</p>
5954
   * @param int    $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5955
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5956
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5957
   *
5958
   * @return int|false <p>
5959
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
5960
   *                   If needle is not found it returns false.
5961
   *                   </p>
5962
   */
5963 59
  public static function strpos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
5964
  {
5965 59
    if (!isset($haystack[0], $needle[0])) {
5966 3
      return false;
5967
    }
5968
5969
    // iconv and mbstring do not support integer $needle
5970 58 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5971
      $needle = (string)self::chr((int)$needle);
5972
    }
5973
5974 58
    if ($cleanUtf8 === true) {
5975
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5976
      // if invalid characters are found in $haystack before $needle
5977 2
      $needle = self::clean($needle);
5978 2
      $haystack = self::clean($haystack);
5979
    }
5980
5981 58
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5982 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5983
    }
5984
5985 58
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5986
      self::checkForSupport();
5987
    }
5988
5989
    if (
5990 58
        $encoding === 'CP850'
5991
        &&
5992 58
        self::$SUPPORT['mbstring_func_overload'] === false
5993
    ) {
5994 44
      return \strpos($haystack, $needle, $offset);
5995
    }
5996
5997 View Code Duplication
    if (
5998 15
        $encoding !== 'UTF-8'
5999
        &&
6000 15
        self::$SUPPORT['iconv'] === false
6001
        &&
6002 15
        self::$SUPPORT['mbstring'] === false
6003
    ) {
6004
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6005
    }
6006
6007
    if (
6008 15
        $offset >= 0 // iconv_strpos() can't handle negative offset
6009
        &&
6010 15
        $encoding !== 'UTF-8'
6011
        &&
6012 15
        self::$SUPPORT['mbstring'] === false
6013
        &&
6014 15
        self::$SUPPORT['iconv'] === true
6015
    ) {
6016
      // ignore invalid negative offset to keep compatibility
6017
      // with php < 5.5.35, < 5.6.21, < 7.0.6
6018
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
6019
      if ($returnTmp !== false) {
6020
        return $returnTmp;
6021
      }
6022
    }
6023
6024 15 View Code Duplication
    if (self::$SUPPORT['mbstring'] === true) {
6025 15
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
6026 15
      if ($returnTmp !== false) {
6027 13
        return $returnTmp;
6028
      }
6029
    }
6030
6031 View Code Duplication
    if (
6032 10
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6033
        &&
6034 10
        self::$SUPPORT['intl'] === true
6035
    ) {
6036 9
      return \grapheme_strpos($haystack, $needle, $offset);
6037
    }
6038
6039
    if (
6040 2
        $offset >= 0 // iconv_strpos() can't handle negative offset
6041
        &&
6042 2
        self::$SUPPORT['iconv'] === true
6043
    ) {
6044
      // ignore invalid negative offset to keep compatibility
6045
      // with php < 5.5.35, < 5.6.21, < 7.0.6
6046 1
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
6047 1
      if ($returnTmp !== false) {
6048
        return $returnTmp;
6049
      }
6050
    }
6051
6052 2
    $haystackIsAscii = self::is_ascii($haystack);
6053 2
    if ($haystackIsAscii && self::is_ascii($needle)) {
6054 1
      return \strpos($haystack, $needle, $offset);
6055
    }
6056
6057
    // fallback via vanilla php
6058
6059 2
    if ($haystackIsAscii) {
6060
      $haystackTmp = \substr($haystack, $offset);
6061
    } else {
6062 2
      $haystackTmp = self::substr($haystack, $offset);
6063
    }
6064 2
    if ($haystackTmp === false) {
6065
      $haystackTmp = '';
6066
    }
6067 2
    $haystack = (string)$haystackTmp;
6068
6069 2
    if ($offset < 0) {
6070
      $offset = 0;
6071
    }
6072
6073 2
    $pos = \strpos($haystack, $needle);
6074 2
    if ($pos === false) {
6075
      return false;
6076
    }
6077
6078 2
    $returnTmp = $offset + self::strlen(\substr($haystack, 0, $pos));
6079 2
    if ($returnTmp !== false) {
6080 2
      return $returnTmp;
6081
    }
6082
6083
    // fallback to "mb_"-function via polyfill
6084
    return \mb_strpos($haystack, $needle, $offset, $encoding);
6085
  }
6086
6087
  /**
6088
   * Finds the last occurrence of a character in a string within another.
6089
   *
6090
   * @link http://php.net/manual/en/function.mb-strrchr.php
6091
   *
6092
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
6093
   * @param string $needle        <p>The string to find in haystack</p>
6094
   * @param bool   $before_needle [optional] <p>
6095
   *                              Determines which portion of haystack
6096
   *                              this function returns.
6097
   *                              If set to true, it returns all of haystack
6098
   *                              from the beginning to the last occurrence of needle.
6099
   *                              If set to false, it returns all of haystack
6100
   *                              from the last occurrence of needle to the end,
6101
   *                              </p>
6102
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
6103
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
6104
   *
6105
   * @return string|false The portion of haystack or false if needle is not found.
6106
   */
6107 1 View Code Duplication
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6108
  {
6109 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6110 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6111
    }
6112
6113 1
    if ($cleanUtf8 === true) {
6114
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6115
      // if invalid characters are found in $haystack before $needle
6116 1
      $needle = self::clean($needle);
6117 1
      $haystack = self::clean($haystack);
6118
    }
6119
6120
    // fallback to "mb_"-function via polyfill
6121 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
6122
  }
6123
6124
  /**
6125
   * Reverses characters order in the string.
6126
   *
6127
   * @param string $str <p>The input string.</p>
6128
   *
6129
   * @return string <p>The string with characters in the reverse sequence.</p>
6130
   */
6131 4
  public static function strrev(string $str): string
6132
  {
6133 4
    if (!isset($str[0])) {
6134 2
      return '';
6135
    }
6136
6137 3
    $reversed = '';
6138 3
    $i = self::strlen($str);
6139 3
    while ($i--) {
6140 3
      $reversed .= self::substr($str, $i, 1);
6141
    }
6142
6143 3
    return $reversed;
6144
  }
6145
6146
  /**
6147
   * Finds the last occurrence of a character in a string within another, case insensitive.
6148
   *
6149
   * @link http://php.net/manual/en/function.mb-strrichr.php
6150
   *
6151
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
6152
   * @param string $needle         <p>The string to find in haystack.</p>
6153
   * @param bool   $before_needle  [optional] <p>
6154
   *                               Determines which portion of haystack
6155
   *                               this function returns.
6156
   *                               If set to true, it returns all of haystack
6157
   *                               from the beginning to the last occurrence of needle.
6158
   *                               If set to false, it returns all of haystack
6159
   *                               from the last occurrence of needle to the end,
6160
   *                               </p>
6161
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
6162
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
6163
   *
6164
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
6165
   */
6166 1 View Code Duplication
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6167
  {
6168 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6169 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6170
    }
6171
6172 1
    if ($cleanUtf8 === true) {
6173
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6174
      // if invalid characters are found in $haystack before $needle
6175 1
      $needle = self::clean($needle);
6176 1
      $haystack = self::clean($haystack);
6177
    }
6178
6179 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
6180
  }
6181
6182
  /**
6183
   * Find position of last occurrence of a case-insensitive string.
6184
   *
6185
   * @param string $haystack  <p>The string to look in.</p>
6186
   * @param string $needle    <p>The string to look for.</p>
6187
   * @param int    $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
6188
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6189
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6190
   *
6191
   * @return int|false <p>
6192
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
6193
   *                   not found, it returns false.
6194
   *                   </p>
6195
   */
6196 1
  public static function strripos(string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6197
  {
6198 1 View Code Duplication
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
6199
      $needle = (string)self::chr((int)$needle);
6200
    }
6201
6202 1
    if (!isset($haystack[0], $needle[0])) {
6203
      return false;
6204
    }
6205
6206 1
    if ($cleanUtf8 === true) {
6207
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
6208 1
      $needle = self::clean($needle);
6209 1
      $haystack = self::clean($haystack);
6210
    }
6211
6212 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6213 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6214
    }
6215
6216 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6217
      self::checkForSupport();
6218
    }
6219
6220 View Code Duplication
    if (
6221 1
        $encoding !== 'UTF-8'
6222
        &&
6223 1
        self::$SUPPORT['mbstring'] === false
6224
    ) {
6225
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6226
    }
6227
6228 1
    if (self::$SUPPORT['mbstring'] === true) {
6229 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
6230
    }
6231
6232 View Code Duplication
    if (
6233
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6234
        &&
6235
        self::$SUPPORT['intl'] === true
6236
    ) {
6237
      return \grapheme_strripos($haystack, $needle, $offset);
6238
    }
6239
6240
    // fallback via vanilla php
6241
6242
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
6243
  }
6244
6245
  /**
6246
   * Find position of last occurrence of a string in a string.
6247
   *
6248
   * @link http://php.net/manual/en/function.mb-strrpos.php
6249
   *
6250
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
6251
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
6252
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
6253
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
6254
   *                              the end of the string.
6255
   *                              </p>
6256
   * @param string     $encoding  [optional] <p>Set the charset.</p>
6257
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6258
   *
6259
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
6260
   *                   is not found, it returns false.</p>
6261
   */
6262 10
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6263
  {
6264 10
    if ((int)$needle === $needle && $needle >= 0) {
6265 2
      $needle = (string)self::chr($needle);
6266
    }
6267 10
    $needle = (string)$needle;
6268
6269 10
    if (!isset($haystack[0], $needle[0])) {
6270 2
      return false;
6271
    }
6272
6273 9
    if ($cleanUtf8 === true) {
6274
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
6275 3
      $needle = self::clean($needle);
6276 3
      $haystack = self::clean($haystack);
6277
    }
6278
6279 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6280 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6281
    }
6282
6283 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6284
      self::checkForSupport();
6285
    }
6286
6287 View Code Duplication
    if (
6288 9
        $encoding !== 'UTF-8'
6289
        &&
6290 9
        self::$SUPPORT['mbstring'] === false
6291
    ) {
6292
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6293
    }
6294
6295 9
    if (self::$SUPPORT['mbstring'] === true) {
6296 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
6297
    }
6298
6299 View Code Duplication
    if (
6300
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6301
        &&
6302
        self::$SUPPORT['intl'] === true
6303
    ) {
6304
      return \grapheme_strrpos($haystack, $needle, $offset);
6305
    }
6306
6307
    // fallback via vanilla php
6308
6309
    $haystackTmp = null;
6310
    if ($offset > 0) {
6311
      $haystackTmp = self::substr($haystack, $offset);
6312
    } elseif ($offset < 0) {
6313
      $haystackTmp = self::substr($haystack, 0, $offset);
6314
      $offset = 0;
6315
    }
6316
6317
    if ($haystackTmp !== null) {
6318
      if ($haystackTmp === false) {
6319
        $haystackTmp = '';
6320
      }
6321
      $haystack = (string)$haystackTmp;
6322
    }
6323
6324
    $pos = \strrpos($haystack, $needle);
6325
    if ($pos === false) {
6326
      return false;
6327
    }
6328
6329
    return $offset + self::strlen(\substr($haystack, 0, $pos));
6330
  }
6331
6332
  /**
6333
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
6334
   * mask.
6335
   *
6336
   * @param string $str    <p>The input string.</p>
6337
   * @param string $mask   <p>The mask of chars</p>
6338
   * @param int    $offset [optional]
6339
   * @param int    $length [optional]
6340
   *
6341
   * @return int
6342
   */
6343 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
6344
  {
6345 10 View Code Duplication
    if ($offset || $length !== null) {
6346 2
      $strTmp = self::substr($str, $offset, $length);
6347 2
      if ($strTmp === false) {
6348
        $strTmp = '';
6349
      }
6350 2
      $str = (string)$strTmp;
6351
    }
6352
6353 10
    if (!isset($str[0], $mask[0])) {
6354 2
      return 0;
6355
    }
6356
6357 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
6358
  }
6359
6360
  /**
6361
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
6362
   *
6363
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
6364
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
6365
   * @param bool   $before_needle  [optional] <p>
6366
   *                               If <b>TRUE</b>, strstr() returns the part of the
6367
   *                               haystack before the first occurrence of the needle (excluding the needle).
6368
   *                               </p>
6369
   * @param string $encoding       [optional] <p>Set the charset for e.g. "\mb_" function</p>
6370
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
6371
   *
6372
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
6373
   */
6374 2
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
6375
  {
6376 2
    if (!isset($haystack[0], $needle[0])) {
6377 1
      return false;
6378
    }
6379
6380 2
    if ($cleanUtf8 === true) {
6381
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6382
      // if invalid characters are found in $haystack before $needle
6383
      $needle = self::clean($needle);
6384
      $haystack = self::clean($haystack);
6385
    }
6386
6387 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6388 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6389
    }
6390
6391 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6392
      self::checkForSupport();
6393
    }
6394
6395 View Code Duplication
    if (
6396 2
        $encoding !== 'UTF-8'
6397
        &&
6398 2
        self::$SUPPORT['mbstring'] === false
6399
    ) {
6400
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6401
    }
6402
6403 2
    if (self::$SUPPORT['mbstring'] === true) {
6404 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
6405
    }
6406
6407 View Code Duplication
    if (
6408
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6409
        &&
6410
        self::$SUPPORT['intl'] === true
6411
    ) {
6412
      return \grapheme_strstr($haystack, $needle, $before_needle);
6413
    }
6414
6415
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
6416
6417
    if (!isset($match[1])) {
6418
      return false;
6419
    }
6420
6421
    if ($before_needle) {
6422
      return $match[1];
6423
    }
6424
6425
    return self::substr($haystack, self::strlen($match[1]));
6426
  }
6427
6428
  /**
6429
   * Unicode transformation for case-less matching.
6430
   *
6431
   * @link http://unicode.org/reports/tr21/tr21-5.html
6432
   *
6433
   * @param string $str        <p>The input string.</p>
6434
   * @param bool   $full       [optional] <p>
6435
   *                           <b>true</b>, replace full case folding chars (default)<br>
6436
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
6437
   *                           </p>
6438
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6439
   *
6440
   * @return string
6441
   */
6442 13
  public static function strtocasefold(string $str, bool $full = true, bool $cleanUtf8 = false): string
6443
  {
6444 13
    if (!isset($str[0])) {
6445 4
      return '';
6446
    }
6447
6448 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
6449 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
6450
6451 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
6452 1
      $COMMON_CASE_FOLD_KEYS_CACHE = \array_keys(self::$COMMON_CASE_FOLD);
6453 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = \array_values(self::$COMMON_CASE_FOLD);
6454
    }
6455
6456 12
    $str = (string)\str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
6457
6458 12
    if ($full) {
6459
6460 12
      static $FULL_CASE_FOLD = null;
6461 12
      if ($FULL_CASE_FOLD === null) {
6462 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
6463
      }
6464
6465 12
      $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
6466
    }
6467
6468 12
    if ($cleanUtf8 === true) {
6469 1
      $str = self::clean($str);
6470
    }
6471
6472 12
    return self::strtolower($str);
6473
  }
6474
6475
  /**
6476
   * Make a string lowercase.
6477
   *
6478
   * @link http://php.net/manual/en/function.mb-strtolower.php
6479
   *
6480
   * @param string      $str       <p>The string being lowercased.</p>
6481
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6482
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6483
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6484
   *
6485
   * @return string str with all alphabetic characters converted to lowercase.
6486
   */
6487 25 View Code Duplication
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6488
  {
6489
    // init
6490 25
    $str = (string)$str;
6491 25
    if (!isset($str[0])) {
6492 3
      return '';
6493
    }
6494
6495 23
    if ($cleanUtf8 === true) {
6496
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6497
      // if invalid characters are found in $haystack before $needle
6498 1
      $str = self::clean($str);
6499
    }
6500
6501 23
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6502 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6503
    }
6504
6505 23
    if ($lang !== null) {
6506
6507 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6508
        self::checkForSupport();
6509
      }
6510
6511 1
      if (self::$SUPPORT['intl'] === true) {
6512
6513 1
        $langCode = $lang . '-Lower';
6514 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6515
          \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, E_USER_WARNING);
6516
6517
          $langCode = 'Any-Lower';
6518
        }
6519
6520
        /** @noinspection PhpComposerExtensionStubsInspection */
6521 1
        return transliterator_transliterate($langCode, $str);
6522
      }
6523
6524
      \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, E_USER_WARNING);
6525
    }
6526
6527 23
    return \mb_strtolower($str, $encoding);
6528
  }
6529
6530
  /**
6531
   * Generic case sensitive transformation for collation matching.
6532
   *
6533
   * @param string $str <p>The input string</p>
6534
   *
6535
   * @return string
6536
   */
6537 3
  private static function strtonatfold(string $str): string
6538
  {
6539
    /** @noinspection PhpUndefinedClassInspection */
6540 3
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6541
  }
6542
6543
  /**
6544
   * Make a string uppercase.
6545
   *
6546
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6547
   *
6548
   * @param string      $str       <p>The string being uppercased.</p>
6549
   * @param string      $encoding  [optional] <p>Set the charset.</p>
6550
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6551
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6552
   *
6553
   * @return string <p>$str with all alphabetic characters converted to uppercase.</p>
6554
   */
6555 19 View Code Duplication
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6556
  {
6557 19
    $str = (string)$str;
6558 19
    if (!isset($str[0])) {
6559 3
      return '';
6560
    }
6561
6562 17
    if ($cleanUtf8 === true) {
6563
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6564
      // if invalid characters are found in $haystack before $needle
6565 2
      $str = self::clean($str);
6566
    }
6567
6568 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6569 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6570
    }
6571
6572 17
    if ($lang !== null) {
6573
6574 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6575
        self::checkForSupport();
6576
      }
6577
6578 1
      if (self::$SUPPORT['intl'] === true) {
6579
6580 1
        $langCode = $lang . '-Upper';
6581 1
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6582
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6583
6584
          $langCode = 'Any-Upper';
6585
        }
6586
6587
        /** @noinspection PhpComposerExtensionStubsInspection */
6588 1
        return transliterator_transliterate($langCode, $str);
6589
      }
6590
6591
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6592
    }
6593
6594 17
    return \mb_strtoupper($str, $encoding);
6595
  }
6596
6597
  /**
6598
   * Translate characters or replace sub-strings.
6599
   *
6600
   * @link  http://php.net/manual/en/function.strtr.php
6601
   *
6602
   * @param string          $str  <p>The string being translated.</p>
6603
   * @param string|string[] $from <p>The string replacing from.</p>
6604
   * @param string|string[] $to   <p>The string being translated to to.</p>
6605
   *
6606
   * @return string <p>
6607
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6608
   *                corresponding character in to.
6609
   *                </p>
6610
   */
6611 1
  public static function strtr(string $str, $from, $to = INF): string
6612
  {
6613 1
    if (!isset($str[0])) {
6614
      return '';
6615
    }
6616
6617 1
    if ($from === $to) {
6618
      return $str;
6619
    }
6620
6621 1
    if (INF !== $to) {
6622 1
      $from = self::str_split($from);
6623 1
      $to = self::str_split($to);
6624 1
      $countFrom = \count($from);
6625 1
      $countTo = \count($to);
6626
6627 1
      if ($countFrom > $countTo) {
6628 1
        $from = \array_slice($from, 0, $countTo);
6629 1
      } elseif ($countFrom < $countTo) {
6630 1
        $to = \array_slice($to, 0, $countFrom);
6631
      }
6632
6633 1
      $from = \array_combine($from, $to);
6634
    }
6635
6636 1
    if (\is_string($from)) {
6637 1
      return \str_replace($from, '', $str);
6638
    }
6639
6640 1
    return \strtr($str, $from);
6641
  }
6642
6643
  /**
6644
   * Return the width of a string.
6645
   *
6646
   * @param string $str       <p>The input string.</p>
6647
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6648
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6649
   *
6650
   * @return int
6651
   */
6652 1 View Code Duplication
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6653
  {
6654 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6655 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6656
    }
6657
6658 1
    if ($cleanUtf8 === true) {
6659
      // iconv and mbstring are not tolerant to invalid encoding
6660
      // further, their behaviour is inconsistent with that of PHP's substr
6661 1
      $str = self::clean($str);
6662
    }
6663
6664
    // fallback to "mb_"-function via polyfill
6665 1
    return \mb_strwidth($str, $encoding);
6666
  }
6667
6668
  /**
6669
   * Get part of a string.
6670
   *
6671
   * @link http://php.net/manual/en/function.mb-substr.php
6672
   *
6673
   * @param string $str       <p>The string being checked.</p>
6674
   * @param int    $offset    <p>The first position used in str.</p>
6675
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
6676
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6677
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6678
   *
6679
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
6680
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
6681
   *                      characters long, <b>FALSE</b> will be returned.</p>
6682
   */
6683 75
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6684
  {
6685 75
    if (!isset($str[0])) {
6686 10
      return '';
6687
    }
6688
6689
    // Empty string
6690 72
    if ($length === 0) {
6691 3
      return '';
6692
    }
6693
6694 71
    if ($cleanUtf8 === true) {
6695
      // iconv and mbstring are not tolerant to invalid encoding
6696
      // further, their behaviour is inconsistent with that of PHP's substr
6697 1
      $str = self::clean($str);
6698
    }
6699
6700
    // Whole string
6701 71
    if (!$offset && $length === null) {
6702 3
      return $str;
6703
    }
6704
6705 68
    $str_length = 0;
6706 68
    if ($offset || $length === null) {
6707 48
      $str_length = self::strlen($str, $encoding);
6708
    }
6709
6710
    // Empty string
6711 68
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be zero. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
6712 10
      return '';
6713
    }
6714
6715
    // Impossible
6716 68
    if ($offset && $offset > $str_length) {
6717 2
      return false;
6718
    }
6719
6720 66
    if ($length === null) {
6721 27
      $length = $str_length;
6722
    } else {
6723 57
      $length = (int)$length;
6724
    }
6725
6726 66
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6727 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6728
    }
6729
6730 66
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6731
      self::checkForSupport();
6732
    }
6733
6734
    if (
6735 66
        $encoding === 'CP850'
6736
        &&
6737 66
        self::$SUPPORT['mbstring_func_overload'] === false
6738
    ) {
6739 16
      return \substr($str, $offset, $length ?? $str_length);
6740
    }
6741
6742 View Code Duplication
    if (
6743 50
        $encoding !== 'UTF-8'
6744
        &&
6745 50
        self::$SUPPORT['mbstring'] === false
6746
    ) {
6747
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6748
    }
6749
6750 50
    if (self::$SUPPORT['mbstring'] === true) {
6751 50
      return \mb_substr($str, $offset, $length, $encoding);
6752
    }
6753
6754
    if (
6755
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6756
        &&
6757
        self::$SUPPORT['intl'] === true
6758
    ) {
6759
      return \grapheme_substr($str, $offset, $length);
6760
    }
6761
6762
    if (
6763
        $length >= 0 // "iconv_substr()" can't handle negative length
6764
        &&
6765
        self::$SUPPORT['iconv'] === true
6766
    ) {
6767
      $returnTmp = \iconv_substr($str, $offset, $length);
6768
      if ($returnTmp !== false) {
6769
        return $returnTmp;
6770
      }
6771
    }
6772
6773
    if (self::is_ascii($str)) {
6774
      return ($length === null) ?
6775
          \substr($str, $offset) :
6776
          \substr($str, $offset, $length);
6777
    }
6778
6779
    // fallback via vanilla php
6780
6781
    // split to array, and remove invalid characters
6782
    $array = self::split($str);
6783
6784
    // extract relevant part, and join to make sting again
6785
    return \implode('', \array_slice($array, $offset, $length));
6786
  }
6787
6788
  /**
6789
   * Binary safe comparison of two strings from an offset, up to length characters.
6790
   *
6791
   * @param string   $str1               <p>The main string being compared.</p>
6792
   * @param string   $str2               <p>The secondary string being compared.</p>
6793
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
6794
   *                                     counting from the end of the string.</p>
6795
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
6796
   *                                     the length of the str compared to the length of main_str less the offset.</p>
6797
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6798
   *                                     insensitive.</p>
6799
   *
6800
   * @return int <p>
6801
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
6802
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
6803
   *             <strong>0</strong> if they are equal.
6804
   *             </p>
6805
   */
6806 1
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
6807
  {
6808
    if (
6809 1
        $offset !== 0
6810
        ||
6811 1
        $length !== null
6812
    ) {
6813 1
      $str1Tmp = self::substr($str1, $offset, $length);
6814 1
      if ($str1Tmp === false) {
6815
        $str1Tmp = '';
6816
      }
6817 1
      $str1 = (string)$str1Tmp;
6818
6819 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
6820 1
      if ($str2Tmp === false) {
6821
        $str2Tmp = '';
6822
      }
6823 1
      $str2 = (string)$str2Tmp;
6824
    }
6825
6826 1
    if ($case_insensitivity === true) {
6827 1
      return self::strcasecmp($str1, $str2);
6828
    }
6829
6830 1
    return self::strcmp($str1, $str2);
6831
  }
6832
6833
  /**
6834
   * Count the number of substring occurrences.
6835
   *
6836
   * @link  http://php.net/manual/en/function.substr-count.php
6837
   *
6838
   * @param string $haystack   <p>The string to search in.</p>
6839
   * @param string $needle     <p>The substring to search for.</p>
6840
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
6841
   * @param int    $length     [optional] <p>
6842
   *                           The maximum length after the specified offset to search for the
6843
   *                           substring. It outputs a warning if the offset plus the length is
6844
   *                           greater than the haystack length.
6845
   *                           </p>
6846
   * @param string $encoding   [optional] <p>Set the charset for e.g. "\mb_" function</p>
6847
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6848
   *
6849
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6850
   */
6851 1
  public static function substr_count(string $haystack, string $needle, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
6852
  {
6853 1
    if (!isset($haystack[0], $needle[0])) {
6854 1
      return false;
6855
    }
6856
6857 1
    if ($offset || $length !== null) {
6858
6859 1
      if ($length === null) {
6860 1
        $length = self::strlen($haystack);
6861
      }
6862
6863
      if (
6864
          (
6865 1
              $length !== 0
6866
              &&
6867 1
              $offset !== 0
6868
          )
6869
          &&
6870 1
          ($length + $offset) <= 0
6871
          &&
6872 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6873
      ) {
6874 1
        return false;
6875
      }
6876
6877 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6878 1
      if ($haystackTmp === false) {
6879
        $haystackTmp = '';
6880
      }
6881 1
      $haystack = (string)$haystackTmp;
6882
    }
6883
6884 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6885 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6886
    }
6887
6888 1
    if ($cleanUtf8 === true) {
6889
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6890
      // if invalid characters are found in $haystack before $needle
6891
      $needle = self::clean($needle);
6892
      $haystack = self::clean($haystack);
6893
    }
6894
6895 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6896
      self::checkForSupport();
6897
    }
6898
6899 View Code Duplication
    if (
6900 1
        $encoding !== 'UTF-8'
6901
        &&
6902 1
        self::$SUPPORT['mbstring'] === false
6903
    ) {
6904
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6905
    }
6906
6907 1
    if (self::$SUPPORT['mbstring'] === true) {
6908 1
      return \mb_substr_count($haystack, $needle, $encoding);
6909
    }
6910
6911
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6912
6913
    return \count($matches);
6914
  }
6915
6916
  /**
6917
   * Returns the number of occurrences of $substring in the given string.
6918
   * By default, the comparison is case-sensitive, but can be made insensitive
6919
   * by setting $caseSensitive to false.
6920
   *
6921
   * @param string $str           <p>The input string.</p>
6922
   * @param string $substring     <p>The substring to search for.</p>
6923
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6924
   * @param string $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
6925
   *
6926
   * @return int
6927
   */
6928
  public static function substr_count_simple(string $str, string $substring, bool $caseSensitive = true, string $encoding = 'UTF-8'): int
6929
  {
6930
    if (!$caseSensitive) {
6931
      $str = self::strtoupper($str, $encoding);
6932
      $substring = self::strtoupper($substring, $encoding);
6933
    }
6934
6935
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
6936
  }
6937
6938
  /**
6939
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6940
   *
6941
   * @param string $haystack <p>The string to search in.</p>
6942
   * @param string $needle   <p>The substring to search for.</p>
6943
   *
6944
   * @return string <p>Return the sub-string.</p>
6945
   */
6946 1 View Code Duplication
  public static function substr_ileft(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6947
  {
6948 1
    if (!isset($haystack[0])) {
6949 1
      return '';
6950
    }
6951
6952 1
    if (!isset($needle[0])) {
6953 1
      return $haystack;
6954
    }
6955
6956 1
    if (self::str_istarts_with($haystack, $needle) === true) {
6957 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6958 1
      if ($haystackTmp === false) {
6959
        $haystackTmp = '';
6960
      }
6961 1
      $haystack = (string)$haystackTmp;
6962
    }
6963
6964 1
    return $haystack;
6965
  }
6966
6967
  /**
6968
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6969
   *
6970
   * @param string $haystack <p>The string to search in.</p>
6971
   * @param string $needle   <p>The substring to search for.</p>
6972
   *
6973
   * @return string <p>Return the sub-string.</p>
6974
   */
6975 1 View Code Duplication
  public static function substr_iright(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6976
  {
6977 1
    if (!isset($haystack[0])) {
6978 1
      return '';
6979
    }
6980
6981 1
    if (!isset($needle[0])) {
6982 1
      return $haystack;
6983
    }
6984
6985 1
    if (self::str_iends_with($haystack, $needle) === true) {
6986 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6987 1
      if ($haystackTmp === false) {
6988
        $haystackTmp = '';
6989
      }
6990 1
      $haystack = (string)$haystackTmp;
6991
    }
6992
6993 1
    return $haystack;
6994
  }
6995
6996
  /**
6997
   * Removes an prefix ($needle) from start of the string ($haystack).
6998
   *
6999
   * @param string $haystack <p>The string to search in.</p>
7000
   * @param string $needle   <p>The substring to search for.</p>
7001
   *
7002
   * @return string <p>Return the sub-string.</p>
7003
   */
7004 1 View Code Duplication
  public static function substr_left(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7005
  {
7006 1
    if (!isset($haystack[0])) {
7007 1
      return '';
7008
    }
7009
7010 1
    if (!isset($needle[0])) {
7011 1
      return $haystack;
7012
    }
7013
7014 1
    if (self::str_starts_with($haystack, $needle) === true) {
7015 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
7016 1
      if ($haystackTmp === false) {
7017
        $haystackTmp = '';
7018
      }
7019 1
      $haystack = (string)$haystackTmp;
7020
    }
7021
7022 1
    return $haystack;
7023
  }
7024
7025
  /**
7026
   * Replace text within a portion of a string.
7027
   *
7028
   * source: https://gist.github.com/stemar/8287074
7029
   *
7030
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
7031
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
7032
   * @param int|int[]       $offset           <p>
7033
   *                                          If start is positive, the replacing will begin at the start'th offset
7034
   *                                          into string.
7035
   *                                          <br><br>
7036
   *                                          If start is negative, the replacing will begin at the start'th character
7037
   *                                          from the end of string.
7038
   *                                          </p>
7039
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
7040
   *                                          portion of string which is to be replaced. If it is negative, it
7041
   *                                          represents the number of characters from the end of string at which to
7042
   *                                          stop replacing. If it is not given, then it will default to strlen(
7043
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
7044
   *                                          length is zero then this function will have the effect of inserting
7045
   *                                          replacement into string at the given start offset.</p>
7046
   *
7047
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
7048
   */
7049 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
7050
  {
7051 7
    if (\is_array($str) === true) {
7052 1
      $num = \count($str);
7053
7054
      // the replacement
7055 1
      if (\is_array($replacement) === true) {
7056 1
        $replacement = \array_slice($replacement, 0, $num);
7057
      } else {
7058 1
        $replacement = \array_pad([$replacement], $num, $replacement);
7059
      }
7060
7061
      // the offset
7062 1 View Code Duplication
      if (\is_array($offset) === true) {
7063 1
        $offset = \array_slice($offset, 0, $num);
7064 1
        foreach ($offset as &$valueTmp) {
7065 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
7066
        }
7067 1
        unset($valueTmp);
7068
      } else {
7069 1
        $offset = \array_pad([$offset], $num, $offset);
7070
      }
7071
7072
      // the length
7073 1
      if (null === $length) {
7074 1
        $length = \array_fill(0, $num, 0);
7075 1 View Code Duplication
      } elseif (\is_array($length) === true) {
7076 1
        $length = \array_slice($length, 0, $num);
7077 1
        foreach ($length as &$valueTmpV2) {
7078 1
          if (null !== $valueTmpV2) {
7079 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
7080
          } else {
7081 1
            $valueTmpV2 = 0;
7082
          }
7083
        }
7084 1
        unset($valueTmpV2);
7085
      } else {
7086 1
        $length = \array_pad([$length], $num, $length);
7087
      }
7088
7089
      // recursive call
7090 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return \array_map(array(...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
7091
    }
7092
7093 7
    if (\is_array($replacement) === true) {
7094 1
      if (\count($replacement) > 0) {
7095 1
        $replacement = $replacement[0];
7096
      } else {
7097 1
        $replacement = '';
7098
      }
7099
    }
7100
7101
    // init
7102 7
    $str = (string)$str;
7103 7
    $replacement = (string)$replacement;
7104
7105 7
    if (!isset($str[0])) {
7106 1
      return $replacement;
7107
    }
7108
7109 6
    if (self::is_ascii($str)) {
7110 3
      return ($length === null) ?
7111
          \substr_replace($str, $replacement, $offset) :
7112 3
          \substr_replace($str, $replacement, $offset, $length);
7113
    }
7114
7115 5
    \preg_match_all('/./us', $str, $smatches);
7116 5
    \preg_match_all('/./us', $replacement, $rmatches);
7117
7118 5
    if ($length === null) {
7119 3
      $length = self::strlen($str);
7120
    }
7121
7122 5
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
7123
7124 5
    return \implode('', $smatches[0]);
7125
  }
7126
7127
  /**
7128
   * Removes an suffix ($needle) from end of the string ($haystack).
7129
   *
7130
   * @param string $haystack <p>The string to search in.</p>
7131
   * @param string $needle   <p>The substring to search for.</p>
7132
   *
7133
   * @return string <p>Return the sub-string.</p>
7134
   */
7135 1 View Code Duplication
  public static function substr_right(string $haystack, string $needle): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7136
  {
7137 1
    if (!isset($haystack[0])) {
7138 1
      return '';
7139
    }
7140
7141 1
    if (!isset($needle[0])) {
7142 1
      return $haystack;
7143
    }
7144
7145 1
    if (self::str_ends_with($haystack, $needle) === true) {
7146 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
7147 1
      if ($haystackTmp === false) {
7148
        $haystackTmp = '';
7149
      }
7150 1
      $haystack = (string)$haystackTmp;
7151
    }
7152
7153 1
    return $haystack;
7154
  }
7155
7156
  /**
7157
   * Returns a case swapped version of the string.
7158
   *
7159
   * @param string $str       <p>The input string.</p>
7160
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7161
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7162
   *
7163
   * @return string <p>Each character's case swapped.</p>
7164
   */
7165 1
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7166
  {
7167 1
    if (!isset($str[0])) {
7168 1
      return '';
7169
    }
7170
7171 1
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7172 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7173
    }
7174
7175 1
    if ($cleanUtf8 === true) {
7176
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7177
      // if invalid characters are found in $haystack before $needle
7178 1
      $str = self::clean($str);
7179
    }
7180
7181 1
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
7182
  }
7183
7184
  /**
7185
   * @param string $str
7186
   * @param int    $tabLength
7187
   *
7188
   * @return string
7189
   */
7190
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
7191
  {
7192
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
7193
  }
7194
7195
  /**
7196
   * Converts the first character of each word in the string to uppercase
7197
   * and all other chars to lowercase.
7198
   *
7199
   * @param string $str      <p>The input string.</p>
7200
   * @param string $encoding [optional] <p>Set the charset for e.g. "\mb_" function</p>
7201
   *
7202
   * @return string <p>String with all characters of $str being title-cased.</p>
7203
   */
7204
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
7205
  {
7206
    // "mb_convert_case()" used a polyfill from the "UTF8"-Class
7207
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
7208
  }
7209
7210
  /**
7211
   * alias for "UTF8::to_ascii()"
7212
   *
7213
   * @see        UTF8::to_ascii()
7214
   *
7215
   * @param string $str
7216
   * @param string $subst_chr
7217
   * @param bool   $strict
7218
   *
7219
   * @return string
7220
   *
7221
   * @deprecated <p>use "UTF8::to_ascii()"</p>
7222
   */
7223 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
7224
  {
7225 7
    return self::to_ascii($str, $subst_chr, $strict);
7226
  }
7227
7228
  /**
7229
   * alias for "UTF8::to_iso8859()"
7230
   *
7231
   * @see        UTF8::to_iso8859()
7232
   *
7233
   * @param string|string[] $str
7234
   *
7235
   * @return string|string[]
7236
   *
7237
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
7238
   */
7239 1
  public static function toIso8859($str)
7240
  {
7241 1
    return self::to_iso8859($str);
7242
  }
7243
7244
  /**
7245
   * alias for "UTF8::to_latin1()"
7246
   *
7247
   * @see        UTF8::to_latin1()
7248
   *
7249
   * @param string|string[] $str
7250
   *
7251
   * @return string|string[]
7252
   *
7253
   * @deprecated <p>use "UTF8::to_latin1()"</p>
7254
   */
7255 1
  public static function toLatin1($str)
7256
  {
7257 1
    return self::to_latin1($str);
7258
  }
7259
7260
  /**
7261
   * alias for "UTF8::to_utf8()"
7262
   *
7263
   * @see        UTF8::to_utf8()
7264
   *
7265
   * @param string|string[] $str
7266
   *
7267
   * @return string|string[]
7268
   *
7269
   * @deprecated <p>use "UTF8::to_utf8()"</p>
7270
   */
7271 1
  public static function toUTF8($str)
7272
  {
7273 1
    return self::to_utf8($str);
7274
  }
7275
7276
  /**
7277
   * Convert a string into ASCII.
7278
   *
7279
   * @param string $str     <p>The input string.</p>
7280
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
7281
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
7282
   *                        performance</p>
7283
   *
7284
   * @return string
7285
   */
7286 21
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
7287
  {
7288 21
    static $UTF8_TO_ASCII;
7289
7290 21
    if (!isset($str[0])) {
7291 4
      return '';
7292
    }
7293
7294
    // check if we only have ASCII, first (better performance)
7295 18
    if (self::is_ascii($str) === true) {
7296 6
      return $str;
7297
    }
7298
7299 13
    $str = self::clean(
7300 13
        $str,
7301 13
        true,
7302 13
        true,
7303 13
        true,
7304 13
        false,
7305 13
        true,
7306 13
        true
7307
    );
7308
7309
    // check again, if we only have ASCII, now ...
7310 13
    if (self::is_ascii($str) === true) {
7311 7
      return $str;
7312
    }
7313
7314 7
    if ($strict === true) {
7315
7316 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7317
        self::checkForSupport();
7318
      }
7319
7320 1
      if (self::$SUPPORT['intl'] === true) {
7321
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
7322
        /** @noinspection PhpComposerExtensionStubsInspection */
7323 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
7324
7325
        // check again, if we only have ASCII, now ...
7326 1
        if (self::is_ascii($str) === true) {
7327 1
          return $str;
7328
        }
7329
7330
      }
7331
    }
7332
7333 7
    if (self::$ORD === null) {
7334
      self::$ORD = self::getData('ord');
7335
    }
7336
7337 7
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
7338 7
    $chars = $ar[0];
7339 7
    $ord = null;
7340 7
    foreach ($chars as &$c) {
7341
7342 7
      $ordC0 = self::$ORD[$c[0]];
7343
7344 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
7345 7
        continue;
7346
      }
7347
7348 7
      $ordC1 = self::$ORD[$c[1]];
7349
7350
      // ASCII - next please
7351 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
7352 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
7353
      }
7354
7355 7
      if ($ordC0 >= 224) {
7356 2
        $ordC2 = self::$ORD[$c[2]];
7357
7358 2
        if ($ordC0 <= 239) {
7359 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
7360
        }
7361
7362 2
        if ($ordC0 >= 240) {
7363 1
          $ordC3 = self::$ORD[$c[3]];
7364
7365 1
          if ($ordC0 <= 247) {
7366 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
7367
          }
7368
7369 1
          if ($ordC0 >= 248) {
7370
            $ordC4 = self::$ORD[$c[4]];
7371
7372 View Code Duplication
            if ($ordC0 <= 251) {
7373
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
7374
            }
7375
7376
            if ($ordC0 >= 252) {
7377
              $ordC5 = self::$ORD[$c[5]];
7378
7379 View Code Duplication
              if ($ordC0 <= 253) {
7380
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
7381
              }
7382
            }
7383
          }
7384
        }
7385
      }
7386
7387 7
      if ($ordC0 === 254 || $ordC0 === 255) {
7388
        $c = $unknown;
7389
        continue;
7390
      }
7391
7392 7
      if ($ord === null) {
7393
        $c = $unknown;
7394
        continue;
7395
      }
7396
7397 7
      $bank = $ord >> 8;
7398 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
7399 3
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
7400 3
        if ($UTF8_TO_ASCII[$bank] === false) {
7401 1
          $UTF8_TO_ASCII[$bank] = [];
7402
        }
7403
      }
7404
7405 7
      $newchar = $ord & 255;
7406
7407 7
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
7408
7409
        // keep for debugging
7410
        /*
7411
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7412
        echo "char: " . $c . "\n";
7413
        echo "ord: " . $ord . "\n";
7414
        echo "newchar: " . $newchar . "\n";
7415
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
7416
        echo "bank:" . $bank . "\n\n";
7417
        */
7418
7419 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
7420
      } else {
7421
7422
        // keep for debugging missing chars
7423
        /*
7424
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7425
        echo "char: " . $c . "\n";
7426
        echo "ord: " . $ord . "\n";
7427
        echo "newchar: " . $newchar . "\n";
7428
        echo "bank:" . $bank . "\n\n";
7429
        */
7430
7431 7
        $c = $unknown;
7432
      }
7433
    }
7434
7435 7
    return \implode('', $chars);
7436
  }
7437
7438
  /**
7439
   * @param string $str
7440
   *
7441
   * @return bool
7442
   */
7443
  public static function to_boolean(string $str): bool
7444
  {
7445
    $key = \strtolower($str);
7446
7447
    // Info: http://php.net/manual/en/filter.filters.validate.php
7448
    $map = [
7449
        'true'  => true,
7450
        '1'     => true,
7451
        'on'    => true,
7452
        'yes'   => true,
7453
        'false' => false,
7454
        '0'     => false,
7455
        'off'   => false,
7456
        'no'    => false,
7457
    ];
7458
7459
    if (isset($map[$key])) {
7460
      return $map[$key];
7461
    }
7462
7463
    if ((int)($str) === $str || (float)($str) === $str) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $str (integer) and $str (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (double) $str (double) and $str (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
7464
      /** @noinspection PhpWrongStringConcatenationInspection */
7465
      return $str + 0 > 0;
7466
    }
7467
7468
    return (bool)self::trim($str);
7469
  }
7470
7471
  /**
7472
   * Convert a string into "ISO-8859"-encoding (Latin-1).
7473
   *
7474
   * @param string|string[] $str
7475
   *
7476
   * @return string|string[]
7477
   */
7478 3
  public static function to_iso8859($str)
7479
  {
7480 3
    if (\is_array($str) === true) {
7481 1
      foreach ($str as $k => $v) {
7482 1
        $str[$k] = self::to_iso8859($v);
7483
      }
7484
7485 1
      return $str;
7486
    }
7487
7488 3
    $str = (string)$str;
7489 3
    if (!isset($str[0])) {
7490 1
      return '';
7491
    }
7492
7493 3
    return self::utf8_decode($str);
7494
  }
7495
7496
  /**
7497
   * alias for "UTF8::to_iso8859()"
7498
   *
7499
   * @see UTF8::to_iso8859()
7500
   *
7501
   * @param string|string[] $str
7502
   *
7503
   * @return string|string[]
7504
   */
7505 1
  public static function to_latin1($str)
7506
  {
7507 1
    return self::to_iso8859($str);
7508
  }
7509
7510
  /**
7511
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
7512
   *
7513
   * <ul>
7514
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
7515
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
7516
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
7517
   * case.</li>
7518
   * </ul>
7519
   *
7520
   * @param string|string[] $str                    <p>Any string or array.</p>
7521
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
7522
   *
7523
   * @return string|string[] <p>The UTF-8 encoded string.</p>
7524
   */
7525 22
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
7526
  {
7527 22 View Code Duplication
    if (\is_array($str) === true) {
7528 2
      foreach ($str as $k => $v) {
7529 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
7530
      }
7531
7532 2
      return $str;
7533
    }
7534
7535 22
    $str = (string)$str;
7536 22
    if (!isset($str[0])) {
7537 3
      return $str;
7538
    }
7539
7540 22
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7541
      self::checkForSupport();
7542
    }
7543
7544 22
    $max = self::strlen_in_byte($str);
7545 22
    $buf = '';
7546
7547
    /** @noinspection ForeachInvariantsInspection */
7548 22
    for ($i = 0; $i < $max; $i++) {
7549 22
      $c1 = $str[$i];
7550
7551 22
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
7552
7553 22
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
7554
7555 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7556
7557 20
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
7558 15
            $buf .= $c1 . $c2;
7559 15
            $i++;
7560
          } else { // not valid UTF8 - convert it
7561 20
            $buf .= self::to_utf8_convert($c1);
7562
          }
7563
7564 21
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
7565
7566 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7567 20
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7568
7569 20
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
7570 12
            $buf .= $c1 . $c2 . $c3;
7571 12
            $i += 2;
7572
          } else { // not valid UTF8 - convert it
7573 20
            $buf .= self::to_utf8_convert($c1);
7574
          }
7575
7576 14
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
7577
7578 14
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7579 14
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7580 14
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
7581
7582 14
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
7583 5
            $buf .= $c1 . $c2 . $c3 . $c4;
7584 5
            $i += 3;
7585
          } else { // not valid UTF8 - convert it
7586 14
            $buf .= self::to_utf8_convert($c1);
7587
          }
7588
7589
        } else { // doesn't look like UTF8, but should be converted
7590 22
          $buf .= self::to_utf8_convert($c1);
7591
        }
7592
7593 20
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
7594
7595 2
        $buf .= self::to_utf8_convert($c1);
7596
7597
      } else { // it doesn't need conversion
7598 20
        $buf .= $c1;
7599
      }
7600
    }
7601
7602
    // decode unicode escape sequences
7603 22
    $buf = \preg_replace_callback(
7604 22
        '/\\\\u([0-9a-f]{4})/i',
7605 22
        function ($match) {
7606 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
7607 22
        },
7608 22
        $buf
7609
    );
7610
7611
    // decode UTF-8 codepoints
7612 22
    if ($decodeHtmlEntityToUtf8 === true) {
7613 1
      $buf = self::html_entity_decode($buf);
7614
    }
7615
7616 22
    return $buf;
7617
  }
7618
7619
  /**
7620
   * @param int $int
7621
   *
7622
   * @return string
7623
   */
7624 16
  private static function to_utf8_convert($int): string
7625
  {
7626
    // init
7627 16
    $buf = '';
7628
7629 16
    if (self::$ORD === null) {
7630 1
      self::$ORD = self::getData('ord');
7631
    }
7632
7633 16
    if (self::$CHR === null) {
7634 1
      self::$CHR = self::getData('chr');
7635
    }
7636
7637 16
    if (self::$WIN1252_TO_UTF8 === null) {
7638 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
7639
    }
7640
7641 16
    $ordC1 = self::$ORD[$int];
7642 16
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
7643 16
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
7644
    } else {
7645 1
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
7646 1
      $cc2 = ($int & "\x3F") | "\x80";
7647 1
      $buf .= $cc1 . $cc2;
7648
    }
7649
7650 16
    return $buf;
7651
  }
7652
7653
  /**
7654
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7655
   *
7656
   * INFO: This is slower then "trim()"
7657
   *
7658
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7659
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7660
   *
7661
   * @param string $str   <p>The string to be trimmed</p>
7662
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
7663
   *
7664
   * @return string <p>The trimmed string.</p>
7665
   */
7666 26 View Code Duplication
  public static function trim(string $str = '', $chars = INF): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7667
  {
7668 26
    if (!isset($str[0])) {
7669 5
      return '';
7670
    }
7671
7672
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7673 22
    if ($chars === INF || !$chars) {
7674 6
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
7675
    } else {
7676 16
      $chars = \preg_quote($chars, '/');
7677 16
      $pattern = "^[$chars]+|[$chars]+\$";
7678
    }
7679
7680 22
    return self::regexReplace($str, $pattern, '', '', '/');
7681
  }
7682
7683
  /**
7684
   * Makes string's first char uppercase.
7685
   *
7686
   * @param string $str       <p>The input string.</p>
7687
   * @param string $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
7688
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7689
   *
7690
   * @return string <p>The resulting string</p>
7691
   */
7692 14
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7693
  {
7694 14
    if ($cleanUtf8 === true) {
7695
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7696
      // if invalid characters are found in $haystack before $needle
7697 1
      $str = self::clean($str);
7698
    }
7699
7700 14
    $strPartTwo = self::substr($str, 1, null, $encoding);
7701 14
    if ($strPartTwo === false) {
7702
      $strPartTwo = '';
7703
    }
7704
7705 14
    $strPartOne = self::strtoupper(
7706 14
        (string)self::substr($str, 0, 1, $encoding),
7707 14
        $encoding,
7708 14
        $cleanUtf8
7709
    );
7710
7711 14
    return $strPartOne . $strPartTwo;
7712
  }
7713
7714
  /**
7715
   * alias for "UTF8::ucfirst()"
7716
   *
7717
   * @see UTF8::ucfirst()
7718
   *
7719
   * @param string $word
7720
   * @param string $encoding
7721
   * @param bool   $cleanUtf8
7722
   *
7723
   * @return string
7724
   */
7725 1
  public static function ucword(string $word, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7726
  {
7727 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
7728
  }
7729
7730
  /**
7731
   * Uppercase for all words in the string.
7732
   *
7733
   * @param string   $str        <p>The input string.</p>
7734
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7735
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7736
   * @param string   $encoding   [optional] <p>Set the charset.</p>
7737
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7738
   *
7739
   * @return string
7740
   */
7741 8
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7742
  {
7743 8
    if (!$str) {
7744 2
      return '';
7745
    }
7746
7747
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
7748
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
7749
7750 7
    if ($cleanUtf8 === true) {
7751
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7752
      // if invalid characters are found in $haystack before $needle
7753 1
      $str = self::clean($str);
7754
    }
7755
7756 7
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
7757
7758
    if (
7759 7
        $usePhpDefaultFunctions === true
7760
        &&
7761 7
        self::is_ascii($str) === true
7762
    ) {
7763
      return \ucwords($str);
7764
    }
7765
7766 7
    $words = self::str_to_words($str, $charlist);
7767 7
    $newWords = [];
7768
7769 7
    if (\count($exceptions) > 0) {
7770 1
      $useExceptions = true;
7771
    } else {
7772 7
      $useExceptions = false;
7773
    }
7774
7775 7 View Code Duplication
    foreach ($words as $word) {
7776
7777 7
      if (!$word) {
7778 7
        continue;
7779
      }
7780
7781
      if (
7782 7
          $useExceptions === false
7783
          ||
7784
          (
7785 1
              $useExceptions === true
7786
              &&
7787 7
              !\in_array($word, $exceptions, true)
7788
          )
7789
      ) {
7790 7
        $word = self::ucfirst($word, $encoding);
7791
      }
7792
7793 7
      $newWords[] = $word;
7794
    }
7795
7796 7
    return \implode('', $newWords);
7797
  }
7798
7799
  /**
7800
   * Multi decode html entity & fix urlencoded-win1252-chars.
7801
   *
7802
   * e.g:
7803
   * 'test+test'                     => 'test test'
7804
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7805
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7806
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7807
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7808
   * 'Düsseldorf'                   => 'Düsseldorf'
7809
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7810
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7811
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7812
   *
7813
   * @param string $str          <p>The input string.</p>
7814
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7815
   *
7816
   * @return string
7817
   */
7818 1 View Code Duplication
  public static function urldecode(string $str, bool $multi_decode = true): string
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7819
  {
7820 1
    if (!isset($str[0])) {
7821 1
      return '';
7822
    }
7823
7824 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
7825 1
    if (\preg_match($pattern, $str)) {
7826 1
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
7827
    }
7828
7829 1
    $flags = ENT_QUOTES | ENT_HTML5;
7830
7831
    do {
7832 1
      $str_compare = $str;
7833
7834 1
      $str = self::fix_simple_utf8(
7835 1
          \urldecode(
7836 1
              self::html_entity_decode(
7837 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7838 1
                  $flags
7839
              )
7840
          )
7841
      );
7842
7843 1
    } while ($multi_decode === true && $str_compare !== $str);
7844
7845 1
    return $str;
7846
  }
7847
7848
  /**
7849
   * Return a array with "urlencoded"-win1252 -> UTF-8
7850
   *
7851
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
7852
   *
7853
   * @return string[]
7854
   */
7855 1
  public static function urldecode_fix_win1252_chars(): array
7856
  {
7857
    return [
7858 1
        '%20' => ' ',
7859
        '%21' => '!',
7860
        '%22' => '"',
7861
        '%23' => '#',
7862
        '%24' => '$',
7863
        '%25' => '%',
7864
        '%26' => '&',
7865
        '%27' => "'",
7866
        '%28' => '(',
7867
        '%29' => ')',
7868
        '%2A' => '*',
7869
        '%2B' => '+',
7870
        '%2C' => ',',
7871
        '%2D' => '-',
7872
        '%2E' => '.',
7873
        '%2F' => '/',
7874
        '%30' => '0',
7875
        '%31' => '1',
7876
        '%32' => '2',
7877
        '%33' => '3',
7878
        '%34' => '4',
7879
        '%35' => '5',
7880
        '%36' => '6',
7881
        '%37' => '7',
7882
        '%38' => '8',
7883
        '%39' => '9',
7884
        '%3A' => ':',
7885
        '%3B' => ';',
7886
        '%3C' => '<',
7887
        '%3D' => '=',
7888
        '%3E' => '>',
7889
        '%3F' => '?',
7890
        '%40' => '@',
7891
        '%41' => 'A',
7892
        '%42' => 'B',
7893
        '%43' => 'C',
7894
        '%44' => 'D',
7895
        '%45' => 'E',
7896
        '%46' => 'F',
7897
        '%47' => 'G',
7898
        '%48' => 'H',
7899
        '%49' => 'I',
7900
        '%4A' => 'J',
7901
        '%4B' => 'K',
7902
        '%4C' => 'L',
7903
        '%4D' => 'M',
7904
        '%4E' => 'N',
7905
        '%4F' => 'O',
7906
        '%50' => 'P',
7907
        '%51' => 'Q',
7908
        '%52' => 'R',
7909
        '%53' => 'S',
7910
        '%54' => 'T',
7911
        '%55' => 'U',
7912
        '%56' => 'V',
7913
        '%57' => 'W',
7914
        '%58' => 'X',
7915
        '%59' => 'Y',
7916
        '%5A' => 'Z',
7917
        '%5B' => '[',
7918
        '%5C' => '\\',
7919
        '%5D' => ']',
7920
        '%5E' => '^',
7921
        '%5F' => '_',
7922
        '%60' => '`',
7923
        '%61' => 'a',
7924
        '%62' => 'b',
7925
        '%63' => 'c',
7926
        '%64' => 'd',
7927
        '%65' => 'e',
7928
        '%66' => 'f',
7929
        '%67' => 'g',
7930
        '%68' => 'h',
7931
        '%69' => 'i',
7932
        '%6A' => 'j',
7933
        '%6B' => 'k',
7934
        '%6C' => 'l',
7935
        '%6D' => 'm',
7936
        '%6E' => 'n',
7937
        '%6F' => 'o',
7938
        '%70' => 'p',
7939
        '%71' => 'q',
7940
        '%72' => 'r',
7941
        '%73' => 's',
7942
        '%74' => 't',
7943
        '%75' => 'u',
7944
        '%76' => 'v',
7945
        '%77' => 'w',
7946
        '%78' => 'x',
7947
        '%79' => 'y',
7948
        '%7A' => 'z',
7949
        '%7B' => '{',
7950
        '%7C' => '|',
7951
        '%7D' => '}',
7952
        '%7E' => '~',
7953
        '%7F' => '',
7954
        '%80' => '`',
7955
        '%81' => '',
7956
        '%82' => '‚',
7957
        '%83' => 'ƒ',
7958
        '%84' => '„',
7959
        '%85' => '…',
7960
        '%86' => '†',
7961
        '%87' => '‡',
7962
        '%88' => 'ˆ',
7963
        '%89' => '‰',
7964
        '%8A' => 'Š',
7965
        '%8B' => '‹',
7966
        '%8C' => 'Œ',
7967
        '%8D' => '',
7968
        '%8E' => 'Ž',
7969
        '%8F' => '',
7970
        '%90' => '',
7971
        '%91' => '‘',
7972
        '%92' => '’',
7973
        '%93' => '“',
7974
        '%94' => '”',
7975
        '%95' => '•',
7976
        '%96' => '–',
7977
        '%97' => '—',
7978
        '%98' => '˜',
7979
        '%99' => '™',
7980
        '%9A' => 'š',
7981
        '%9B' => '›',
7982
        '%9C' => 'œ',
7983
        '%9D' => '',
7984
        '%9E' => 'ž',
7985
        '%9F' => 'Ÿ',
7986
        '%A0' => '',
7987
        '%A1' => '¡',
7988
        '%A2' => '¢',
7989
        '%A3' => '£',
7990
        '%A4' => '¤',
7991
        '%A5' => '¥',
7992
        '%A6' => '¦',
7993
        '%A7' => '§',
7994
        '%A8' => '¨',
7995
        '%A9' => '©',
7996
        '%AA' => 'ª',
7997
        '%AB' => '«',
7998
        '%AC' => '¬',
7999
        '%AD' => '',
8000
        '%AE' => '®',
8001
        '%AF' => '¯',
8002
        '%B0' => '°',
8003
        '%B1' => '±',
8004
        '%B2' => '²',
8005
        '%B3' => '³',
8006
        '%B4' => '´',
8007
        '%B5' => 'µ',
8008
        '%B6' => '¶',
8009
        '%B7' => '·',
8010
        '%B8' => '¸',
8011
        '%B9' => '¹',
8012
        '%BA' => 'º',
8013
        '%BB' => '»',
8014
        '%BC' => '¼',
8015
        '%BD' => '½',
8016
        '%BE' => '¾',
8017
        '%BF' => '¿',
8018
        '%C0' => 'À',
8019
        '%C1' => 'Á',
8020
        '%C2' => 'Â',
8021
        '%C3' => 'Ã',
8022
        '%C4' => 'Ä',
8023
        '%C5' => 'Å',
8024
        '%C6' => 'Æ',
8025
        '%C7' => 'Ç',
8026
        '%C8' => 'È',
8027
        '%C9' => 'É',
8028
        '%CA' => 'Ê',
8029
        '%CB' => 'Ë',
8030
        '%CC' => 'Ì',
8031
        '%CD' => 'Í',
8032
        '%CE' => 'Î',
8033
        '%CF' => 'Ï',
8034
        '%D0' => 'Ð',
8035
        '%D1' => 'Ñ',
8036
        '%D2' => 'Ò',
8037
        '%D3' => 'Ó',
8038
        '%D4' => 'Ô',
8039
        '%D5' => 'Õ',
8040
        '%D6' => 'Ö',
8041
        '%D7' => '×',
8042
        '%D8' => 'Ø',
8043
        '%D9' => 'Ù',
8044
        '%DA' => 'Ú',
8045
        '%DB' => 'Û',
8046
        '%DC' => 'Ü',
8047
        '%DD' => 'Ý',
8048
        '%DE' => 'Þ',
8049
        '%DF' => 'ß',
8050
        '%E0' => 'à',
8051
        '%E1' => 'á',
8052
        '%E2' => 'â',
8053
        '%E3' => 'ã',
8054
        '%E4' => 'ä',
8055
        '%E5' => 'å',
8056
        '%E6' => 'æ',
8057
        '%E7' => 'ç',
8058
        '%E8' => 'è',
8059
        '%E9' => 'é',
8060
        '%EA' => 'ê',
8061
        '%EB' => 'ë',
8062
        '%EC' => 'ì',
8063
        '%ED' => 'í',
8064
        '%EE' => 'î',
8065
        '%EF' => 'ï',
8066
        '%F0' => 'ð',
8067
        '%F1' => 'ñ',
8068
        '%F2' => 'ò',
8069
        '%F3' => 'ó',
8070
        '%F4' => 'ô',
8071
        '%F5' => 'õ',
8072
        '%F6' => 'ö',
8073
        '%F7' => '÷',
8074
        '%F8' => 'ø',
8075
        '%F9' => 'ù',
8076
        '%FA' => 'ú',
8077
        '%FB' => 'û',
8078
        '%FC' => 'ü',
8079
        '%FD' => 'ý',
8080
        '%FE' => 'þ',
8081
        '%FF' => 'ÿ',
8082
    ];
8083
  }
8084
8085
  /**
8086
   * Decodes an UTF-8 string to ISO-8859-1.
8087
   *
8088
   * @param string $str <p>The input string.</p>
8089
   * @param bool   $keepUtf8Chars
8090
   *
8091
   * @return string
8092
   */
8093 6
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
8094
  {
8095 6
    if (!isset($str[0])) {
8096 3
      return '';
8097
    }
8098
8099 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
8100 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
8101
8102 6 View Code Duplication
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
8103
8104 1
      if (self::$WIN1252_TO_UTF8 === null) {
8105
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
8106
      }
8107
8108 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
8109 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
8110
    }
8111
8112
    /** @noinspection PhpInternalEntityUsedInspection */
8113 6
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
8114
8115 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8116
      self::checkForSupport();
8117
    }
8118
8119
    // save for later comparision
8120 6
    $str_backup = $str;
8121 6
    $len = self::strlen_in_byte($str);
8122
8123 6
    if (self::$ORD === null) {
8124
      self::$ORD = self::getData('ord');
8125
    }
8126
8127 6
    if (self::$CHR === null) {
8128
      self::$CHR = self::getData('chr');
8129
    }
8130
8131 6
    $noCharFound = '?';
8132
    /** @noinspection ForeachInvariantsInspection */
8133 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
8134 6
      switch ($str[$i] & "\xF0") {
8135 6
        case "\xC0":
8136 6
        case "\xD0":
8137 6
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
8138 6
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
8139 6
          break;
8140
8141
        /** @noinspection PhpMissingBreakStatementInspection */
8142 6
        case "\xF0":
8143
          ++$i;
8144 6
        case "\xE0":
8145 5
          $str[$j] = $noCharFound;
8146 5
          $i += 2;
8147 5
          break;
8148
8149
        default:
8150 6
          $str[$j] = $str[$i];
8151
      }
8152
    }
8153
8154 6
    $return = (string)self::substr($str, 0, $j, 'CP850'); // 8-BIT
8155
8156
    if (
8157 6
        $keepUtf8Chars === true
8158
        &&
8159 6
        self::strlen($return) >= self::strlen($str_backup)
8160
    ) {
8161 1
      return $str_backup;
8162
    }
8163
8164 6
    return $return;
8165
  }
8166
8167
  /**
8168
   * Encodes an ISO-8859-1 string to UTF-8.
8169
   *
8170
   * @param string $str <p>The input string.</p>
8171
   *
8172
   * @return string
8173
   */
8174 7
  public static function utf8_encode(string $str): string
8175
  {
8176 7
    if (!isset($str[0])) {
8177 7
      return '';
8178
    }
8179
8180 7
    $strTmp = \utf8_encode($str);
8181
8182
    // the polyfill maybe return false
8183 7
    if ($strTmp === false) {
8184
      return '';
8185
    }
8186
8187 7
    $str = (string)$strTmp;
8188 7
    if (false === \strpos($str, "\xC2")) {
8189 3
      return $str;
8190
    }
8191
8192 6
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
8193 6
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
8194
8195 6 View Code Duplication
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
8196
8197 1
      if (self::$WIN1252_TO_UTF8 === null) {
8198
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
8199
      }
8200
8201 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
8202 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
8203
    }
8204
8205 6
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
8206
  }
8207
8208
  /**
8209
   * fix -> utf8-win1252 chars
8210
   *
8211
   * @param string $str <p>The input string.</p>
8212
   *
8213
   * @return string
8214
   *
8215
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
8216
   */
8217 1
  public static function utf8_fix_win1252_chars(string $str): string
8218
  {
8219 1
    return self::fix_simple_utf8($str);
8220
  }
8221
8222
  /**
8223
   * Returns an array with all utf8 whitespace characters.
8224
   *
8225
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
8226
   *
8227
   * @author: Derek E. [email protected]
8228
   *
8229
   * @return string[] <p>
8230
   *               An array with all known whitespace characters as values and the type of whitespace as keys
8231
   *               as defined in above URL.
8232
   *               </p>
8233
   */
8234 1
  public static function whitespace_table(): array
8235
  {
8236 1
    return self::$WHITESPACE_TABLE;
8237
  }
8238
8239
  /**
8240
   * Limit the number of words in a string.
8241
   *
8242
   * @param string $str      <p>The input string.</p>
8243
   * @param int    $limit    <p>The limit of words as integer.</p>
8244
   * @param string $strAddOn <p>Replacement for the striped string.</p>
8245
   *
8246
   * @return string
8247
   */
8248 1
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
8249
  {
8250 1
    if (!isset($str[0])) {
8251 1
      return '';
8252
    }
8253
8254 1
    if ($limit < 1) {
8255 1
      return '';
8256
    }
8257
8258 1
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
8259
8260
    if (
8261 1
        !isset($matches[0])
8262
        ||
8263 1
        self::strlen($str) === self::strlen($matches[0])
8264
    ) {
8265 1
      return $str;
8266
    }
8267
8268 1
    return self::rtrim($matches[0]) . $strAddOn;
8269
  }
8270
8271
  /**
8272
   * Wraps a string to a given number of characters
8273
   *
8274
   * @link  http://php.net/manual/en/function.wordwrap.php
8275
   *
8276
   * @param string $str   <p>The input string.</p>
8277
   * @param int    $width [optional] <p>The column width.</p>
8278
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
8279
   * @param bool   $cut   [optional] <p>
8280
   *                      If the cut is set to true, the string is
8281
   *                      always wrapped at or before the specified width. So if you have
8282
   *                      a word that is larger than the given width, it is broken apart.
8283
   *                      </p>
8284
   *
8285
   * @return string <p>The given string wrapped at the specified column.</p>
8286
   */
8287 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
8288
  {
8289 10
    if (!isset($str[0], $break[0])) {
8290 3
      return '';
8291
    }
8292
8293 8
    $w = '';
8294 8
    $strSplit = \explode($break, $str);
8295 8
    $count = \count($strSplit);
8296
8297 8
    $chars = [];
8298
    /** @noinspection ForeachInvariantsInspection */
8299 8
    for ($i = 0; $i < $count; ++$i) {
8300
8301 8
      if ($i) {
8302 1
        $chars[] = $break;
8303 1
        $w .= '#';
8304
      }
8305
8306 8
      $c = $strSplit[$i];
8307 8
      unset($strSplit[$i]);
8308
8309 8
      foreach (self::split($c) as $c) {
8310 8
        $chars[] = $c;
8311 8
        $w .= ' ' === $c ? ' ' : '?';
8312
      }
8313
    }
8314
8315 8
    $strReturn = '';
8316 8
    $j = 0;
8317 8
    $b = $i = -1;
8318 8
    $w = \wordwrap($w, $width, '#', $cut);
8319
8320 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
8321 6
      for (++$i; $i < $b; ++$i) {
8322 6
        $strReturn .= $chars[$j];
8323 6
        unset($chars[$j++]);
8324
      }
8325
8326 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
8327 3
        unset($chars[$j++]);
8328
      }
8329
8330 6
      $strReturn .= $break;
8331
    }
8332
8333 8
    return $strReturn . \implode('', $chars);
8334
  }
8335
8336
  /**
8337
   * Returns an array of Unicode White Space characters.
8338
   *
8339
   * @return string[] <p>An array with numeric code point as key and White Space Character as value.</p>
8340
   */
8341 1
  public static function ws(): array
8342
  {
8343 1
    return self::$WHITESPACE;
8344
  }
8345
8346
}
8347