Completed
Push — master ( 20da7f...f678bd )
by Lars
05:01
created

UTF8::fix_simple_utf8()   B

Complexity

Conditions 4
Paths 4

Size

Total Lines 24
Code Lines 12

Duplication

Lines 24
Ratio 100 %

Code Coverage

Tests 14
CRAP Score 4

Importance

Changes 0
Metric Value
dl 24
loc 24
ccs 14
cts 14
cp 1
rs 8.6845
c 0
b 0
f 0
cc 4
eloc 12
nc 4
nop 1
crap 4
1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * UTF8-Helper-Class
7
 *
8
 * @package voku\helper
9
 */
10
final class UTF8
11
{
12
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
13
  // This regular expression is a work around for http://bugs.exim.org/1279
14
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
15
16
  /**
17
   * Bom => Byte-Length
18
   *
19
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
20
   *
21
   * @var array
22
   */
23
  private static $BOM = array(
24
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
25
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
26
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
27
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
28
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
29
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
30
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
31
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
32
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
33
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
34
  );
35
36
  /**
37
   * Numeric code point => UTF-8 Character
38
   *
39
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
40
   *
41
   * @var array
42
   */
43
  private static $WHITESPACE = array(
44
    // NUL Byte
45
    0     => "\x0",
46
    // Tab
47
    9     => "\x9",
48
    // New Line
49
    10    => "\xa",
50
    // Vertical Tab
51
    11    => "\xb",
52
    // Carriage Return
53
    13    => "\xd",
54
    // Ordinary Space
55
    32    => "\x20",
56
    // NO-BREAK SPACE
57
    160   => "\xc2\xa0",
58
    // OGHAM SPACE MARK
59
    5760  => "\xe1\x9a\x80",
60
    // MONGOLIAN VOWEL SEPARATOR
61
    6158  => "\xe1\xa0\x8e",
62
    // EN QUAD
63
    8192  => "\xe2\x80\x80",
64
    // EM QUAD
65
    8193  => "\xe2\x80\x81",
66
    // EN SPACE
67
    8194  => "\xe2\x80\x82",
68
    // EM SPACE
69
    8195  => "\xe2\x80\x83",
70
    // THREE-PER-EM SPACE
71
    8196  => "\xe2\x80\x84",
72
    // FOUR-PER-EM SPACE
73
    8197  => "\xe2\x80\x85",
74
    // SIX-PER-EM SPACE
75
    8198  => "\xe2\x80\x86",
76
    // FIGURE SPACE
77
    8199  => "\xe2\x80\x87",
78
    // PUNCTUATION SPACE
79
    8200  => "\xe2\x80\x88",
80
    // THIN SPACE
81
    8201  => "\xe2\x80\x89",
82
    //HAIR SPACE
83
    8202  => "\xe2\x80\x8a",
84
    // LINE SEPARATOR
85
    8232  => "\xe2\x80\xa8",
86
    // PARAGRAPH SEPARATOR
87
    8233  => "\xe2\x80\xa9",
88
    // NARROW NO-BREAK SPACE
89
    8239  => "\xe2\x80\xaf",
90
    // MEDIUM MATHEMATICAL SPACE
91
    8287  => "\xe2\x81\x9f",
92
    // IDEOGRAPHIC SPACE
93
    12288 => "\xe3\x80\x80",
94
  );
95
96
  /**
97
   * @var array
98
   */
99
  private static $WHITESPACE_TABLE = array(
100
      'SPACE'                     => "\x20",
101
      'NO-BREAK SPACE'            => "\xc2\xa0",
102
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
103
      'EN QUAD'                   => "\xe2\x80\x80",
104
      'EM QUAD'                   => "\xe2\x80\x81",
105
      'EN SPACE'                  => "\xe2\x80\x82",
106
      'EM SPACE'                  => "\xe2\x80\x83",
107
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
108
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
109
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
110
      'FIGURE SPACE'              => "\xe2\x80\x87",
111
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
112
      'THIN SPACE'                => "\xe2\x80\x89",
113
      'HAIR SPACE'                => "\xe2\x80\x8a",
114
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
115
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
116
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
117
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
118
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
119
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
120
  );
121
122
  /**
123
   * bidirectional text chars
124
   *
125
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
126
   *
127
   * @var array
128
   */
129
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
130
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
131
    8234 => "\xE2\x80\xAA",
132
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
133
    8235 => "\xE2\x80\xAB",
134
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
135
    8236 => "\xE2\x80\xAC",
136
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
137
    8237 => "\xE2\x80\xAD",
138
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
139
    8238 => "\xE2\x80\xAE",
140
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
141
    8294 => "\xE2\x81\xA6",
142
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
143
    8295 => "\xE2\x81\xA7",
144
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
145
    8296 => "\xE2\x81\xA8",
146
    // POP DIRECTIONAL ISOLATE
147
    8297 => "\xE2\x81\xA9",
148
  );
149
150
  /**
151
   * @var array
152
   */
153
  private static $COMMON_CASE_FOLD = array(
154
      'ſ'            => 's',
155
      "\xCD\x85"     => 'ι',
156
      'ς'            => 'σ',
157
      "\xCF\x90"     => 'β',
158
      "\xCF\x91"     => 'θ',
159
      "\xCF\x95"     => 'φ',
160
      "\xCF\x96"     => 'π',
161
      "\xCF\xB0"     => 'κ',
162
      "\xCF\xB1"     => 'ρ',
163
      "\xCF\xB5"     => 'ε',
164
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
165
      "\xE1\xBE\xBE" => 'ι',
166
  );
167
168
  /**
169
   * @var array
170
   */
171
  private static $SUPPORT = array();
172
173
  /**
174
   * @var null|array
175
   */
176
  private static $UTF8_MSWORD = null;
177
178
  /**
179
   * @var null|array
180
   */
181
  private static $BROKEN_UTF8_FIX = null;
182
183
  /**
184
   * @var null|array
185
   */
186
  private static $WIN1252_TO_UTF8 = null;
187
188
  /**
189
   * @var null|array
190
   */
191
  private static $ENCODINGS = null;
192
193
  /**
194
   * @var null|array
195
   */
196
  private static $ORD = null;
197
198
  /**
199
   * @var null|array
200
   */
201
  private static $CHR = null;
202
203
  /**
204
   * __construct()
205
   */
206 16
  public function __construct()
207
  {
208 16
    self::checkForSupport();
209 16
  }
210
211
  /**
212
   * Return the character at the specified position: $str[1] like functionality.
213
   *
214
   * @param string $str <p>A UTF-8 string.</p>
215
   * @param int    $pos <p>The position of character to return.</p>
216
   *
217
   * @return string <p>Single Multi-Byte character.</p>
218
   */
219 3
  public static function access($str, $pos)
220
  {
221 3
    $str = (string)$str;
222
223 3
    if (!isset($str[0])) {
224 1
      return '';
225
    }
226
227 3
    $pos = (int)$pos;
228
229 3
    if ($pos < 0) {
230 1
      return '';
231
    }
232
233 3
    return (string)self::substr($str, $pos, 1);
234
  }
235
236
  /**
237
   * Prepends UTF-8 BOM character to the string and returns the whole string.
238
   *
239
   * INFO: If BOM already existed there, the Input string is returned.
240
   *
241
   * @param string $str <p>The input string.</p>
242
   *
243
   * @return string <p>The output string that contains BOM.</p>
244
   */
245 1
  public static function add_bom_to_string($str)
246
  {
247 1
    if (self::string_has_bom($str) === false) {
248 1
      $str = self::bom() . $str;
249 1
    }
250
251 1
    return $str;
252
  }
253
254
  /**
255
   * Convert binary into an string.
256
   *
257
   * @param mixed $bin 1|0
258
   *
259
   * @return string
260
   */
261 1
  public static function binary_to_str($bin)
262
  {
263 1
    if (!isset($bin[0])) {
264
      return '';
265
    }
266
267 1
    $convert = base_convert($bin, 2, 16);
268 1
    if ($convert === '0') {
269 1
      return '';
270
    }
271
272 1
    return pack('H*', $convert);
273
  }
274
275
  /**
276
   * Returns the UTF-8 Byte Order Mark Character.
277
   *
278
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
279
   *
280
   * @return string UTF-8 Byte Order Mark
281
   */
282 2
  public static function bom()
283
  {
284 2
    return "\xef\xbb\xbf";
285
  }
286
287
  /**
288
   * @alias of UTF8::chr_map()
289
   *
290
   * @see   UTF8::chr_map()
291
   *
292
   * @param string|array $callback
293
   * @param string       $str
294
   *
295
   * @return array
296
   */
297 1
  public static function callback($callback, $str)
298
  {
299 1
    return self::chr_map($callback, $str);
300
  }
301
302
  /**
303
   * This method will auto-detect your server environment for UTF-8 support.
304
   *
305
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
306
   */
307 19
  public static function checkForSupport()
308
  {
309 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
310
311 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
312
313
      // http://php.net/manual/en/book.mbstring.php
314 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
315 1
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
316
317
      // http://php.net/manual/en/book.iconv.php
318 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
319
320
      // http://php.net/manual/en/book.intl.php
321 1
      self::$SUPPORT['intl'] = self::intl_loaded();
322 1
      self::$SUPPORT['intl__transliterator_list_ids'] = array();
323
      if (
324 1
          self::$SUPPORT['intl'] === true
325 1
          &&
326 1
          function_exists('transliterator_list_ids') === true
327 1
      ) {
328
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
329
      }
330
331
      // http://php.net/manual/en/class.intlchar.php
332 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
333
334
      // http://php.net/manual/en/book.pcre.php
335 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
336 1
    }
337 19
  }
338
339
  /**
340
   * Generates a UTF-8 encoded character from the given code point.
341
   *
342
   * INFO: opposite to UTF8::ord()
343
   *
344
   * @param int    $code_point <p>The code point for which to generate a character.</p>
345
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
346
   *
347
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
348
   */
349 10
  public static function chr($code_point, $encoding = 'UTF-8')
350
  {
351
    // init
352 10
    static $CHAR_CACHE = array();
353
354 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
355
      self::checkForSupport();
356
    }
357
358 10
    if ($encoding !== 'UTF-8') {
359 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
360 2
    }
361
362 View Code Duplication
    if (
363
        $encoding !== 'UTF-8'
364 10
        &&
365
        $encoding !== 'WINDOWS-1252'
366 10
        &&
367 1
        self::$SUPPORT['mbstring'] === false
368 10
    ) {
369
      trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
370
    }
371
372 10
    $cacheKey = $code_point . $encoding;
373 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
374 8
      return $CHAR_CACHE[$cacheKey];
375
    }
376
377 9
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
378
379 7
      if (self::$CHR === null) {
380
        self::$CHR = self::getData('chr');
381
      }
382
383 7
      $chr = self::$CHR[$code_point];
384
385 7
      if ($encoding !== 'UTF-8') {
386 1
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
387 1
      }
388
389 7
      return $CHAR_CACHE[$cacheKey] = $chr;
390
    }
391
392 7
    if (self::$SUPPORT['intlChar'] === true) {
393
      $chr = \IntlChar::chr($code_point);
394
395
      if ($encoding !== 'UTF-8') {
396
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
397
      }
398
399
      return $CHAR_CACHE[$cacheKey] = $chr;
400
    }
401
402 7
    if (self::$CHR === null) {
403
      self::$CHR = self::getData('chr');
404
    }
405
406 7
    if ($code_point <= 0x7F) {
407
      $chr = self::$CHR[$code_point];
408 7
    } elseif ($code_point <= 0x7FF) {
409 6
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
410 6
             self::$CHR[($code_point & 0x3F) + 0x80];
411 7
    } elseif ($code_point <= 0xFFFF) {
412 7
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
413 7
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
414 7
             self::$CHR[($code_point & 0x3F) + 0x80];
415 7
    } else {
416 1
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
417 1
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
418 1
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
419 1
             self::$CHR[($code_point & 0x3F) + 0x80];
420
    }
421
422 7
    if ($encoding !== 'UTF-8') {
423
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
424
    }
425
426 7
    return $CHAR_CACHE[$cacheKey] = $chr;
427
  }
428
429
  /**
430
   * Applies callback to all characters of a string.
431
   *
432
   * @param string|array $callback <p>The callback function.</p>
433
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
434
   *
435
   * @return array <p>The outcome of callback.</p>
436
   */
437 1
  public static function chr_map($callback, $str)
438
  {
439 1
    $chars = self::split($str);
440
441 1
    return array_map($callback, $chars);
442
  }
443
444
  /**
445
   * Generates an array of byte length of each character of a Unicode string.
446
   *
447
   * 1 byte => U+0000  - U+007F
448
   * 2 byte => U+0080  - U+07FF
449
   * 3 byte => U+0800  - U+FFFF
450
   * 4 byte => U+10000 - U+10FFFF
451
   *
452
   * @param string $str <p>The original Unicode string.</p>
453
   *
454
   * @return array <p>An array of byte lengths of each character.</p>
455
   */
456 4
  public static function chr_size_list($str)
457
  {
458 4
    $str = (string)$str;
459
460 4
    if (!isset($str[0])) {
461 3
      return array();
462
    }
463
464 4
    return array_map(
465
        function ($data) {
466 4
          return UTF8::strlen($data, '8BIT');
467 4
        },
468 4
        self::split($str)
469 4
    );
470
  }
471
472
  /**
473
   * Get a decimal code representation of a specific character.
474
   *
475
   * @param string $char <p>The input character.</p>
476
   *
477
   * @return int
478
   */
479 2
  public static function chr_to_decimal($char)
480
  {
481 2
    $char = (string)$char;
482 2
    $code = self::ord($char[0]);
483 2
    $bytes = 1;
484
485 2
    if (!($code & 0x80)) {
486
      // 0xxxxxxx
487 2
      return $code;
488
    }
489
490 2
    if (($code & 0xe0) === 0xc0) {
491
      // 110xxxxx
492 2
      $bytes = 2;
493 2
      $code &= ~0xc0;
494 2
    } elseif (($code & 0xf0) === 0xe0) {
495
      // 1110xxxx
496 2
      $bytes = 3;
497 2
      $code &= ~0xe0;
498 2
    } elseif (($code & 0xf8) === 0xf0) {
499
      // 11110xxx
500 1
      $bytes = 4;
501 1
      $code &= ~0xf0;
502 1
    }
503
504 2
    for ($i = 2; $i <= $bytes; $i++) {
505
      // 10xxxxxx
506 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
507 2
    }
508
509 2
    return $code;
510
  }
511
512
  /**
513
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
514
   *
515
   * @param string $char <p>The input character</p>
516
   * @param string $pfix [optional]
517
   *
518
   * @return string <p>The code point encoded as U+xxxx<p>
519
   */
520 1
  public static function chr_to_hex($char, $pfix = 'U+')
521
  {
522 1
    $char = (string)$char;
523
524 1
    if (!isset($char[0])) {
525 1
      return '';
526
    }
527
528 1
    if ($char === '&#0;') {
529
      $char = '';
530
    }
531
532 1
    return self::int_to_hex(self::ord($char), $pfix);
533
  }
534
535
  /**
536
   * alias for "UTF8::chr_to_decimal()"
537
   *
538
   * @see UTF8::chr_to_decimal()
539
   *
540
   * @param string $chr
541
   *
542
   * @return int
543
   */
544 1
  public static function chr_to_int($chr)
545
  {
546 1
    return self::chr_to_decimal($chr);
547
  }
548
549
  /**
550
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
551
   *
552
   * @param string $body     <p>The original string to be split.</p>
553
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
554
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
555
   *
556
   * @return string <p>The chunked string</p>
557
   */
558 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
559
  {
560 1
    return implode($end, self::split($body, $chunklen));
561
  }
562
563
  /**
564
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
565
   *
566
   * @param string $str                     <p>The string to be sanitized.</p>
567
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
568
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
569
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
570
   *                                        => "..."</p>
571
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
572
   *                                        $normalize_whitespace</p>
573
   *
574
   * @return string <p>Clean UTF-8 encoded string.</p>
575
   */
576 61
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
577
  {
578
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
579
    // caused connection reset problem on larger strings
580
581
    $regx = '/
582
      (
583
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
584
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
585
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
586
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
587
        ){1,100}                      # ...one or more times
588
      )
589
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
590
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
591 61
    /x';
592 61
    $str = preg_replace($regx, '$1', $str);
593
594 61
    $str = self::replace_diamond_question_mark($str, '');
595 61
    $str = self::remove_invisible_characters($str);
596
597 61
    if ($normalize_whitespace === true) {
598 36
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
599 36
    }
600
601 61
    if ($normalize_msword === true) {
602 15
      $str = self::normalize_msword($str);
603 15
    }
604
605 61
    if ($remove_bom === true) {
606 35
      $str = self::remove_bom($str);
607 35
    }
608
609 61
    return $str;
610
  }
611
612
  /**
613
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
614
   *
615
   * @param string $str <p>The input string.</p>
616
   *
617
   * @return string
618
   */
619 21 View Code Duplication
  public static function cleanup($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
620
  {
621 21
    $str = (string)$str;
622
623 21
    if (!isset($str[0])) {
624 2
      return '';
625
    }
626
627
    // fixed ISO <-> UTF-8 Errors
628 21
    $str = self::fix_simple_utf8($str);
629
630
    // remove all none UTF-8 symbols
631
    // && remove diamond question mark (�)
632
    // && remove remove invisible characters (e.g. "\0")
633
    // && remove BOM
634
    // && normalize whitespace chars (but keep non-breaking-spaces)
635 21
    $str = self::clean($str, true, true, false, true);
636
637 21
    return (string)$str;
638
  }
639
640
  /**
641
   * Accepts a string or a array of strings and returns an array of Unicode code points.
642
   *
643
   * INFO: opposite to UTF8::string()
644
   *
645
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
646
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
647
   *                                    default, code points will be returned as integers.</p>
648
   *
649
   * @return array <p>The array of code points.</p>
650
   */
651 7
  public static function codepoints($arg, $u_style = false)
652
  {
653 7
    if (is_string($arg) === true) {
654 7
      $arg = self::split($arg);
655 7
    }
656
657 7
    $arg = array_map(
658
        array(
659 7
            '\\voku\\helper\\UTF8',
660 7
            'ord',
661 7
        ),
662
        $arg
663 7
    );
664
665 7
    if ($u_style) {
666 1
      $arg = array_map(
667
          array(
668 1
              '\\voku\\helper\\UTF8',
669 1
              'int_to_hex',
670 1
          ),
671
          $arg
672 1
      );
673 1
    }
674
675 7
    return $arg;
676
  }
677
678
  /**
679
   * Returns count of characters used in a string.
680
   *
681
   * @param string $str       <p>The input string.</p>
682
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
683
   *
684
   * @return array <p>An associative array of Character as keys and
685
   *               their count as values.</p>
686
   */
687 7
  public static function count_chars($str, $cleanUtf8 = false)
688
  {
689 7
    return array_count_values(self::split($str, 1, $cleanUtf8));
690
  }
691
692
  /**
693
   * Converts a int-value into an UTF-8 character.
694
   *
695
   * @param mixed $int
696
   *
697
   * @return string
698
   */
699 5
  public static function decimal_to_chr($int)
700
  {
701 5
    if (Bootup::is_php('5.4') === true) {
702
      $flags = ENT_QUOTES | ENT_HTML5;
703
    } else {
704 5
      $flags = ENT_QUOTES;
705
    }
706
707 5
    return self::html_entity_decode('&#' . $int . ';', $flags);
708
  }
709
710
  /**
711
   * Encode a string with a new charset-encoding.
712
   *
713
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
714
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
715
   *
716
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
717
   * @param string $str      <p>The input string</p>
718
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
719
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
720
   *
721
   * @return string
722
   */
723 11
  public static function encode($encoding, $str, $force = true)
724
  {
725 11
    $str = (string)$str;
726 11
    $encoding = (string)$encoding;
727
728 11
    if (!isset($str[0], $encoding[0])) {
729 5
      return $str;
730
    }
731
732 11
    if ($encoding !== 'UTF-8') {
733 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
734 2
    }
735
736 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
737
      self::checkForSupport();
738
    }
739
740 11
    $encodingDetected = self::str_detect_encoding($str);
741
742
    if (
743
        $encodingDetected !== false
744 11
        &&
745
        (
746
            $force === true
747 11
            ||
748
            $encodingDetected !== $encoding
749 3
        )
750 11
    ) {
751
752 View Code Duplication
      if (
753
          $encoding === 'UTF-8'
754 11
          &&
755
          (
756
              $force === true
757 11
              || $encodingDetected === 'UTF-8'
758 2
              || $encodingDetected === 'WINDOWS-1252'
759 2
              || $encodingDetected === 'ISO-8859-1'
760 2
          )
761 11
      ) {
762 11
        return self::to_utf8($str);
763
      }
764
765 View Code Duplication
      if (
766
          $encoding === 'ISO-8859-1'
767 3
          &&
768
          (
769
              $force === true
770 2
              || $encodingDetected === 'ISO-8859-1'
771 1
              || $encodingDetected === 'WINDOWS-1252'
772 1
              || $encodingDetected === 'UTF-8'
773 1
          )
774 3
      ) {
775 2
        return self::to_iso8859($str);
776
      }
777
778 View Code Duplication
      if (
779
          $encoding !== 'UTF-8'
780 2
          &&
781
          $encoding !== 'WINDOWS-1252'
782 2
          &&
783 1
          self::$SUPPORT['mbstring'] === false
784 2
      ) {
785
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
786
      }
787
788 2
      $strEncoded = \mb_convert_encoding(
789 2
          $str,
790 2
          $encoding,
791
          $encodingDetected
792 2
      );
793
794 2
      if ($strEncoded) {
795 2
        return $strEncoded;
796
      }
797
    }
798
799 1
    return $str;
800
  }
801
802
  /**
803
   * Reads entire file into a string.
804
   *
805
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
806
   *
807
   * @link http://php.net/manual/en/function.file-get-contents.php
808
   *
809
   * @param string        $filename      <p>
810
   *                                     Name of the file to read.
811
   *                                     </p>
812
   * @param int|false     $flags         [optional] <p>
813
   *                                     Prior to PHP 6, this parameter is called
814
   *                                     use_include_path and is a bool.
815
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
816
   *                                     to trigger include path
817
   *                                     search.
818
   *                                     </p>
819
   *                                     <p>
820
   *                                     The value of flags can be any combination of
821
   *                                     the following flags (with some restrictions), joined with the
822
   *                                     binary OR (|)
823
   *                                     operator.
824
   *                                     </p>
825
   *                                     <p>
826
   *                                     <table>
827
   *                                     Available flags
828
   *                                     <tr valign="top">
829
   *                                     <td>Flag</td>
830
   *                                     <td>Description</td>
831
   *                                     </tr>
832
   *                                     <tr valign="top">
833
   *                                     <td>
834
   *                                     FILE_USE_INCLUDE_PATH
835
   *                                     </td>
836
   *                                     <td>
837
   *                                     Search for filename in the include directory.
838
   *                                     See include_path for more
839
   *                                     information.
840
   *                                     </td>
841
   *                                     </tr>
842
   *                                     <tr valign="top">
843
   *                                     <td>
844
   *                                     FILE_TEXT
845
   *                                     </td>
846
   *                                     <td>
847
   *                                     As of PHP 6, the default encoding of the read
848
   *                                     data is UTF-8. You can specify a different encoding by creating a
849
   *                                     custom context or by changing the default using
850
   *                                     stream_default_encoding. This flag cannot be
851
   *                                     used with FILE_BINARY.
852
   *                                     </td>
853
   *                                     </tr>
854
   *                                     <tr valign="top">
855
   *                                     <td>
856
   *                                     FILE_BINARY
857
   *                                     </td>
858
   *                                     <td>
859
   *                                     With this flag, the file is read in binary mode. This is the default
860
   *                                     setting and cannot be used with FILE_TEXT.
861
   *                                     </td>
862
   *                                     </tr>
863
   *                                     </table>
864
   *                                     </p>
865
   * @param resource|null $context       [optional] <p>
866
   *                                     A valid context resource created with
867
   *                                     stream_context_create. If you don't need to use a
868
   *                                     custom context, you can skip this parameter by &null;.
869
   *                                     </p>
870
   * @param int|null      $offset        [optional] <p>
871
   *                                     The offset where the reading starts.
872
   *                                     </p>
873
   * @param int|null      $maxLength     [optional] <p>
874
   *                                     Maximum length of data read. The default is to read until end
875
   *                                     of file is reached.
876
   *                                     </p>
877
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
878
   *
879
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
880
   *                                     or pdf, because they used non default utf-8 chars</p>
881
   *
882
   * @return string <p>The function returns the read data or false on failure.</p>
883
   */
884 3
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxLength = null, $timeout = 10, $convertToUtf8 = true)
885
  {
886
    // init
887 3
    $timeout = (int)$timeout;
888 3
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
889
890 3
    if ($timeout && $context === null) {
891 2
      $context = stream_context_create(
892
          array(
893
              'http' =>
894
                  array(
895 2
                      'timeout' => $timeout,
896 2
                  ),
897
          )
898 2
      );
899 2
    }
900
901 3
    if (!$flags) {
902 3
      $flags = false;
903 3
    }
904
905 3
    if ($offset === null) {
906 3
      $offset = 0;
907 3
    }
908
909 3
    if (is_int($maxLength) === true) {
910 1
      $data = file_get_contents($filename, $flags, $context, $offset, $maxLength);
911 1
    } else {
912 3
      $data = file_get_contents($filename, $flags, $context, $offset);
913
    }
914
915
    // return false on error
916 3
    if ($data === false) {
917 1
      return false;
918
    }
919
920 2
    if ($convertToUtf8 === true) {
921 2
      $data = self::encode('UTF-8', $data, false);
922 2
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
923 2
    }
924
925 2
    return $data;
926
  }
927
928
  /**
929
   * Checks if a file starts with BOM (Byte Order Mark) character.
930
   *
931
   * @param string $file_path <p>Path to a valid file.</p>
932
   *
933
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
934
   */
935 1
  public static function file_has_bom($file_path)
936
  {
937 1
    return self::string_has_bom(file_get_contents($file_path));
938
  }
939
940
  /**
941
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
942
   *
943
   * @param mixed  $var
944
   * @param int    $normalization_form
945
   * @param string $leading_combining
946
   *
947
   * @return mixed
948
   */
949 9
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
950
  {
951 9
    switch (gettype($var)) {
952 9 View Code Duplication
      case 'array':
953 3
        foreach ($var as $k => $v) {
954
          /** @noinspection AlterInForeachInspection */
955 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
956 3
        }
957 3
        break;
958 9 View Code Duplication
      case 'object':
959 2
        foreach ($var as $k => $v) {
960 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
961 2
        }
962 2
        break;
963 9
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
964
965 8
        if (false !== strpos($var, "\r")) {
966
          // Workaround https://bugs.php.net/65732
967 2
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
968 2
        }
969
970 8
        if (self::is_ascii($var) === false) {
971
          /** @noinspection PhpUndefinedClassInspection */
972 8
          if (\Normalizer::isNormalized($var, $normalization_form)) {
973 6
            $n = '-';
974 6
          } else {
975
            /** @noinspection PhpUndefinedClassInspection */
976 6
            $n = \Normalizer::normalize($var, $normalization_form);
977
978 6
            if (isset($n[0])) {
979 3
              $var = $n;
980 3
            } else {
981 5
              $var = self::encode('UTF-8', $var, true);
982
            }
983
          }
984
985
          if (
986 8
              $var[0] >= "\x80"
987 8
              &&
988 6
              isset($n[0], $leading_combining[0])
989 8
              &&
990 5
              preg_match('/^\p{Mn}/u', $var)
991 8
          ) {
992
            // Prevent leading combining chars
993
            // for NFC-safe concatenations.
994 2
            $var = $leading_combining . $var;
995 2
          }
996 8
        }
997
998 8
        break;
999 9
    }
1000
1001 9
    return $var;
1002
  }
1003
1004
  /**
1005
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1006
   *
1007
   * Gets a specific external variable by name and optionally filters it
1008
   *
1009
   * @link  http://php.net/manual/en/function.filter-input.php
1010
   *
1011
   * @param int    $type          <p>
1012
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1013
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1014
   *                              <b>INPUT_ENV</b>.
1015
   *                              </p>
1016
   * @param string $variable_name <p>
1017
   *                              Name of a variable to get.
1018
   *                              </p>
1019
   * @param int    $filter        [optional] <p>
1020
   *                              The ID of the filter to apply. The
1021
   *                              manual page lists the available filters.
1022
   *                              </p>
1023
   * @param mixed  $options       [optional] <p>
1024
   *                              Associative array of options or bitwise disjunction of flags. If filter
1025
   *                              accepts options, flags can be provided in "flags" field of array.
1026
   *                              </p>
1027
   *
1028
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1029
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1030
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1031
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1032
   * @since 5.2.0
1033
   */
1034 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1035
  {
1036
    if (4 > func_num_args()) {
1037
      $var = filter_input($type, $variable_name, $filter);
1038
    } else {
1039
      $var = filter_input($type, $variable_name, $filter, $options);
1040
    }
1041
1042
    return self::filter($var);
1043
  }
1044
1045
  /**
1046
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1047
   *
1048
   * Gets external variables and optionally filters them
1049
   *
1050
   * @link  http://php.net/manual/en/function.filter-input-array.php
1051
   *
1052
   * @param int   $type       <p>
1053
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1054
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1055
   *                          <b>INPUT_ENV</b>.
1056
   *                          </p>
1057
   * @param mixed $definition [optional] <p>
1058
   *                          An array defining the arguments. A valid key is a string
1059
   *                          containing a variable name and a valid value is either a filter type, or an array
1060
   *                          optionally specifying the filter, flags and options. If the value is an
1061
   *                          array, valid keys are filter which specifies the
1062
   *                          filter type,
1063
   *                          flags which specifies any flags that apply to the
1064
   *                          filter, and options which specifies any options that
1065
   *                          apply to the filter. See the example below for a better understanding.
1066
   *                          </p>
1067
   *                          <p>
1068
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1069
   *                          input array are filtered by this filter.
1070
   *                          </p>
1071
   * @param bool  $add_empty  [optional] <p>
1072
   *                          Add missing keys as <b>NULL</b> to the return value.
1073
   *                          </p>
1074
   *
1075
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1076
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1077
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1078
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1079
   * fails.
1080
   * @since 5.2.0
1081
   */
1082 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1083
  {
1084
    if (2 > func_num_args()) {
1085
      $a = filter_input_array($type);
1086
    } else {
1087
      $a = filter_input_array($type, $definition, $add_empty);
1088
    }
1089
1090
    return self::filter($a);
1091
  }
1092
1093
  /**
1094
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1095
   *
1096
   * Filters a variable with a specified filter
1097
   *
1098
   * @link  http://php.net/manual/en/function.filter-var.php
1099
   *
1100
   * @param mixed $variable <p>
1101
   *                        Value to filter.
1102
   *                        </p>
1103
   * @param int   $filter   [optional] <p>
1104
   *                        The ID of the filter to apply. The
1105
   *                        manual page lists the available filters.
1106
   *                        </p>
1107
   * @param mixed $options  [optional] <p>
1108
   *                        Associative array of options or bitwise disjunction of flags. If filter
1109
   *                        accepts options, flags can be provided in "flags" field of array. For
1110
   *                        the "callback" filter, callable type should be passed. The
1111
   *                        callback must accept one argument, the value to be filtered, and return
1112
   *                        the value after filtering/sanitizing it.
1113
   *                        </p>
1114
   *                        <p>
1115
   *                        <code>
1116
   *                        // for filters that accept options, use this format
1117
   *                        $options = array(
1118
   *                        'options' => array(
1119
   *                        'default' => 3, // value to return if the filter fails
1120
   *                        // other options here
1121
   *                        'min_range' => 0
1122
   *                        ),
1123
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1124
   *                        );
1125
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1126
   *                        // for filter that only accept flags, you can pass them directly
1127
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1128
   *                        // for filter that only accept flags, you can also pass as an array
1129
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1130
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1131
   *                        // callback validate filter
1132
   *                        function foo($value)
1133
   *                        {
1134
   *                        // Expected format: Surname, GivenNames
1135
   *                        if (strpos($value, ", ") === false) return false;
1136
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1137
   *                        $empty = (empty($surname) || empty($givennames));
1138
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1139
   *                        if ($empty || $notstrings) {
1140
   *                        return false;
1141
   *                        } else {
1142
   *                        return $value;
1143
   *                        }
1144
   *                        }
1145
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1146
   *                        </code>
1147
   *                        </p>
1148
   *
1149
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1150
   * @since 5.2.0
1151
   */
1152 1 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1153
  {
1154 1
    if (3 > func_num_args()) {
1155 1
      $variable = filter_var($variable, $filter);
1156 1
    } else {
1157 1
      $variable = filter_var($variable, $filter, $options);
1158
    }
1159
1160 1
    return self::filter($variable);
1161
  }
1162
1163
  /**
1164
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1165
   *
1166
   * Gets multiple variables and optionally filters them
1167
   *
1168
   * @link  http://php.net/manual/en/function.filter-var-array.php
1169
   *
1170
   * @param array $data       <p>
1171
   *                          An array with string keys containing the data to filter.
1172
   *                          </p>
1173
   * @param mixed $definition [optional] <p>
1174
   *                          An array defining the arguments. A valid key is a string
1175
   *                          containing a variable name and a valid value is either a
1176
   *                          filter type, or an
1177
   *                          array optionally specifying the filter, flags and options.
1178
   *                          If the value is an array, valid keys are filter
1179
   *                          which specifies the filter type,
1180
   *                          flags which specifies any flags that apply to the
1181
   *                          filter, and options which specifies any options that
1182
   *                          apply to the filter. See the example below for a better understanding.
1183
   *                          </p>
1184
   *                          <p>
1185
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1186
   *                          input array are filtered by this filter.
1187
   *                          </p>
1188
   * @param bool  $add_empty  [optional] <p>
1189
   *                          Add missing keys as <b>NULL</b> to the return value.
1190
   *                          </p>
1191
   *
1192
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1193
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1194
   * the variable is not set.
1195
   * @since 5.2.0
1196
   */
1197 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1198
  {
1199 1
    if (2 > func_num_args()) {
1200 1
      $a = filter_var_array($data);
1201 1
    } else {
1202 1
      $a = filter_var_array($data, $definition, $add_empty);
1203
    }
1204
1205 1
    return self::filter($a);
1206
  }
1207
1208
  /**
1209
   * Check if the number of unicode characters are not more than the specified integer.
1210
   *
1211
   * @param string $str      The original string to be checked.
1212
   * @param int    $box_size The size in number of chars to be checked against string.
1213
   *
1214
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1215
   */
1216 1
  public static function fits_inside($str, $box_size)
1217
  {
1218 1
    return (self::strlen($str) <= $box_size);
1219
  }
1220
1221
  /**
1222
   * Try to fix simple broken UTF-8 strings.
1223
   *
1224
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1225
   *
1226
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1227
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1228
   * See: http://en.wikipedia.org/wiki/Windows-1252
1229
   *
1230
   * @param string $str <p>The input string</p>
1231
   *
1232
   * @return string
1233
   */
1234 26 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1235
  {
1236
    // init
1237 26
    $str = (string)$str;
1238
1239 26
    if (!isset($str[0])) {
1240 2
      return '';
1241
    }
1242
1243 26
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1244 26
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1245
1246 26
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1247
1248 1
      if (self::$BROKEN_UTF8_FIX === null) {
1249 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1250 1
      }
1251
1252 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1253 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1254 1
    }
1255
1256 26
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1257
  }
1258
1259
  /**
1260
   * Fix a double (or multiple) encoded UTF8 string.
1261
   *
1262
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1263
   *
1264
   * @return string|string[] <p>Will return the fixed input-"array" or
1265
   *                         the fixed input-"string".</p>
1266
   */
1267 1
  public static function fix_utf8($str)
1268
  {
1269 1
    if (is_array($str) === true) {
1270
1271
      /** @noinspection ForeachSourceInspection */
1272 1
      foreach ($str as $k => $v) {
1273
        /** @noinspection AlterInForeachInspection */
1274
        /** @noinspection OffsetOperationsInspection */
1275 1
        $str[$k] = self::fix_utf8($v);
1276 1
      }
1277
1278 1
      return $str;
1279
    }
1280
1281 1
    $last = '';
1282 1
    while ($last !== $str) {
1283 1
      $last = $str;
1284 1
      $str = self::to_utf8(
1285 1
          self::utf8_decode($str, true)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str, true)) on line 1284 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1286 1
      );
1287 1
    }
1288
1289 1
    return $str;
1290
  }
1291
1292
  /**
1293
   * Get character of a specific character.
1294
   *
1295
   * @param string $char
1296
   *
1297
   * @return string <p>'RTL' or 'LTR'</p>
1298
   */
1299 1
  public static function getCharDirection($char)
1300
  {
1301 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1302
      self::checkForSupport();
1303
    }
1304
1305 1
    if (self::$SUPPORT['intlChar'] === true) {
1306
      $tmpReturn = \IntlChar::charDirection($char);
1307
1308
      // from "IntlChar"-Class
1309
      $charDirection = array(
1310
          'RTL' => array(1, 13, 14, 15, 21),
1311
          'LTR' => array(0, 11, 12, 20),
1312
      );
1313
1314
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1315
        return 'LTR';
1316
      }
1317
1318
      if (in_array($tmpReturn, $charDirection['RTL'], true)) {
1319
        return 'RTL';
1320
      }
1321
    }
1322
1323 1
    $c = static::chr_to_decimal($char);
1324
1325 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1326 1
      return 'LTR';
1327
    }
1328
1329 1
    if (0x85e >= $c) {
1330
1331 1
      if (0x5be === $c ||
1332 1
          0x5c0 === $c ||
1333 1
          0x5c3 === $c ||
1334 1
          0x5c6 === $c ||
1335 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1336 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1337 1
          0x608 === $c ||
1338 1
          0x60b === $c ||
1339 1
          0x60d === $c ||
1340 1
          0x61b === $c ||
1341 1
          (0x61e <= $c && 0x64a >= $c) ||
1342 1
          (0x66d <= $c && 0x66f >= $c) ||
1343 1
          (0x671 <= $c && 0x6d5 >= $c) ||
1344 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1345 1
          (0x6ee <= $c && 0x6ef >= $c) ||
1346 1
          (0x6fa <= $c && 0x70d >= $c) ||
1347 1
          0x710 === $c ||
1348 1
          (0x712 <= $c && 0x72f >= $c) ||
1349 1
          (0x74d <= $c && 0x7a5 >= $c) ||
1350 1
          0x7b1 === $c ||
1351 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
1352 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1353 1
          0x7fa === $c ||
1354 1
          (0x800 <= $c && 0x815 >= $c) ||
1355 1
          0x81a === $c ||
1356 1
          0x824 === $c ||
1357 1
          0x828 === $c ||
1358 1
          (0x830 <= $c && 0x83e >= $c) ||
1359 1
          (0x840 <= $c && 0x858 >= $c) ||
1360
          0x85e === $c
1361 1
      ) {
1362 1
        return 'RTL';
1363
      }
1364
1365 1
    } elseif (0x200f === $c) {
1366
1367
      return 'RTL';
1368
1369 1
    } elseif (0xfb1d <= $c) {
1370
1371 1
      if (0xfb1d === $c ||
1372 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1373 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1374 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1375 1
          0xfb3e === $c ||
1376 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1377 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1378 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1379 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1380 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1381 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1382 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1383 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1384 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1385 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1386 1
          0x10808 === $c ||
1387 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1388 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1389 1
          0x1083c === $c ||
1390 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1391 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1392 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1393 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1394 1
          0x1093f === $c ||
1395 1
          0x10a00 === $c ||
1396 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1397 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1398 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1399 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1400 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1401 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1402 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1403 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1404 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1405
          (0x10b78 <= $c && 0x10b7f >= $c)
1406 1
      ) {
1407 1
        return 'RTL';
1408
      }
1409
    }
1410
1411 1
    return 'LTR';
1412
  }
1413
1414
  /**
1415
   * get data from "/data/*.ser"
1416
   *
1417
   * @param string $file
1418
   *
1419
   * @return bool|string|array|int <p>Will return false on error.</p>
1420
   */
1421 6
  private static function getData($file)
1422
  {
1423 6
    $file = __DIR__ . '/data/' . $file . '.php';
1424 6
    if (file_exists($file)) {
1425
      /** @noinspection PhpIncludeInspection */
1426 6
      return require $file;
1427
    }
1428
1429 1
    return false;
1430
  }
1431
1432
  /**
1433
   * Check for php-support.
1434
   *
1435
   * @param string|null $key
1436
   *
1437
   * @return mixed <p>Return the full support-"array", if $key === null<br>
1438
   *               return bool-value, if $key is used and available<br>
1439
   *               otherwise return null</p>
1440
   */
1441 19
  public static function getSupportInfo($key = null)
1442
  {
1443 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1444
      self::checkForSupport();
1445
    }
1446
1447 19
    if ($key === null) {
1448 2
      return self::$SUPPORT;
1449
    }
1450
1451 18
    if (!isset(self::$SUPPORT[$key])) {
1452 1
      return null;
1453
    }
1454
1455 17
    return self::$SUPPORT[$key];
1456
  }
1457
1458
  /**
1459
   * alias for "UTF8::string_has_bom()"
1460
   *
1461
   * @see        UTF8::string_has_bom()
1462
   *
1463
   * @param string $str
1464
   *
1465
   * @return bool
1466
   *
1467
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1468
   */
1469
  public static function hasBom($str)
1470
  {
1471
    return self::string_has_bom($str);
1472
  }
1473
1474
  /**
1475
   * Converts a hexadecimal-value into an UTF-8 character.
1476
   *
1477
   * @param string $hexdec <p>The hexadecimal value.</p>
1478
   *
1479
   * @return string|false <p>One single UTF-8 character.</p>
1480
   */
1481 2
  public static function hex_to_chr($hexdec)
1482
  {
1483 2
    return self::decimal_to_chr(hexdec($hexdec));
1484
  }
1485
1486
  /**
1487
   * Converts hexadecimal U+xxxx code point representation to integer.
1488
   *
1489
   * INFO: opposite to UTF8::int_to_hex()
1490
   *
1491
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1492
   *
1493
   * @return int|false <p>The code point, or false on failure.</p>
1494
   */
1495 1
  public static function hex_to_int($hexDec)
1496
  {
1497 1
    $hexDec = (string)$hexDec;
1498
1499 1
    if (!isset($hexDec[0])) {
1500 1
      return false;
1501
    }
1502
1503 1
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1504 1
      return intval($match[1], 16);
1505
    }
1506
1507 1
    return false;
1508
  }
1509
1510
  /**
1511
   * alias for "UTF8::html_entity_decode()"
1512
   *
1513
   * @see UTF8::html_entity_decode()
1514
   *
1515
   * @param string $str
1516
   * @param int    $flags
1517
   * @param string $encoding
1518
   *
1519
   * @return string
1520
   */
1521 1
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
1522
  {
1523 1
    return self::html_entity_decode($str, $flags, $encoding);
1524
  }
1525
1526
  /**
1527
   * Converts a UTF-8 string to a series of HTML numbered entities.
1528
   *
1529
   * INFO: opposite to UTF8::html_decode()
1530
   *
1531
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1532
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1533
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
1534
   *
1535
   * @return string <p>HTML numbered entities.</p>
1536
   */
1537 2
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
1538
  {
1539
    // init
1540 2
    $str = (string)$str;
1541
1542 2
    if (!isset($str[0])) {
1543 1
      return '';
1544
    }
1545
1546 2
    if ($encoding !== 'UTF-8') {
1547 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1548 1
    }
1549
1550
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1551 2
    if (function_exists('mb_encode_numericentity')) {
1552
1553 2
      $startCode = 0x00;
1554 2
      if ($keepAsciiChars === true) {
1555 1
        $startCode = 0x80;
1556 1
      }
1557
1558 2
      return mb_encode_numericentity(
1559 2
          $str,
1560 2
          array($startCode, 0xfffff, 0, 0xfffff, 0),
1561
          $encoding
1562 2
      );
1563
    }
1564
1565
    return implode(
1566
        '',
1567
        array_map(
1568
            function ($data) use ($keepAsciiChars, $encoding) {
1569
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1570
            },
1571
            self::split($str)
1572
        )
1573
    );
1574
  }
1575
1576
  /**
1577
   * UTF-8 version of html_entity_decode()
1578
   *
1579
   * The reason we are not using html_entity_decode() by itself is because
1580
   * while it is not technically correct to leave out the semicolon
1581
   * at the end of an entity most browsers will still interpret the entity
1582
   * correctly. html_entity_decode() does not convert entities without
1583
   * semicolons, so we are left with our own little solution here. Bummer.
1584
   *
1585
   * Convert all HTML entities to their applicable characters
1586
   *
1587
   * INFO: opposite to UTF8::html_encode()
1588
   *
1589
   * @link http://php.net/manual/en/function.html-entity-decode.php
1590
   *
1591
   * @param string $str      <p>
1592
   *                         The input string.
1593
   *                         </p>
1594
   * @param int    $flags    [optional] <p>
1595
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1596
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1597
   *                         <table>
1598
   *                         Available <i>flags</i> constants
1599
   *                         <tr valign="top">
1600
   *                         <td>Constant Name</td>
1601
   *                         <td>Description</td>
1602
   *                         </tr>
1603
   *                         <tr valign="top">
1604
   *                         <td><b>ENT_COMPAT</b></td>
1605
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1606
   *                         </tr>
1607
   *                         <tr valign="top">
1608
   *                         <td><b>ENT_QUOTES</b></td>
1609
   *                         <td>Will convert both double and single quotes.</td>
1610
   *                         </tr>
1611
   *                         <tr valign="top">
1612
   *                         <td><b>ENT_NOQUOTES</b></td>
1613
   *                         <td>Will leave both double and single quotes unconverted.</td>
1614
   *                         </tr>
1615
   *                         <tr valign="top">
1616
   *                         <td><b>ENT_HTML401</b></td>
1617
   *                         <td>
1618
   *                         Handle code as HTML 4.01.
1619
   *                         </td>
1620
   *                         </tr>
1621
   *                         <tr valign="top">
1622
   *                         <td><b>ENT_XML1</b></td>
1623
   *                         <td>
1624
   *                         Handle code as XML 1.
1625
   *                         </td>
1626
   *                         </tr>
1627
   *                         <tr valign="top">
1628
   *                         <td><b>ENT_XHTML</b></td>
1629
   *                         <td>
1630
   *                         Handle code as XHTML.
1631
   *                         </td>
1632
   *                         </tr>
1633
   *                         <tr valign="top">
1634
   *                         <td><b>ENT_HTML5</b></td>
1635
   *                         <td>
1636
   *                         Handle code as HTML 5.
1637
   *                         </td>
1638
   *                         </tr>
1639
   *                         </table>
1640
   *                         </p>
1641
   * @param string $encoding [optional] <p>Encoding to use.</p>
1642
   *
1643
   * @return string <p>The decoded string.</p>
1644
   */
1645 16
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
1646
  {
1647
    // init
1648 16
    $str = (string)$str;
1649
1650 16
    if (!isset($str[0])) {
1651 5
      return '';
1652
    }
1653
1654 16
    if (!isset($str[3])) { // examples: &; || &x;
1655 9
      return $str;
1656
    }
1657
1658
    if (
1659 15
        strpos($str, '&') === false
1660 15
        ||
1661
        (
1662 15
            strpos($str, '&#') === false
1663 15
            &&
1664 9
            strpos($str, ';') === false
1665 9
        )
1666 15
    ) {
1667 8
      return $str;
1668
    }
1669
1670 15
    if ($encoding !== 'UTF-8') {
1671 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1672 2
    }
1673
1674 15
    if ($flags === null) {
1675 5
      if (Bootup::is_php('5.4') === true) {
1676
        $flags = ENT_QUOTES | ENT_HTML5;
1677
      } else {
1678 5
        $flags = ENT_QUOTES;
1679
      }
1680 5
    }
1681
1682 View Code Duplication
    if (
1683
        $encoding !== 'UTF-8'
1684 15
        &&
1685
        $encoding !== 'WINDOWS-1252'
1686 15
        &&
1687 2
        self::$SUPPORT['mbstring'] === false
1688 15
    ) {
1689
      trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1690
    }
1691
1692
    do {
1693 15
      $str_compare = $str;
1694
1695 15
      $str = preg_replace_callback(
1696 15
          "/&#\d{2,6};/",
1697
          function ($matches) use ($encoding) {
1698 13
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
1699
1700 13
            if ($returnTmp !== '"' && $returnTmp !== "'") {
1701 13
              return $returnTmp;
1702
            }
1703
1704 6
            return $matches[0];
1705 15
          },
1706
          $str
1707 15
      );
1708
1709
      // decode numeric & UTF16 two byte entities
1710 15
      $str = html_entity_decode(
1711 15
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
1712 15
          $flags,
1713
          $encoding
1714 15
      );
1715
1716 15
    } while ($str_compare !== $str);
1717
1718 15
    return $str;
1719
  }
1720
1721
  /**
1722
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
1723
   *
1724
   * @link http://php.net/manual/en/function.htmlentities.php
1725
   *
1726
   * @param string $str           <p>
1727
   *                              The input string.
1728
   *                              </p>
1729
   * @param int    $flags         [optional] <p>
1730
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1731
   *                              invalid code unit sequences and the used document type. The default is
1732
   *                              ENT_COMPAT | ENT_HTML401.
1733
   *                              <table>
1734
   *                              Available <i>flags</i> constants
1735
   *                              <tr valign="top">
1736
   *                              <td>Constant Name</td>
1737
   *                              <td>Description</td>
1738
   *                              </tr>
1739
   *                              <tr valign="top">
1740
   *                              <td><b>ENT_COMPAT</b></td>
1741
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1742
   *                              </tr>
1743
   *                              <tr valign="top">
1744
   *                              <td><b>ENT_QUOTES</b></td>
1745
   *                              <td>Will convert both double and single quotes.</td>
1746
   *                              </tr>
1747
   *                              <tr valign="top">
1748
   *                              <td><b>ENT_NOQUOTES</b></td>
1749
   *                              <td>Will leave both double and single quotes unconverted.</td>
1750
   *                              </tr>
1751
   *                              <tr valign="top">
1752
   *                              <td><b>ENT_IGNORE</b></td>
1753
   *                              <td>
1754
   *                              Silently discard invalid code unit sequences instead of returning
1755
   *                              an empty string. Using this flag is discouraged as it
1756
   *                              may have security implications.
1757
   *                              </td>
1758
   *                              </tr>
1759
   *                              <tr valign="top">
1760
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1761
   *                              <td>
1762
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1763
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1764
   *                              </td>
1765
   *                              </tr>
1766
   *                              <tr valign="top">
1767
   *                              <td><b>ENT_DISALLOWED</b></td>
1768
   *                              <td>
1769
   *                              Replace invalid code points for the given document type with a
1770
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1771
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1772
   *                              instance, to ensure the well-formedness of XML documents with
1773
   *                              embedded external content.
1774
   *                              </td>
1775
   *                              </tr>
1776
   *                              <tr valign="top">
1777
   *                              <td><b>ENT_HTML401</b></td>
1778
   *                              <td>
1779
   *                              Handle code as HTML 4.01.
1780
   *                              </td>
1781
   *                              </tr>
1782
   *                              <tr valign="top">
1783
   *                              <td><b>ENT_XML1</b></td>
1784
   *                              <td>
1785
   *                              Handle code as XML 1.
1786
   *                              </td>
1787
   *                              </tr>
1788
   *                              <tr valign="top">
1789
   *                              <td><b>ENT_XHTML</b></td>
1790
   *                              <td>
1791
   *                              Handle code as XHTML.
1792
   *                              </td>
1793
   *                              </tr>
1794
   *                              <tr valign="top">
1795
   *                              <td><b>ENT_HTML5</b></td>
1796
   *                              <td>
1797
   *                              Handle code as HTML 5.
1798
   *                              </td>
1799
   *                              </tr>
1800
   *                              </table>
1801
   *                              </p>
1802
   * @param string $encoding      [optional] <p>
1803
   *                              Like <b>htmlspecialchars</b>,
1804
   *                              <b>htmlentities</b> takes an optional third argument
1805
   *                              <i>encoding</i> which defines encoding used in
1806
   *                              conversion.
1807
   *                              Although this argument is technically optional, you are highly
1808
   *                              encouraged to specify the correct value for your code.
1809
   *                              </p>
1810
   * @param bool   $double_encode [optional] <p>
1811
   *                              When <i>double_encode</i> is turned off PHP will not
1812
   *                              encode existing html entities. The default is to convert everything.
1813
   *                              </p>
1814
   *
1815
   *
1816
   * @return string the encoded string.
1817
   * </p>
1818
   * <p>
1819
   * If the input <i>string</i> contains an invalid code unit
1820
   * sequence within the given <i>encoding</i> an empty string
1821
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1822
   * <b>ENT_SUBSTITUTE</b> flags are set.
1823
   */
1824 2
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
1825
  {
1826 2
    if ($encoding !== 'UTF-8') {
1827 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1828 1
    }
1829
1830 2
    $str = htmlentities($str, $flags, $encoding, $double_encode);
1831
1832
    /**
1833
     * PHP doesn't replace a backslash to its html entity since this is something
1834
     * that's mostly used to escape characters when inserting in a database. Since
1835
     * we're using a decent database layer, we don't need this shit and we're replacing
1836
     * the double backslashes by its' html entity equivalent.
1837
     *
1838
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
1839
     */
1840 2
    $str = str_replace('\\', '&#92;', $str);
1841
1842 2
    if ($encoding !== 'UTF-8') {
1843 1
      return $str;
1844
    }
1845
1846 2
    $byteLengths = self::chr_size_list($str);
1847 2
    $search = array();
1848 2
    $replacements = array();
1849 2
    foreach ($byteLengths as $counter => $byteLength) {
1850 2
      if ($byteLength >= 3) {
1851 1
        $char = self::access($str, $counter);
1852
1853 1
        if (!isset($replacements[$char])) {
1854 1
          $search[$char] = $char;
1855 1
          $replacements[$char] = self::html_encode($char);
1856 1
        }
1857 1
      }
1858 2
    }
1859
1860 2
    return str_replace($search, $replacements, $str);
1861
  }
1862
1863
  /**
1864
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
1865
   *
1866
   * INFO: Take a look at "UTF8::htmlentities()"
1867
   *
1868
   * @link http://php.net/manual/en/function.htmlspecialchars.php
1869
   *
1870
   * @param string $str           <p>
1871
   *                              The string being converted.
1872
   *                              </p>
1873
   * @param int    $flags         [optional] <p>
1874
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1875
   *                              invalid code unit sequences and the used document type. The default is
1876
   *                              ENT_COMPAT | ENT_HTML401.
1877
   *                              <table>
1878
   *                              Available <i>flags</i> constants
1879
   *                              <tr valign="top">
1880
   *                              <td>Constant Name</td>
1881
   *                              <td>Description</td>
1882
   *                              </tr>
1883
   *                              <tr valign="top">
1884
   *                              <td><b>ENT_COMPAT</b></td>
1885
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1886
   *                              </tr>
1887
   *                              <tr valign="top">
1888
   *                              <td><b>ENT_QUOTES</b></td>
1889
   *                              <td>Will convert both double and single quotes.</td>
1890
   *                              </tr>
1891
   *                              <tr valign="top">
1892
   *                              <td><b>ENT_NOQUOTES</b></td>
1893
   *                              <td>Will leave both double and single quotes unconverted.</td>
1894
   *                              </tr>
1895
   *                              <tr valign="top">
1896
   *                              <td><b>ENT_IGNORE</b></td>
1897
   *                              <td>
1898
   *                              Silently discard invalid code unit sequences instead of returning
1899
   *                              an empty string. Using this flag is discouraged as it
1900
   *                              may have security implications.
1901
   *                              </td>
1902
   *                              </tr>
1903
   *                              <tr valign="top">
1904
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1905
   *                              <td>
1906
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1907
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1908
   *                              </td>
1909
   *                              </tr>
1910
   *                              <tr valign="top">
1911
   *                              <td><b>ENT_DISALLOWED</b></td>
1912
   *                              <td>
1913
   *                              Replace invalid code points for the given document type with a
1914
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1915
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1916
   *                              instance, to ensure the well-formedness of XML documents with
1917
   *                              embedded external content.
1918
   *                              </td>
1919
   *                              </tr>
1920
   *                              <tr valign="top">
1921
   *                              <td><b>ENT_HTML401</b></td>
1922
   *                              <td>
1923
   *                              Handle code as HTML 4.01.
1924
   *                              </td>
1925
   *                              </tr>
1926
   *                              <tr valign="top">
1927
   *                              <td><b>ENT_XML1</b></td>
1928
   *                              <td>
1929
   *                              Handle code as XML 1.
1930
   *                              </td>
1931
   *                              </tr>
1932
   *                              <tr valign="top">
1933
   *                              <td><b>ENT_XHTML</b></td>
1934
   *                              <td>
1935
   *                              Handle code as XHTML.
1936
   *                              </td>
1937
   *                              </tr>
1938
   *                              <tr valign="top">
1939
   *                              <td><b>ENT_HTML5</b></td>
1940
   *                              <td>
1941
   *                              Handle code as HTML 5.
1942
   *                              </td>
1943
   *                              </tr>
1944
   *                              </table>
1945
   *                              </p>
1946
   * @param string $encoding      [optional] <p>
1947
   *                              Defines encoding used in conversion.
1948
   *                              </p>
1949
   *                              <p>
1950
   *                              For the purposes of this function, the encodings
1951
   *                              ISO-8859-1, ISO-8859-15,
1952
   *                              UTF-8, cp866,
1953
   *                              cp1251, cp1252, and
1954
   *                              KOI8-R are effectively equivalent, provided the
1955
   *                              <i>string</i> itself is valid for the encoding, as
1956
   *                              the characters affected by <b>htmlspecialchars</b> occupy
1957
   *                              the same positions in all of these encodings.
1958
   *                              </p>
1959
   * @param bool   $double_encode [optional] <p>
1960
   *                              When <i>double_encode</i> is turned off PHP will not
1961
   *                              encode existing html entities, the default is to convert everything.
1962
   *                              </p>
1963
   *
1964
   * @return string The converted string.
1965
   * </p>
1966
   * <p>
1967
   * If the input <i>string</i> contains an invalid code unit
1968
   * sequence within the given <i>encoding</i> an empty string
1969
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1970
   * <b>ENT_SUBSTITUTE</b> flags are set.
1971
   */
1972 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
1973
  {
1974 1
    if ($encoding !== 'UTF-8') {
1975 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1976 1
    }
1977
1978 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
1979
  }
1980
1981
  /**
1982
   * Checks whether iconv is available on the server.
1983
   *
1984
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1985
   */
1986 1
  public static function iconv_loaded()
1987
  {
1988 1
    $return = extension_loaded('iconv') ? true : false;
1989
1990
    // INFO: "default_charset" is already set by the "Bootup"-class
1991
1992 1
    if (Bootup::is_php('5.6') === false) {
1993
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
1994 1
      iconv_set_encoding('input_encoding', 'UTF-8');
1995 1
      iconv_set_encoding('output_encoding', 'UTF-8');
1996 1
      iconv_set_encoding('internal_encoding', 'UTF-8');
1997 1
    }
1998
1999 1
    return $return;
2000
  }
2001
2002
  /**
2003
   * alias for "UTF8::decimal_to_chr()"
2004
   *
2005
   * @see UTF8::decimal_to_chr()
2006
   *
2007
   * @param mixed $int
2008
   *
2009
   * @return string
2010
   */
2011 2
  public static function int_to_chr($int)
2012
  {
2013 2
    return self::decimal_to_chr($int);
2014
  }
2015
2016
  /**
2017
   * Converts Integer to hexadecimal U+xxxx code point representation.
2018
   *
2019
   * INFO: opposite to UTF8::hex_to_int()
2020
   *
2021
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2022
   * @param string $pfix [optional]
2023
   *
2024
   * @return string <p>The code point, or empty string on failure.</p>
2025
   */
2026 3
  public static function int_to_hex($int, $pfix = 'U+')
2027
  {
2028 3
    if ((int)$int === $int) {
2029 3
      $hex = dechex($int);
2030
2031 3
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2032
2033 3
      return $pfix . $hex;
2034
    }
2035
2036 1
    return '';
2037
  }
2038
2039
  /**
2040
   * Checks whether intl-char is available on the server.
2041
   *
2042
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2043
   */
2044 1
  public static function intlChar_loaded()
2045
  {
2046
    return (
2047 1
        Bootup::is_php('7.0') === true
2048 1
        &&
2049
        class_exists('IntlChar') === true
2050 1
    );
2051
  }
2052
2053
  /**
2054
   * Checks whether intl is available on the server.
2055
   *
2056
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2057
   */
2058 4
  public static function intl_loaded()
2059
  {
2060 4
    return extension_loaded('intl') ? true : false;
2061
  }
2062
2063
  /**
2064
   * alias for "UTF8::is_ascii()"
2065
   *
2066
   * @see        UTF8::is_ascii()
2067
   *
2068
   * @param string $str
2069
   *
2070
   * @return boolean
2071
   *
2072
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2073
   */
2074
  public static function isAscii($str)
2075
  {
2076
    return self::is_ascii($str);
2077
  }
2078
2079
  /**
2080
   * alias for "UTF8::is_base64()"
2081
   *
2082
   * @see        UTF8::is_base64()
2083
   *
2084
   * @param string $str
2085
   *
2086
   * @return bool
2087
   *
2088
   * @deprecated <p>use "UTF8::is_base64()"</p>
2089
   */
2090
  public static function isBase64($str)
2091
  {
2092
    return self::is_base64($str);
2093
  }
2094
2095
  /**
2096
   * alias for "UTF8::is_binary()"
2097
   *
2098
   * @see        UTF8::is_binary()
2099
   *
2100
   * @param string $str
2101
   *
2102
   * @return bool
2103
   *
2104
   * @deprecated <p>use "UTF8::is_binary()"</p>
2105
   */
2106
  public static function isBinary($str)
2107
  {
2108
    return self::is_binary($str);
2109
  }
2110
2111
  /**
2112
   * alias for "UTF8::is_bom()"
2113
   *
2114
   * @see        UTF8::is_bom()
2115
   *
2116
   * @param string $utf8_chr
2117
   *
2118
   * @return boolean
2119
   *
2120
   * @deprecated <p>use "UTF8::is_bom()"</p>
2121
   */
2122
  public static function isBom($utf8_chr)
2123
  {
2124
    return self::is_bom($utf8_chr);
2125
  }
2126
2127
  /**
2128
   * alias for "UTF8::is_html()"
2129
   *
2130
   * @see        UTF8::is_html()
2131
   *
2132
   * @param string $str
2133
   *
2134
   * @return boolean
2135
   *
2136
   * @deprecated <p>use "UTF8::is_html()"</p>
2137
   */
2138
  public static function isHtml($str)
2139
  {
2140
    return self::is_html($str);
2141
  }
2142
2143
  /**
2144
   * alias for "UTF8::is_json()"
2145
   *
2146
   * @see        UTF8::is_json()
2147
   *
2148
   * @param string $str
2149
   *
2150
   * @return bool
2151
   *
2152
   * @deprecated <p>use "UTF8::is_json()"</p>
2153
   */
2154
  public static function isJson($str)
2155
  {
2156
    return self::is_json($str);
2157
  }
2158
2159
  /**
2160
   * alias for "UTF8::is_utf16()"
2161
   *
2162
   * @see        UTF8::is_utf16()
2163
   *
2164
   * @param string $str
2165
   *
2166
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2167
   *
2168
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2169
   */
2170
  public static function isUtf16($str)
2171
  {
2172
    return self::is_utf16($str);
2173
  }
2174
2175
  /**
2176
   * alias for "UTF8::is_utf32()"
2177
   *
2178
   * @see        UTF8::is_utf32()
2179
   *
2180
   * @param string $str
2181
   *
2182
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2183
   *
2184
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2185
   */
2186
  public static function isUtf32($str)
2187
  {
2188
    return self::is_utf32($str);
2189
  }
2190
2191
  /**
2192
   * alias for "UTF8::is_utf8()"
2193
   *
2194
   * @see        UTF8::is_utf8()
2195
   *
2196
   * @param string $str
2197
   * @param bool   $strict
2198
   *
2199
   * @return bool
2200
   *
2201
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2202
   */
2203
  public static function isUtf8($str, $strict = false)
2204
  {
2205
    return self::is_utf8($str, $strict);
2206
  }
2207
2208
  /**
2209
   * Checks if a string is 7 bit ASCII.
2210
   *
2211
   * @param string $str <p>The string to check.</p>
2212
   *
2213
   * @return bool <p>
2214
   *              <strong>true</strong> if it is ASCII<br>
2215
   *              <strong>false</strong> otherwise
2216
   *              </p>
2217
   */
2218 55
  public static function is_ascii($str)
2219
  {
2220 55
    $str = (string)$str;
2221
2222 55
    if (!isset($str[0])) {
2223 6
      return true;
2224
    }
2225
2226 54
    return (bool)!preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2227
  }
2228
2229
  /**
2230
   * Returns true if the string is base64 encoded, false otherwise.
2231
   *
2232
   * @param string $str <p>The input string.</p>
2233
   *
2234
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2235
   */
2236 1
  public static function is_base64($str)
2237
  {
2238 1
    $str = (string)$str;
2239
2240 1
    if (!isset($str[0])) {
2241 1
      return false;
2242
    }
2243
2244 1
    $base64String = (string)base64_decode($str, true);
2245 1
    if ($base64String && base64_encode($base64String) === $str) {
2246 1
      return true;
2247
    }
2248
2249 1
    return false;
2250
  }
2251
2252
  /**
2253
   * Check if the input is binary... (is look like a hack).
2254
   *
2255
   * @param mixed $input
2256
   *
2257
   * @return bool
2258
   */
2259 16
  public static function is_binary($input)
2260
  {
2261 16
    $input = (string)$input;
2262
2263 16
    if (!isset($input[0])) {
2264 4
      return false;
2265
    }
2266
2267 16
    if (preg_match('~^[01]+$~', $input)) {
2268 4
      return true;
2269
    }
2270
2271 16
    $testLength = strlen($input);
2272 16
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2273 5
      return true;
2274
    }
2275
2276 15
    if (substr_count($input, "\x00") > 0) {
2277 1
      return true;
2278
    }
2279
2280 15
    return false;
2281
  }
2282
2283
  /**
2284
   * Check if the file is binary.
2285
   *
2286
   * @param string $file
2287
   *
2288
   * @return boolean
2289
   */
2290 1
  public static function is_binary_file($file)
2291
  {
2292
    try {
2293 1
      $fp = fopen($file, 'rb');
2294 1
      $block = fread($fp, 512);
2295 1
      fclose($fp);
2296 1
    } catch (\Exception $e) {
2297
      $block = '';
2298
    }
2299
2300 1
    return self::is_binary($block);
2301
  }
2302
2303
  /**
2304
   * Checks if the given string is equal to any "Byte Order Mark".
2305
   *
2306
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2307
   *
2308
   * @param string $str <p>The input string.</p>
2309
   *
2310
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2311
   */
2312 1
  public static function is_bom($str)
2313
  {
2314 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2315 1
      if ($str === $bomString) {
2316 1
        return true;
2317
      }
2318 1
    }
2319
2320 1
    return false;
2321
  }
2322
2323
  /**
2324
   * Check if the string contains any html-tags <lall>.
2325
   *
2326
   * @param string $str <p>The input string.</p>
2327
   *
2328
   * @return boolean
2329
   */
2330 1
  public static function is_html($str)
2331
  {
2332 1
    $str = (string)$str;
2333
2334 1
    if (!isset($str[0])) {
2335 1
      return false;
2336
    }
2337
2338
    // init
2339 1
    $matches = array();
2340
2341 1
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2342
2343 1
    if (count($matches) === 0) {
2344 1
      return false;
2345
    }
2346
2347 1
    return true;
2348
  }
2349
2350
  /**
2351
   * Try to check if "$str" is an json-string.
2352
   *
2353
   * @param string $str <p>The input string.</p>
2354
   *
2355
   * @return bool
2356
   */
2357 1
  public static function is_json($str)
2358
  {
2359 1
    $str = (string)$str;
2360
2361 1
    if (!isset($str[0])) {
2362 1
      return false;
2363
    }
2364
2365 1
    $json = self::json_decode($str);
2366
2367
    if (
2368
        (
2369 1
            is_object($json) === true
2370 1
            ||
2371 1
            is_array($json) === true
2372 1
        )
2373 1
        &&
2374 1
        json_last_error() === JSON_ERROR_NONE
2375 1
    ) {
2376 1
      return true;
2377
    }
2378
2379 1
    return false;
2380
  }
2381
2382
  /**
2383
   * Check if the string is UTF-16.
2384
   *
2385
   * @param string $str <p>The input string.</p>
2386
   *
2387
   * @return int|false <p>
2388
   *                   <strong>false</strong> if is't not UTF-16,<br>
2389
   *                   <strong>1</strong> for UTF-16LE,<br>
2390
   *                   <strong>2</strong> for UTF-16BE.
2391
   *                   </p>
2392
   */
2393 5 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2394
  {
2395 5
    $str = self::remove_bom($str);
2396
2397 5
    if (self::is_binary($str) === true) {
2398
2399 5
      $maybeUTF16LE = 0;
2400 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2401 5
      if ($test) {
2402 5
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2403 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2404 5
        if ($test3 === $test) {
2405 5
          $strChars = self::count_chars($str, true);
2406 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2407 4
            if (in_array($test3char, $strChars, true) === true) {
2408 2
              $maybeUTF16LE++;
2409 2
            }
2410 5
          }
2411 5
        }
2412 5
      }
2413
2414 5
      $maybeUTF16BE = 0;
2415 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2416 5
      if ($test) {
2417 5
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2418 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2419 5
        if ($test3 === $test) {
2420 5
          $strChars = self::count_chars($str, true);
2421 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2422 4
            if (in_array($test3char, $strChars, true) === true) {
2423 3
              $maybeUTF16BE++;
2424 3
            }
2425 5
          }
2426 5
        }
2427 5
      }
2428
2429 5
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2430 3
        if ($maybeUTF16LE > $maybeUTF16BE) {
2431 2
          return 1;
2432
        }
2433
2434 3
        return 2;
2435
      }
2436
2437 3
    }
2438
2439 3
    return false;
2440
  }
2441
2442
  /**
2443
   * Check if the string is UTF-32.
2444
   *
2445
   * @param string $str
2446
   *
2447
   * @return int|false <p>
2448
   *                   <strong>false</strong> if is't not UTF-32,<br>
2449
   *                   <strong>1</strong> for UTF-32LE,<br>
2450
   *                   <strong>2</strong> for UTF-32BE.
2451
   *                   </p>
2452
   */
2453 3 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2454
  {
2455 3
    $str = self::remove_bom($str);
2456
2457 3
    if (self::is_binary($str) === true) {
2458
2459 3
      $maybeUTF32LE = 0;
2460 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2461 3
      if ($test) {
2462 2
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2463 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2464 2
        if ($test3 === $test) {
2465 2
          $strChars = self::count_chars($str, true);
2466 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2467 2
            if (in_array($test3char, $strChars, true) === true) {
2468 1
              $maybeUTF32LE++;
2469 1
            }
2470 2
          }
2471 2
        }
2472 2
      }
2473
2474 3
      $maybeUTF32BE = 0;
2475 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2476 3
      if ($test) {
2477 2
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2478 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2479 2
        if ($test3 === $test) {
2480 2
          $strChars = self::count_chars($str, true);
2481 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2482 2
            if (in_array($test3char, $strChars, true) === true) {
2483 1
              $maybeUTF32BE++;
2484 1
            }
2485 2
          }
2486 2
        }
2487 2
      }
2488
2489 3
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2490 1
        if ($maybeUTF32LE > $maybeUTF32BE) {
2491 1
          return 1;
2492
        }
2493
2494 1
        return 2;
2495
      }
2496
2497 3
    }
2498
2499 3
    return false;
2500
  }
2501
2502
  /**
2503
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2504
   *
2505
   * @see    http://hsivonen.iki.fi/php-utf8/
2506
   *
2507
   * @param string $str    <p>The string to be checked.</p>
2508
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2509
   *
2510
   * @return bool
2511
   */
2512 60
  public static function is_utf8($str, $strict = false)
2513
  {
2514 60
    $str = (string)$str;
2515
2516 60
    if (!isset($str[0])) {
2517 3
      return true;
2518
    }
2519
2520 58
    if ($strict === true) {
2521 1
      if (self::is_utf16($str) !== false) {
2522 1
        return false;
2523
      }
2524
2525
      if (self::is_utf32($str) !== false) {
2526
        return false;
2527
      }
2528
    }
2529
2530 58
    if (self::pcre_utf8_support() !== true) {
2531
2532
      // If even just the first character can be matched, when the /u
2533
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2534
      // invalid, nothing at all will match, even if the string contains
2535
      // some valid sequences
2536
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
2537
    }
2538
2539 58
    $mState = 0; // cached expected number of octets after the current octet
2540
    // until the beginning of the next UTF8 character sequence
2541 58
    $mUcs4 = 0; // cached Unicode character
2542 58
    $mBytes = 1; // cached expected number of octets in the current sequence
2543
2544 58
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2545
      self::checkForSupport();
2546
    }
2547
2548 58 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
2549
      $len = \mb_strlen($str, '8BIT');
2550
    } else {
2551 58
      $len = \strlen($str);
2552
    }
2553
2554 58
    if (self::$ORD === null) {
2555 1
      self::$ORD = self::getData('ord');
2556 1
    }
2557
2558
    /** @noinspection ForeachInvariantsInspection */
2559 58
    for ($i = 0; $i < $len; $i++) {
2560 58
      $in = self::$ORD[$str[$i]];
2561 58
      if ($mState === 0) {
2562
        // When mState is zero we expect either a US-ASCII character or a
2563
        // multi-octet sequence.
2564 58
        if (0 === (0x80 & $in)) {
2565
          // US-ASCII, pass straight through.
2566 52
          $mBytes = 1;
2567 58 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
2568
          // First octet of 2 octet sequence.
2569 48
          $mUcs4 = $in;
2570 48
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
2571 48
          $mState = 1;
2572 48
          $mBytes = 2;
2573 55
        } elseif (0xE0 === (0xF0 & $in)) {
2574
          // First octet of 3 octet sequence.
2575 28
          $mUcs4 = $in;
2576 28
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
2577 28
          $mState = 2;
2578 28
          $mBytes = 3;
2579 45 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
2580
          // First octet of 4 octet sequence.
2581 11
          $mUcs4 = $in;
2582 11
          $mUcs4 = ($mUcs4 & 0x07) << 18;
2583 11
          $mState = 3;
2584 11
          $mBytes = 4;
2585 22
        } elseif (0xF8 === (0xFC & $in)) {
2586
          /* First octet of 5 octet sequence.
2587
          *
2588
          * This is illegal because the encoded codepoint must be either
2589
          * (a) not the shortest form or
2590
          * (b) outside the Unicode range of 0-0x10FFFF.
2591
          * Rather than trying to resynchronize, we will carry on until the end
2592
          * of the sequence and let the later error handling code catch it.
2593
          */
2594 4
          $mUcs4 = $in;
2595 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
2596 4
          $mState = 4;
2597 4
          $mBytes = 5;
2598 12 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
2599
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
2600 4
          $mUcs4 = $in;
2601 4
          $mUcs4 = ($mUcs4 & 1) << 30;
2602 4
          $mState = 5;
2603 4
          $mBytes = 6;
2604 4
        } else {
2605
          /* Current octet is neither in the US-ASCII range nor a legal first
2606
           * octet of a multi-octet sequence.
2607
           */
2608 6
          return false;
2609
        }
2610 57
      } else {
2611
        // When mState is non-zero, we expect a continuation of the multi-octet
2612
        // sequence
2613 52
        if (0x80 === (0xC0 & $in)) {
2614
          // Legal continuation.
2615 48
          $shift = ($mState - 1) * 6;
2616 48
          $tmp = $in;
2617 48
          $tmp = ($tmp & 0x0000003F) << $shift;
2618 48
          $mUcs4 |= $tmp;
2619
          /**
2620
           * End of the multi-octet sequence. mUcs4 now contains the final
2621
           * Unicode code point to be output
2622
           */
2623 48
          if (0 === --$mState) {
2624
            /*
2625
            * Check for illegal sequences and code points.
2626
            */
2627
            // From Unicode 3.1, non-shortest form is illegal
2628
            if (
2629 48
                (2 === $mBytes && $mUcs4 < 0x0080) ||
2630 48
                (3 === $mBytes && $mUcs4 < 0x0800) ||
2631 48
                (4 === $mBytes && $mUcs4 < 0x10000) ||
2632 48
                (4 < $mBytes) ||
2633
                // From Unicode 3.2, surrogate characters are illegal.
2634 48
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
2635
                // Code points outside the Unicode range are illegal.
2636 48
                ($mUcs4 > 0x10FFFF)
2637 48
            ) {
2638 7
              return false;
2639
            }
2640
            // initialize UTF8 cache
2641 48
            $mState = 0;
2642 48
            $mUcs4 = 0;
2643 48
            $mBytes = 1;
2644 48
          }
2645 48
        } else {
2646
          /**
2647
           *((0xC0 & (*in) != 0x80) && (mState != 0))
2648
           * Incomplete multi-octet sequence.
2649
           */
2650 26
          return false;
2651
        }
2652
      }
2653 57
    }
2654
2655 27
    return true;
2656
  }
2657
2658
  /**
2659
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2660
   * Decodes a JSON string
2661
   *
2662
   * @link http://php.net/manual/en/function.json-decode.php
2663
   *
2664
   * @param string $json    <p>
2665
   *                        The <i>json</i> string being decoded.
2666
   *                        </p>
2667
   *                        <p>
2668
   *                        This function only works with UTF-8 encoded strings.
2669
   *                        </p>
2670
   *                        <p>PHP implements a superset of
2671
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2672
   *                        only supports these values when they are nested inside an array or an object.
2673
   *                        </p>
2674
   * @param bool   $assoc   [optional] <p>
2675
   *                        When <b>TRUE</b>, returned objects will be converted into
2676
   *                        associative arrays.
2677
   *                        </p>
2678
   * @param int    $depth   [optional] <p>
2679
   *                        User specified recursion depth.
2680
   *                        </p>
2681
   * @param int    $options [optional] <p>
2682
   *                        Bitmask of JSON decode options. Currently only
2683
   *                        <b>JSON_BIGINT_AS_STRING</b>
2684
   *                        is supported (default is to cast large integers as floats)
2685
   *                        </p>
2686
   *
2687
   * @return mixed the value encoded in <i>json</i> in appropriate
2688
   * PHP type. Values true, false and
2689
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
2690
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
2691
   * <i>json</i> cannot be decoded or if the encoded
2692
   * data is deeper than the recursion limit.
2693
   */
2694 2 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2695
  {
2696 2
    $json = (string)self::filter($json);
2697
2698 2
    if (Bootup::is_php('5.4') === true) {
2699
      $json = json_decode($json, $assoc, $depth, $options);
2700
    } else {
2701 2
      $json = json_decode($json, $assoc, $depth);
2702
    }
2703
2704 2
    return $json;
2705
  }
2706
2707
  /**
2708
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2709
   * Returns the JSON representation of a value.
2710
   *
2711
   * @link http://php.net/manual/en/function.json-encode.php
2712
   *
2713
   * @param mixed $value   <p>
2714
   *                       The <i>value</i> being encoded. Can be any type except
2715
   *                       a resource.
2716
   *                       </p>
2717
   *                       <p>
2718
   *                       All string data must be UTF-8 encoded.
2719
   *                       </p>
2720
   *                       <p>PHP implements a superset of
2721
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2722
   *                       only supports these values when they are nested inside an array or an object.
2723
   *                       </p>
2724
   * @param int   $options [optional] <p>
2725
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
2726
   *                       <b>JSON_HEX_TAG</b>,
2727
   *                       <b>JSON_HEX_AMP</b>,
2728
   *                       <b>JSON_HEX_APOS</b>,
2729
   *                       <b>JSON_NUMERIC_CHECK</b>,
2730
   *                       <b>JSON_PRETTY_PRINT</b>,
2731
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
2732
   *                       <b>JSON_FORCE_OBJECT</b>,
2733
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
2734
   *                       constants is described on
2735
   *                       the JSON constants page.
2736
   *                       </p>
2737
   * @param int   $depth   [optional] <p>
2738
   *                       Set the maximum depth. Must be greater than zero.
2739
   *                       </p>
2740
   *
2741
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
2742
   */
2743 2 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2744
  {
2745 2
    $value = self::filter($value);
2746
2747 2
    if (Bootup::is_php('5.5') === true) {
2748
      $json = json_encode($value, $options, $depth);
2749
    } else {
2750 2
      $json = json_encode($value, $options);
2751
    }
2752
2753 2
    return $json;
2754
  }
2755
2756
  /**
2757
   * Makes string's first char lowercase.
2758
   *
2759
   * @param string  $str       <p>The input string</p>
2760
   * @param string  $encoding  [optional] <p>Set the charset.</p>
2761
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
2762
   *
2763
   * @return string <p>The resulting string</p>
2764
   */
2765 7
  public static function lcfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
2766
  {
2767 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
2768 7
    if ($strPartTwo === false) {
2769
      $strPartTwo = '';
2770
    }
2771
2772 7
    $strPartOne = self::strtolower(
2773 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
2774 7
        $encoding,
2775
        $cleanUtf8
2776 7
    );
2777
2778 7
    return $strPartOne . $strPartTwo;
2779
  }
2780
2781
  /**
2782
   * alias for "UTF8::lcfirst()"
2783
   *
2784
   * @see UTF8::lcfirst()
2785
   *
2786
   * @param string  $word
2787
   * @param string  $encoding
2788
   * @param boolean $cleanUtf8
2789
   *
2790
   * @return string
2791
   */
2792 1
  public static function lcword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
2793
  {
2794 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
2795
  }
2796
2797
  /**
2798
   * Lowercase for all words in the string.
2799
   *
2800
   * @param string   $str        <p>The input string.</p>
2801
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
2802
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
2803
   * @param string   $encoding   [optional] <p>Set the charset.</p>
2804
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
2805
   *
2806
   * @return string
2807
   */
2808 1
  public static function lcwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
2809
  {
2810 1
    if (!$str) {
2811 1
      return '';
2812
    }
2813
2814 1
    $words = self::str_to_words($str, $charlist);
2815 1
    $newWords = array();
2816
2817 1
    if (count($exceptions) > 0) {
2818 1
      $useExceptions = true;
2819 1
    } else {
2820 1
      $useExceptions = false;
2821
    }
2822
2823 1 View Code Duplication
    foreach ($words as $word) {
2824
2825 1
      if (!$word) {
2826 1
        continue;
2827
      }
2828
2829
      if (
2830
          $useExceptions === false
2831 1
          ||
2832
          (
2833
              $useExceptions === true
2834 1
              &&
2835 1
              !in_array($word, $exceptions, true)
2836 1
          )
2837 1
      ) {
2838 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
2839 1
      }
2840
2841 1
      $newWords[] = $word;
2842 1
    }
2843
2844 1
    return implode('', $newWords);
2845
  }
2846
2847
  /**
2848
   * Strip whitespace or other characters from beginning of a UTF-8 string.
2849
   *
2850
   * @param string $str   <p>The string to be trimmed</p>
2851
   * @param string $chars <p>Optional characters to be stripped</p>
2852
   *
2853
   * @return string <p>The string with unwanted characters stripped from the left.</p>
2854
   */
2855 24 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2856
  {
2857 24
    $str = (string)$str;
2858
2859 24
    if (!isset($str[0])) {
2860 2
      return '';
2861
    }
2862
2863
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
2864 23
    if ($chars === INF || !$chars) {
2865 2
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
2866
    }
2867
2868 23
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
2869
  }
2870
2871
  /**
2872
   * Returns the UTF-8 character with the maximum code point in the given data.
2873
   *
2874
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
2875
   *
2876
   * @return string <p>The character with the highest code point than others.</p>
2877
   */
2878 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2879
  {
2880 1
    if (is_array($arg) === true) {
2881 1
      $arg = implode('', $arg);
2882 1
    }
2883
2884 1
    return self::chr(max(self::codepoints($arg)));
2885
  }
2886
2887
  /**
2888
   * Calculates and returns the maximum number of bytes taken by any
2889
   * UTF-8 encoded character in the given string.
2890
   *
2891
   * @param string $str <p>The original Unicode string.</p>
2892
   *
2893
   * @return int <p>Max byte lengths of the given chars.</p>
2894
   */
2895 1
  public static function max_chr_width($str)
2896
  {
2897 1
    $bytes = self::chr_size_list($str);
2898 1
    if (count($bytes) > 0) {
2899 1
      return (int)max($bytes);
2900
    }
2901
2902 1
    return 0;
2903
  }
2904
2905
  /**
2906
   * Checks whether mbstring is available on the server.
2907
   *
2908
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2909
   */
2910 12
  public static function mbstring_loaded()
2911
  {
2912 12
    $return = extension_loaded('mbstring') ? true : false;
2913
2914 12
    if ($return === true) {
2915 12
      \mb_internal_encoding('UTF-8');
2916 12
    }
2917
2918 12
    return $return;
2919
  }
2920
2921 1
  private static function mbstring_overloaded()
2922
  {
2923
    if (
2924 1
        defined('MB_OVERLOAD_STRING')
2925 1
        &&
2926 1
        ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
2927 1
    ) {
2928
      return true;
2929
    }
2930
2931 1
    return false;
2932
  }
2933
2934
  /**
2935
   * Returns the UTF-8 character with the minimum code point in the given data.
2936
   *
2937
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
2938
   *
2939
   * @return string <p>The character with the lowest code point than others.</p>
2940
   */
2941 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2942
  {
2943 1
    if (is_array($arg) === true) {
2944 1
      $arg = implode('', $arg);
2945 1
    }
2946
2947 1
    return self::chr(min(self::codepoints($arg)));
2948
  }
2949
2950
  /**
2951
   * alias for "UTF8::normalize_encoding()"
2952
   *
2953
   * @see        UTF8::normalize_encoding()
2954
   *
2955
   * @param string $encoding
2956
   * @param mixed  $fallback
2957
   *
2958
   * @return string
2959
   *
2960
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
2961
   */
2962
  public static function normalizeEncoding($encoding, $fallback = false)
2963
  {
2964
    return self::normalize_encoding($encoding, $fallback);
2965
  }
2966
2967
  /**
2968
   * Normalize the encoding-"name" input.
2969
   *
2970
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
2971
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
2972
   *
2973
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
2974
   */
2975 80
  public static function normalize_encoding($encoding, $fallback = false)
2976
  {
2977 80
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
2978
2979 80
    if (!$encoding) {
2980 3
      return $fallback;
2981
    }
2982
2983 79
    if ('UTF-8' === $encoding) {
2984 1
      return $encoding;
2985
    }
2986
2987 79
    if (self::$ENCODINGS === null) {
2988 1
      self::$ENCODINGS = self::getData('encodings');
2989 1
    }
2990
2991 79
    if (in_array($encoding, self::$ENCODINGS, true)) {
2992 7
      return $encoding;
2993
    }
2994
2995 78
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
2996 77
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
2997
    }
2998
2999 5
    $encodingOrig = $encoding;
3000 5
    $encoding = strtoupper($encoding);
3001 5
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3002
3003
    $equivalences = array(
3004 5
        'ISO8859'     => 'ISO-8859-1',
3005 5
        'ISO88591'    => 'ISO-8859-1',
3006 5
        'ISO'         => 'ISO-8859-1',
3007 5
        'LATIN'       => 'ISO-8859-1',
3008 5
        'LATIN1'      => 'ISO-8859-1', // Western European
3009 5
        'ISO88592'    => 'ISO-8859-2',
3010 5
        'LATIN2'      => 'ISO-8859-2', // Central European
3011 5
        'ISO88593'    => 'ISO-8859-3',
3012 5
        'LATIN3'      => 'ISO-8859-3', // Southern European
3013 5
        'ISO88594'    => 'ISO-8859-4',
3014 5
        'LATIN4'      => 'ISO-8859-4', // Northern European
3015 5
        'ISO88595'    => 'ISO-8859-5',
3016 5
        'ISO88596'    => 'ISO-8859-6', // Greek
3017 5
        'ISO88597'    => 'ISO-8859-7',
3018 5
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3019 5
        'ISO88599'    => 'ISO-8859-9',
3020 5
        'LATIN5'      => 'ISO-8859-9', // Turkish
3021 5
        'ISO885911'   => 'ISO-8859-11',
3022 5
        'TIS620'      => 'ISO-8859-11', // Thai
3023 5
        'ISO885910'   => 'ISO-8859-10',
3024 5
        'LATIN6'      => 'ISO-8859-10', // Nordic
3025 5
        'ISO885913'   => 'ISO-8859-13',
3026 5
        'LATIN7'      => 'ISO-8859-13', // Baltic
3027 5
        'ISO885914'   => 'ISO-8859-14',
3028 5
        'LATIN8'      => 'ISO-8859-14', // Celtic
3029 5
        'ISO885915'   => 'ISO-8859-15',
3030 5
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3031 5
        'ISO885916'   => 'ISO-8859-16',
3032 5
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3033 5
        'CP1250'      => 'WINDOWS-1250',
3034 5
        'WIN1250'     => 'WINDOWS-1250',
3035 5
        'WINDOWS1250' => 'WINDOWS-1250',
3036 5
        'CP1251'      => 'WINDOWS-1251',
3037 5
        'WIN1251'     => 'WINDOWS-1251',
3038 5
        'WINDOWS1251' => 'WINDOWS-1251',
3039 5
        'CP1252'      => 'WINDOWS-1252',
3040 5
        'WIN1252'     => 'WINDOWS-1252',
3041 5
        'WINDOWS1252' => 'WINDOWS-1252',
3042 5
        'CP1253'      => 'WINDOWS-1253',
3043 5
        'WIN1253'     => 'WINDOWS-1253',
3044 5
        'WINDOWS1253' => 'WINDOWS-1253',
3045 5
        'CP1254'      => 'WINDOWS-1254',
3046 5
        'WIN1254'     => 'WINDOWS-1254',
3047 5
        'WINDOWS1254' => 'WINDOWS-1254',
3048 5
        'CP1255'      => 'WINDOWS-1255',
3049 5
        'WIN1255'     => 'WINDOWS-1255',
3050 5
        'WINDOWS1255' => 'WINDOWS-1255',
3051 5
        'CP1256'      => 'WINDOWS-1256',
3052 5
        'WIN1256'     => 'WINDOWS-1256',
3053 5
        'WINDOWS1256' => 'WINDOWS-1256',
3054 5
        'CP1257'      => 'WINDOWS-1257',
3055 5
        'WIN1257'     => 'WINDOWS-1257',
3056 5
        'WINDOWS1257' => 'WINDOWS-1257',
3057 5
        'CP1258'      => 'WINDOWS-1258',
3058 5
        'WIN1258'     => 'WINDOWS-1258',
3059 5
        'WINDOWS1258' => 'WINDOWS-1258',
3060 5
        'UTF16'       => 'UTF-16',
3061 5
        'UTF32'       => 'UTF-32',
3062 5
        'UTF8'        => 'UTF-8',
3063 5
        'UTF'         => 'UTF-8',
3064 5
        'UTF7'        => 'UTF-7',
3065 5
        '8BIT'        => 'CP850',
3066 5
        'BINARY'      => 'CP850',
3067 5
    );
3068
3069 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3070 5
      $encoding = $equivalences[$encodingUpperHelper];
3071 5
    }
3072
3073 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3074
3075 5
    return $encoding;
3076
  }
3077
3078
  /**
3079
   * Normalize some MS Word special characters.
3080
   *
3081
   * @param string $str <p>The string to be normalized.</p>
3082
   *
3083
   * @return string
3084
   */
3085 16 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3086
  {
3087 16
    $str = (string)$str;
3088
3089 16
    if (!isset($str[0])) {
3090 1
      return '';
3091
    }
3092
3093 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
3094 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3095
3096 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3097
3098
3099 1
      if (self::$UTF8_MSWORD === null) {
3100 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
3101 1
      }
3102
3103 1
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3104 1
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3105 1
    }
3106
3107 16
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3108
  }
3109
3110
  /**
3111
   * Normalize the whitespace.
3112
   *
3113
   * @param string $str                     <p>The string to be normalized.</p>
3114
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3115
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3116
   *                                        bidirectional text chars.</p>
3117
   *
3118
   * @return string
3119
   */
3120 37
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3121
  {
3122 37
    $str = (string)$str;
3123
3124 37
    if (!isset($str[0])) {
3125 4
      return '';
3126
    }
3127
3128 37
    static $WHITESPACE_CACHE = array();
3129 37
    $cacheKey = (int)$keepNonBreakingSpace;
3130
3131 37
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3132
3133 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3134
3135 2
      if ($keepNonBreakingSpace === true) {
3136
        /** @noinspection OffsetOperationsInspection */
3137 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3138 1
      }
3139
3140 2
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3141 2
    }
3142
3143 37
    if ($keepBidiUnicodeControls === false) {
3144 37
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3145
3146 37
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3147 1
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3148 1
      }
3149
3150 37
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3151 37
    }
3152
3153 37
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3154
  }
3155
3156
  /**
3157
   * Strip all whitespace characters. This includes tabs and newline
3158
   * characters, as well as multibyte whitespace such as the thin space
3159
   * and ideographic space.
3160
   *
3161
   * @param string $str
3162
   *
3163
   * @return string
3164
   */
3165 12
  public static function strip_whitespace($str)
3166
  {
3167 12
    $str = (string)$str;
3168
3169 12
    if (!isset($str[0])) {
3170 1
      return '';
3171
    }
3172
3173 11
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3174
  }
3175
3176
  /**
3177
   * Format a number with grouped thousands.
3178
   *
3179
   * @param float  $number
3180
   * @param int    $decimals
3181
   * @param string $dec_point
3182
   * @param string $thousands_sep
3183
   *
3184
   * @return string
3185
   *
3186
   * @deprecated <p>This has nothing to do with UTF-8.</p>
3187
   */
3188
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3189
  {
3190
    $thousands_sep = (string)$thousands_sep;
3191
    $dec_point = (string)$dec_point;
3192
    $number = (float)$number;
3193
3194
    if (
3195
        isset($thousands_sep[1], $dec_point[1])
3196
        &&
3197
        Bootup::is_php('5.4') === true
3198
    ) {
3199
      return str_replace(
3200
          array(
3201
              '.',
3202
              ',',
3203
          ),
3204
          array(
3205
              $dec_point,
3206
              $thousands_sep,
3207
          ),
3208
          number_format($number, $decimals, '.', ',')
3209
      );
3210
    }
3211
3212
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3213
  }
3214
3215
  /**
3216
   * Calculates Unicode code point of the given UTF-8 encoded character.
3217
   *
3218
   * INFO: opposite to UTF8::chr()
3219
   *
3220
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3221
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3222
   *
3223
   * @return int <p>
3224
   *             Unicode code point of the given character,<br>
3225
   *             0 on invalid UTF-8 byte sequence.
3226
   *             </p>
3227
   */
3228 23
  public static function ord($chr, $encoding = 'UTF-8')
3229
  {
3230
    // init
3231 23
    static $CHAR_CACHE = array();
3232 23
    $encoding = (string)$encoding;
3233
3234
    // save the original string
3235 23
    $chr_orig = $chr;
3236
3237 23
    if ($encoding !== 'UTF-8') {
3238 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3239
3240
      // check again, if it's still not UTF-8
3241
      /** @noinspection NotOptimalIfConditionsInspection */
3242 2
      if ($encoding !== 'UTF-8') {
3243 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3244 2
      }
3245 2
    }
3246
3247 23
    $cacheKey = $chr_orig . $encoding;
3248 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3249 23
      return $CHAR_CACHE[$cacheKey];
3250
    }
3251
3252 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3253
      self::checkForSupport();
3254
    }
3255
3256 11
    if (self::$SUPPORT['intlChar'] === true) {
3257
      $code = \IntlChar::ord($chr);
3258
      if ($code) {
3259
        return $CHAR_CACHE[$cacheKey] = $code;
3260
      }
3261
    }
3262
3263
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3264 11
    $chr = unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3265 11
    $code = $chr ? $chr[1] : 0;
3266
3267 11
    if (0xF0 <= $code && isset($chr[4])) {
3268 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3269
    }
3270
3271 11
    if (0xE0 <= $code && isset($chr[3])) {
3272 4
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3273
    }
3274
3275 11
    if (0xC0 <= $code && isset($chr[2])) {
3276 7
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3277
    }
3278
3279 10
    return $CHAR_CACHE[$cacheKey] = $code;
3280
  }
3281
3282
  /**
3283
   * Parses the string into an array (into the the second parameter).
3284
   *
3285
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3286
   *          if the second parameter is not set!
3287
   *
3288
   * @link http://php.net/manual/en/function.parse-str.php
3289
   *
3290
   * @param string  $str       <p>The input string.</p>
3291
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3292
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3293
   *
3294
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3295
   */
3296 1
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3297
  {
3298 1
    if ($cleanUtf8 === true) {
3299 1
      $str = self::clean($str);
3300 1
    }
3301
3302
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3303 1
    $return = \mb_parse_str($str, $result);
3304 1
    if ($return === false || empty($result)) {
3305 1
      return false;
3306
    }
3307
3308 1
    return true;
3309
  }
3310
3311
  /**
3312
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3313
   *
3314
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3315
   */
3316 58
  public static function pcre_utf8_support()
3317
  {
3318
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3319 58
    return (bool)@preg_match('//u', '');
3320
  }
3321
3322
  /**
3323
   * Create an array containing a range of UTF-8 characters.
3324
   *
3325
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3326
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3327
   *
3328
   * @return array
3329
   */
3330 1
  public static function range($var1, $var2)
3331
  {
3332 1
    if (!$var1 || !$var2) {
3333 1
      return array();
3334
    }
3335
3336 1 View Code Duplication
    if (ctype_digit((string)$var1)) {
3337 1
      $start = (int)$var1;
3338 1
    } elseif (ctype_xdigit($var1)) {
3339
      $start = (int)self::hex_to_int($var1);
3340
    } else {
3341 1
      $start = self::ord($var1);
3342
    }
3343
3344 1
    if (!$start) {
3345
      return array();
3346
    }
3347
3348 1 View Code Duplication
    if (ctype_digit((string)$var2)) {
3349 1
      $end = (int)$var2;
3350 1
    } elseif (ctype_xdigit($var2)) {
3351
      $end = (int)self::hex_to_int($var2);
3352
    } else {
3353 1
      $end = self::ord($var2);
3354
    }
3355
3356 1
    if (!$end) {
3357
      return array();
3358
    }
3359
3360 1
    return array_map(
3361
        array(
3362 1
            '\\voku\\helper\\UTF8',
3363 1
            'chr',
3364 1
        ),
3365 1
        range($start, $end)
3366 1
    );
3367
  }
3368
3369
  /**
3370
   * Multi decode html entity & fix urlencoded-win1252-chars.
3371
   *
3372
   * e.g:
3373
   * 'test+test'                     => 'test+test'
3374
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3375
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3376
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3377
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3378
   * 'Düsseldorf'                   => 'Düsseldorf'
3379
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3380
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3381
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3382
   *
3383
   * @param string $str          <p>The input string.</p>
3384
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3385
   *
3386
   * @return string
3387
   */
3388 2 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3389
  {
3390 2
    $str = (string)$str;
3391
3392 2
    if (!isset($str[0])) {
3393 1
      return '';
3394
    }
3395
3396 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3397 2
    if (preg_match($pattern, $str)) {
3398 1
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3399 1
    }
3400
3401 2
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3402
3403
    do {
3404 2
      $str_compare = $str;
3405
3406 2
      $str = self::fix_simple_utf8(
3407 2
          rawurldecode(
3408 2
              self::html_entity_decode(
3409 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3410
                  $flags
3411 2
              )
3412 2
          )
3413 2
      );
3414
3415 2
    } while ($multi_decode === true && $str_compare !== $str);
3416
3417 2
    return (string)$str;
3418
  }
3419
3420
  /**
3421
   * alias for "UTF8::remove_bom()"
3422
   *
3423
   * @see        UTF8::remove_bom()
3424
   *
3425
   * @param string $str
3426
   *
3427
   * @return string
3428
   *
3429
   * @deprecated <p>use "UTF8::remove_bom()"</p>
3430
   */
3431
  public static function removeBOM($str)
3432
  {
3433
    return self::remove_bom($str);
3434
  }
3435
3436
  /**
3437
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3438
   *
3439
   * @param string $str <p>The input string.</p>
3440
   *
3441
   * @return string <p>String without UTF-BOM</p>
3442
   */
3443 40
  public static function remove_bom($str)
3444
  {
3445 40
    $str = (string)$str;
3446
3447 40
    if (!isset($str[0])) {
3448 5
      return '';
3449
    }
3450
3451 40
    foreach (self::$BOM as $bomString => $bomByteLength) {
3452 40
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
3453 5
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
3454 5
        if ($strTmp === false) {
3455
          $strTmp = '';
3456
        }
3457 5
        $str = (string)$strTmp;
3458 5
      }
3459 40
    }
3460
3461 40
    return $str;
3462
  }
3463
3464
  /**
3465
   * Removes duplicate occurrences of a string in another string.
3466
   *
3467
   * @param string          $str  <p>The base string.</p>
3468
   * @param string|string[] $what <p>String to search for in the base string.</p>
3469
   *
3470
   * @return string <p>The result string with removed duplicates.</p>
3471
   */
3472 1
  public static function remove_duplicates($str, $what = ' ')
3473
  {
3474 1
    if (is_string($what) === true) {
3475 1
      $what = array($what);
3476 1
    }
3477
3478 1
    if (is_array($what) === true) {
3479
      /** @noinspection ForeachSourceInspection */
3480 1
      foreach ($what as $item) {
3481 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3482 1
      }
3483 1
    }
3484
3485 1
    return $str;
3486
  }
3487
3488
  /**
3489
   * Remove invisible characters from a string.
3490
   *
3491
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3492
   *
3493
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3494
   *
3495
   * @param string $str
3496
   * @param bool   $url_encoded
3497
   * @param string $replacement
3498
   *
3499
   * @return string
3500
   */
3501 62
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3502
  {
3503
    // init
3504 62
    $non_displayables = array();
3505
3506
    // every control character except newline (dec 10),
3507
    // carriage return (dec 13) and horizontal tab (dec 09)
3508 62
    if ($url_encoded) {
3509 62
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
3510 62
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3511 62
    }
3512
3513 62
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
3514
3515
    do {
3516 62
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3517 62
    } while ($count !== 0);
3518
3519 62
    return $str;
3520
  }
3521
3522
  /**
3523
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3524
   *
3525
   * @param string $str                <p>The input string</p>
3526
   * @param string $replacementChar    <p>The replacement character.</p>
3527
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3528
   *
3529
   * @return string
3530
   */
3531 62
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
3532
  {
3533 62
    $str = (string)$str;
3534
3535 62
    if (!isset($str[0])) {
3536 4
      return '';
3537
    }
3538
3539 62
    if ($processInvalidUtf8 === true) {
3540 62
      $replacementCharHelper = $replacementChar;
3541 62
      if ($replacementChar === '') {
3542 62
        $replacementCharHelper = 'none';
3543 62
      }
3544
3545 62
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3546
        self::checkForSupport();
3547
      }
3548
3549 62
      $save = \mb_substitute_character();
3550 62
      \mb_substitute_character($replacementCharHelper);
3551 62
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3552 62
      \mb_substitute_character($save);
3553 62
    }
3554
3555 62
    return str_replace(
3556
        array(
3557 62
            "\xEF\xBF\xBD",
3558 62
            '�',
3559 62
        ),
3560
        array(
3561 62
            $replacementChar,
3562 62
            $replacementChar,
3563 62
        ),
3564
        $str
3565 62
    );
3566
  }
3567
3568
  /**
3569
   * Strip whitespace or other characters from end of a UTF-8 string.
3570
   *
3571
   * @param string $str   <p>The string to be trimmed.</p>
3572
   * @param string $chars <p>Optional characters to be stripped.</p>
3573
   *
3574
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3575
   */
3576 23 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3577
  {
3578 23
    $str = (string)$str;
3579
3580 23
    if (!isset($str[0])) {
3581 5
      return '';
3582
    }
3583
3584
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3585 19
    if ($chars === INF || !$chars) {
3586 3
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3587
    }
3588
3589 18
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3590
  }
3591
3592
  /**
3593
   * rxClass
3594
   *
3595
   * @param string $s
3596
   * @param string $class
3597
   *
3598
   * @return string
3599
   */
3600 60
  private static function rxClass($s, $class = '')
3601
  {
3602 60
    static $RX_CLASSS_CACHE = array();
3603
3604 60
    $cacheKey = $s . $class;
3605
3606 60
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
3607 48
      return $RX_CLASSS_CACHE[$cacheKey];
3608
    }
3609
3610
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3611 20
    $class = array($class);
3612
3613
    /** @noinspection SuspiciousLoopInspection */
3614 20
    foreach (self::str_split($s) as $s) {
3615 19
      if ('-' === $s) {
3616
        $class[0] = '-' . $class[0];
3617 19
      } elseif (!isset($s[2])) {
3618 19
        $class[0] .= preg_quote($s, '/');
3619 19
      } elseif (1 === self::strlen($s)) {
3620 2
        $class[0] .= $s;
3621 2
      } else {
3622
        $class[] = $s;
3623
      }
3624 20
    }
3625
3626 20
    if ($class[0]) {
3627 20
      $class[0] = '[' . $class[0] . ']';
3628 20
    }
3629
3630 20
    if (1 === count($class)) {
3631 20
      $return = $class[0];
3632 20
    } else {
3633
      $return = '(?:' . implode('|', $class) . ')';
3634
    }
3635
3636 20
    $RX_CLASSS_CACHE[$cacheKey] = $return;
3637
3638 20
    return $return;
3639
  }
3640
3641
  /**
3642
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
3643
   */
3644 1
  public static function showSupport()
3645
  {
3646 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3647
      self::checkForSupport();
3648
    }
3649
3650 1
    echo '<pre>';
3651 1
    foreach (self::$SUPPORT as $key => $value) {
3652 1
      echo $key . ' - ' . print_r($value, true) . "\n<br>";
3653 1
    }
3654 1
    echo '</pre>';
3655 1
  }
3656
3657
  /**
3658
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3659
   *
3660
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3661
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3662
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3663
   *
3664
   * @return string <p>The HTML numbered entity.</p>
3665
   */
3666 1
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
3667
  {
3668 1
    $char = (string)$char;
3669
3670 1
    if (!isset($char[0])) {
3671 1
      return '';
3672
    }
3673
3674
    if (
3675
        $keepAsciiChars === true
3676 1
        &&
3677 1
        self::is_ascii($char) === true
3678 1
    ) {
3679 1
      return $char;
3680
    }
3681
3682 1
    if ($encoding !== 'UTF-8') {
3683 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3684 1
    }
3685
3686 1
    return '&#' . self::ord($char, $encoding) . ';';
3687
  }
3688
3689
  /**
3690
   * Convert a string to an array of Unicode characters.
3691
   *
3692
   * @param string  $str       <p>The string to split into array.</p>
3693
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
3694
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3695
   *
3696
   * @return string[] <p>An array containing chunks of the string.</p>
3697
   */
3698 39
  public static function split($str, $length = 1, $cleanUtf8 = false)
3699
  {
3700 39
    $str = (string)$str;
3701
3702 39
    if (!isset($str[0])) {
3703 3
      return array();
3704
    }
3705
3706
    // init
3707 38
    $ret = array();
3708
3709 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3710
      self::checkForSupport();
3711
    }
3712
3713 38
    if ($cleanUtf8 === true) {
3714 7
      $str = self::clean($str);
3715 7
    }
3716
3717 38
    if (self::$SUPPORT['pcre_utf8'] === true) {
3718
3719 38
      preg_match_all('/./us', $str, $retArray);
3720 38
      if (isset($retArray[0])) {
3721 38
        $ret = $retArray[0];
3722 38
      }
3723 38
      unset($retArray);
3724
3725 38
    } else {
3726
3727
      // fallback
3728
3729 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3730
        self::checkForSupport();
3731
      }
3732
3733 2 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
3734
        $len = \mb_strlen($str, '8BIT');
3735
      } else {
3736 2
        $len = strlen($str);
3737
      }
3738
3739
      /** @noinspection ForeachInvariantsInspection */
3740 2
      for ($i = 0; $i < $len; $i++) {
3741
3742 2
        if (($str[$i] & "\x80") === "\x00") {
3743
3744 2
          $ret[] = $str[$i];
3745
3746 2
        } elseif (
3747 2
            isset($str[$i + 1])
3748 2
            &&
3749 2
            ($str[$i] & "\xE0") === "\xC0"
3750 2
        ) {
3751
3752
          if (($str[$i + 1] & "\xC0") === "\x80") {
3753
            $ret[] = $str[$i] . $str[$i + 1];
3754
3755
            $i++;
3756
          }
3757
3758 View Code Duplication
        } elseif (
3759 2
            isset($str[$i + 2])
3760 2
            &&
3761 2
            ($str[$i] & "\xF0") === "\xE0"
3762 2
        ) {
3763
3764
          if (
3765 2
              ($str[$i + 1] & "\xC0") === "\x80"
3766 2
              &&
3767 2
              ($str[$i + 2] & "\xC0") === "\x80"
3768 2
          ) {
3769 2
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3770
3771 2
            $i += 2;
3772 2
          }
3773
3774 2
        } elseif (
3775
            isset($str[$i + 3])
3776
            &&
3777
            ($str[$i] & "\xF8") === "\xF0"
3778
        ) {
3779
3780 View Code Duplication
          if (
3781
              ($str[$i + 1] & "\xC0") === "\x80"
3782
              &&
3783
              ($str[$i + 2] & "\xC0") === "\x80"
3784
              &&
3785
              ($str[$i + 3] & "\xC0") === "\x80"
3786
          ) {
3787
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3788
3789
            $i += 3;
3790
          }
3791
3792
        }
3793 2
      }
3794
    }
3795
3796 38
    if ($length > 1) {
3797 5
      $ret = array_chunk($ret, $length);
3798
3799 5
      return array_map(
3800
          function ($item) {
3801 5
            return implode('', $item);
3802 5
          }, $ret
3803 5
      );
3804
    }
3805
3806 34
    if (isset($ret[0]) && $ret[0] === '') {
3807
      return array();
3808
    }
3809
3810 34
    return $ret;
3811
  }
3812
3813
  /**
3814
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3815
   *
3816
   * @param string $str <p>The input string.</p>
3817
   *
3818
   * @return false|string <p>
3819
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
3820
   *                      otherwise it will return false.
3821
   *                      </p>
3822
   */
3823 12
  public static function str_detect_encoding($str)
3824
  {
3825
    //
3826
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
3827
    //
3828
3829 12
    if (self::is_binary($str) === true) {
3830
3831 3
      if (self::is_utf16($str) === 1) {
3832 1
        return 'UTF-16LE';
3833
      }
3834
3835 3
      if (self::is_utf16($str) === 2) {
3836 1
        return 'UTF-16BE';
3837
      }
3838
3839 2
      if (self::is_utf32($str) === 1) {
3840
        return 'UTF-32LE';
3841
      }
3842
3843 2
      if (self::is_utf32($str) === 2) {
3844
        return 'UTF-32BE';
3845
      }
3846
3847 2
    }
3848
3849
    //
3850
    // 2.) simple check for ASCII chars
3851
    //
3852
3853 12
    if (self::is_ascii($str) === true) {
3854 3
      return 'ASCII';
3855
    }
3856
3857
    //
3858
    // 3.) simple check for UTF-8 chars
3859
    //
3860
3861 12
    if (self::is_utf8($str) === true) {
3862 9
      return 'UTF-8';
3863
    }
3864
3865
    //
3866
    // 4.) check via "\mb_detect_encoding()"
3867
    //
3868
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
3869
3870
    $detectOrder = array(
3871 7
        'ISO-8859-1',
3872 7
        'ISO-8859-2',
3873 7
        'ISO-8859-3',
3874 7
        'ISO-8859-4',
3875 7
        'ISO-8859-5',
3876 7
        'ISO-8859-6',
3877 7
        'ISO-8859-7',
3878 7
        'ISO-8859-8',
3879 7
        'ISO-8859-9',
3880 7
        'ISO-8859-10',
3881 7
        'ISO-8859-13',
3882 7
        'ISO-8859-14',
3883 7
        'ISO-8859-15',
3884 7
        'ISO-8859-16',
3885 7
        'WINDOWS-1251',
3886 7
        'WINDOWS-1252',
3887 7
        'WINDOWS-1254',
3888 7
        'ISO-2022-JP',
3889 7
        'JIS',
3890 7
        'EUC-JP',
3891 7
    );
3892
3893 7
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
3894 7
    if ($encoding) {
3895 7
      return $encoding;
3896
    }
3897
3898
    //
3899
    // 5.) check via "iconv()"
3900
    //
3901
3902
    if (self::$ENCODINGS === null) {
3903
      self::$ENCODINGS = self::getData('encodings');
3904
    }
3905
3906
    $md5 = md5($str);
3907
    foreach (self::$ENCODINGS as $encodingTmp) {
0 ignored issues
show
Bug introduced by
The expression self::$ENCODINGS of type boolean|string|array|integer is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
3908
      # INFO: //IGNORE and //TRANSLIT still throw notice
3909
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3910
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
3911
        return $encodingTmp;
3912
      }
3913
    }
3914
3915
    return false;
3916
  }
3917
3918
  /**
3919
   * Check if the string ends with the given substring.
3920
   *
3921
   * @param string $haystack <p>The string to search in.</p>
3922
   * @param string $needle   <p>The substring to search for.</p>
3923
   *
3924
   * @return bool
3925
   */
3926 2
  public static function str_ends_with($haystack, $needle)
3927
  {
3928 2
    $haystack = (string)$haystack;
3929 2
    $needle = (string)$needle;
3930
3931 2
    if (!isset($haystack[0], $needle[0])) {
3932 1
      return false;
3933
    }
3934
3935 2
    if (substr($haystack, -strlen($needle)) === $needle) {
3936 2
      return true;
3937
    }
3938
3939 2
    return false;
3940
  }
3941
3942
  /**
3943
   * Check if the string ends with the given substring, case insensitive.
3944
   *
3945
   * @param string $haystack <p>The string to search in.</p>
3946
   * @param string $needle   <p>The substring to search for.</p>
3947
   *
3948
   * @return bool
3949
   */
3950 2
  public static function str_iends_with($haystack, $needle)
3951
  {
3952 2
    $haystack = (string)$haystack;
3953 2
    $needle = (string)$needle;
3954
3955 2
    if (!isset($haystack[0], $needle[0])) {
3956 1
      return false;
3957
    }
3958
3959 2
    if (self::strcasecmp(substr($haystack, -strlen($needle)), $needle) === 0) {
3960 2
      return true;
3961
    }
3962
3963 2
    return false;
3964
  }
3965
3966
  /**
3967
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
3968
   *
3969
   * @link  http://php.net/manual/en/function.str-ireplace.php
3970
   *
3971
   * @param mixed $search  <p>
3972
   *                       Every replacement with search array is
3973
   *                       performed on the result of previous replacement.
3974
   *                       </p>
3975
   * @param mixed $replace <p>
3976
   *                       </p>
3977
   * @param mixed $subject <p>
3978
   *                       If subject is an array, then the search and
3979
   *                       replace is performed with every entry of
3980
   *                       subject, and the return value is an array as
3981
   *                       well.
3982
   *                       </p>
3983
   * @param int   $count   [optional] <p>
3984
   *                       The number of matched and replaced needles will
3985
   *                       be returned in count which is passed by
3986
   *                       reference.
3987
   *                       </p>
3988
   *
3989
   * @return mixed <p>A string or an array of replacements.</p>
3990
   */
3991 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
3992
  {
3993 26
    $search = (array)$search;
3994
3995
    /** @noinspection AlterInForeachInspection */
3996 26
    foreach ($search as &$s) {
3997 26
      if ('' === $s .= '') {
3998 2
        $s = '/^(?<=.)$/';
3999 2
      } else {
4000 24
        $s = '/' . preg_quote($s, '/') . '/ui';
4001
      }
4002 26
    }
4003
4004 26
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4005 26
    $count = $replace; // used as reference parameter
4006
4007 26
    return $subject;
4008
  }
4009
4010
  /**
4011
   * Check if the string starts with the given substring, case insensitive.
4012
   *
4013
   * @param string $haystack <p>The string to search in.</p>
4014
   * @param string $needle   <p>The substring to search for.</p>
4015
   *
4016
   * @return bool
4017
   */
4018 2 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4019
  {
4020 2
    $haystack = (string)$haystack;
4021 2
    $needle = (string)$needle;
4022
4023 2
    if (!isset($haystack[0], $needle[0])) {
4024 1
      return false;
4025
    }
4026
4027 2
    if (self::stripos($haystack, $needle) === 0) {
4028 2
      return true;
4029
    }
4030
4031 2
    return false;
4032
  }
4033
4034
  /**
4035
   * Limit the number of characters in a string, but also after the next word.
4036
   *
4037
   * @param string $str
4038
   * @param int    $length
4039
   * @param string $strAddOn
4040
   *
4041
   * @return string
4042
   */
4043 1
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4044
  {
4045 1
    $str = (string)$str;
4046
4047 1
    if (!isset($str[0])) {
4048 1
      return '';
4049
    }
4050
4051 1
    $length = (int)$length;
4052
4053 1
    if (self::strlen($str) <= $length) {
4054 1
      return $str;
4055
    }
4056
4057 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4058 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4059
    }
4060
4061 1
    $str = (string)self::substr($str, 0, $length);
4062 1
    $array = explode(' ', $str);
4063 1
    array_pop($array);
4064 1
    $new_str = implode(' ', $array);
4065
4066 1
    if ($new_str === '') {
4067 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4068 1
    } else {
4069 1
      $str = $new_str . $strAddOn;
4070
    }
4071
4072 1
    return $str;
4073
  }
4074
4075
  /**
4076
   * Pad a UTF-8 string to given length with another string.
4077
   *
4078
   * @param string $str        <p>The input string.</p>
4079
   * @param int    $pad_length <p>The length of return string.</p>
4080
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4081
   * @param int    $pad_type   [optional] <p>
4082
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4083
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4084
   *                           </p>
4085
   *
4086
   * @return string <strong>Returns the padded string</strong>
4087
   */
4088 2
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4089
  {
4090 2
    $str_length = self::strlen($str);
4091
4092
    if (
4093 2
        is_int($pad_length) === true
4094 2
        &&
4095
        $pad_length > 0
4096 2
        &&
4097
        $pad_length >= $str_length
4098 2
    ) {
4099 2
      $ps_length = self::strlen($pad_string);
4100
4101 2
      $diff = $pad_length - $str_length;
4102
4103
      switch ($pad_type) {
4104 2 View Code Duplication
        case STR_PAD_LEFT:
4105 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4106 2
          $pre = (string)self::substr($pre, 0, $diff);
4107 2
          $post = '';
4108 2
          break;
4109
4110 2
        case STR_PAD_BOTH:
4111 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4112 2
          $pre = (string)self::substr($pre, 0, (int)$diff / 2);
4113 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4114 2
          $post = (string)self::substr($post, 0, (int)ceil($diff / 2));
4115 2
          break;
4116
4117 2
        case STR_PAD_RIGHT:
4118 2 View Code Duplication
        default:
4119 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4120 2
          $post = (string)self::substr($post, 0, $diff);
4121 2
          $pre = '';
4122 2
      }
4123
4124 2
      return $pre . $str . $post;
4125
    }
4126
4127 2
    return $str;
4128
  }
4129
4130
  /**
4131
   * Repeat a string.
4132
   *
4133
   * @param string $str        <p>
4134
   *                           The string to be repeated.
4135
   *                           </p>
4136
   * @param int    $multiplier <p>
4137
   *                           Number of time the input string should be
4138
   *                           repeated.
4139
   *                           </p>
4140
   *                           <p>
4141
   *                           multiplier has to be greater than or equal to 0.
4142
   *                           If the multiplier is set to 0, the function
4143
   *                           will return an empty string.
4144
   *                           </p>
4145
   *
4146
   * @return string <p>The repeated string.</p>
4147
   */
4148 1
  public static function str_repeat($str, $multiplier)
4149
  {
4150 1
    $str = self::filter($str);
4151
4152 1
    return str_repeat($str, $multiplier);
4153
  }
4154
4155
  /**
4156
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4157
   *
4158
   * Replace all occurrences of the search string with the replacement string
4159
   *
4160
   * @link http://php.net/manual/en/function.str-replace.php
4161
   *
4162
   * @param mixed $search  <p>
4163
   *                       The value being searched for, otherwise known as the needle.
4164
   *                       An array may be used to designate multiple needles.
4165
   *                       </p>
4166
   * @param mixed $replace <p>
4167
   *                       The replacement value that replaces found search
4168
   *                       values. An array may be used to designate multiple replacements.
4169
   *                       </p>
4170
   * @param mixed $subject <p>
4171
   *                       The string or array being searched and replaced on,
4172
   *                       otherwise known as the haystack.
4173
   *                       </p>
4174
   *                       <p>
4175
   *                       If subject is an array, then the search and
4176
   *                       replace is performed with every entry of
4177
   *                       subject, and the return value is an array as
4178
   *                       well.
4179
   *                       </p>
4180
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4181
   *
4182
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4183
   */
4184 12
  public static function str_replace($search, $replace, $subject, &$count = null)
4185
  {
4186 12
    return str_replace($search, $replace, $subject, $count);
4187
  }
4188
4189
  /**
4190
   * Replace the first "$search"-term with the "$replace"-term.
4191
   *
4192
   * @param string $search
4193
   * @param string $replace
4194
   * @param string $subject
4195
   *
4196
   * @return string
4197
   */
4198 1
  public static function str_replace_first($search, $replace, $subject)
4199
  {
4200 1
    $pos = self::strpos($subject, $search);
4201
4202 1
    if ($pos !== false) {
4203 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4204
    }
4205
4206 1
    return $subject;
4207
  }
4208
4209
  /**
4210
   * Shuffles all the characters in the string.
4211
   *
4212
   * @param string $str <p>The input string</p>
4213
   *
4214
   * @return string <p>The shuffled string.</p>
4215
   */
4216 1
  public static function str_shuffle($str)
4217
  {
4218 1
    $array = self::split($str);
4219
4220 1
    shuffle($array);
4221
4222 1
    return implode('', $array);
4223
  }
4224
4225
  /**
4226
   * Sort all characters according to code points.
4227
   *
4228
   * @param string $str    <p>A UTF-8 string.</p>
4229
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4230
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4231
   *
4232
   * @return string <p>String of sorted characters.</p>
4233
   */
4234 1
  public static function str_sort($str, $unique = false, $desc = false)
4235
  {
4236 1
    $array = self::codepoints($str);
4237
4238 1
    if ($unique) {
4239 1
      $array = array_flip(array_flip($array));
4240 1
    }
4241
4242 1
    if ($desc) {
4243 1
      arsort($array);
4244 1
    } else {
4245 1
      asort($array);
4246
    }
4247
4248 1
    return self::string($array);
4249
  }
4250
4251
  /**
4252
   * Split a string into an array.
4253
   *
4254
   * @param string $str
4255
   * @param int    $len
4256
   *
4257
   * @return array
4258
   */
4259 23
  public static function str_split($str, $len = 1)
4260
  {
4261 23
    $str = (string)$str;
4262
4263 23
    if (!isset($str[0])) {
4264 1
      return array();
4265
    }
4266
4267 22
    $len = (int)$len;
4268
4269 22
    if ($len < 1) {
4270
      return str_split($str, $len);
4271
    }
4272
4273
    /** @noinspection PhpInternalEntityUsedInspection */
4274 22
    preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4275 22
    $a = $a[0];
4276
4277 22
    if ($len === 1) {
4278 22
      return $a;
4279
    }
4280
4281 1
    $arrayOutput = array();
4282 1
    $p = -1;
4283
4284
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4285 1
    foreach ($a as $l => $a) {
4286 1
      if ($l % $len) {
4287 1
        $arrayOutput[$p] .= $a;
4288 1
      } else {
4289 1
        $arrayOutput[++$p] = $a;
4290
      }
4291 1
    }
4292
4293 1
    return $arrayOutput;
4294
  }
4295
4296
  /**
4297
   * Check if the string starts with the given substring.
4298
   *
4299
   * @param string $haystack <p>The string to search in.</p>
4300
   * @param string $needle   <p>The substring to search for.</p>
4301
   *
4302
   * @return bool
4303
   */
4304 2 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4305
  {
4306 2
    $haystack = (string)$haystack;
4307 2
    $needle = (string)$needle;
4308
4309 2
    if (!isset($haystack[0], $needle[0])) {
4310 1
      return false;
4311
    }
4312
4313 2
    if (strpos($haystack, $needle) === 0) {
4314 2
      return true;
4315
    }
4316
4317 2
    return false;
4318
  }
4319
4320
  /**
4321
   * Get a binary representation of a specific string.
4322
   *
4323
   * @param string $str <p>The input string.</p>
4324
   *
4325
   * @return string
4326
   */
4327 1
  public static function str_to_binary($str)
4328
  {
4329 1
    $str = (string)$str;
4330
4331 1
    $value = unpack('H*', $str);
4332
4333 1
    return base_convert($value[1], 16, 2);
4334
  }
4335
4336
  /**
4337
   * Convert a string into an array of words.
4338
   *
4339
   * @param string   $str
4340
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
4341
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4342
   * @param null|int $removeShortValues
4343
   *
4344
   * @return array
4345
   */
4346 10
  public static function str_to_words($str, $charList = '', $removeEmptyValues = false, $removeShortValues = null)
4347
  {
4348 10
    $str = (string)$str;
4349
4350 10
    if ($removeShortValues !== null) {
4351 1
      $removeShortValues = (int)$removeShortValues;
4352 1
    }
4353
4354 10
    if (!isset($str[0])) {
4355 2
      if ($removeEmptyValues === true) {
4356
        return array();
4357
      }
4358
4359 2
      return array('');
4360
    }
4361
4362 10
    $charList = self::rxClass($charList, '\pL');
4363
4364 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4365
4366
    if (
4367
        $removeShortValues === null
4368 10
        &&
4369
        $removeEmptyValues === false
4370 10
    ) {
4371 10
      return $return;
4372
    }
4373
4374 1
    $tmpReturn = array();
4375 1
    foreach ($return as $returnValue) {
4376
      if (
4377
          $removeShortValues !== null
4378 1
          &&
4379 1
          self::strlen($returnValue) <= $removeShortValues
4380 1
      ) {
4381 1
        continue;
4382
      }
4383
4384
      if (
4385
          $removeEmptyValues === true
4386 1
          &&
4387 1
          trim($returnValue) === ''
4388 1
      ) {
4389 1
        continue;
4390
      }
4391
4392 1
      $tmpReturn[] = $returnValue;
4393 1
    }
4394
4395 1
    return $tmpReturn;
4396
  }
4397
4398
  /**
4399
   * alias for "UTF8::to_ascii()"
4400
   *
4401
   * @see UTF8::to_ascii()
4402
   *
4403
   * @param string $str
4404
   * @param string $unknown
4405
   * @param bool   $strict
4406
   *
4407
   * @return string
4408
   */
4409 7
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4410
  {
4411 7
    return self::to_ascii($str, $unknown, $strict);
4412
  }
4413
4414
  /**
4415
   * Counts number of words in the UTF-8 string.
4416
   *
4417
   * @param string $str      <p>The input string.</p>
4418
   * @param int    $format   [optional] <p>
4419
   *                         <strong>0</strong> => return a number of words (default)<br>
4420
   *                         <strong>1</strong> => return an array of words<br>
4421
   *                         <strong>2</strong> => return an array of words with word-offset as key
4422
   *                         </p>
4423
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4424
   *
4425
   * @return array|int <p>The number of words in the string</p>
4426
   */
4427 1
  public static function str_word_count($str, $format = 0, $charlist = '')
4428
  {
4429 1
    $strParts = self::str_to_words($str, $charlist);
4430
4431 1
    $len = count($strParts);
4432
4433 1
    if ($format === 1) {
4434
4435 1
      $numberOfWords = array();
4436 1
      for ($i = 1; $i < $len; $i += 2) {
4437 1
        $numberOfWords[] = $strParts[$i];
4438 1
      }
4439
4440 1
    } elseif ($format === 2) {
4441
4442 1
      $numberOfWords = array();
4443 1
      $offset = self::strlen($strParts[0]);
4444 1
      for ($i = 1; $i < $len; $i += 2) {
4445 1
        $numberOfWords[$offset] = $strParts[$i];
4446 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4447 1
      }
4448
4449 1
    } else {
4450
4451 1
      $numberOfWords = ($len - 1) / 2;
4452
4453
    }
4454
4455 1
    return $numberOfWords;
4456
  }
4457
4458
  /**
4459
   * Case-insensitive string comparison.
4460
   *
4461
   * INFO: Case-insensitive version of UTF8::strcmp()
4462
   *
4463
   * @param string $str1
4464
   * @param string $str2
4465
   *
4466
   * @return int <p>
4467
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
4468
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
4469
   *             <strong>0</strong> if they are equal.
4470
   *             </p>
4471
   */
4472 11
  public static function strcasecmp($str1, $str2)
4473
  {
4474 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4475
  }
4476
4477
  /**
4478
   * alias for "UTF8::strstr()"
4479
   *
4480
   * @see UTF8::strstr()
4481
   *
4482
   * @param string  $haystack
4483
   * @param string  $needle
4484
   * @param bool    $before_needle
4485
   * @param string  $encoding
4486
   * @param boolean $cleanUtf8
4487
   *
4488
   * @return string|false
4489
   */
4490 1
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4491
  {
4492 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4493
  }
4494
4495
  /**
4496
   * Case-sensitive string comparison.
4497
   *
4498
   * @param string $str1
4499
   * @param string $str2
4500
   *
4501
   * @return int  <p>
4502
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
4503
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
4504
   *              <strong>0</strong> if they are equal.
4505
   *              </p>
4506
   */
4507 14
  public static function strcmp($str1, $str2)
4508
  {
4509
    /** @noinspection PhpUndefinedClassInspection */
4510 14
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4511 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
4512 13
        \Normalizer::normalize($str2, \Normalizer::NFD)
4513 14
    );
4514
  }
4515
4516
  /**
4517
   * Find length of initial segment not matching mask.
4518
   *
4519
   * @param string $str
4520
   * @param string $charList
4521
   * @param int    $offset
4522
   * @param int    $length
4523
   *
4524
   * @return int|null
4525
   */
4526 15
  public static function strcspn($str, $charList, $offset = 0, $length = null)
4527
  {
4528 15
    if ('' === $charList .= '') {
4529 1
      return null;
4530
    }
4531
4532 14 View Code Duplication
    if ($offset || $length !== null) {
4533 2
      $strTmp = self::substr($str, $offset, $length);
4534 2
      if ($strTmp === false) {
4535
        return null;
4536
      }
4537 2
      $str = (string)$strTmp;
4538 2
    }
4539
4540 14
    $str = (string)$str;
4541 14
    if (!isset($str[0])) {
4542 1
      return null;
4543
    }
4544
4545 13
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4546
      /** @noinspection OffsetOperationsInspection */
4547 13
      return self::strlen($length[1]);
4548
    }
4549
4550 1
    return self::strlen($str);
4551
  }
4552
4553
  /**
4554
   * alias for "UTF8::stristr()"
4555
   *
4556
   * @see UTF8::stristr()
4557
   *
4558
   * @param string  $haystack
4559
   * @param string  $needle
4560
   * @param bool    $before_needle
4561
   * @param string  $encoding
4562
   * @param boolean $cleanUtf8
4563
   *
4564
   * @return string|false
4565
   */
4566 1
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4567
  {
4568 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4569
  }
4570
4571
  /**
4572
   * Create a UTF-8 string from code points.
4573
   *
4574
   * INFO: opposite to UTF8::codepoints()
4575
   *
4576
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4577
   *
4578
   * @return string <p>UTF-8 encoded string.</p>
4579
   */
4580 2
  public static function string(array $array)
4581
  {
4582 2
    return implode(
4583 2
        '',
4584 2
        array_map(
4585
            array(
4586 2
                '\\voku\\helper\\UTF8',
4587 2
                'chr',
4588 2
            ),
4589
            $array
4590 2
        )
4591 2
    );
4592
  }
4593
4594
  /**
4595
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4596
   *
4597
   * @param string $str <p>The input string.</p>
4598
   *
4599
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4600
   */
4601 3
  public static function string_has_bom($str)
4602
  {
4603 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
4604 3
      if (0 === strpos($str, $bomString)) {
4605 3
        return true;
4606
      }
4607 3
    }
4608
4609 3
    return false;
4610
  }
4611
4612
  /**
4613
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4614
   *
4615
   * @link http://php.net/manual/en/function.strip-tags.php
4616
   *
4617
   * @param string  $str            <p>
4618
   *                                The input string.
4619
   *                                </p>
4620
   * @param string  $allowable_tags [optional] <p>
4621
   *                                You can use the optional second parameter to specify tags which should
4622
   *                                not be stripped.
4623
   *                                </p>
4624
   *                                <p>
4625
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4626
   *                                can not be changed with allowable_tags.
4627
   *                                </p>
4628
   * @param boolean $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
4629
   *
4630
   * @return string <p>The stripped string.</p>
4631
   */
4632 2 View Code Duplication
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4633
  {
4634 2
    $str = (string)$str;
4635
4636 2
    if (!isset($str[0])) {
4637 1
      return '';
4638
    }
4639
4640 2
    if ($cleanUtf8 === true) {
4641 1
      $str = self::clean($str);
4642 1
    }
4643
4644 2
    return strip_tags($str, $allowable_tags);
4645
  }
4646
4647
  /**
4648
   * Finds position of first occurrence of a string within another, case insensitive.
4649
   *
4650
   * @link http://php.net/manual/en/function.mb-stripos.php
4651
   *
4652
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4653
   * @param string  $needle    <p>The string to find in haystack.</p>
4654
   * @param int     $offset    [optional] <p>The position in haystack to start searching.</p>
4655
   * @param string  $encoding  [optional] <p>Set the charset.</p>
4656
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4657
   *
4658
   * @return int|false <p>
4659
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
4660
   *                   or false if needle is not found.
4661
   *                   </p>
4662
   */
4663 10
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4664
  {
4665 10
    $haystack = (string)$haystack;
4666 10
    $needle = (string)$needle;
4667 10
    $offset = (int)$offset;
4668
4669 10
    if (!isset($haystack[0], $needle[0])) {
4670 3
      return false;
4671
    }
4672
4673 9
    if ($cleanUtf8 === true) {
4674
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4675
      // if invalid characters are found in $haystack before $needle
4676 1
      $haystack = self::clean($haystack);
4677 1
      $needle = self::clean($needle);
4678 1
    }
4679
4680 View Code Duplication
    if (
4681
        $encoding === 'UTF-8'
4682 9
        ||
4683 2
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4684 9
    ) {
4685 9
      $encoding = 'UTF-8';
4686 9
    } else {
4687 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4688
    }
4689
4690 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4691
      self::checkForSupport();
4692
    }
4693
4694
    if (
4695
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4696 9
        &&
4697 9
        self::$SUPPORT['intl'] === true
4698 9
        &&
4699 9
        Bootup::is_php('5.4') === true
4700 9
    ) {
4701
      return \grapheme_stripos($haystack, $needle, $offset);
4702
    }
4703
4704
    // fallback to "mb_"-function via polyfill
4705 9
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4706
  }
4707
4708
  /**
4709
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4710
   *
4711
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
4712
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
4713
   * @param bool    $before_needle [optional] <p>
4714
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4715
   *                               haystack before the first occurrence of the needle (excluding the needle).
4716
   *                               </p>
4717
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4718
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
4719
   *
4720
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
4721
   */
4722 17
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4723
  {
4724 17
    $haystack = (string)$haystack;
4725 17
    $needle = (string)$needle;
4726 17
    $before_needle = (bool)$before_needle;
4727
4728 17
    if (!isset($haystack[0], $needle[0])) {
4729 6
      return false;
4730
    }
4731
4732 11
    if ($encoding !== 'UTF-8') {
4733 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4734 1
    }
4735
4736 11
    if ($cleanUtf8 === true) {
4737
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4738
      // if invalid characters are found in $haystack before $needle
4739 1
      $needle = self::clean($needle);
4740 1
      $haystack = self::clean($haystack);
4741 1
    }
4742
4743 11
    if (!$needle) {
4744
      return $haystack;
4745
    }
4746
4747 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4748
      self::checkForSupport();
4749
    }
4750
4751 View Code Duplication
    if (
4752
        $encoding !== 'UTF-8'
4753 11
        &&
4754 1
        self::$SUPPORT['mbstring'] === false
4755 11
    ) {
4756
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4757
    }
4758
4759 11
    if (self::$SUPPORT['mbstring'] === true) {
4760 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4761
    }
4762
4763
    if (
4764
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4765
        &&
4766
        self::$SUPPORT['intl'] === true
4767
        &&
4768
        Bootup::is_php('5.4') === true
4769
    ) {
4770
      return \grapheme_stristr($haystack, $needle, $before_needle);
4771
    }
4772
4773
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
4774
      return stristr($haystack, $needle, $before_needle);
4775
    }
4776
4777
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
4778
4779
    if (!isset($match[1])) {
4780
      return false;
4781
    }
4782
4783
    if ($before_needle) {
4784
      return $match[1];
4785
    }
4786
4787
    return self::substr($haystack, self::strlen($match[1]));
4788
  }
4789
4790
  /**
4791
   * Get the string length, not the byte-length!
4792
   *
4793
   * @link     http://php.net/manual/en/function.mb-strlen.php
4794
   *
4795
   * @param string  $str       <p>The string being checked for length.</p>
4796
   * @param string  $encoding  [optional] <p>Set the charset.</p>
4797
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4798
   *
4799
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4800
   *             character counted as +1)</p>
4801
   */
4802 88
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4803
  {
4804 88
    $str = (string)$str;
4805
4806 88
    if (!isset($str[0])) {
4807 6
      return 0;
4808
    }
4809
4810 View Code Duplication
    if (
4811
        $encoding === 'UTF-8'
4812 87
        ||
4813 14
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4814 87
    ) {
4815 78
      $encoding = 'UTF-8';
4816 78
    } else {
4817 13
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4818
    }
4819
4820 87
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4821
      self::checkForSupport();
4822
    }
4823
4824
    switch ($encoding) {
4825 87
      case 'ASCII':
4826 87
      case 'CP850':
4827 87
      case '8BIT':
4828
        if (
4829
            $encoding === 'CP850'
4830 10
            &&
4831 10
            self::$SUPPORT['mbstring_func_overload'] === false
4832 10
        ) {
4833 10
          return strlen($str);
4834
        }
4835
4836
        return \mb_strlen($str, '8BIT');
4837
    }
4838
4839 79
    if ($cleanUtf8 === true) {
4840
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
4841
      // if invalid characters are found in $str
4842 2
      $str = self::clean($str);
4843 2
    }
4844
4845 View Code Duplication
    if (
4846
        $encoding !== 'UTF-8'
4847 79
        &&
4848 2
        self::$SUPPORT['mbstring'] === false
4849 79
        &&
4850
        self::$SUPPORT['iconv'] === false
4851 79
    ) {
4852
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4853
    }
4854
4855
    if (
4856
        $encoding !== 'UTF-8'
4857 79
        &&
4858 2
        self::$SUPPORT['iconv'] === true
4859 79
        &&
4860 2
        self::$SUPPORT['mbstring'] === false
4861 79
    ) {
4862
      return \iconv_strlen($str, $encoding);
4863
    }
4864
4865 79
    if (self::$SUPPORT['mbstring'] === true) {
4866 78
      return \mb_strlen($str, $encoding);
4867
    }
4868
4869 2
    if (self::$SUPPORT['iconv'] === true) {
4870
      return \iconv_strlen($str, $encoding);
4871
    }
4872
4873
    if (
4874
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4875 2
        &&
4876 2
        self::$SUPPORT['intl'] === true
4877 2
        &&
4878
        Bootup::is_php('5.4') === true
4879 2
    ) {
4880
      return \grapheme_strlen($str);
4881
    }
4882
4883 2
    if (self::is_ascii($str)) {
4884 1
      return strlen($str);
4885
    }
4886
4887
    // fallback via vanilla php
4888 2
    preg_match_all('/./us', $str, $parts);
4889 2
    $returnTmp = count($parts[0]);
4890 2
    if ($returnTmp !== 0) {
4891 2
      return $returnTmp;
4892
    }
4893
4894
    // fallback to "mb_"-function via polyfill
4895
    return \mb_strlen($str, $encoding);
4896
  }
4897
4898
  /**
4899
   * Case insensitive string comparisons using a "natural order" algorithm.
4900
   *
4901
   * INFO: natural order version of UTF8::strcasecmp()
4902
   *
4903
   * @param string $str1 <p>The first string.</p>
4904
   * @param string $str2 <p>The second string.</p>
4905
   *
4906
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
4907
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
4908
   *             <strong>0</strong> if they are equal
4909
   */
4910 1
  public static function strnatcasecmp($str1, $str2)
4911
  {
4912 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4913
  }
4914
4915
  /**
4916
   * String comparisons using a "natural order" algorithm
4917
   *
4918
   * INFO: natural order version of UTF8::strcmp()
4919
   *
4920
   * @link  http://php.net/manual/en/function.strnatcmp.php
4921
   *
4922
   * @param string $str1 <p>The first string.</p>
4923
   * @param string $str2 <p>The second string.</p>
4924
   *
4925
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
4926
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
4927
   *             <strong>0</strong> if they are equal
4928
   */
4929 2
  public static function strnatcmp($str1, $str2)
4930
  {
4931 2
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4932
  }
4933
4934
  /**
4935
   * Case-insensitive string comparison of the first n characters.
4936
   *
4937
   * @link  http://php.net/manual/en/function.strncasecmp.php
4938
   *
4939
   * @param string $str1 <p>The first string.</p>
4940
   * @param string $str2 <p>The second string.</p>
4941
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
4942
   *
4943
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4944
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4945
   *             <strong>0</strong> if they are equal
4946
   */
4947 1
  public static function strncasecmp($str1, $str2, $len)
4948
  {
4949 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4950
  }
4951
4952
  /**
4953
   * String comparison of the first n characters.
4954
   *
4955
   * @link  http://php.net/manual/en/function.strncmp.php
4956
   *
4957
   * @param string $str1 <p>The first string.</p>
4958
   * @param string $str2 <p>The second string.</p>
4959
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
4960
   *
4961
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4962
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4963
   *             <strong>0</strong> if they are equal
4964
   */
4965 2
  public static function strncmp($str1, $str2, $len)
4966
  {
4967 2
    $str1 = (string)self::substr($str1, 0, $len);
4968 2
    $str2 = (string)self::substr($str2, 0, $len);
4969
4970 2
    return self::strcmp($str1, $str2);
4971
  }
4972
4973
  /**
4974
   * Search a string for any of a set of characters.
4975
   *
4976
   * @link  http://php.net/manual/en/function.strpbrk.php
4977
   *
4978
   * @param string $haystack  <p>The string where char_list is looked for.</p>
4979
   * @param string $char_list <p>This parameter is case sensitive.</p>
4980
   *
4981
   * @return string String starting from the character found, or false if it is not found.
4982
   */
4983 1
  public static function strpbrk($haystack, $char_list)
4984
  {
4985 1
    $haystack = (string)$haystack;
4986 1
    $char_list = (string)$char_list;
4987
4988 1
    if (!isset($haystack[0], $char_list[0])) {
4989 1
      return false;
4990
    }
4991
4992 1
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
4993 1
      return substr($haystack, strpos($haystack, $m[0]));
4994
    }
4995
4996 1
    return false;
4997
  }
4998
4999
  /**
5000
   * Find position of first occurrence of string in a string.
5001
   *
5002
   * @link http://php.net/manual/en/function.mb-strpos.php
5003
   *
5004
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5005
   * @param string  $needle    <p>The string to find in haystack.</p>
5006
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5007
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5008
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5009
   *
5010
   * @return int|false <p>
5011
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
5012
   *                   If needle is not found it returns false.
5013
   *                   </p>
5014
   */
5015 56
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5016
  {
5017 56
    $haystack = (string)$haystack;
5018 56
    $needle = (string)$needle;
5019
5020 56
    if (!isset($haystack[0], $needle[0])) {
5021 3
      return false;
5022
    }
5023
5024
    // init
5025 55
    $offset = (int)$offset;
5026
5027
    // iconv and mbstring do not support integer $needle
5028
5029 55
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5030
      $needle = (string)self::chr($needle);
5031
    }
5032
5033 55
    if ($cleanUtf8 === true) {
5034
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5035
      // if invalid characters are found in $haystack before $needle
5036 2
      $needle = self::clean($needle);
5037 2
      $haystack = self::clean($haystack);
5038 2
    }
5039
5040 View Code Duplication
    if (
5041
        $encoding === 'UTF-8'
5042 55
        ||
5043 42
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5044 55
    ) {
5045 15
      $encoding = 'UTF-8';
5046 15
    } else {
5047 41
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5048
    }
5049
5050 55
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5051
      self::checkForSupport();
5052
    }
5053
5054
    if (
5055
        $encoding === 'CP850'
5056 55
        &&
5057 41
        self::$SUPPORT['mbstring_func_overload'] === false
5058 55
    ) {
5059 41
      return strpos($haystack, $needle, $offset);
5060
    }
5061
5062 View Code Duplication
    if (
5063
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5064 15
        &
5065 15
        self::$SUPPORT['iconv'] === true
5066 15
        &&
5067 1
        self::$SUPPORT['mbstring'] === false
5068 15
    ) {
5069
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5070
    }
5071
5072
    if (
5073
        $offset >= 0 // iconv_strpos() can't handle negative offset
5074 15
        &&
5075
        $encoding !== 'UTF-8'
5076 15
        &&
5077 1
        self::$SUPPORT['mbstring'] === false
5078 15
        &&
5079
        self::$SUPPORT['iconv'] === true
5080 15
    ) {
5081
      // ignore invalid negative offset to keep compatibility
5082
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5083
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5084
    }
5085
5086 15
    if (self::$SUPPORT['mbstring'] === true) {
5087 15
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5088
    }
5089
5090
    if (
5091
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5092 1
        &&
5093 1
        self::$SUPPORT['intl'] === true
5094 1
        &&
5095
        Bootup::is_php('5.4') === true
5096 1
    ) {
5097
      return \grapheme_strpos($haystack, $needle, $offset);
5098
    }
5099
5100
    if (
5101
        $offset >= 0 // iconv_strpos() can't handle negative offset
5102 1
        &&
5103 1
        self::$SUPPORT['iconv'] === true
5104 1
    ) {
5105
      // ignore invalid negative offset to keep compatibility
5106
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5107
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5108
    }
5109
5110 1
    $haystackIsAscii = self::is_ascii($haystack);
5111 1
    if ($haystackIsAscii && self::is_ascii($needle)) {
5112 1
      return strpos($haystack, $needle, $offset);
5113
    }
5114
5115
    // fallback via vanilla php
5116
5117 1
    if ($haystackIsAscii) {
5118
      $haystackTmp = substr($haystack, $offset);
5119
    } else {
5120 1
      $haystackTmp = self::substr($haystack, $offset);
5121
    }
5122 1
    if ($haystackTmp === false) {
5123
      $haystackTmp = '';
5124
    }
5125 1
    $haystack = (string)$haystackTmp;
5126
5127 1
    if ($offset < 0) {
5128
      $offset = 0;
5129
    }
5130
5131 1
    $pos = strpos($haystack, $needle);
5132 1
    if ($pos === false) {
5133
      return false;
5134
    }
5135
5136 1
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5137 1
    if ($returnTmp !== false) {
5138 1
      return $returnTmp;
5139
    }
5140
5141
    // fallback to "mb_"-function via polyfill
5142
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5143
  }
5144
5145
  /**
5146
   * Finds the last occurrence of a character in a string within another.
5147
   *
5148
   * @link http://php.net/manual/en/function.mb-strrchr.php
5149
   *
5150
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5151
   * @param string $needle        <p>The string to find in haystack</p>
5152
   * @param bool   $before_needle [optional] <p>
5153
   *                              Determines which portion of haystack
5154
   *                              this function returns.
5155
   *                              If set to true, it returns all of haystack
5156
   *                              from the beginning to the last occurrence of needle.
5157
   *                              If set to false, it returns all of haystack
5158
   *                              from the last occurrence of needle to the end,
5159
   *                              </p>
5160
   * @param string $encoding      [optional] <p>
5161
   *                              Character encoding name to use.
5162
   *                              If it is omitted, internal character encoding is used.
5163
   *                              </p>
5164
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5165
   *
5166
   * @return string|false The portion of haystack or false if needle is not found.
5167
   */
5168 1 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5169
  {
5170 1
    if ($encoding !== 'UTF-8') {
5171 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5172 1
    }
5173
5174 1
    if ($cleanUtf8 === true) {
5175
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5176
      // if invalid characters are found in $haystack before $needle
5177 1
      $needle = self::clean($needle);
5178 1
      $haystack = self::clean($haystack);
5179 1
    }
5180
5181
    // fallback to "mb_"-function via polyfill
5182 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5183
  }
5184
5185
  /**
5186
   * Reverses characters order in the string.
5187
   *
5188
   * @param string $str The input string
5189
   *
5190
   * @return string The string with characters in the reverse sequence
5191
   */
5192 4
  public static function strrev($str)
5193
  {
5194 4
    $str = (string)$str;
5195
5196 4
    if (!isset($str[0])) {
5197 2
      return '';
5198
    }
5199
5200 3
    return implode('', array_reverse(self::split($str)));
5201
  }
5202
5203
  /**
5204
   * Finds the last occurrence of a character in a string within another, case insensitive.
5205
   *
5206
   * @link http://php.net/manual/en/function.mb-strrichr.php
5207
   *
5208
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5209
   * @param string  $needle        <p>The string to find in haystack.</p>
5210
   * @param bool    $before_needle [optional] <p>
5211
   *                               Determines which portion of haystack
5212
   *                               this function returns.
5213
   *                               If set to true, it returns all of haystack
5214
   *                               from the beginning to the last occurrence of needle.
5215
   *                               If set to false, it returns all of haystack
5216
   *                               from the last occurrence of needle to the end,
5217
   *                               </p>
5218
   * @param string  $encoding      [optional] <p>
5219
   *                               Character encoding name to use.
5220
   *                               If it is omitted, internal character encoding is used.
5221
   *                               </p>
5222
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5223
   *
5224
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
5225
   */
5226 1 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5227
  {
5228 1
    if ($encoding !== 'UTF-8') {
5229 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5230 1
    }
5231
5232 1
    if ($cleanUtf8 === true) {
5233
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5234
      // if invalid characters are found in $haystack before $needle
5235 1
      $needle = self::clean($needle);
5236 1
      $haystack = self::clean($haystack);
5237 1
    }
5238
5239 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5240
  }
5241
5242
  /**
5243
   * Find position of last occurrence of a case-insensitive string.
5244
   *
5245
   * @param string  $haystack  <p>The string to look in.</p>
5246
   * @param string  $needle    <p>The string to look for.</p>
5247
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5248
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5249
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5250
   *
5251
   * @return int|false <p>
5252
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5253
   *                   not found, it returns false.
5254
   *                   </p>
5255
   */
5256 1
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5257
  {
5258 1
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5259
      $needle = (string)self::chr($needle);
5260
    }
5261
5262
    // init
5263 1
    $haystack = (string)$haystack;
5264 1
    $needle = (string)$needle;
5265 1
    $offset = (int)$offset;
5266
5267 1
    if (!isset($haystack[0], $needle[0])) {
5268
      return false;
5269
    }
5270
5271 View Code Duplication
    if (
5272
        $cleanUtf8 === true
5273 1
        ||
5274
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5275 1
    ) {
5276
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5277
5278 1
      $needle = self::clean($needle);
5279 1
      $haystack = self::clean($haystack);
5280 1
    }
5281
5282 View Code Duplication
    if (
5283
        $encoding === 'UTF-8'
5284 1
        ||
5285 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5286 1
    ) {
5287 1
      $encoding = 'UTF-8';
5288 1
    } else {
5289 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5290
    }
5291
5292 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5293
      self::checkForSupport();
5294
    }
5295
5296 View Code Duplication
    if (
5297
        $encoding !== 'UTF-8'
5298 1
        &&
5299
        self::$SUPPORT['mbstring'] === false
5300 1
    ) {
5301
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5302
    }
5303
5304 1
    if (self::$SUPPORT['mbstring'] === true) {
5305 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5306
    }
5307
5308
    if (
5309
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5310
        &&
5311
        self::$SUPPORT['intl'] === true
5312
        &&
5313
        Bootup::is_php('5.4') === true
5314
    ) {
5315
      return \grapheme_strripos($haystack, $needle, $offset);
5316
    }
5317
5318
    // fallback via vanilla php
5319
5320
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5321
  }
5322
5323
  /**
5324
   * Find position of last occurrence of a string in a string.
5325
   *
5326
   * @link http://php.net/manual/en/function.mb-strrpos.php
5327
   *
5328
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5329
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5330
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5331
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5332
   *                              the end of the string.
5333
   *                              </p>
5334
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5335
   * @param boolean    $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5336
   *
5337
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5338
   *                   is not found, it returns false.</p>
5339
   */
5340 10
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5341
  {
5342 10
    if ((int)$needle === $needle && $needle >= 0) {
5343 2
      $needle = (string)self::chr($needle);
5344 2
    }
5345
5346
    // init
5347 10
    $haystack = (string)$haystack;
5348 10
    $needle = (string)$needle;
5349 10
    $offset = (int)$offset;
5350
5351 10
    if (!isset($haystack[0], $needle[0])) {
5352 2
      return false;
5353
    }
5354
5355 View Code Duplication
    if (
5356
        $cleanUtf8 === true
5357 9
        ||
5358
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5359 9
    ) {
5360
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5361 3
      $needle = self::clean($needle);
5362 3
      $haystack = self::clean($haystack);
5363 3
    }
5364
5365 View Code Duplication
    if (
5366
        $encoding === 'UTF-8'
5367 9
        ||
5368 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5369 9
    ) {
5370 9
      $encoding = 'UTF-8';
5371 9
    } else {
5372 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5373
    }
5374
5375 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5376
      self::checkForSupport();
5377
    }
5378
5379 View Code Duplication
    if (
5380
        $encoding !== 'UTF-8'
5381 9
        &&
5382 1
        self::$SUPPORT['mbstring'] === false
5383 9
    ) {
5384
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5385
    }
5386
5387 9
    if (self::$SUPPORT['mbstring'] === true) {
5388 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5389
    }
5390
5391
    if (
5392
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5393
        &&
5394
        self::$SUPPORT['intl'] === true
5395
        &&
5396
        Bootup::is_php('5.4') === true
5397
    ) {
5398
      return \grapheme_strrpos($haystack, $needle, $offset);
5399
    }
5400
5401
    // fallback via vanilla php
5402
5403
    $haystackTmp = null;
5404
    if ($offset > 0) {
5405
      $haystackTmp = self::substr($haystack, $offset);
5406
    } elseif ($offset < 0) {
5407
      $haystackTmp = self::substr($haystack, 0, $offset);
5408
      $offset = 0;
5409
    }
5410
5411
    if ($haystackTmp !== null) {
5412
      if ($haystackTmp === false) {
5413
        $haystackTmp = '';
5414
      }
5415
      $haystack = (string)$haystackTmp;
5416
    }
5417
5418
    $pos = strrpos($haystack, $needle);
5419
    if ($pos === false) {
5420
      return false;
5421
    }
5422
5423
    return $offset + self::strlen(substr($haystack, 0, $pos));
5424
  }
5425
5426
  /**
5427
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5428
   * mask.
5429
   *
5430
   * @param string $str    <p>The input string.</p>
5431
   * @param string $mask   <p>The mask of chars</p>
5432
   * @param int    $offset [optional]
5433
   * @param int    $length [optional]
5434
   *
5435
   * @return int
5436
   */
5437 10
  public static function strspn($str, $mask, $offset = 0, $length = null)
5438
  {
5439 10 View Code Duplication
    if ($offset || $length !== null) {
5440 2
      $strTmp = self::substr($str, $offset, $length);
5441 2
      if ($strTmp === false) {
5442
        $strTmp = '';
5443
      }
5444 2
      $str = (string)$strTmp;
5445 2
    }
5446
5447 10
    $str = (string)$str;
5448 10
    if (!isset($str[0], $mask[0])) {
5449 2
      return 0;
5450
    }
5451
5452 8
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5453
  }
5454
5455
  /**
5456
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5457
   *
5458
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5459
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5460
   * @param bool    $before_needle [optional] <p>
5461
   *                               If <b>TRUE</b>, strstr() returns the part of the
5462
   *                               haystack before the first occurrence of the needle (excluding the needle).
5463
   *                               </p>
5464
   * @param string  $encoding      [optional] <p>Set the charset.</p>
5465
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5466
   *
5467
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
5468
   */
5469 2
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5470
  {
5471 2
    $haystack = (string)$haystack;
5472 2
    $needle = (string)$needle;
5473
5474 2
    if (!isset($haystack[0], $needle[0])) {
5475 1
      return false;
5476
    }
5477
5478 2
    if ($cleanUtf8 === true) {
5479
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5480
      // if invalid characters are found in $haystack before $needle
5481
      $needle = self::clean($needle);
5482
      $haystack = self::clean($haystack);
5483
    }
5484
5485 2
    if ($encoding !== 'UTF-8') {
5486 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5487 1
    }
5488
5489 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5490
      self::checkForSupport();
5491
    }
5492
5493 View Code Duplication
    if (
5494
        $encoding !== 'UTF-8'
5495 2
        &&
5496 1
        self::$SUPPORT['mbstring'] === false
5497 2
    ) {
5498
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5499
    }
5500
5501 2
    if (self::$SUPPORT['mbstring'] === true) {
5502 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5503
    }
5504
5505
    if (
5506
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5507
        &&
5508
        self::$SUPPORT['intl'] === true
5509
        &&
5510
        Bootup::is_php('5.4') === true
5511
    ) {
5512
      return \grapheme_strstr($haystack, $needle, $before_needle);
5513
    }
5514
5515
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5516
5517
    if (!isset($match[1])) {
5518
      return false;
5519
    }
5520
5521
    if ($before_needle) {
5522
      return $match[1];
5523
    }
5524
5525
    return self::substr($haystack, self::strlen($match[1]));
5526
  }
5527
5528
  /**
5529
   * Unicode transformation for case-less matching.
5530
   *
5531
   * @link http://unicode.org/reports/tr21/tr21-5.html
5532
   *
5533
   * @param string  $str       <p>The input string.</p>
5534
   * @param bool    $full      [optional] <p>
5535
   *                           <b>true</b>, replace full case folding chars (default)<br>
5536
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5537
   *                           </p>
5538
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5539
   *
5540
   * @return string
5541
   */
5542 13
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5543
  {
5544
    // init
5545 13
    $str = (string)$str;
5546
5547 13
    if (!isset($str[0])) {
5548 4
      return '';
5549
    }
5550
5551 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5552 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5553
5554 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5555 1
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
5556 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
5557 1
    }
5558
5559 12
    $str = (string)str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5560
5561 12
    if ($full) {
5562
5563 12
      static $FULL_CASE_FOLD = null;
5564 12
      if ($FULL_CASE_FOLD === null) {
5565 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
5566 1
      }
5567
5568
      /** @noinspection OffsetOperationsInspection */
5569 12
      $str = (string)str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
5570 12
    }
5571
5572 12
    if ($cleanUtf8 === true) {
5573 1
      $str = self::clean($str);
5574 1
    }
5575
5576 12
    return self::strtolower($str);
5577
  }
5578
5579
  /**
5580
   * Make a string lowercase.
5581
   *
5582
   * @link http://php.net/manual/en/function.mb-strtolower.php
5583
   *
5584
   * @param string      $str       <p>The string being lowercased.</p>
5585
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5586
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5587
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5588
   *
5589
   * @return string str with all alphabetic characters converted to lowercase.
5590
   */
5591 25 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5592
  {
5593
    // init
5594 25
    $str = (string)$str;
5595
5596 25
    if (!isset($str[0])) {
5597 3
      return '';
5598
    }
5599
5600 23
    if ($cleanUtf8 === true) {
5601
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5602
      // if invalid characters are found in $haystack before $needle
5603 1
      $str = self::clean($str);
5604 1
    }
5605
5606 23
    if ($encoding !== 'UTF-8') {
5607 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5608 2
    }
5609
5610 23
    if ($lang !== null) {
5611
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5612
        self::checkForSupport();
5613
      }
5614
5615
      if (
5616
          self::$SUPPORT['intl'] === true
5617
          &&
5618
          Bootup::is_php('5.4') === true
5619
      ) {
5620
5621
        $langCode = $lang . '-Lower';
5622
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5623
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
5624
5625
          $langCode = 'Any-Lower';
5626
        }
5627
5628
        return transliterator_transliterate($langCode, $str);
5629
      }
5630
5631
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5632
    }
5633
5634 23
    return \mb_strtolower($str, $encoding);
5635
  }
5636
5637
  /**
5638
   * Generic case sensitive transformation for collation matching.
5639
   *
5640
   * @param string $str <p>The input string</p>
5641
   *
5642
   * @return string
5643
   */
5644 3
  private static function strtonatfold($str)
5645
  {
5646
    /** @noinspection PhpUndefinedClassInspection */
5647 3
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5648
  }
5649
5650
  /**
5651
   * Make a string uppercase.
5652
   *
5653
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5654
   *
5655
   * @param string      $str       <p>The string being uppercased.</p>
5656
   * @param string      $encoding  [optional] <p>Set the charset.</p>
5657
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5658
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5659
   *
5660
   * @return string str with all alphabetic characters converted to uppercase.
5661
   */
5662 19 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5663
  {
5664 19
    $str = (string)$str;
5665
5666 19
    if (!isset($str[0])) {
5667 3
      return '';
5668
    }
5669
5670 17
    if ($cleanUtf8 === true) {
5671
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5672
      // if invalid characters are found in $haystack before $needle
5673 2
      $str = self::clean($str);
5674 2
    }
5675
5676 17
    if ($encoding !== 'UTF-8') {
5677 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5678 3
    }
5679
5680 17
    if ($lang !== null) {
5681
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5682
        self::checkForSupport();
5683
      }
5684
5685
      if (
5686
          self::$SUPPORT['intl'] === true
5687
          &&
5688
          Bootup::is_php('5.4') === true
5689
      ) {
5690
5691
        $langCode = $lang . '-Upper';
5692
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5693
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
5694
5695
          $langCode = 'Any-Upper';
5696
        }
5697
5698
        return transliterator_transliterate($langCode, $str);
5699
      }
5700
5701
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5702
    }
5703
5704 17
    return \mb_strtoupper($str, $encoding);
5705
  }
5706
5707
  /**
5708
   * Translate characters or replace sub-strings.
5709
   *
5710
   * @link  http://php.net/manual/en/function.strtr.php
5711
   *
5712
   * @param string          $str  <p>The string being translated.</p>
5713
   * @param string|string[] $from <p>The string replacing from.</p>
5714
   * @param string|string[] $to   <p>The string being translated to to.</p>
5715
   *
5716
   * @return string <p>
5717
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5718
   *                corresponding character in to.
5719
   *                </p>
5720
   */
5721 1
  public static function strtr($str, $from, $to = INF)
5722
  {
5723 1
    $str = (string)$str;
5724
5725 1
    if (!isset($str[0])) {
5726
      return '';
5727
    }
5728
5729 1
    if ($from === $to) {
5730
      return $str;
5731
    }
5732
5733 1
    if (INF !== $to) {
5734 1
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5734 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5735 1
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5735 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5736 1
      $countFrom = count($from);
5737 1
      $countTo = count($to);
5738
5739 1
      if ($countFrom > $countTo) {
5740 1
        $from = array_slice($from, 0, $countTo);
5741 1
      } elseif ($countFrom < $countTo) {
5742 1
        $to = array_slice($to, 0, $countFrom);
5743 1
      }
5744
5745 1
      $from = array_combine($from, $to);
5746 1
    }
5747
5748 1
    if (is_string($from)) {
5749 1
      return str_replace($from, '', $str);
5750
    }
5751
5752 1
    return strtr($str, $from);
5753
  }
5754
5755
  /**
5756
   * Return the width of a string.
5757
   *
5758
   * @param string  $str       <p>The input string.</p>
5759
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5760
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5761
   *
5762
   * @return int
5763
   */
5764 1
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5765
  {
5766 1
    if ($encoding !== 'UTF-8') {
5767 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5768 1
    }
5769
5770 1
    if ($cleanUtf8 === true) {
5771
      // iconv and mbstring are not tolerant to invalid encoding
5772
      // further, their behaviour is inconsistent with that of PHP's substr
5773 1
      $str = self::clean($str);
5774 1
    }
5775
5776
    // fallback to "mb_"-function via polyfill
5777 1
    return \mb_strwidth($str, $encoding);
5778
  }
5779
5780
  /**
5781
   * Changes all keys in an array.
5782
   *
5783
   * @param array $array <p>The array to work on</p>
5784
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
5785
   *                     or <strong>CASE_LOWER</strong> (default)</p>
5786
   *
5787
   * @return array|false <p>An array with its keys lower or uppercased, or false if
5788
   *                     input is not an array.</p>
5789
   */
5790 1
  public static function array_change_key_case($array, $case = CASE_LOWER)
5791
  {
5792 1
    if (!is_array($array)) {
5793
      return false;
5794
    }
5795
5796
    if (
5797
        $case !== CASE_LOWER
5798 1
        &&
5799
        $case !== CASE_UPPER
5800 1
    ) {
5801
      $case = CASE_UPPER;
5802
    }
5803
5804 1
    $return = array();
5805 1
    foreach ($array as $key => $value) {
5806 1
      if ($case === CASE_LOWER) {
5807 1
        $key = self::strtolower($key);
5808 1
      } else {
5809 1
        $key = self::strtoupper($key);
5810
      }
5811
5812 1
      $return[$key] = $value;
5813 1
    }
5814
5815 1
    return $return;
5816
  }
5817
5818
  /**
5819
   * Get part of a string.
5820
   *
5821
   * @link http://php.net/manual/en/function.mb-substr.php
5822
   *
5823
   * @param string  $str       <p>The string being checked.</p>
5824
   * @param int     $offset    <p>The first position used in str.</p>
5825
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
5826
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5827
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5828
   *
5829
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
5830
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
5831
   *                      characters long, <b>FALSE</b> will be returned.</p>
5832
   */
5833 76
  public static function substr($str, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5834
  {
5835
    // init
5836 76
    $str = (string)$str;
5837
5838 76
    if (!isset($str[0])) {
5839 10
      return '';
5840
    }
5841
5842
    // Empty string
5843 74
    if ($length === 0) {
5844 3
      return '';
5845
    }
5846
5847 73
    if ($cleanUtf8 === true) {
5848
      // iconv and mbstring are not tolerant to invalid encoding
5849
      // further, their behaviour is inconsistent with that of PHP's substr
5850 1
      $str = self::clean($str);
5851 1
    }
5852
5853
    // Whole string
5854 73
    if (!$offset && $length === null) {
5855 2
      return $str;
5856
    }
5857
5858 71
    $str_length = 0;
5859 71
    if ($offset || $length === null) {
5860 45
      $str_length = (int)self::strlen($str, $encoding);
5861 45
    }
5862
5863
    // Impossible
5864 71
    if ($offset && $offset > $str_length) {
5865 2
      return false;
5866
    }
5867
5868 69
    if ($length === null) {
5869 30
      $length = $str_length;
5870 30
    } else {
5871 60
      $length = (int)$length;
5872
    }
5873
5874 View Code Duplication
    if (
5875
        $encoding === 'UTF-8'
5876 69
        ||
5877 25
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5878 69
    ) {
5879 47
      $encoding = 'UTF-8';
5880 47
    } else {
5881 24
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5882
    }
5883
5884 69
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5885
      self::checkForSupport();
5886
    }
5887
5888
    if (
5889
        $encoding === 'CP850'
5890 69
        &&
5891 22
        self::$SUPPORT['mbstring_func_overload'] === false
5892 69
    ) {
5893 22
      return substr($str, $offset, $length === null ? $str_length : $length);
5894
    }
5895
5896 View Code Duplication
    if (
5897
        $encoding !== 'UTF-8'
5898 47
        &&
5899 1
        self::$SUPPORT['mbstring'] === false
5900 47
    ) {
5901
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5902
    }
5903
5904 47
    if (self::$SUPPORT['mbstring'] === true) {
5905 47
      return \mb_substr($str, $offset, $length, $encoding);
5906
    }
5907
5908
    if (
5909
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5910
        &&
5911
        self::$SUPPORT['intl'] === true
5912
        &&
5913
        Bootup::is_php('5.4') === true
5914
    ) {
5915
      return \grapheme_substr($str, $offset, $length);
5916
    }
5917
5918
    if (
5919
        $length >= 0 // "iconv_substr()" can't handle negative length
5920
        &&
5921
        self::$SUPPORT['iconv'] === true
5922
    ) {
5923
      return \iconv_substr($str, $offset, $length);
5924
    }
5925
5926
    if (self::is_ascii($str)) {
5927
      return ($length === null) ?
5928
          substr($str, $offset) :
5929
          substr($str, $offset, $length);
5930
    }
5931
5932
    // fallback via vanilla php
5933
5934
    // split to array, and remove invalid characters
5935
    $array = self::split($str);
5936
5937
    // extract relevant part, and join to make sting again
5938
    return implode('', array_slice($array, $offset, $length));
5939
  }
5940
5941
  /**
5942
   * Binary safe comparison of two strings from an offset, up to length characters.
5943
   *
5944
   * @param string  $str1               <p>The main string being compared.</p>
5945
   * @param string  $str2               <p>The secondary string being compared.</p>
5946
   * @param int     $offset             [optional] <p>The start position for the comparison. If negative, it starts
5947
   *                                    counting from the end of the string.</p>
5948
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
5949
   *                                    the length of the str compared to the length of main_str less the offset.</p>
5950
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5951
   *                                    insensitive.</p>
5952
   *
5953
   * @return int <p>
5954
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5955
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5956
   *             <strong>0</strong> if they are equal.
5957
   *             </p>
5958
   */
5959 1
  public static function substr_compare($str1, $str2, $offset = 0, $length = null, $case_insensitivity = false)
5960
  {
5961
    if (
5962
        $offset !== 0
5963 1
        ||
5964
        $length !== null
5965 1
    ) {
5966 1
      $str1Tmp = self::substr($str1, $offset, $length);
5967 1
      if ($str1Tmp === false) {
5968
        $str1Tmp = '';
5969
      }
5970 1
      $str1 = (string)$str1Tmp;
5971
5972 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
5973 1
      if ($str2Tmp === false) {
5974
        $str2Tmp = '';
5975
      }
5976 1
      $str2 = (string)$str2Tmp;
5977 1
    }
5978
5979 1
    if ($case_insensitivity === true) {
5980 1
      return self::strcasecmp($str1, $str2);
5981
    }
5982
5983 1
    return self::strcmp($str1, $str2);
5984
  }
5985
5986
  /**
5987
   * Count the number of substring occurrences.
5988
   *
5989
   * @link  http://php.net/manual/en/function.substr-count.php
5990
   *
5991
   * @param string  $haystack  <p>The string to search in.</p>
5992
   * @param string  $needle    <p>The substring to search for.</p>
5993
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
5994
   * @param int     $length    [optional] <p>
5995
   *                           The maximum length after the specified offset to search for the
5996
   *                           substring. It outputs a warning if the offset plus the length is
5997
   *                           greater than the haystack length.
5998
   *                           </p>
5999
   * @param string  $encoding  <p>Set the charset.</p>
6000
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6001
   *
6002
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6003
   */
6004 1
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6005
  {
6006
    // init
6007 1
    $haystack = (string)$haystack;
6008 1
    $needle = (string)$needle;
6009
6010 1
    if (!isset($haystack[0], $needle[0])) {
6011 1
      return false;
6012
    }
6013
6014 1
    if ($offset || $length !== null) {
6015
6016 1
      if ($length === null) {
6017 1
        $length = (int)self::strlen($haystack);
6018 1
      }
6019
6020 1
      $offset = (int)$offset;
6021 1
      $length = (int)$length;
6022
6023
      if (
6024
          (
6025
              $length !== 0
6026 1
              &&
6027
              $offset !== 0
6028 1
          )
6029 1
          &&
6030 1
          $length + $offset <= 0
6031 1
          &&
6032 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6033 1
      ) {
6034 1
        return false;
6035
      }
6036
6037 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6038 1
      if ($haystackTmp === false) {
6039
        $haystackTmp = '';
6040
      }
6041 1
      $haystack = (string)$haystackTmp;
6042 1
    }
6043
6044 1
    if ($encoding !== 'UTF-8') {
6045 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6046 1
    }
6047
6048 1
    if ($cleanUtf8 === true) {
6049
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6050
      // if invalid characters are found in $haystack before $needle
6051
      $needle = self::clean($needle);
6052
      $haystack = self::clean($haystack);
6053
    }
6054
6055 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6056
      self::checkForSupport();
6057
    }
6058
6059 View Code Duplication
    if (
6060
        $encoding !== 'UTF-8'
6061 1
        &&
6062 1
        self::$SUPPORT['mbstring'] === false
6063 1
    ) {
6064
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6065
    }
6066
6067 1
    if (self::$SUPPORT['mbstring'] === true) {
6068 1
      return \mb_substr_count($haystack, $needle, $encoding);
6069
    }
6070
6071
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6072
6073
    return count($matches);
6074
  }
6075
6076
  /**
6077
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6078
   *
6079
   * @param string $haystack <p>The string to search in.</p>
6080
   * @param string $needle   <p>The substring to search for.</p>
6081
   *
6082
   * @return string <p>Return the sub-string.</p>
6083
   */
6084 1 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6085
  {
6086
    // init
6087 1
    $haystack = (string)$haystack;
6088 1
    $needle = (string)$needle;
6089
6090 1
    if (!isset($haystack[0])) {
6091 1
      return '';
6092
    }
6093
6094 1
    if (!isset($needle[0])) {
6095 1
      return $haystack;
6096
    }
6097
6098 1
    if (self::str_istarts_with($haystack, $needle) === true) {
6099 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6100 1
      if ($haystackTmp === false) {
6101
        $haystackTmp = '';
6102
      }
6103 1
      $haystack = (string)$haystackTmp;
6104 1
    }
6105
6106 1
    return $haystack;
6107
  }
6108
6109
  /**
6110
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6111
   *
6112
   * @param string $haystack <p>The string to search in.</p>
6113
   * @param string $needle   <p>The substring to search for.</p>
6114
   *
6115
   * @return string <p>Return the sub-string.</p>
6116
   */
6117 1 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6118
  {
6119
    // init
6120 1
    $haystack = (string)$haystack;
6121 1
    $needle = (string)$needle;
6122
6123 1
    if (!isset($haystack[0])) {
6124 1
      return '';
6125
    }
6126
6127 1
    if (!isset($needle[0])) {
6128 1
      return $haystack;
6129
    }
6130
6131 1
    if (self::str_iends_with($haystack, $needle) === true) {
6132 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6133 1
      if ($haystackTmp === false) {
6134
        $haystackTmp = '';
6135
      }
6136 1
      $haystack = (string)$haystackTmp;
6137 1
    }
6138
6139 1
    return $haystack;
6140
  }
6141
6142
  /**
6143
   * Removes an prefix ($needle) from start of the string ($haystack).
6144
   *
6145
   * @param string $haystack <p>The string to search in.</p>
6146
   * @param string $needle   <p>The substring to search for.</p>
6147
   *
6148
   * @return string <p>Return the sub-string.</p>
6149
   */
6150 1 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6151
  {
6152
    // init
6153 1
    $haystack = (string)$haystack;
6154 1
    $needle = (string)$needle;
6155
6156 1
    if (!isset($haystack[0])) {
6157 1
      return '';
6158
    }
6159
6160 1
    if (!isset($needle[0])) {
6161 1
      return $haystack;
6162
    }
6163
6164 1
    if (self::str_starts_with($haystack, $needle) === true) {
6165 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6166 1
      if ($haystackTmp === false) {
6167
        $haystackTmp = '';
6168
      }
6169 1
      $haystack = (string)$haystackTmp;
6170 1
    }
6171
6172 1
    return $haystack;
6173
  }
6174
6175
  /**
6176
   * Replace text within a portion of a string.
6177
   *
6178
   * source: https://gist.github.com/stemar/8287074
6179
   *
6180
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6181
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6182
   * @param int|int[]       $offset           <p>
6183
   *                                          If start is positive, the replacing will begin at the start'th offset
6184
   *                                          into string.
6185
   *                                          <br><br>
6186
   *                                          If start is negative, the replacing will begin at the start'th character
6187
   *                                          from the end of string.
6188
   *                                          </p>
6189
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6190
   *                                          portion of string which is to be replaced. If it is negative, it
6191
   *                                          represents the number of characters from the end of string at which to
6192
   *                                          stop replacing. If it is not given, then it will default to strlen(
6193
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6194
   *                                          length is zero then this function will have the effect of inserting
6195
   *                                          replacement into string at the given start offset.</p>
6196
   *
6197
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6198
   */
6199 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
6200
  {
6201 7
    if (is_array($str) === true) {
6202 1
      $num = count($str);
6203
6204
      // the replacement
6205 1
      if (is_array($replacement) === true) {
6206 1
        $replacement = array_slice($replacement, 0, $num);
6207 1
      } else {
6208 1
        $replacement = array_pad(array($replacement), $num, $replacement);
6209
      }
6210
6211
      // the offset
6212 1 View Code Duplication
      if (is_array($offset) === true) {
6213 1
        $offset = array_slice($offset, 0, $num);
6214 1
        foreach ($offset as &$valueTmp) {
6215 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6216 1
        }
6217 1
        unset($valueTmp);
6218 1
      } else {
6219 1
        $offset = array_pad(array($offset), $num, $offset);
6220
      }
6221
6222
      // the length
6223 1
      if (!isset($length)) {
6224 1
        $length = array_fill(0, $num, 0);
6225 1 View Code Duplication
      } elseif (is_array($length) === true) {
6226 1
        $length = array_slice($length, 0, $num);
6227 1
        foreach ($length as &$valueTmpV2) {
6228 1
          if (isset($valueTmpV2)) {
6229 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6230 1
          } else {
6231
            $valueTmpV2 = 0;
6232
          }
6233 1
        }
6234 1
        unset($valueTmpV2);
6235 1
      } else {
6236 1
        $length = array_pad(array($length), $num, $length);
6237
      }
6238
6239
      // recursive call
6240 1
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return array_map(array('...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
6241
    }
6242
6243 7
    if (is_array($replacement) === true) {
6244 1
      if (count($replacement) > 0) {
6245 1
        $replacement = $replacement[0];
6246 1
      } else {
6247 1
        $replacement = '';
6248
      }
6249 1
    }
6250
6251
    // init
6252 7
    $str = (string)$str;
6253 7
    $replacement = (string)$replacement;
6254
6255 7
    if (!isset($str[0])) {
6256 1
      return $replacement;
6257
    }
6258
6259 6
    if (self::is_ascii($str)) {
6260 3
      return ($length === null) ?
6261 3
          substr_replace($str, $replacement, $offset) :
6262 3
          substr_replace($str, $replacement, $offset, $length);
6263
    }
6264
6265 5
    preg_match_all('/./us', $str, $smatches);
6266 5
    preg_match_all('/./us', $replacement, $rmatches);
6267
6268 5
    if ($length === null) {
6269 3
      $length = (int)self::strlen($str);
6270 3
    }
6271
6272 5
    array_splice($smatches[0], $offset, $length, $rmatches[0]);
6273
6274 5
    return implode('', $smatches[0]);
6275
  }
6276
6277
  /**
6278
   * Removes an suffix ($needle) from end of the string ($haystack).
6279
   *
6280
   * @param string $haystack <p>The string to search in.</p>
6281
   * @param string $needle   <p>The substring to search for.</p>
6282
   *
6283
   * @return string <p>Return the sub-string.</p>
6284
   */
6285 1 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6286
  {
6287 1
    $haystack = (string)$haystack;
6288 1
    $needle = (string)$needle;
6289
6290 1
    if (!isset($haystack[0])) {
6291 1
      return '';
6292
    }
6293
6294 1
    if (!isset($needle[0])) {
6295 1
      return $haystack;
6296
    }
6297
6298 1
    if (self::str_ends_with($haystack, $needle) === true) {
6299 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6300 1
      if ($haystackTmp === false) {
6301
        $haystackTmp = '';
6302
      }
6303 1
      $haystack = (string)$haystackTmp;
6304 1
    }
6305
6306 1
    return $haystack;
6307
  }
6308
6309
  /**
6310
   * Returns a case swapped version of the string.
6311
   *
6312
   * @param string  $str       <p>The input string.</p>
6313
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6314
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6315
   *
6316
   * @return string <p>Each character's case swapped.</p>
6317
   */
6318 1
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6319
  {
6320 1
    $str = (string)$str;
6321
6322 1
    if (!isset($str[0])) {
6323 1
      return '';
6324
    }
6325
6326 1
    if ($encoding !== 'UTF-8') {
6327 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6328 1
    }
6329
6330 1
    if ($cleanUtf8 === true) {
6331
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6332
      // if invalid characters are found in $haystack before $needle
6333 1
      $str = self::clean($str);
6334 1
    }
6335
6336 1
    $strSwappedCase = preg_replace_callback(
6337 1
        '/[\S]/u',
6338
        function ($match) use ($encoding) {
6339 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6340
6341 1
          if ($match[0] === $marchToUpper) {
6342 1
            return UTF8::strtolower($match[0], $encoding);
6343
          }
6344
6345 1
          return $marchToUpper;
6346 1
        },
6347
        $str
6348 1
    );
6349
6350 1
    return $strSwappedCase;
6351
  }
6352
6353
  /**
6354
   * alias for "UTF8::to_ascii()"
6355
   *
6356
   * @see        UTF8::to_ascii()
6357
   *
6358
   * @param string $s
6359
   * @param string $subst_chr
6360
   * @param bool   $strict
6361
   *
6362
   * @return string
6363
   *
6364
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6365
   */
6366
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6367
  {
6368
    return self::to_ascii($s, $subst_chr, $strict);
6369
  }
6370
6371
  /**
6372
   * alias for "UTF8::to_iso8859()"
6373
   *
6374
   * @see        UTF8::to_iso8859()
6375
   *
6376
   * @param string $str
6377
   *
6378
   * @return string|string[]
6379
   *
6380
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6381
   */
6382
  public static function toIso8859($str)
6383
  {
6384
    return self::to_iso8859($str);
6385
  }
6386
6387
  /**
6388
   * alias for "UTF8::to_latin1()"
6389
   *
6390
   * @see        UTF8::to_latin1()
6391
   *
6392
   * @param $str
6393
   *
6394
   * @return string
6395
   *
6396
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6397
   */
6398
  public static function toLatin1($str)
6399
  {
6400
    return self::to_latin1($str);
6401
  }
6402
6403
  /**
6404
   * alias for "UTF8::to_utf8()"
6405
   *
6406
   * @see        UTF8::to_utf8()
6407
   *
6408
   * @param string $str
6409
   *
6410
   * @return string
6411
   *
6412
   * @deprecated <p>use "UTF8::to_utf8()"</p>
6413
   */
6414
  public static function toUTF8($str)
6415
  {
6416
    return self::to_utf8($str);
6417
  }
6418
6419
  /**
6420
   * Convert a string into ASCII.
6421
   *
6422
   * @param string $str     <p>The input string.</p>
6423
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6424
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6425
   *                        performance</p>
6426
   *
6427
   * @return string
6428
   */
6429 21
  public static function to_ascii($str, $unknown = '?', $strict = false)
6430
  {
6431 21
    static $UTF8_TO_ASCII;
6432
6433
    // init
6434 21
    $str = (string)$str;
6435
6436 21
    if (!isset($str[0])) {
6437 4
      return '';
6438
    }
6439
6440
    // check if we only have ASCII, first (better performance)
6441 18
    if (self::is_ascii($str) === true) {
6442 6
      return $str;
6443
    }
6444
6445 13
    $str = self::clean($str, true, true, true);
6446
6447
    // check again, if we only have ASCII, now ...
6448 13
    if (self::is_ascii($str) === true) {
6449 7
      return $str;
6450
    }
6451
6452 7
    if ($strict === true) {
6453
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6454
        self::checkForSupport();
6455
      }
6456
6457
      if (
6458
          self::$SUPPORT['intl'] === true
6459
          &&
6460
          Bootup::is_php('5.4') === true
6461
      ) {
6462
6463
        // HACK for issue from "transliterator_transliterate()"
6464
        $str = str_replace(
6465
            'ℌ',
6466
            'H',
6467
            $str
6468
        );
6469
6470
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
6471
6472
        // check again, if we only have ASCII, now ...
6473
        if (self::is_ascii($str) === true) {
6474
          return $str;
6475
        }
6476
6477
      }
6478
    }
6479
6480 7
    if (self::$ORD === null) {
6481
      self::$ORD = self::getData('ord');
6482
    }
6483
6484 7
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6485 7
    $chars = $ar[0];
6486 7
    foreach ($chars as &$c) {
6487
6488 7
      $ordC0 = self::$ORD[$c[0]];
6489
6490 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6491 7
        continue;
6492
      }
6493
6494 7
      $ordC1 = self::$ORD[$c[1]];
6495
6496
      // ASCII - next please
6497 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6498 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6499 7
      }
6500
6501 7
      if ($ordC0 >= 224) {
6502 2
        $ordC2 = self::$ORD[$c[2]];
6503
6504 2
        if ($ordC0 <= 239) {
6505 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6506 2
        }
6507
6508 2
        if ($ordC0 >= 240) {
6509 1
          $ordC3 = self::$ORD[$c[3]];
6510
6511 1
          if ($ordC0 <= 247) {
6512 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6513 1
          }
6514
6515 1
          if ($ordC0 >= 248) {
6516
            $ordC4 = self::$ORD[$c[4]];
6517
6518 View Code Duplication
            if ($ordC0 <= 251) {
6519
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6520
            }
6521
6522
            if ($ordC0 >= 252) {
6523
              $ordC5 = self::$ORD[$c[5]];
6524
6525 View Code Duplication
              if ($ordC0 <= 253) {
6526
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6527
              }
6528
            }
6529
          }
6530 1
        }
6531 2
      }
6532
6533 7
      if ($ordC0 === 254 || $ordC0 === 255) {
6534
        $c = $unknown;
6535
        continue;
6536
      }
6537
6538 7
      if (!isset($ord)) {
6539
        $c = $unknown;
6540
        continue;
6541
      }
6542
6543 7
      $bank = $ord >> 8;
6544 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
6545 3
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
6546 3
        if ($UTF8_TO_ASCII[$bank] === false) {
6547 1
          $UTF8_TO_ASCII[$bank] = array();
6548 1
        }
6549 3
      }
6550
6551 7
      $newchar = $ord & 255;
6552
6553 7
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
6554
6555
        // keep for debugging
6556
        /*
6557
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6558
        echo "char: " . $c . "\n";
6559
        echo "ord: " . $ord . "\n";
6560
        echo "newchar: " . $newchar . "\n";
6561
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
6562
        echo "bank:" . $bank . "\n\n";
6563
        */
6564
6565 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6566 7
      } else {
6567
6568
        // keep for debugging missing chars
6569
        /*
6570
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6571
        echo "char: " . $c . "\n";
6572
        echo "ord: " . $ord . "\n";
6573
        echo "newchar: " . $newchar . "\n";
6574
        echo "bank:" . $bank . "\n\n";
6575
        */
6576
6577 1
        $c = $unknown;
6578
      }
6579 7
    }
6580
6581 7
    return implode('', $chars);
6582
  }
6583
6584
  /**
6585
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6586
   *
6587
   * @param string|string[] $str
6588
   *
6589
   * @return string|string[]
6590
   */
6591 3
  public static function to_iso8859($str)
6592
  {
6593 3
    if (is_array($str) === true) {
6594
6595
      /** @noinspection ForeachSourceInspection */
6596 1
      foreach ($str as $k => $v) {
6597
        /** @noinspection AlterInForeachInspection */
6598
        /** @noinspection OffsetOperationsInspection */
6599 1
        $str[$k] = self::to_iso8859($v);
6600 1
      }
6601
6602 1
      return $str;
6603
    }
6604
6605 3
    $str = (string)$str;
6606
6607 3
    if (!isset($str[0])) {
6608 1
      return '';
6609
    }
6610
6611 3
    return self::utf8_decode($str);
6612
  }
6613
6614
  /**
6615
   * alias for "UTF8::to_iso8859()"
6616
   *
6617
   * @see UTF8::to_iso8859()
6618
   *
6619
   * @param string|string[] $str
6620
   *
6621
   * @return string|string[]
6622
   */
6623 1
  public static function to_latin1($str)
6624
  {
6625 1
    return self::to_iso8859($str);
6626
  }
6627
6628
  /**
6629
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6630
   *
6631
   * <ul>
6632
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6633
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
6634
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
6635
   * case.</li>
6636
   * </ul>
6637
   *
6638
   * @param string|string[] $str                    <p>Any string or array.</p>
6639
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6640
   *
6641
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6642
   */
6643 20
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
6644
  {
6645 20
    if (is_array($str) === true) {
6646
      /** @noinspection ForeachSourceInspection */
6647 2
      foreach ($str as $k => $v) {
6648
        /** @noinspection AlterInForeachInspection */
6649
        /** @noinspection OffsetOperationsInspection */
6650 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6651 2
      }
6652
6653 2
      return $str;
6654
    }
6655
6656 20
    $str = (string)$str;
6657
6658 20
    if (!isset($str[0])) {
6659 3
      return $str;
6660
    }
6661
6662 20
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6663
      self::checkForSupport();
6664
    }
6665
6666 20 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
6667
      $max = \mb_strlen($str, '8BIT');
6668
    } else {
6669 20
      $max = strlen($str);
6670
    }
6671
6672 20
    $buf = '';
6673
6674
    /** @noinspection ForeachInvariantsInspection */
6675 20
    for ($i = 0; $i < $max; $i++) {
6676 20
      $c1 = $str[$i];
6677
6678 20
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
6679
6680 20
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
6681
6682 18
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6683
6684 18
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
6685 15
            $buf .= $c1 . $c2;
6686 15
            $i++;
6687 15
          } else { // not valid UTF8 - convert it
6688 8
            $buf .= self::to_utf8_convert($c1);
6689
          }
6690
6691 20
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
6692
6693 18
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6694 18
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6695
6696 18
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
6697 12
            $buf .= $c1 . $c2 . $c3;
6698 12
            $i += 2;
6699 12
          } else { // not valid UTF8 - convert it
6700 10
            $buf .= self::to_utf8_convert($c1);
6701
          }
6702
6703 19
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
6704
6705 12
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6706 12
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6707 12
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6708
6709 12
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
6710 5
            $buf .= $c1 . $c2 . $c3 . $c4;
6711 5
            $i += 3;
6712 5
          } else { // not valid UTF8 - convert it
6713 9
            $buf .= self::to_utf8_convert($c1);
6714
          }
6715
6716 12
        } else { // doesn't look like UTF8, but should be converted
6717 9
          $buf .= self::to_utf8_convert($c1);
6718
        }
6719
6720 20
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
6721
6722 3
        $buf .= self::to_utf8_convert($c1);
6723
6724 3
      } else { // it doesn't need conversion
6725 18
        $buf .= $c1;
6726
      }
6727 20
    }
6728
6729
    // decode unicode escape sequences
6730 20
    $buf = preg_replace_callback(
6731 20
        '/\\\\u([0-9a-f]{4})/i',
6732 20
        function ($match) {
6733 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6734 20
        },
6735
        $buf
6736 20
    );
6737
6738
    // decode UTF-8 codepoints
6739 20
    if ($decodeHtmlEntityToUtf8 === true) {
6740 1
      $buf = self::html_entity_decode($buf);
6741 1
    }
6742
6743 20
    return $buf;
6744
  }
6745
6746
  /**
6747
   * @param int $int
6748
   *
6749
   * @return string
6750
   */
6751 15
  private static function to_utf8_convert($int)
6752
  {
6753
    // init
6754 15
    $buf = '';
6755
6756 15
    if (self::$ORD === null) {
6757
      self::$ORD = self::getData('ord');
6758
    }
6759
6760 15
    if (self::$CHR === null) {
6761 1
      self::$CHR = self::getData('chr');
6762 1
    }
6763
6764 15
    if (self::$WIN1252_TO_UTF8 === null) {
6765 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6766 1
    }
6767
6768 15
    $ordC1 = self::$ORD[$int];
6769 15
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
6770 15
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
6771 15
    } else {
6772 2
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
6773 2
      $cc2 = ($int & "\x3F") | "\x80";
6774 2
      $buf .= $cc1 . $cc2;
6775
    }
6776
6777 15
    return $buf;
6778
  }
6779
6780
  /**
6781
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6782
   *
6783
   * INFO: This is slower then "trim()"
6784
   *
6785
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6786
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6787
   *
6788
   * @param string $str   <p>The string to be trimmed</p>
6789
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
6790
   *
6791
   * @return string <p>The trimmed string.</p>
6792
   */
6793 26
  public static function trim($str = '', $chars = INF)
6794
  {
6795 26
    $str = (string)$str;
6796
6797 26
    if (!isset($str[0])) {
6798 5
      return '';
6799
    }
6800
6801
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6802 22
    if ($chars === INF || !$chars) {
6803 6
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6804
    }
6805
6806 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
6807
  }
6808
6809
  /**
6810
   * Makes string's first char uppercase.
6811
   *
6812
   * @param string  $str       <p>The input string.</p>
6813
   * @param string  $encoding  [optional] <p>Set the charset.</p>
6814
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6815
   *
6816
   * @return string <p>The resulting string</p>
6817
   */
6818 14
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6819
  {
6820 14
    if ($cleanUtf8 === true) {
6821
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6822
      // if invalid characters are found in $haystack before $needle
6823 1
      $str = self::clean($str);
6824 1
    }
6825
6826 14
    $strPartTwo = self::substr($str, 1, null, $encoding);
6827 14
    if ($strPartTwo === false) {
6828
      $strPartTwo = '';
6829
    }
6830
6831 14
    $strPartOne = self::strtoupper(
6832 14
        (string)self::substr($str, 0, 1, $encoding),
6833 14
        $encoding,
6834
        $cleanUtf8
6835 14
    );
6836
6837 14
    return $strPartOne . $strPartTwo;
6838
  }
6839
6840
  /**
6841
   * alias for "UTF8::ucfirst()"
6842
   *
6843
   * @see UTF8::ucfirst()
6844
   *
6845
   * @param string  $word
6846
   * @param string  $encoding
6847
   * @param boolean $cleanUtf8
6848
   *
6849
   * @return string
6850
   */
6851 1
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
6852
  {
6853 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
6854
  }
6855
6856
  /**
6857
   * Uppercase for all words in the string.
6858
   *
6859
   * @param string   $str        <p>The input string.</p>
6860
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6861
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6862
   * @param string   $encoding   [optional] <p>Set the charset.</p>
6863
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6864
   *
6865
   * @return string
6866
   */
6867 8
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
6868
  {
6869 8
    if (!$str) {
6870 2
      return '';
6871
    }
6872
6873
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
6874
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
6875
6876 7
    if ($cleanUtf8 === true) {
6877
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6878
      // if invalid characters are found in $haystack before $needle
6879 1
      $str = self::clean($str);
6880 1
    }
6881
6882 7
    $usePhpDefaultFunctions = !(bool)($charlist . implode('', $exceptions));
6883
6884
    if (
6885
        $usePhpDefaultFunctions === true
6886 7
        &&
6887 7
        self::is_ascii($str) === true
6888 7
    ) {
6889
      return ucwords($str);
6890
    }
6891
6892 7
    $words = self::str_to_words($str, $charlist);
6893 7
    $newWords = array();
6894
6895 7
    if (count($exceptions) > 0) {
6896 1
      $useExceptions = true;
6897 1
    } else {
6898 7
      $useExceptions = false;
6899
    }
6900
6901 7 View Code Duplication
    foreach ($words as $word) {
6902
6903 7
      if (!$word) {
6904 7
        continue;
6905
      }
6906
6907
      if (
6908
          $useExceptions === false
6909 7
          ||
6910
          (
6911
              $useExceptions === true
6912 1
              &&
6913 1
              !in_array($word, $exceptions, true)
6914 1
          )
6915 7
      ) {
6916 7
        $word = self::ucfirst($word, $encoding);
6917 7
      }
6918
6919 7
      $newWords[] = $word;
6920 7
    }
6921
6922 7
    return implode('', $newWords);
6923
  }
6924
6925
  /**
6926
   * Multi decode html entity & fix urlencoded-win1252-chars.
6927
   *
6928
   * e.g:
6929
   * 'test+test'                     => 'test test'
6930
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6931
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6932
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6933
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6934
   * 'Düsseldorf'                   => 'Düsseldorf'
6935
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6936
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6937
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6938
   *
6939
   * @param string $str          <p>The input string.</p>
6940
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6941
   *
6942
   * @return string
6943
   */
6944 1 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6945
  {
6946 1
    $str = (string)$str;
6947
6948 1
    if (!isset($str[0])) {
6949 1
      return '';
6950
    }
6951
6952 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
6953 1
    if (preg_match($pattern, $str)) {
6954 1
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
6955 1
    }
6956
6957 1
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6958
6959
    do {
6960 1
      $str_compare = $str;
6961
6962 1
      $str = self::fix_simple_utf8(
6963 1
          urldecode(
6964 1
              self::html_entity_decode(
6965 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6966
                  $flags
6967 1
              )
6968 1
          )
6969 1
      );
6970
6971 1
    } while ($multi_decode === true && $str_compare !== $str);
6972
6973 1
    return (string)$str;
6974
  }
6975
6976
  /**
6977
   * Return a array with "urlencoded"-win1252 -> UTF-8
6978
   *
6979
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
6980
   *
6981
   * @return array
6982
   */
6983
  public static function urldecode_fix_win1252_chars()
6984
  {
6985
    return array(
6986
        '%20' => ' ',
6987
        '%21' => '!',
6988
        '%22' => '"',
6989
        '%23' => '#',
6990
        '%24' => '$',
6991
        '%25' => '%',
6992
        '%26' => '&',
6993
        '%27' => "'",
6994
        '%28' => '(',
6995
        '%29' => ')',
6996
        '%2A' => '*',
6997
        '%2B' => '+',
6998
        '%2C' => ',',
6999
        '%2D' => '-',
7000
        '%2E' => '.',
7001
        '%2F' => '/',
7002
        '%30' => '0',
7003
        '%31' => '1',
7004
        '%32' => '2',
7005
        '%33' => '3',
7006
        '%34' => '4',
7007
        '%35' => '5',
7008
        '%36' => '6',
7009
        '%37' => '7',
7010
        '%38' => '8',
7011
        '%39' => '9',
7012
        '%3A' => ':',
7013
        '%3B' => ';',
7014
        '%3C' => '<',
7015
        '%3D' => '=',
7016
        '%3E' => '>',
7017
        '%3F' => '?',
7018
        '%40' => '@',
7019
        '%41' => 'A',
7020
        '%42' => 'B',
7021
        '%43' => 'C',
7022
        '%44' => 'D',
7023
        '%45' => 'E',
7024
        '%46' => 'F',
7025
        '%47' => 'G',
7026
        '%48' => 'H',
7027
        '%49' => 'I',
7028
        '%4A' => 'J',
7029
        '%4B' => 'K',
7030
        '%4C' => 'L',
7031
        '%4D' => 'M',
7032
        '%4E' => 'N',
7033
        '%4F' => 'O',
7034
        '%50' => 'P',
7035
        '%51' => 'Q',
7036
        '%52' => 'R',
7037
        '%53' => 'S',
7038
        '%54' => 'T',
7039
        '%55' => 'U',
7040
        '%56' => 'V',
7041
        '%57' => 'W',
7042
        '%58' => 'X',
7043
        '%59' => 'Y',
7044
        '%5A' => 'Z',
7045
        '%5B' => '[',
7046
        '%5C' => '\\',
7047
        '%5D' => ']',
7048
        '%5E' => '^',
7049
        '%5F' => '_',
7050
        '%60' => '`',
7051
        '%61' => 'a',
7052
        '%62' => 'b',
7053
        '%63' => 'c',
7054
        '%64' => 'd',
7055
        '%65' => 'e',
7056
        '%66' => 'f',
7057
        '%67' => 'g',
7058
        '%68' => 'h',
7059
        '%69' => 'i',
7060
        '%6A' => 'j',
7061
        '%6B' => 'k',
7062
        '%6C' => 'l',
7063
        '%6D' => 'm',
7064
        '%6E' => 'n',
7065
        '%6F' => 'o',
7066
        '%70' => 'p',
7067
        '%71' => 'q',
7068
        '%72' => 'r',
7069
        '%73' => 's',
7070
        '%74' => 't',
7071
        '%75' => 'u',
7072
        '%76' => 'v',
7073
        '%77' => 'w',
7074
        '%78' => 'x',
7075
        '%79' => 'y',
7076
        '%7A' => 'z',
7077
        '%7B' => '{',
7078
        '%7C' => '|',
7079
        '%7D' => '}',
7080
        '%7E' => '~',
7081
        '%7F' => '',
7082
        '%80' => '`',
7083
        '%81' => '',
7084
        '%82' => '‚',
7085
        '%83' => 'ƒ',
7086
        '%84' => '„',
7087
        '%85' => '…',
7088
        '%86' => '†',
7089
        '%87' => '‡',
7090
        '%88' => 'ˆ',
7091
        '%89' => '‰',
7092
        '%8A' => 'Š',
7093
        '%8B' => '‹',
7094
        '%8C' => 'Œ',
7095
        '%8D' => '',
7096
        '%8E' => 'Ž',
7097
        '%8F' => '',
7098
        '%90' => '',
7099
        '%91' => '‘',
7100
        '%92' => '’',
7101
        '%93' => '“',
7102
        '%94' => '”',
7103
        '%95' => '•',
7104
        '%96' => '–',
7105
        '%97' => '—',
7106
        '%98' => '˜',
7107
        '%99' => '™',
7108
        '%9A' => 'š',
7109
        '%9B' => '›',
7110
        '%9C' => 'œ',
7111
        '%9D' => '',
7112
        '%9E' => 'ž',
7113
        '%9F' => 'Ÿ',
7114
        '%A0' => '',
7115
        '%A1' => '¡',
7116
        '%A2' => '¢',
7117
        '%A3' => '£',
7118
        '%A4' => '¤',
7119
        '%A5' => '¥',
7120
        '%A6' => '¦',
7121
        '%A7' => '§',
7122
        '%A8' => '¨',
7123
        '%A9' => '©',
7124
        '%AA' => 'ª',
7125
        '%AB' => '«',
7126
        '%AC' => '¬',
7127
        '%AD' => '',
7128
        '%AE' => '®',
7129
        '%AF' => '¯',
7130
        '%B0' => '°',
7131
        '%B1' => '±',
7132
        '%B2' => '²',
7133
        '%B3' => '³',
7134
        '%B4' => '´',
7135
        '%B5' => 'µ',
7136
        '%B6' => '¶',
7137
        '%B7' => '·',
7138
        '%B8' => '¸',
7139
        '%B9' => '¹',
7140
        '%BA' => 'º',
7141
        '%BB' => '»',
7142
        '%BC' => '¼',
7143
        '%BD' => '½',
7144
        '%BE' => '¾',
7145
        '%BF' => '¿',
7146
        '%C0' => 'À',
7147
        '%C1' => 'Á',
7148
        '%C2' => 'Â',
7149
        '%C3' => 'Ã',
7150
        '%C4' => 'Ä',
7151
        '%C5' => 'Å',
7152
        '%C6' => 'Æ',
7153
        '%C7' => 'Ç',
7154
        '%C8' => 'È',
7155
        '%C9' => 'É',
7156
        '%CA' => 'Ê',
7157
        '%CB' => 'Ë',
7158
        '%CC' => 'Ì',
7159
        '%CD' => 'Í',
7160
        '%CE' => 'Î',
7161
        '%CF' => 'Ï',
7162
        '%D0' => 'Ð',
7163
        '%D1' => 'Ñ',
7164
        '%D2' => 'Ò',
7165
        '%D3' => 'Ó',
7166
        '%D4' => 'Ô',
7167
        '%D5' => 'Õ',
7168
        '%D6' => 'Ö',
7169
        '%D7' => '×',
7170
        '%D8' => 'Ø',
7171
        '%D9' => 'Ù',
7172
        '%DA' => 'Ú',
7173
        '%DB' => 'Û',
7174
        '%DC' => 'Ü',
7175
        '%DD' => 'Ý',
7176
        '%DE' => 'Þ',
7177
        '%DF' => 'ß',
7178
        '%E0' => 'à',
7179
        '%E1' => 'á',
7180
        '%E2' => 'â',
7181
        '%E3' => 'ã',
7182
        '%E4' => 'ä',
7183
        '%E5' => 'å',
7184
        '%E6' => 'æ',
7185
        '%E7' => 'ç',
7186
        '%E8' => 'è',
7187
        '%E9' => 'é',
7188
        '%EA' => 'ê',
7189
        '%EB' => 'ë',
7190
        '%EC' => 'ì',
7191
        '%ED' => 'í',
7192
        '%EE' => 'î',
7193
        '%EF' => 'ï',
7194
        '%F0' => 'ð',
7195
        '%F1' => 'ñ',
7196
        '%F2' => 'ò',
7197
        '%F3' => 'ó',
7198
        '%F4' => 'ô',
7199
        '%F5' => 'õ',
7200
        '%F6' => 'ö',
7201
        '%F7' => '÷',
7202
        '%F8' => 'ø',
7203
        '%F9' => 'ù',
7204
        '%FA' => 'ú',
7205
        '%FB' => 'û',
7206
        '%FC' => 'ü',
7207
        '%FD' => 'ý',
7208
        '%FE' => 'þ',
7209
        '%FF' => 'ÿ',
7210
    );
7211
  }
7212
7213
  /**
7214
   * Decodes an UTF-8 string to ISO-8859-1.
7215
   *
7216
   * @param string $str <p>The input string.</p>
7217
   * @param bool   $keepUtf8Chars
7218
   *
7219
   * @return string
7220
   */
7221 6
  public static function utf8_decode($str, $keepUtf8Chars = false)
7222
  {
7223
    // init
7224 6
    $str = (string)$str;
7225
7226 6
    if (!isset($str[0])) {
7227 3
      return '';
7228
    }
7229
7230 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7231 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7232
7233 6 View Code Duplication
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7234
7235 1
      if (self::$WIN1252_TO_UTF8 === null) {
7236
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
7237
      }
7238
7239 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
7240 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
7241 1
    }
7242
7243
    /** @noinspection PhpInternalEntityUsedInspection */
7244 6
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7245
7246 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7247
      self::checkForSupport();
7248
    }
7249
7250
    // save for later comparision
7251 6
    $str_backup = $str;
7252
7253 6 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7254
      $len = \mb_strlen($str, '8BIT');
7255
    } else {
7256 6
      $len = \strlen($str);
7257
    }
7258
7259 6
    if (self::$ORD === null) {
7260
      self::$ORD = self::getData('ord');
7261
    }
7262
7263 6
    if (self::$CHR === null) {
7264
      self::$CHR = self::getData('chr');
7265
    }
7266
7267 6
    $noCharFound = '?';
7268
    /** @noinspection ForeachInvariantsInspection */
7269 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7270 6
      switch ($str[$i] & "\xF0") {
7271 6
        case "\xC0":
7272 6
        case "\xD0":
7273 6
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
7274 6
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
7275 6
          break;
7276
7277
        /** @noinspection PhpMissingBreakStatementInspection */
7278 6
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7279
          ++$i;
7280 6
        case "\xE0":
7281 5
          $str[$j] = $noCharFound;
7282 5
          $i += 2;
7283 5
          break;
7284
7285 6
        default:
7286 6
          $str[$j] = $str[$i];
7287 6
      }
7288 6
    }
7289
7290 6
    $return = (string)self::substr($str, 0, $j, '8BIT');
7291
7292
    if (
7293
        $keepUtf8Chars === true
7294 6
        &&
7295 1
        self::strlen($return) >= self::strlen($str_backup)
7296 6
    ) {
7297 1
      return $str_backup;
7298
    }
7299
7300 6
    return $return;
7301
  }
7302
7303
  /**
7304
   * Encodes an ISO-8859-1 string to UTF-8.
7305
   *
7306
   * @param string $str <p>The input string.</p>
7307
   *
7308
   * @return string
7309
   */
7310 7
  public static function utf8_encode($str)
7311
  {
7312
    // init
7313 7
    $str = (string)$str;
7314
7315 7
    if (!isset($str[0])) {
7316 7
      return '';
7317
    }
7318
7319 7
    $strTmp = \utf8_encode($str);
7320
7321
    // the polyfill maybe return false
7322 7
    if ($strTmp === false) {
7323
      return '';
7324
    }
7325
7326 7
    $str = (string)$strTmp;
7327 7
    if (false === strpos($str, "\xC2")) {
7328 3
      return $str;
7329
    }
7330
7331 6
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
7332 6
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
7333
7334 6 View Code Duplication
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
7335
7336 1
      if (self::$WIN1252_TO_UTF8 === null) {
7337
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
7338
      }
7339
7340 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
7341 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
7342 1
    }
7343
7344 6
    return str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
7345
  }
7346
7347
  /**
7348
   * fix -> utf8-win1252 chars
7349
   *
7350
   * @param string $str <p>The input string.</p>
7351
   *
7352
   * @return string
7353
   *
7354
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
7355
   */
7356
  public static function utf8_fix_win1252_chars($str)
7357
  {
7358
    return self::fix_simple_utf8($str);
7359
  }
7360
7361
  /**
7362
   * Returns an array with all utf8 whitespace characters.
7363
   *
7364
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7365
   *
7366
   * @author: Derek E. [email protected]
7367
   *
7368
   * @return array <p>
7369
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7370
   *               as defined in above URL.
7371
   *               </p>
7372
   */
7373 1
  public static function whitespace_table()
7374
  {
7375 1
    return self::$WHITESPACE_TABLE;
7376
  }
7377
7378
  /**
7379
   * Limit the number of words in a string.
7380
   *
7381
   * @param string $str      <p>The input string.</p>
7382
   * @param int    $limit    <p>The limit of words as integer.</p>
7383
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7384
   *
7385
   * @return string
7386
   */
7387 1
  public static function words_limit($str, $limit = 100, $strAddOn = '...')
7388
  {
7389 1
    $str = (string)$str;
7390
7391 1
    if (!isset($str[0])) {
7392 1
      return '';
7393
    }
7394
7395
    // init
7396 1
    $limit = (int)$limit;
7397
7398 1
    if ($limit < 1) {
7399 1
      return '';
7400
    }
7401
7402 1
    preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7403
7404
    if (
7405 1
        !isset($matches[0])
7406 1
        ||
7407 1
        self::strlen($str) === self::strlen($matches[0])
7408 1
    ) {
7409 1
      return $str;
7410
    }
7411
7412 1
    return self::rtrim($matches[0]) . $strAddOn;
7413
  }
7414
7415
  /**
7416
   * Wraps a string to a given number of characters
7417
   *
7418
   * @link  http://php.net/manual/en/function.wordwrap.php
7419
   *
7420
   * @param string $str   <p>The input string.</p>
7421
   * @param int    $width [optional] <p>The column width.</p>
7422
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7423
   * @param bool   $cut   [optional] <p>
7424
   *                      If the cut is set to true, the string is
7425
   *                      always wrapped at or before the specified width. So if you have
7426
   *                      a word that is larger than the given width, it is broken apart.
7427
   *                      </p>
7428
   *
7429
   * @return string <p>The given string wrapped at the specified column.</p>
7430
   */
7431 10
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7432
  {
7433 10
    $str = (string)$str;
7434 10
    $break = (string)$break;
7435
7436 10
    if (!isset($str[0], $break[0])) {
7437 3
      return '';
7438
    }
7439
7440 8
    $w = '';
7441 8
    $strSplit = explode($break, $str);
7442 8
    $count = count($strSplit);
7443
7444 8
    $chars = array();
7445
    /** @noinspection ForeachInvariantsInspection */
7446 8
    for ($i = 0; $i < $count; ++$i) {
7447
7448 8
      if ($i) {
7449 1
        $chars[] = $break;
7450 1
        $w .= '#';
7451 1
      }
7452
7453 8
      $c = $strSplit[$i];
7454 8
      unset($strSplit[$i]);
7455
7456 8
      foreach (self::split($c) as $c) {
7457 8
        $chars[] = $c;
7458 8
        $w .= ' ' === $c ? ' ' : '?';
7459 8
      }
7460 8
    }
7461
7462 8
    $strReturn = '';
7463 8
    $j = 0;
7464 8
    $b = $i = -1;
7465 8
    $w = wordwrap($w, $width, '#', $cut);
7466
7467 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7468 6
      for (++$i; $i < $b; ++$i) {
7469 6
        $strReturn .= $chars[$j];
7470 6
        unset($chars[$j++]);
7471 6
      }
7472
7473 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7474 3
        unset($chars[$j++]);
7475 3
      }
7476
7477 6
      $strReturn .= $break;
7478 6
    }
7479
7480 8
    return $strReturn . implode('', $chars);
7481
  }
7482
7483
  /**
7484
   * Returns an array of Unicode White Space characters.
7485
   *
7486
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7487
   */
7488 1
  public static function ws()
7489
  {
7490 1
    return self::$WHITESPACE;
7491
  }
7492
7493
}
7494