Completed
Push — master ( f678bd...162de9 )
by Lars
05:11
created

UTF8::strlen_in_byte()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 10
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 2.1481

Importance

Changes 0
Metric Value
dl 0
loc 10
ccs 4
cts 6
cp 0.6667
rs 9.4285
c 0
b 0
f 0
cc 2
eloc 6
nc 2
nop 1
crap 2.1481
1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * UTF8-Helper-Class
7
 *
8
 * @package voku\helper
9
 */
10
final class UTF8
11
{
12
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
13
  // This regular expression is a work around for http://bugs.exim.org/1279
14
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
15
16
  /**
17
   * Bom => Byte-Length
18
   *
19
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
20
   *
21
   * @var array
22
   */
23
  private static $BOM = array(
24
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
25
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
26
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
27
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
28
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
29
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
30
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
31
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
32
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
33
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
34
  );
35
36
  /**
37
   * Numeric code point => UTF-8 Character
38
   *
39
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
40
   *
41
   * @var array
42
   */
43
  private static $WHITESPACE = array(
44
    // NUL Byte
45
    0     => "\x0",
46
    // Tab
47
    9     => "\x9",
48
    // New Line
49
    10    => "\xa",
50
    // Vertical Tab
51
    11    => "\xb",
52
    // Carriage Return
53
    13    => "\xd",
54
    // Ordinary Space
55
    32    => "\x20",
56
    // NO-BREAK SPACE
57
    160   => "\xc2\xa0",
58
    // OGHAM SPACE MARK
59
    5760  => "\xe1\x9a\x80",
60
    // MONGOLIAN VOWEL SEPARATOR
61
    6158  => "\xe1\xa0\x8e",
62
    // EN QUAD
63
    8192  => "\xe2\x80\x80",
64
    // EM QUAD
65
    8193  => "\xe2\x80\x81",
66
    // EN SPACE
67
    8194  => "\xe2\x80\x82",
68
    // EM SPACE
69
    8195  => "\xe2\x80\x83",
70
    // THREE-PER-EM SPACE
71
    8196  => "\xe2\x80\x84",
72
    // FOUR-PER-EM SPACE
73
    8197  => "\xe2\x80\x85",
74
    // SIX-PER-EM SPACE
75
    8198  => "\xe2\x80\x86",
76
    // FIGURE SPACE
77
    8199  => "\xe2\x80\x87",
78
    // PUNCTUATION SPACE
79
    8200  => "\xe2\x80\x88",
80
    // THIN SPACE
81
    8201  => "\xe2\x80\x89",
82
    //HAIR SPACE
83
    8202  => "\xe2\x80\x8a",
84
    // LINE SEPARATOR
85
    8232  => "\xe2\x80\xa8",
86
    // PARAGRAPH SEPARATOR
87
    8233  => "\xe2\x80\xa9",
88
    // NARROW NO-BREAK SPACE
89
    8239  => "\xe2\x80\xaf",
90
    // MEDIUM MATHEMATICAL SPACE
91
    8287  => "\xe2\x81\x9f",
92
    // IDEOGRAPHIC SPACE
93
    12288 => "\xe3\x80\x80",
94
  );
95
96
  /**
97
   * @var array
98
   */
99
  private static $WHITESPACE_TABLE = array(
100
      'SPACE'                     => "\x20",
101
      'NO-BREAK SPACE'            => "\xc2\xa0",
102
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
103
      'EN QUAD'                   => "\xe2\x80\x80",
104
      'EM QUAD'                   => "\xe2\x80\x81",
105
      'EN SPACE'                  => "\xe2\x80\x82",
106
      'EM SPACE'                  => "\xe2\x80\x83",
107
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
108
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
109
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
110
      'FIGURE SPACE'              => "\xe2\x80\x87",
111
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
112
      'THIN SPACE'                => "\xe2\x80\x89",
113
      'HAIR SPACE'                => "\xe2\x80\x8a",
114
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
115
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
116
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
117
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
118
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
119
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
120
  );
121
122
  /**
123
   * bidirectional text chars
124
   *
125
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
126
   *
127
   * @var array
128
   */
129
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
130
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
131
    8234 => "\xE2\x80\xAA",
132
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
133
    8235 => "\xE2\x80\xAB",
134
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
135
    8236 => "\xE2\x80\xAC",
136
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
137
    8237 => "\xE2\x80\xAD",
138
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
139
    8238 => "\xE2\x80\xAE",
140
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
141
    8294 => "\xE2\x81\xA6",
142
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
143
    8295 => "\xE2\x81\xA7",
144
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
145
    8296 => "\xE2\x81\xA8",
146
    // POP DIRECTIONAL ISOLATE
147
    8297 => "\xE2\x81\xA9",
148
  );
149
150
  /**
151
   * @var array
152
   */
153
  private static $COMMON_CASE_FOLD = array(
154
      'ſ'            => 's',
155
      "\xCD\x85"     => 'ι',
156
      'ς'            => 'σ',
157
      "\xCF\x90"     => 'β',
158
      "\xCF\x91"     => 'θ',
159
      "\xCF\x95"     => 'φ',
160
      "\xCF\x96"     => 'π',
161
      "\xCF\xB0"     => 'κ',
162
      "\xCF\xB1"     => 'ρ',
163
      "\xCF\xB5"     => 'ε',
164
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
165
      "\xE1\xBE\xBE" => 'ι',
166
  );
167
168
  /**
169
   * @var array
170
   */
171
  private static $SUPPORT = array();
172
173
  /**
174
   * @var null|array
175
   */
176
  private static $UTF8_MSWORD = null;
177
178
  /**
179
   * @var null|array
180
   */
181
  private static $BROKEN_UTF8_FIX = null;
182
183
  /**
184
   * @var null|array
185
   */
186
  private static $WIN1252_TO_UTF8 = null;
187
188
  /**
189
   * @var null|array
190
   */
191
  private static $ENCODINGS = null;
192
193
  /**
194
   * @var null|array
195
   */
196
  private static $ORD = null;
197
198
  /**
199
   * @var null|array
200
   */
201
  private static $CHR = null;
202
203
  /**
204
   * __construct()
205
   */
206 16
  public function __construct()
207
  {
208 16
    self::checkForSupport();
209 16
  }
210
211
  /**
212
   * Return the character at the specified position: $str[1] like functionality.
213
   *
214
   * @param string $str <p>A UTF-8 string.</p>
215
   * @param int    $pos <p>The position of character to return.</p>
216
   *
217
   * @return string <p>Single Multi-Byte character.</p>
218
   */
219 3
  public static function access($str, $pos)
220
  {
221 3
    $str = (string)$str;
222
223 3
    if (!isset($str[0])) {
224 1
      return '';
225
    }
226
227 3
    $pos = (int)$pos;
228
229 3
    if ($pos < 0) {
230 1
      return '';
231
    }
232
233 3
    return (string)self::substr($str, $pos, 1);
234
  }
235
236
  /**
237
   * Prepends UTF-8 BOM character to the string and returns the whole string.
238
   *
239
   * INFO: If BOM already existed there, the Input string is returned.
240
   *
241
   * @param string $str <p>The input string.</p>
242
   *
243
   * @return string <p>The output string that contains BOM.</p>
244
   */
245 1
  public static function add_bom_to_string($str)
246
  {
247 1
    if (self::string_has_bom($str) === false) {
248 1
      $str = self::bom() . $str;
249 1
    }
250
251 1
    return $str;
252
  }
253
254
  /**
255
   * Convert binary into an string.
256
   *
257
   * @param mixed $bin 1|0
258
   *
259
   * @return string
260
   */
261 1
  public static function binary_to_str($bin)
262
  {
263 1
    if (!isset($bin[0])) {
264
      return '';
265
    }
266
267 1
    $convert = base_convert($bin, 2, 16);
268 1
    if ($convert === '0') {
269 1
      return '';
270
    }
271
272 1
    return pack('H*', $convert);
273
  }
274
275
  /**
276
   * Returns the UTF-8 Byte Order Mark Character.
277
   *
278
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
279
   *
280
   * @return string UTF-8 Byte Order Mark
281
   */
282 2
  public static function bom()
283
  {
284 2
    return "\xef\xbb\xbf";
285
  }
286
287
  /**
288
   * @alias of UTF8::chr_map()
289
   *
290
   * @see   UTF8::chr_map()
291
   *
292
   * @param string|array $callback
293
   * @param string       $str
294
   *
295
   * @return array
296
   */
297 1
  public static function callback($callback, $str)
298
  {
299 1
    return self::chr_map($callback, $str);
300
  }
301
302
  /**
303
   * This method will auto-detect your server environment for UTF-8 support.
304
   *
305
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
306
   */
307 19
  public static function checkForSupport()
308
  {
309 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
310
311 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
312
313
      // http://php.net/manual/en/book.mbstring.php
314 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
315 1
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
316
317
      // http://php.net/manual/en/book.iconv.php
318 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
319
320
      // http://php.net/manual/en/book.intl.php
321 1
      self::$SUPPORT['intl'] = self::intl_loaded();
322 1
      self::$SUPPORT['intl__transliterator_list_ids'] = array();
323
      if (
324 1
          self::$SUPPORT['intl'] === true
325 1
          &&
326 1
          function_exists('transliterator_list_ids') === true
327 1
      ) {
328
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
329
      }
330
331
      // http://php.net/manual/en/class.intlchar.php
332 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
333
334
      // http://php.net/manual/en/book.pcre.php
335 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
336 1
    }
337 19
  }
338
339
  /**
340
   * Generates a UTF-8 encoded character from the given code point.
341
   *
342
   * INFO: opposite to UTF8::ord()
343
   *
344
   * @param int    $code_point <p>The code point for which to generate a character.</p>
345
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
346
   *
347
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
348
   */
349 10
  public static function chr($code_point, $encoding = 'UTF-8')
350
  {
351
    // init
352 10
    static $CHAR_CACHE = array();
353
354 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
355
      self::checkForSupport();
356
    }
357
358 10
    if ($encoding !== 'UTF-8') {
359 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
360 2
    }
361
362 View Code Duplication
    if (
363
        $encoding !== 'UTF-8'
364 10
        &&
365
        $encoding !== 'WINDOWS-1252'
366 10
        &&
367 1
        self::$SUPPORT['mbstring'] === false
368 10
    ) {
369
      trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
370
    }
371
372 10
    $cacheKey = $code_point . $encoding;
373 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
374 8
      return $CHAR_CACHE[$cacheKey];
375
    }
376
377 9
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
378
379 7
      if (self::$CHR === null) {
380
        self::$CHR = self::getData('chr');
381
      }
382
383 7
      $chr = self::$CHR[$code_point];
384
385 7
      if ($encoding !== 'UTF-8') {
386 1
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
387 1
      }
388
389 7
      return $CHAR_CACHE[$cacheKey] = $chr;
390
    }
391
392 7
    if (self::$SUPPORT['intlChar'] === true) {
393
      $chr = \IntlChar::chr($code_point);
394
395
      if ($encoding !== 'UTF-8') {
396
        $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
397
      }
398
399
      return $CHAR_CACHE[$cacheKey] = $chr;
400
    }
401
402 7
    if (self::$CHR === null) {
403
      self::$CHR = self::getData('chr');
404
    }
405
406 7
    if ($code_point <= 0x7F) {
407
      $chr = self::$CHR[$code_point];
408 7
    } elseif ($code_point <= 0x7FF) {
409 6
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
410 6
             self::$CHR[($code_point & 0x3F) + 0x80];
411 7
    } elseif ($code_point <= 0xFFFF) {
412 7
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
413 7
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
414 7
             self::$CHR[($code_point & 0x3F) + 0x80];
415 7
    } else {
416 1
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
417 1
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
418 1
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
419 1
             self::$CHR[($code_point & 0x3F) + 0x80];
420
    }
421
422 7
    if ($encoding !== 'UTF-8') {
423
      $chr = \mb_convert_encoding($chr, $encoding, 'UTF-8');
424
    }
425
426 7
    return $CHAR_CACHE[$cacheKey] = $chr;
427
  }
428
429
  /**
430
   * Applies callback to all characters of a string.
431
   *
432
   * @param string|array $callback <p>The callback function.</p>
433
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
434
   *
435
   * @return array <p>The outcome of callback.</p>
436
   */
437 1
  public static function chr_map($callback, $str)
438
  {
439 1
    $chars = self::split($str);
440
441 1
    return array_map($callback, $chars);
442
  }
443
444
  /**
445
   * Generates an array of byte length of each character of a Unicode string.
446
   *
447
   * 1 byte => U+0000  - U+007F
448
   * 2 byte => U+0080  - U+07FF
449
   * 3 byte => U+0800  - U+FFFF
450
   * 4 byte => U+10000 - U+10FFFF
451
   *
452
   * @param string $str <p>The original Unicode string.</p>
453
   *
454
   * @return array <p>An array of byte lengths of each character.</p>
455
   */
456 4
  public static function chr_size_list($str)
457
  {
458 4
    $str = (string)$str;
459
460 4
    if (!isset($str[0])) {
461 3
      return array();
462
    }
463
464 4
    return array_map(
465
        function ($data) {
466 4
          return UTF8::strlen($data, '8BIT');
467 4
        },
468 4
        self::split($str)
469 4
    );
470
  }
471
472
  /**
473
   * Get a decimal code representation of a specific character.
474
   *
475
   * @param string $char <p>The input character.</p>
476
   *
477
   * @return int
478
   */
479 2
  public static function chr_to_decimal($char)
480
  {
481 2
    $char = (string)$char;
482 2
    $code = self::ord($char[0]);
483 2
    $bytes = 1;
484
485 2
    if (!($code & 0x80)) {
486
      // 0xxxxxxx
487 2
      return $code;
488
    }
489
490 2
    if (($code & 0xe0) === 0xc0) {
491
      // 110xxxxx
492 2
      $bytes = 2;
493 2
      $code &= ~0xc0;
494 2
    } elseif (($code & 0xf0) === 0xe0) {
495
      // 1110xxxx
496 2
      $bytes = 3;
497 2
      $code &= ~0xe0;
498 2
    } elseif (($code & 0xf8) === 0xf0) {
499
      // 11110xxx
500 1
      $bytes = 4;
501 1
      $code &= ~0xf0;
502 1
    }
503
504 2
    for ($i = 2; $i <= $bytes; $i++) {
505
      // 10xxxxxx
506 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
507 2
    }
508
509 2
    return $code;
510
  }
511
512
  /**
513
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
514
   *
515
   * @param string $char <p>The input character</p>
516
   * @param string $pfix [optional]
517
   *
518
   * @return string <p>The code point encoded as U+xxxx<p>
519
   */
520 1
  public static function chr_to_hex($char, $pfix = 'U+')
521
  {
522 1
    $char = (string)$char;
523
524 1
    if (!isset($char[0])) {
525 1
      return '';
526
    }
527
528 1
    if ($char === '&#0;') {
529
      $char = '';
530
    }
531
532 1
    return self::int_to_hex(self::ord($char), $pfix);
533
  }
534
535
  /**
536
   * alias for "UTF8::chr_to_decimal()"
537
   *
538
   * @see UTF8::chr_to_decimal()
539
   *
540
   * @param string $chr
541
   *
542
   * @return int
543
   */
544 1
  public static function chr_to_int($chr)
545
  {
546 1
    return self::chr_to_decimal($chr);
547
  }
548
549
  /**
550
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
551
   *
552
   * @param string $body     <p>The original string to be split.</p>
553
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
554
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
555
   *
556
   * @return string <p>The chunked string</p>
557
   */
558 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
559
  {
560 1
    return implode($end, self::split($body, $chunklen));
561
  }
562
563
  /**
564
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
565
   *
566
   * @param string $str                     <p>The string to be sanitized.</p>
567
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
568
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
569
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
570
   *                                        => "..."</p>
571
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
572
   *                                        $normalize_whitespace</p>
573
   *
574
   * @return string <p>Clean UTF-8 encoded string.</p>
575
   */
576 61
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
577
  {
578
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
579
    // caused connection reset problem on larger strings
580
581
    $regx = '/
582
      (
583
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
584
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
585
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
586
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
587
        ){1,100}                      # ...one or more times
588
      )
589
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
590
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
591 61
    /x';
592 61
    $str = preg_replace($regx, '$1', $str);
593
594 61
    $str = self::replace_diamond_question_mark($str, '');
595 61
    $str = self::remove_invisible_characters($str);
596
597 61
    if ($normalize_whitespace === true) {
598 36
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
599 36
    }
600
601 61
    if ($normalize_msword === true) {
602 15
      $str = self::normalize_msword($str);
603 15
    }
604
605 61
    if ($remove_bom === true) {
606 35
      $str = self::remove_bom($str);
607 35
    }
608
609 61
    return $str;
610
  }
611
612
  /**
613
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
614
   *
615
   * @param string $str <p>The input string.</p>
616
   *
617
   * @return string
618
   */
619 21 View Code Duplication
  public static function cleanup($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
620
  {
621 21
    $str = (string)$str;
622
623 21
    if (!isset($str[0])) {
624 2
      return '';
625
    }
626
627
    // fixed ISO <-> UTF-8 Errors
628 21
    $str = self::fix_simple_utf8($str);
629
630
    // remove all none UTF-8 symbols
631
    // && remove diamond question mark (�)
632
    // && remove remove invisible characters (e.g. "\0")
633
    // && remove BOM
634
    // && normalize whitespace chars (but keep non-breaking-spaces)
635 21
    $str = self::clean($str, true, true, false, true);
636
637 21
    return (string)$str;
638
  }
639
640
  /**
641
   * Accepts a string or a array of strings and returns an array of Unicode code points.
642
   *
643
   * INFO: opposite to UTF8::string()
644
   *
645
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
646
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
647
   *                                    default, code points will be returned as integers.</p>
648
   *
649
   * @return array <p>The array of code points.</p>
650
   */
651 7
  public static function codepoints($arg, $u_style = false)
652
  {
653 7
    if (is_string($arg) === true) {
654 7
      $arg = self::split($arg);
655 7
    }
656
657 7
    $arg = array_map(
658
        array(
659 7
            '\\voku\\helper\\UTF8',
660 7
            'ord',
661 7
        ),
662
        $arg
663 7
    );
664
665 7
    if ($u_style) {
666 1
      $arg = array_map(
667
          array(
668 1
              '\\voku\\helper\\UTF8',
669 1
              'int_to_hex',
670 1
          ),
671
          $arg
672 1
      );
673 1
    }
674
675 7
    return $arg;
676
  }
677
678
  /**
679
   * Returns count of characters used in a string.
680
   *
681
   * @param string $str       <p>The input string.</p>
682
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
683
   *
684
   * @return array <p>An associative array of Character as keys and
685
   *               their count as values.</p>
686
   */
687 7
  public static function count_chars($str, $cleanUtf8 = false)
688
  {
689 7
    return array_count_values(self::split($str, 1, $cleanUtf8));
690
  }
691
692
  /**
693
   * Converts a int-value into an UTF-8 character.
694
   *
695
   * @param mixed $int
696
   *
697
   * @return string
698
   */
699 5
  public static function decimal_to_chr($int)
700
  {
701 5
    if (Bootup::is_php('5.4') === true) {
702
      $flags = ENT_QUOTES | ENT_HTML5;
703
    } else {
704 5
      $flags = ENT_QUOTES;
705
    }
706
707 5
    return self::html_entity_decode('&#' . $int . ';', $flags);
708
  }
709
710
  /**
711
   * Encode a string with a new charset-encoding.
712
   *
713
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
714
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
715
   *
716
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
717
   * @param string $str      <p>The input string</p>
718
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
719
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
720
   *
721
   * @return string
722
   */
723 11
  public static function encode($encoding, $str, $force = true)
724
  {
725 11
    $str = (string)$str;
726 11
    $encoding = (string)$encoding;
727
728 11
    if (!isset($str[0], $encoding[0])) {
729 5
      return $str;
730
    }
731
732 11
    if ($encoding !== 'UTF-8') {
733 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
734 2
    }
735
736 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
737
      self::checkForSupport();
738
    }
739
740 11
    $encodingDetected = self::str_detect_encoding($str);
741
742
    if (
743
        $encodingDetected !== false
744 11
        &&
745
        (
746
            $force === true
747 11
            ||
748
            $encodingDetected !== $encoding
749 3
        )
750 11
    ) {
751
752 View Code Duplication
      if (
753
          $encoding === 'UTF-8'
754 11
          &&
755
          (
756
              $force === true
757 11
              || $encodingDetected === 'UTF-8'
758 2
              || $encodingDetected === 'WINDOWS-1252'
759 2
              || $encodingDetected === 'ISO-8859-1'
760 2
          )
761 11
      ) {
762 11
        return self::to_utf8($str);
763
      }
764
765 View Code Duplication
      if (
766
          $encoding === 'ISO-8859-1'
767 3
          &&
768
          (
769
              $force === true
770 2
              || $encodingDetected === 'ISO-8859-1'
771 1
              || $encodingDetected === 'WINDOWS-1252'
772 1
              || $encodingDetected === 'UTF-8'
773 1
          )
774 3
      ) {
775 2
        return self::to_iso8859($str);
776
      }
777
778 View Code Duplication
      if (
779
          $encoding !== 'UTF-8'
780 2
          &&
781
          $encoding !== 'WINDOWS-1252'
782 2
          &&
783 1
          self::$SUPPORT['mbstring'] === false
784 2
      ) {
785
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
786
      }
787
788 2
      $strEncoded = \mb_convert_encoding(
789 2
          $str,
790 2
          $encoding,
791
          $encodingDetected
792 2
      );
793
794 2
      if ($strEncoded) {
795 2
        return $strEncoded;
796
      }
797
    }
798
799 1
    return $str;
800
  }
801
802
  /**
803
   * Reads entire file into a string.
804
   *
805
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
806
   *
807
   * @link http://php.net/manual/en/function.file-get-contents.php
808
   *
809
   * @param string        $filename      <p>
810
   *                                     Name of the file to read.
811
   *                                     </p>
812
   * @param int|false     $flags         [optional] <p>
813
   *                                     Prior to PHP 6, this parameter is called
814
   *                                     use_include_path and is a bool.
815
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
816
   *                                     to trigger include path
817
   *                                     search.
818
   *                                     </p>
819
   *                                     <p>
820
   *                                     The value of flags can be any combination of
821
   *                                     the following flags (with some restrictions), joined with the
822
   *                                     binary OR (|)
823
   *                                     operator.
824
   *                                     </p>
825
   *                                     <p>
826
   *                                     <table>
827
   *                                     Available flags
828
   *                                     <tr valign="top">
829
   *                                     <td>Flag</td>
830
   *                                     <td>Description</td>
831
   *                                     </tr>
832
   *                                     <tr valign="top">
833
   *                                     <td>
834
   *                                     FILE_USE_INCLUDE_PATH
835
   *                                     </td>
836
   *                                     <td>
837
   *                                     Search for filename in the include directory.
838
   *                                     See include_path for more
839
   *                                     information.
840
   *                                     </td>
841
   *                                     </tr>
842
   *                                     <tr valign="top">
843
   *                                     <td>
844
   *                                     FILE_TEXT
845
   *                                     </td>
846
   *                                     <td>
847
   *                                     As of PHP 6, the default encoding of the read
848
   *                                     data is UTF-8. You can specify a different encoding by creating a
849
   *                                     custom context or by changing the default using
850
   *                                     stream_default_encoding. This flag cannot be
851
   *                                     used with FILE_BINARY.
852
   *                                     </td>
853
   *                                     </tr>
854
   *                                     <tr valign="top">
855
   *                                     <td>
856
   *                                     FILE_BINARY
857
   *                                     </td>
858
   *                                     <td>
859
   *                                     With this flag, the file is read in binary mode. This is the default
860
   *                                     setting and cannot be used with FILE_TEXT.
861
   *                                     </td>
862
   *                                     </tr>
863
   *                                     </table>
864
   *                                     </p>
865
   * @param resource|null $context       [optional] <p>
866
   *                                     A valid context resource created with
867
   *                                     stream_context_create. If you don't need to use a
868
   *                                     custom context, you can skip this parameter by &null;.
869
   *                                     </p>
870
   * @param int|null      $offset        [optional] <p>
871
   *                                     The offset where the reading starts.
872
   *                                     </p>
873
   * @param int|null      $maxLength     [optional] <p>
874
   *                                     Maximum length of data read. The default is to read until end
875
   *                                     of file is reached.
876
   *                                     </p>
877
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
878
   *
879
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
880
   *                                     or pdf, because they used non default utf-8 chars</p>
881
   *
882
   * @return string <p>The function returns the read data or false on failure.</p>
883
   */
884 3
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxLength = null, $timeout = 10, $convertToUtf8 = true)
885
  {
886
    // init
887 3
    $timeout = (int)$timeout;
888 3
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
889
890 3
    if ($timeout && $context === null) {
891 2
      $context = stream_context_create(
892
          array(
893
              'http' =>
894
                  array(
895 2
                      'timeout' => $timeout,
896 2
                  ),
897
          )
898 2
      );
899 2
    }
900
901 3
    if (!$flags) {
902 3
      $flags = false;
903 3
    }
904
905 3
    if ($offset === null) {
906 3
      $offset = 0;
907 3
    }
908
909 3
    if (is_int($maxLength) === true) {
910 1
      $data = file_get_contents($filename, $flags, $context, $offset, $maxLength);
911 1
    } else {
912 3
      $data = file_get_contents($filename, $flags, $context, $offset);
913
    }
914
915
    // return false on error
916 3
    if ($data === false) {
917 1
      return false;
918
    }
919
920 2
    if ($convertToUtf8 === true) {
921 2
      $data = self::encode('UTF-8', $data, false);
922 2
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
923 2
    }
924
925 2
    return $data;
926
  }
927
928
  /**
929
   * Checks if a file starts with BOM (Byte Order Mark) character.
930
   *
931
   * @param string $file_path <p>Path to a valid file.</p>
932
   *
933
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
934
   */
935 1
  public static function file_has_bom($file_path)
936
  {
937 1
    return self::string_has_bom(file_get_contents($file_path));
938
  }
939
940
  /**
941
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
942
   *
943
   * @param mixed  $var
944
   * @param int    $normalization_form
945
   * @param string $leading_combining
946
   *
947
   * @return mixed
948
   */
949 9
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
950
  {
951 9
    switch (gettype($var)) {
952 9 View Code Duplication
      case 'array':
953 3
        foreach ($var as $k => $v) {
954
          /** @noinspection AlterInForeachInspection */
955 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
956 3
        }
957 3
        break;
958 9 View Code Duplication
      case 'object':
959 2
        foreach ($var as $k => $v) {
960 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
961 2
        }
962 2
        break;
963 9
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
964
965 8
        if (false !== strpos($var, "\r")) {
966
          // Workaround https://bugs.php.net/65732
967 2
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
968 2
        }
969
970 8
        if (self::is_ascii($var) === false) {
971
          /** @noinspection PhpUndefinedClassInspection */
972 8
          if (\Normalizer::isNormalized($var, $normalization_form)) {
973 6
            $n = '-';
974 6
          } else {
975
            /** @noinspection PhpUndefinedClassInspection */
976 6
            $n = \Normalizer::normalize($var, $normalization_form);
977
978 6
            if (isset($n[0])) {
979 3
              $var = $n;
980 3
            } else {
981 5
              $var = self::encode('UTF-8', $var, true);
982
            }
983
          }
984
985
          if (
986 8
              $var[0] >= "\x80"
987 8
              &&
988 6
              isset($n[0], $leading_combining[0])
989 8
              &&
990 5
              preg_match('/^\p{Mn}/u', $var)
991 8
          ) {
992
            // Prevent leading combining chars
993
            // for NFC-safe concatenations.
994 2
            $var = $leading_combining . $var;
995 2
          }
996 8
        }
997
998 8
        break;
999 9
    }
1000
1001 9
    return $var;
1002
  }
1003
1004
  /**
1005
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1006
   *
1007
   * Gets a specific external variable by name and optionally filters it
1008
   *
1009
   * @link  http://php.net/manual/en/function.filter-input.php
1010
   *
1011
   * @param int    $type          <p>
1012
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1013
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1014
   *                              <b>INPUT_ENV</b>.
1015
   *                              </p>
1016
   * @param string $variable_name <p>
1017
   *                              Name of a variable to get.
1018
   *                              </p>
1019
   * @param int    $filter        [optional] <p>
1020
   *                              The ID of the filter to apply. The
1021
   *                              manual page lists the available filters.
1022
   *                              </p>
1023
   * @param mixed  $options       [optional] <p>
1024
   *                              Associative array of options or bitwise disjunction of flags. If filter
1025
   *                              accepts options, flags can be provided in "flags" field of array.
1026
   *                              </p>
1027
   *
1028
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1029
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1030
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1031
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1032
   * @since 5.2.0
1033
   */
1034 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1035
  {
1036
    if (4 > func_num_args()) {
1037
      $var = filter_input($type, $variable_name, $filter);
1038
    } else {
1039
      $var = filter_input($type, $variable_name, $filter, $options);
1040
    }
1041
1042
    return self::filter($var);
1043
  }
1044
1045
  /**
1046
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1047
   *
1048
   * Gets external variables and optionally filters them
1049
   *
1050
   * @link  http://php.net/manual/en/function.filter-input-array.php
1051
   *
1052
   * @param int   $type       <p>
1053
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1054
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1055
   *                          <b>INPUT_ENV</b>.
1056
   *                          </p>
1057
   * @param mixed $definition [optional] <p>
1058
   *                          An array defining the arguments. A valid key is a string
1059
   *                          containing a variable name and a valid value is either a filter type, or an array
1060
   *                          optionally specifying the filter, flags and options. If the value is an
1061
   *                          array, valid keys are filter which specifies the
1062
   *                          filter type,
1063
   *                          flags which specifies any flags that apply to the
1064
   *                          filter, and options which specifies any options that
1065
   *                          apply to the filter. See the example below for a better understanding.
1066
   *                          </p>
1067
   *                          <p>
1068
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1069
   *                          input array are filtered by this filter.
1070
   *                          </p>
1071
   * @param bool  $add_empty  [optional] <p>
1072
   *                          Add missing keys as <b>NULL</b> to the return value.
1073
   *                          </p>
1074
   *
1075
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1076
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1077
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1078
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1079
   * fails.
1080
   * @since 5.2.0
1081
   */
1082 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1083
  {
1084
    if (2 > func_num_args()) {
1085
      $a = filter_input_array($type);
1086
    } else {
1087
      $a = filter_input_array($type, $definition, $add_empty);
1088
    }
1089
1090
    return self::filter($a);
1091
  }
1092
1093
  /**
1094
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1095
   *
1096
   * Filters a variable with a specified filter
1097
   *
1098
   * @link  http://php.net/manual/en/function.filter-var.php
1099
   *
1100
   * @param mixed $variable <p>
1101
   *                        Value to filter.
1102
   *                        </p>
1103
   * @param int   $filter   [optional] <p>
1104
   *                        The ID of the filter to apply. The
1105
   *                        manual page lists the available filters.
1106
   *                        </p>
1107
   * @param mixed $options  [optional] <p>
1108
   *                        Associative array of options or bitwise disjunction of flags. If filter
1109
   *                        accepts options, flags can be provided in "flags" field of array. For
1110
   *                        the "callback" filter, callable type should be passed. The
1111
   *                        callback must accept one argument, the value to be filtered, and return
1112
   *                        the value after filtering/sanitizing it.
1113
   *                        </p>
1114
   *                        <p>
1115
   *                        <code>
1116
   *                        // for filters that accept options, use this format
1117
   *                        $options = array(
1118
   *                        'options' => array(
1119
   *                        'default' => 3, // value to return if the filter fails
1120
   *                        // other options here
1121
   *                        'min_range' => 0
1122
   *                        ),
1123
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1124
   *                        );
1125
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1126
   *                        // for filter that only accept flags, you can pass them directly
1127
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1128
   *                        // for filter that only accept flags, you can also pass as an array
1129
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1130
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1131
   *                        // callback validate filter
1132
   *                        function foo($value)
1133
   *                        {
1134
   *                        // Expected format: Surname, GivenNames
1135
   *                        if (strpos($value, ", ") === false) return false;
1136
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1137
   *                        $empty = (empty($surname) || empty($givennames));
1138
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1139
   *                        if ($empty || $notstrings) {
1140
   *                        return false;
1141
   *                        } else {
1142
   *                        return $value;
1143
   *                        }
1144
   *                        }
1145
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1146
   *                        </code>
1147
   *                        </p>
1148
   *
1149
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1150
   * @since 5.2.0
1151
   */
1152 1 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1153
  {
1154 1
    if (3 > func_num_args()) {
1155 1
      $variable = filter_var($variable, $filter);
1156 1
    } else {
1157 1
      $variable = filter_var($variable, $filter, $options);
1158
    }
1159
1160 1
    return self::filter($variable);
1161
  }
1162
1163
  /**
1164
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1165
   *
1166
   * Gets multiple variables and optionally filters them
1167
   *
1168
   * @link  http://php.net/manual/en/function.filter-var-array.php
1169
   *
1170
   * @param array $data       <p>
1171
   *                          An array with string keys containing the data to filter.
1172
   *                          </p>
1173
   * @param mixed $definition [optional] <p>
1174
   *                          An array defining the arguments. A valid key is a string
1175
   *                          containing a variable name and a valid value is either a
1176
   *                          filter type, or an
1177
   *                          array optionally specifying the filter, flags and options.
1178
   *                          If the value is an array, valid keys are filter
1179
   *                          which specifies the filter type,
1180
   *                          flags which specifies any flags that apply to the
1181
   *                          filter, and options which specifies any options that
1182
   *                          apply to the filter. See the example below for a better understanding.
1183
   *                          </p>
1184
   *                          <p>
1185
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1186
   *                          input array are filtered by this filter.
1187
   *                          </p>
1188
   * @param bool  $add_empty  [optional] <p>
1189
   *                          Add missing keys as <b>NULL</b> to the return value.
1190
   *                          </p>
1191
   *
1192
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1193
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1194
   * the variable is not set.
1195
   * @since 5.2.0
1196
   */
1197 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1198
  {
1199 1
    if (2 > func_num_args()) {
1200 1
      $a = filter_var_array($data);
1201 1
    } else {
1202 1
      $a = filter_var_array($data, $definition, $add_empty);
1203
    }
1204
1205 1
    return self::filter($a);
1206
  }
1207
1208
  /**
1209
   * Check if the number of unicode characters are not more than the specified integer.
1210
   *
1211
   * @param string $str      The original string to be checked.
1212
   * @param int    $box_size The size in number of chars to be checked against string.
1213
   *
1214
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1215
   */
1216 1
  public static function fits_inside($str, $box_size)
1217
  {
1218 1
    return (self::strlen($str) <= $box_size);
1219
  }
1220
1221
  /**
1222
   * Try to fix simple broken UTF-8 strings.
1223
   *
1224
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1225
   *
1226
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1227
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1228
   * See: http://en.wikipedia.org/wiki/Windows-1252
1229
   *
1230
   * @param string $str <p>The input string</p>
1231
   *
1232
   * @return string
1233
   */
1234 26 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1235
  {
1236
    // init
1237 26
    $str = (string)$str;
1238
1239 26
    if (!isset($str[0])) {
1240 2
      return '';
1241
    }
1242
1243 26
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1244 26
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1245
1246 26
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1247
1248 1
      if (self::$BROKEN_UTF8_FIX === null) {
1249 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1250 1
      }
1251
1252 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1253 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1254 1
    }
1255
1256 26
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1257
  }
1258
1259
  /**
1260
   * Fix a double (or multiple) encoded UTF8 string.
1261
   *
1262
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1263
   *
1264
   * @return string|string[] <p>Will return the fixed input-"array" or
1265
   *                         the fixed input-"string".</p>
1266
   */
1267 1
  public static function fix_utf8($str)
1268
  {
1269 1
    if (is_array($str) === true) {
1270
1271
      /** @noinspection ForeachSourceInspection */
1272 1
      foreach ($str as $k => $v) {
1273
        /** @noinspection AlterInForeachInspection */
1274
        /** @noinspection OffsetOperationsInspection */
1275 1
        $str[$k] = self::fix_utf8($v);
1276 1
      }
1277
1278 1
      return $str;
1279
    }
1280
1281 1
    $last = '';
1282 1
    while ($last !== $str) {
1283 1
      $last = $str;
1284 1
      $str = self::to_utf8(
1285 1
          self::utf8_decode($str, true)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str, true)) on line 1284 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1286 1
      );
1287 1
    }
1288
1289 1
    return $str;
1290
  }
1291
1292
  /**
1293
   * Get character of a specific character.
1294
   *
1295
   * @param string $char
1296
   *
1297
   * @return string <p>'RTL' or 'LTR'</p>
1298
   */
1299 1
  public static function getCharDirection($char)
1300
  {
1301 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1302
      self::checkForSupport();
1303
    }
1304
1305 1
    if (self::$SUPPORT['intlChar'] === true) {
1306
      $tmpReturn = \IntlChar::charDirection($char);
1307
1308
      // from "IntlChar"-Class
1309
      $charDirection = array(
1310
          'RTL' => array(1, 13, 14, 15, 21),
1311
          'LTR' => array(0, 11, 12, 20),
1312
      );
1313
1314
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1315
        return 'LTR';
1316
      }
1317
1318
      if (in_array($tmpReturn, $charDirection['RTL'], true)) {
1319
        return 'RTL';
1320
      }
1321
    }
1322
1323 1
    $c = static::chr_to_decimal($char);
1324
1325 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1326 1
      return 'LTR';
1327
    }
1328
1329 1
    if (0x85e >= $c) {
1330
1331 1
      if (0x5be === $c ||
1332 1
          0x5c0 === $c ||
1333 1
          0x5c3 === $c ||
1334 1
          0x5c6 === $c ||
1335 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1336 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1337 1
          0x608 === $c ||
1338 1
          0x60b === $c ||
1339 1
          0x60d === $c ||
1340 1
          0x61b === $c ||
1341 1
          (0x61e <= $c && 0x64a >= $c) ||
1342 1
          (0x66d <= $c && 0x66f >= $c) ||
1343 1
          (0x671 <= $c && 0x6d5 >= $c) ||
1344 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1345 1
          (0x6ee <= $c && 0x6ef >= $c) ||
1346 1
          (0x6fa <= $c && 0x70d >= $c) ||
1347 1
          0x710 === $c ||
1348 1
          (0x712 <= $c && 0x72f >= $c) ||
1349 1
          (0x74d <= $c && 0x7a5 >= $c) ||
1350 1
          0x7b1 === $c ||
1351 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
1352 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1353 1
          0x7fa === $c ||
1354 1
          (0x800 <= $c && 0x815 >= $c) ||
1355 1
          0x81a === $c ||
1356 1
          0x824 === $c ||
1357 1
          0x828 === $c ||
1358 1
          (0x830 <= $c && 0x83e >= $c) ||
1359 1
          (0x840 <= $c && 0x858 >= $c) ||
1360
          0x85e === $c
1361 1
      ) {
1362 1
        return 'RTL';
1363
      }
1364
1365 1
    } elseif (0x200f === $c) {
1366
1367
      return 'RTL';
1368
1369 1
    } elseif (0xfb1d <= $c) {
1370
1371 1
      if (0xfb1d === $c ||
1372 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1373 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1374 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1375 1
          0xfb3e === $c ||
1376 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1377 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1378 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1379 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1380 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1381 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1382 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1383 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1384 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1385 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1386 1
          0x10808 === $c ||
1387 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1388 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1389 1
          0x1083c === $c ||
1390 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1391 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1392 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1393 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1394 1
          0x1093f === $c ||
1395 1
          0x10a00 === $c ||
1396 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1397 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1398 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1399 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1400 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1401 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1402 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1403 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1404 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1405
          (0x10b78 <= $c && 0x10b7f >= $c)
1406 1
      ) {
1407 1
        return 'RTL';
1408
      }
1409
    }
1410
1411 1
    return 'LTR';
1412
  }
1413
1414
  /**
1415
   * get data from "/data/*.ser"
1416
   *
1417
   * @param string $file
1418
   *
1419
   * @return bool|string|array|int <p>Will return false on error.</p>
1420
   */
1421 6
  private static function getData($file)
1422
  {
1423 6
    $file = __DIR__ . '/data/' . $file . '.php';
1424 6
    if (file_exists($file)) {
1425
      /** @noinspection PhpIncludeInspection */
1426 6
      return require $file;
1427
    }
1428
1429 1
    return false;
1430
  }
1431
1432
  /**
1433
   * Check for php-support.
1434
   *
1435
   * @param string|null $key
1436
   *
1437
   * @return mixed <p>Return the full support-"array", if $key === null<br>
1438
   *               return bool-value, if $key is used and available<br>
1439
   *               otherwise return null</p>
1440
   */
1441 19
  public static function getSupportInfo($key = null)
1442
  {
1443 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1444
      self::checkForSupport();
1445
    }
1446
1447 19
    if ($key === null) {
1448 2
      return self::$SUPPORT;
1449
    }
1450
1451 18
    if (!isset(self::$SUPPORT[$key])) {
1452 1
      return null;
1453
    }
1454
1455 17
    return self::$SUPPORT[$key];
1456
  }
1457
1458
  /**
1459
   * alias for "UTF8::string_has_bom()"
1460
   *
1461
   * @see        UTF8::string_has_bom()
1462
   *
1463
   * @param string $str
1464
   *
1465
   * @return bool
1466
   *
1467
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
1468
   */
1469
  public static function hasBom($str)
1470
  {
1471
    return self::string_has_bom($str);
1472
  }
1473
1474
  /**
1475
   * Converts a hexadecimal-value into an UTF-8 character.
1476
   *
1477
   * @param string $hexdec <p>The hexadecimal value.</p>
1478
   *
1479
   * @return string|false <p>One single UTF-8 character.</p>
1480
   */
1481 2
  public static function hex_to_chr($hexdec)
1482
  {
1483 2
    return self::decimal_to_chr(hexdec($hexdec));
1484
  }
1485
1486
  /**
1487
   * Converts hexadecimal U+xxxx code point representation to integer.
1488
   *
1489
   * INFO: opposite to UTF8::int_to_hex()
1490
   *
1491
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
1492
   *
1493
   * @return int|false <p>The code point, or false on failure.</p>
1494
   */
1495 1
  public static function hex_to_int($hexDec)
1496
  {
1497 1
    $hexDec = (string)$hexDec;
1498
1499 1
    if (!isset($hexDec[0])) {
1500 1
      return false;
1501
    }
1502
1503 1
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
1504 1
      return intval($match[1], 16);
1505
    }
1506
1507 1
    return false;
1508
  }
1509
1510
  /**
1511
   * alias for "UTF8::html_entity_decode()"
1512
   *
1513
   * @see UTF8::html_entity_decode()
1514
   *
1515
   * @param string $str
1516
   * @param int    $flags
1517
   * @param string $encoding
1518
   *
1519
   * @return string
1520
   */
1521 1
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
1522
  {
1523 1
    return self::html_entity_decode($str, $flags, $encoding);
1524
  }
1525
1526
  /**
1527
   * Converts a UTF-8 string to a series of HTML numbered entities.
1528
   *
1529
   * INFO: opposite to UTF8::html_decode()
1530
   *
1531
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1532
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1533
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
1534
   *
1535
   * @return string <p>HTML numbered entities.</p>
1536
   */
1537 2
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
1538
  {
1539
    // init
1540 2
    $str = (string)$str;
1541
1542 2
    if (!isset($str[0])) {
1543 1
      return '';
1544
    }
1545
1546 2
    if ($encoding !== 'UTF-8') {
1547 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1548 1
    }
1549
1550
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1551 2
    if (function_exists('mb_encode_numericentity')) {
1552
1553 2
      $startCode = 0x00;
1554 2
      if ($keepAsciiChars === true) {
1555 1
        $startCode = 0x80;
1556 1
      }
1557
1558 2
      return mb_encode_numericentity(
1559 2
          $str,
1560 2
          array($startCode, 0xfffff, 0, 0xfffff, 0),
1561
          $encoding
1562 2
      );
1563
    }
1564
1565
    return implode(
1566
        '',
1567
        array_map(
1568
            function ($data) use ($keepAsciiChars, $encoding) {
1569
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1570
            },
1571
            self::split($str)
1572
        )
1573
    );
1574
  }
1575
1576
  /**
1577
   * UTF-8 version of html_entity_decode()
1578
   *
1579
   * The reason we are not using html_entity_decode() by itself is because
1580
   * while it is not technically correct to leave out the semicolon
1581
   * at the end of an entity most browsers will still interpret the entity
1582
   * correctly. html_entity_decode() does not convert entities without
1583
   * semicolons, so we are left with our own little solution here. Bummer.
1584
   *
1585
   * Convert all HTML entities to their applicable characters
1586
   *
1587
   * INFO: opposite to UTF8::html_encode()
1588
   *
1589
   * @link http://php.net/manual/en/function.html-entity-decode.php
1590
   *
1591
   * @param string $str      <p>
1592
   *                         The input string.
1593
   *                         </p>
1594
   * @param int    $flags    [optional] <p>
1595
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1596
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1597
   *                         <table>
1598
   *                         Available <i>flags</i> constants
1599
   *                         <tr valign="top">
1600
   *                         <td>Constant Name</td>
1601
   *                         <td>Description</td>
1602
   *                         </tr>
1603
   *                         <tr valign="top">
1604
   *                         <td><b>ENT_COMPAT</b></td>
1605
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1606
   *                         </tr>
1607
   *                         <tr valign="top">
1608
   *                         <td><b>ENT_QUOTES</b></td>
1609
   *                         <td>Will convert both double and single quotes.</td>
1610
   *                         </tr>
1611
   *                         <tr valign="top">
1612
   *                         <td><b>ENT_NOQUOTES</b></td>
1613
   *                         <td>Will leave both double and single quotes unconverted.</td>
1614
   *                         </tr>
1615
   *                         <tr valign="top">
1616
   *                         <td><b>ENT_HTML401</b></td>
1617
   *                         <td>
1618
   *                         Handle code as HTML 4.01.
1619
   *                         </td>
1620
   *                         </tr>
1621
   *                         <tr valign="top">
1622
   *                         <td><b>ENT_XML1</b></td>
1623
   *                         <td>
1624
   *                         Handle code as XML 1.
1625
   *                         </td>
1626
   *                         </tr>
1627
   *                         <tr valign="top">
1628
   *                         <td><b>ENT_XHTML</b></td>
1629
   *                         <td>
1630
   *                         Handle code as XHTML.
1631
   *                         </td>
1632
   *                         </tr>
1633
   *                         <tr valign="top">
1634
   *                         <td><b>ENT_HTML5</b></td>
1635
   *                         <td>
1636
   *                         Handle code as HTML 5.
1637
   *                         </td>
1638
   *                         </tr>
1639
   *                         </table>
1640
   *                         </p>
1641
   * @param string $encoding [optional] <p>Encoding to use.</p>
1642
   *
1643
   * @return string <p>The decoded string.</p>
1644
   */
1645 16
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
1646
  {
1647
    // init
1648 16
    $str = (string)$str;
1649
1650 16
    if (!isset($str[0])) {
1651 5
      return '';
1652
    }
1653
1654 16
    if (!isset($str[3])) { // examples: &; || &x;
1655 9
      return $str;
1656
    }
1657
1658
    if (
1659 15
        strpos($str, '&') === false
1660 15
        ||
1661
        (
1662 15
            strpos($str, '&#') === false
1663 15
            &&
1664 9
            strpos($str, ';') === false
1665 9
        )
1666 15
    ) {
1667 8
      return $str;
1668
    }
1669
1670 15
    if ($encoding !== 'UTF-8') {
1671 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1672 2
    }
1673
1674 15
    if ($flags === null) {
1675 5
      if (Bootup::is_php('5.4') === true) {
1676
        $flags = ENT_QUOTES | ENT_HTML5;
1677
      } else {
1678 5
        $flags = ENT_QUOTES;
1679
      }
1680 5
    }
1681
1682 View Code Duplication
    if (
1683
        $encoding !== 'UTF-8'
1684 15
        &&
1685
        $encoding !== 'WINDOWS-1252'
1686 15
        &&
1687 2
        self::$SUPPORT['mbstring'] === false
1688 15
    ) {
1689
      trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1690
    }
1691
1692
    do {
1693 15
      $str_compare = $str;
1694
1695 15
      $str = preg_replace_callback(
1696 15
          "/&#\d{2,6};/",
1697
          function ($matches) use ($encoding) {
1698 13
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
1699
1700 13
            if ($returnTmp !== '"' && $returnTmp !== "'") {
1701 13
              return $returnTmp;
1702
            }
1703
1704 6
            return $matches[0];
1705 15
          },
1706
          $str
1707 15
      );
1708
1709
      // decode numeric & UTF16 two byte entities
1710 15
      $str = html_entity_decode(
1711 15
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
1712 15
          $flags,
1713
          $encoding
1714 15
      );
1715
1716 15
    } while ($str_compare !== $str);
1717
1718 15
    return $str;
1719
  }
1720
1721
  /**
1722
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
1723
   *
1724
   * @link http://php.net/manual/en/function.htmlentities.php
1725
   *
1726
   * @param string $str           <p>
1727
   *                              The input string.
1728
   *                              </p>
1729
   * @param int    $flags         [optional] <p>
1730
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1731
   *                              invalid code unit sequences and the used document type. The default is
1732
   *                              ENT_COMPAT | ENT_HTML401.
1733
   *                              <table>
1734
   *                              Available <i>flags</i> constants
1735
   *                              <tr valign="top">
1736
   *                              <td>Constant Name</td>
1737
   *                              <td>Description</td>
1738
   *                              </tr>
1739
   *                              <tr valign="top">
1740
   *                              <td><b>ENT_COMPAT</b></td>
1741
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1742
   *                              </tr>
1743
   *                              <tr valign="top">
1744
   *                              <td><b>ENT_QUOTES</b></td>
1745
   *                              <td>Will convert both double and single quotes.</td>
1746
   *                              </tr>
1747
   *                              <tr valign="top">
1748
   *                              <td><b>ENT_NOQUOTES</b></td>
1749
   *                              <td>Will leave both double and single quotes unconverted.</td>
1750
   *                              </tr>
1751
   *                              <tr valign="top">
1752
   *                              <td><b>ENT_IGNORE</b></td>
1753
   *                              <td>
1754
   *                              Silently discard invalid code unit sequences instead of returning
1755
   *                              an empty string. Using this flag is discouraged as it
1756
   *                              may have security implications.
1757
   *                              </td>
1758
   *                              </tr>
1759
   *                              <tr valign="top">
1760
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1761
   *                              <td>
1762
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1763
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1764
   *                              </td>
1765
   *                              </tr>
1766
   *                              <tr valign="top">
1767
   *                              <td><b>ENT_DISALLOWED</b></td>
1768
   *                              <td>
1769
   *                              Replace invalid code points for the given document type with a
1770
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1771
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1772
   *                              instance, to ensure the well-formedness of XML documents with
1773
   *                              embedded external content.
1774
   *                              </td>
1775
   *                              </tr>
1776
   *                              <tr valign="top">
1777
   *                              <td><b>ENT_HTML401</b></td>
1778
   *                              <td>
1779
   *                              Handle code as HTML 4.01.
1780
   *                              </td>
1781
   *                              </tr>
1782
   *                              <tr valign="top">
1783
   *                              <td><b>ENT_XML1</b></td>
1784
   *                              <td>
1785
   *                              Handle code as XML 1.
1786
   *                              </td>
1787
   *                              </tr>
1788
   *                              <tr valign="top">
1789
   *                              <td><b>ENT_XHTML</b></td>
1790
   *                              <td>
1791
   *                              Handle code as XHTML.
1792
   *                              </td>
1793
   *                              </tr>
1794
   *                              <tr valign="top">
1795
   *                              <td><b>ENT_HTML5</b></td>
1796
   *                              <td>
1797
   *                              Handle code as HTML 5.
1798
   *                              </td>
1799
   *                              </tr>
1800
   *                              </table>
1801
   *                              </p>
1802
   * @param string $encoding      [optional] <p>
1803
   *                              Like <b>htmlspecialchars</b>,
1804
   *                              <b>htmlentities</b> takes an optional third argument
1805
   *                              <i>encoding</i> which defines encoding used in
1806
   *                              conversion.
1807
   *                              Although this argument is technically optional, you are highly
1808
   *                              encouraged to specify the correct value for your code.
1809
   *                              </p>
1810
   * @param bool   $double_encode [optional] <p>
1811
   *                              When <i>double_encode</i> is turned off PHP will not
1812
   *                              encode existing html entities. The default is to convert everything.
1813
   *                              </p>
1814
   *
1815
   *
1816
   * @return string the encoded string.
1817
   * </p>
1818
   * <p>
1819
   * If the input <i>string</i> contains an invalid code unit
1820
   * sequence within the given <i>encoding</i> an empty string
1821
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1822
   * <b>ENT_SUBSTITUTE</b> flags are set.
1823
   */
1824 2
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
1825
  {
1826 2
    if ($encoding !== 'UTF-8') {
1827 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1828 1
    }
1829
1830 2
    $str = htmlentities($str, $flags, $encoding, $double_encode);
1831
1832
    /**
1833
     * PHP doesn't replace a backslash to its html entity since this is something
1834
     * that's mostly used to escape characters when inserting in a database. Since
1835
     * we're using a decent database layer, we don't need this shit and we're replacing
1836
     * the double backslashes by its' html entity equivalent.
1837
     *
1838
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
1839
     */
1840 2
    $str = str_replace('\\', '&#92;', $str);
1841
1842 2
    if ($encoding !== 'UTF-8') {
1843 1
      return $str;
1844
    }
1845
1846 2
    $byteLengths = self::chr_size_list($str);
1847 2
    $search = array();
1848 2
    $replacements = array();
1849 2
    foreach ($byteLengths as $counter => $byteLength) {
1850 2
      if ($byteLength >= 3) {
1851 1
        $char = self::access($str, $counter);
1852
1853 1
        if (!isset($replacements[$char])) {
1854 1
          $search[$char] = $char;
1855 1
          $replacements[$char] = self::html_encode($char);
1856 1
        }
1857 1
      }
1858 2
    }
1859
1860 2
    return str_replace($search, $replacements, $str);
1861
  }
1862
1863
  /**
1864
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
1865
   *
1866
   * INFO: Take a look at "UTF8::htmlentities()"
1867
   *
1868
   * @link http://php.net/manual/en/function.htmlspecialchars.php
1869
   *
1870
   * @param string $str           <p>
1871
   *                              The string being converted.
1872
   *                              </p>
1873
   * @param int    $flags         [optional] <p>
1874
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1875
   *                              invalid code unit sequences and the used document type. The default is
1876
   *                              ENT_COMPAT | ENT_HTML401.
1877
   *                              <table>
1878
   *                              Available <i>flags</i> constants
1879
   *                              <tr valign="top">
1880
   *                              <td>Constant Name</td>
1881
   *                              <td>Description</td>
1882
   *                              </tr>
1883
   *                              <tr valign="top">
1884
   *                              <td><b>ENT_COMPAT</b></td>
1885
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1886
   *                              </tr>
1887
   *                              <tr valign="top">
1888
   *                              <td><b>ENT_QUOTES</b></td>
1889
   *                              <td>Will convert both double and single quotes.</td>
1890
   *                              </tr>
1891
   *                              <tr valign="top">
1892
   *                              <td><b>ENT_NOQUOTES</b></td>
1893
   *                              <td>Will leave both double and single quotes unconverted.</td>
1894
   *                              </tr>
1895
   *                              <tr valign="top">
1896
   *                              <td><b>ENT_IGNORE</b></td>
1897
   *                              <td>
1898
   *                              Silently discard invalid code unit sequences instead of returning
1899
   *                              an empty string. Using this flag is discouraged as it
1900
   *                              may have security implications.
1901
   *                              </td>
1902
   *                              </tr>
1903
   *                              <tr valign="top">
1904
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1905
   *                              <td>
1906
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1907
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1908
   *                              </td>
1909
   *                              </tr>
1910
   *                              <tr valign="top">
1911
   *                              <td><b>ENT_DISALLOWED</b></td>
1912
   *                              <td>
1913
   *                              Replace invalid code points for the given document type with a
1914
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1915
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1916
   *                              instance, to ensure the well-formedness of XML documents with
1917
   *                              embedded external content.
1918
   *                              </td>
1919
   *                              </tr>
1920
   *                              <tr valign="top">
1921
   *                              <td><b>ENT_HTML401</b></td>
1922
   *                              <td>
1923
   *                              Handle code as HTML 4.01.
1924
   *                              </td>
1925
   *                              </tr>
1926
   *                              <tr valign="top">
1927
   *                              <td><b>ENT_XML1</b></td>
1928
   *                              <td>
1929
   *                              Handle code as XML 1.
1930
   *                              </td>
1931
   *                              </tr>
1932
   *                              <tr valign="top">
1933
   *                              <td><b>ENT_XHTML</b></td>
1934
   *                              <td>
1935
   *                              Handle code as XHTML.
1936
   *                              </td>
1937
   *                              </tr>
1938
   *                              <tr valign="top">
1939
   *                              <td><b>ENT_HTML5</b></td>
1940
   *                              <td>
1941
   *                              Handle code as HTML 5.
1942
   *                              </td>
1943
   *                              </tr>
1944
   *                              </table>
1945
   *                              </p>
1946
   * @param string $encoding      [optional] <p>
1947
   *                              Defines encoding used in conversion.
1948
   *                              </p>
1949
   *                              <p>
1950
   *                              For the purposes of this function, the encodings
1951
   *                              ISO-8859-1, ISO-8859-15,
1952
   *                              UTF-8, cp866,
1953
   *                              cp1251, cp1252, and
1954
   *                              KOI8-R are effectively equivalent, provided the
1955
   *                              <i>string</i> itself is valid for the encoding, as
1956
   *                              the characters affected by <b>htmlspecialchars</b> occupy
1957
   *                              the same positions in all of these encodings.
1958
   *                              </p>
1959
   * @param bool   $double_encode [optional] <p>
1960
   *                              When <i>double_encode</i> is turned off PHP will not
1961
   *                              encode existing html entities, the default is to convert everything.
1962
   *                              </p>
1963
   *
1964
   * @return string The converted string.
1965
   * </p>
1966
   * <p>
1967
   * If the input <i>string</i> contains an invalid code unit
1968
   * sequence within the given <i>encoding</i> an empty string
1969
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1970
   * <b>ENT_SUBSTITUTE</b> flags are set.
1971
   */
1972 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
1973
  {
1974 1
    if ($encoding !== 'UTF-8') {
1975 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1976 1
    }
1977
1978 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
1979
  }
1980
1981
  /**
1982
   * Checks whether iconv is available on the server.
1983
   *
1984
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
1985
   */
1986 1
  public static function iconv_loaded()
1987
  {
1988 1
    $return = extension_loaded('iconv') ? true : false;
1989
1990
    // INFO: "default_charset" is already set by the "Bootup"-class
1991
1992 1
    if (Bootup::is_php('5.6') === false) {
1993
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
1994 1
      iconv_set_encoding('input_encoding', 'UTF-8');
1995 1
      iconv_set_encoding('output_encoding', 'UTF-8');
1996 1
      iconv_set_encoding('internal_encoding', 'UTF-8');
1997 1
    }
1998
1999 1
    return $return;
2000
  }
2001
2002
  /**
2003
   * alias for "UTF8::decimal_to_chr()"
2004
   *
2005
   * @see UTF8::decimal_to_chr()
2006
   *
2007
   * @param mixed $int
2008
   *
2009
   * @return string
2010
   */
2011 2
  public static function int_to_chr($int)
2012
  {
2013 2
    return self::decimal_to_chr($int);
2014
  }
2015
2016
  /**
2017
   * Converts Integer to hexadecimal U+xxxx code point representation.
2018
   *
2019
   * INFO: opposite to UTF8::hex_to_int()
2020
   *
2021
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2022
   * @param string $pfix [optional]
2023
   *
2024
   * @return string <p>The code point, or empty string on failure.</p>
2025
   */
2026 3
  public static function int_to_hex($int, $pfix = 'U+')
2027
  {
2028 3
    if ((int)$int === $int) {
2029 3
      $hex = dechex($int);
2030
2031 3
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2032
2033 3
      return $pfix . $hex;
2034
    }
2035
2036 1
    return '';
2037
  }
2038
2039
  /**
2040
   * Checks whether intl-char is available on the server.
2041
   *
2042
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2043
   */
2044 1
  public static function intlChar_loaded()
2045
  {
2046
    return (
2047 1
        Bootup::is_php('7.0') === true
2048 1
        &&
2049
        class_exists('IntlChar') === true
2050 1
    );
2051
  }
2052
2053
  /**
2054
   * Checks whether intl is available on the server.
2055
   *
2056
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2057
   */
2058 4
  public static function intl_loaded()
2059
  {
2060 4
    return extension_loaded('intl') ? true : false;
2061
  }
2062
2063
  /**
2064
   * alias for "UTF8::is_ascii()"
2065
   *
2066
   * @see        UTF8::is_ascii()
2067
   *
2068
   * @param string $str
2069
   *
2070
   * @return boolean
2071
   *
2072
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2073
   */
2074
  public static function isAscii($str)
2075
  {
2076
    return self::is_ascii($str);
2077
  }
2078
2079
  /**
2080
   * alias for "UTF8::is_base64()"
2081
   *
2082
   * @see        UTF8::is_base64()
2083
   *
2084
   * @param string $str
2085
   *
2086
   * @return bool
2087
   *
2088
   * @deprecated <p>use "UTF8::is_base64()"</p>
2089
   */
2090
  public static function isBase64($str)
2091
  {
2092
    return self::is_base64($str);
2093
  }
2094
2095
  /**
2096
   * alias for "UTF8::is_binary()"
2097
   *
2098
   * @see        UTF8::is_binary()
2099
   *
2100
   * @param string $str
2101
   *
2102
   * @return bool
2103
   *
2104
   * @deprecated <p>use "UTF8::is_binary()"</p>
2105
   */
2106
  public static function isBinary($str)
2107
  {
2108
    return self::is_binary($str);
2109
  }
2110
2111
  /**
2112
   * alias for "UTF8::is_bom()"
2113
   *
2114
   * @see        UTF8::is_bom()
2115
   *
2116
   * @param string $utf8_chr
2117
   *
2118
   * @return boolean
2119
   *
2120
   * @deprecated <p>use "UTF8::is_bom()"</p>
2121
   */
2122
  public static function isBom($utf8_chr)
2123
  {
2124
    return self::is_bom($utf8_chr);
2125
  }
2126
2127
  /**
2128
   * alias for "UTF8::is_html()"
2129
   *
2130
   * @see        UTF8::is_html()
2131
   *
2132
   * @param string $str
2133
   *
2134
   * @return boolean
2135
   *
2136
   * @deprecated <p>use "UTF8::is_html()"</p>
2137
   */
2138
  public static function isHtml($str)
2139
  {
2140
    return self::is_html($str);
2141
  }
2142
2143
  /**
2144
   * alias for "UTF8::is_json()"
2145
   *
2146
   * @see        UTF8::is_json()
2147
   *
2148
   * @param string $str
2149
   *
2150
   * @return bool
2151
   *
2152
   * @deprecated <p>use "UTF8::is_json()"</p>
2153
   */
2154
  public static function isJson($str)
2155
  {
2156
    return self::is_json($str);
2157
  }
2158
2159
  /**
2160
   * alias for "UTF8::is_utf16()"
2161
   *
2162
   * @see        UTF8::is_utf16()
2163
   *
2164
   * @param string $str
2165
   *
2166
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2167
   *
2168
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2169
   */
2170
  public static function isUtf16($str)
2171
  {
2172
    return self::is_utf16($str);
2173
  }
2174
2175
  /**
2176
   * alias for "UTF8::is_utf32()"
2177
   *
2178
   * @see        UTF8::is_utf32()
2179
   *
2180
   * @param string $str
2181
   *
2182
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2183
   *
2184
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2185
   */
2186
  public static function isUtf32($str)
2187
  {
2188
    return self::is_utf32($str);
2189
  }
2190
2191
  /**
2192
   * alias for "UTF8::is_utf8()"
2193
   *
2194
   * @see        UTF8::is_utf8()
2195
   *
2196
   * @param string $str
2197
   * @param bool   $strict
2198
   *
2199
   * @return bool
2200
   *
2201
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2202
   */
2203
  public static function isUtf8($str, $strict = false)
2204
  {
2205
    return self::is_utf8($str, $strict);
2206
  }
2207
2208
  /**
2209
   * Checks if a string is 7 bit ASCII.
2210
   *
2211
   * @param string $str <p>The string to check.</p>
2212
   *
2213
   * @return bool <p>
2214
   *              <strong>true</strong> if it is ASCII<br>
2215
   *              <strong>false</strong> otherwise
2216
   *              </p>
2217
   */
2218 55
  public static function is_ascii($str)
2219
  {
2220 55
    $str = (string)$str;
2221
2222 55
    if (!isset($str[0])) {
2223 6
      return true;
2224
    }
2225
2226 54
    return (bool)!preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2227
  }
2228
2229
  /**
2230
   * Returns true if the string is base64 encoded, false otherwise.
2231
   *
2232
   * @param string $str <p>The input string.</p>
2233
   *
2234
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2235
   */
2236 1
  public static function is_base64($str)
2237
  {
2238 1
    $str = (string)$str;
2239
2240 1
    if (!isset($str[0])) {
2241 1
      return false;
2242
    }
2243
2244 1
    $base64String = (string)base64_decode($str, true);
2245 1
    if ($base64String && base64_encode($base64String) === $str) {
2246 1
      return true;
2247
    }
2248
2249 1
    return false;
2250
  }
2251
2252
  /**
2253
   * Check if the input is binary... (is look like a hack).
2254
   *
2255
   * @param mixed $input
2256
   *
2257
   * @return bool
2258
   */
2259 16
  public static function is_binary($input)
2260
  {
2261 16
    $input = (string)$input;
2262
2263 16
    if (!isset($input[0])) {
2264 4
      return false;
2265
    }
2266
2267 16
    if (preg_match('~^[01]+$~', $input)) {
2268 4
      return true;
2269
    }
2270
2271 16
    $testLength = strlen($input);
2272 16
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2273 5
      return true;
2274
    }
2275
2276 15
    if (substr_count($input, "\x00") > 0) {
2277 1
      return true;
2278
    }
2279
2280 15
    return false;
2281
  }
2282
2283
  /**
2284
   * Check if the file is binary.
2285
   *
2286
   * @param string $file
2287
   *
2288
   * @return boolean
2289
   */
2290 1
  public static function is_binary_file($file)
2291
  {
2292
    try {
2293 1
      $fp = fopen($file, 'rb');
2294 1
      $block = fread($fp, 512);
2295 1
      fclose($fp);
2296 1
    } catch (\Exception $e) {
2297
      $block = '';
2298
    }
2299
2300 1
    return self::is_binary($block);
2301
  }
2302
2303
  /**
2304
   * Checks if the given string is equal to any "Byte Order Mark".
2305
   *
2306
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2307
   *
2308
   * @param string $str <p>The input string.</p>
2309
   *
2310
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2311
   */
2312 1
  public static function is_bom($str)
2313
  {
2314 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2315 1
      if ($str === $bomString) {
2316 1
        return true;
2317
      }
2318 1
    }
2319
2320 1
    return false;
2321
  }
2322
2323
  /**
2324
   * Check if the string contains any html-tags <lall>.
2325
   *
2326
   * @param string $str <p>The input string.</p>
2327
   *
2328
   * @return boolean
2329
   */
2330 1
  public static function is_html($str)
2331
  {
2332 1
    $str = (string)$str;
2333
2334 1
    if (!isset($str[0])) {
2335 1
      return false;
2336
    }
2337
2338
    // init
2339 1
    $matches = array();
2340
2341 1
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2342
2343 1
    if (count($matches) === 0) {
2344 1
      return false;
2345
    }
2346
2347 1
    return true;
2348
  }
2349
2350
  /**
2351
   * Try to check if "$str" is an json-string.
2352
   *
2353
   * @param string $str <p>The input string.</p>
2354
   *
2355
   * @return bool
2356
   */
2357 1
  public static function is_json($str)
2358
  {
2359 1
    $str = (string)$str;
2360
2361 1
    if (!isset($str[0])) {
2362 1
      return false;
2363
    }
2364
2365 1
    $json = self::json_decode($str);
2366
2367
    if (
2368
        (
2369 1
            is_object($json) === true
2370 1
            ||
2371 1
            is_array($json) === true
2372 1
        )
2373 1
        &&
2374 1
        json_last_error() === JSON_ERROR_NONE
2375 1
    ) {
2376 1
      return true;
2377
    }
2378
2379 1
    return false;
2380
  }
2381
2382
  /**
2383
   * Check if the string is UTF-16.
2384
   *
2385
   * @param string $str <p>The input string.</p>
2386
   *
2387
   * @return int|false <p>
2388
   *                   <strong>false</strong> if is't not UTF-16,<br>
2389
   *                   <strong>1</strong> for UTF-16LE,<br>
2390
   *                   <strong>2</strong> for UTF-16BE.
2391
   *                   </p>
2392
   */
2393 5 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2394
  {
2395 5
    $str = self::remove_bom($str);
2396
2397 5
    if (self::is_binary($str) === true) {
2398
2399 5
      $maybeUTF16LE = 0;
2400 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2401 5
      if ($test) {
2402 5
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2403 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2404 5
        if ($test3 === $test) {
2405 5
          $strChars = self::count_chars($str, true);
2406 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2407 4
            if (in_array($test3char, $strChars, true) === true) {
2408 2
              $maybeUTF16LE++;
2409 2
            }
2410 5
          }
2411 5
        }
2412 5
      }
2413
2414 5
      $maybeUTF16BE = 0;
2415 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2416 5
      if ($test) {
2417 5
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2418 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2419 5
        if ($test3 === $test) {
2420 5
          $strChars = self::count_chars($str, true);
2421 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2422 4
            if (in_array($test3char, $strChars, true) === true) {
2423 3
              $maybeUTF16BE++;
2424 3
            }
2425 5
          }
2426 5
        }
2427 5
      }
2428
2429 5
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2430 3
        if ($maybeUTF16LE > $maybeUTF16BE) {
2431 2
          return 1;
2432
        }
2433
2434 3
        return 2;
2435
      }
2436
2437 3
    }
2438
2439 3
    return false;
2440
  }
2441
2442
  /**
2443
   * Check if the string is UTF-32.
2444
   *
2445
   * @param string $str
2446
   *
2447
   * @return int|false <p>
2448
   *                   <strong>false</strong> if is't not UTF-32,<br>
2449
   *                   <strong>1</strong> for UTF-32LE,<br>
2450
   *                   <strong>2</strong> for UTF-32BE.
2451
   *                   </p>
2452
   */
2453 3 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2454
  {
2455 3
    $str = self::remove_bom($str);
2456
2457 3
    if (self::is_binary($str) === true) {
2458
2459 3
      $maybeUTF32LE = 0;
2460 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2461 3
      if ($test) {
2462 2
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2463 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2464 2
        if ($test3 === $test) {
2465 2
          $strChars = self::count_chars($str, true);
2466 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2467 2
            if (in_array($test3char, $strChars, true) === true) {
2468 1
              $maybeUTF32LE++;
2469 1
            }
2470 2
          }
2471 2
        }
2472 2
      }
2473
2474 3
      $maybeUTF32BE = 0;
2475 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2476 3
      if ($test) {
2477 2
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2478 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2479 2
        if ($test3 === $test) {
2480 2
          $strChars = self::count_chars($str, true);
2481 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2482 2
            if (in_array($test3char, $strChars, true) === true) {
2483 1
              $maybeUTF32BE++;
2484 1
            }
2485 2
          }
2486 2
        }
2487 2
      }
2488
2489 3
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2490 1
        if ($maybeUTF32LE > $maybeUTF32BE) {
2491 1
          return 1;
2492
        }
2493
2494 1
        return 2;
2495
      }
2496
2497 3
    }
2498
2499 3
    return false;
2500
  }
2501
2502
  /**
2503
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2504
   *
2505
   * @see    http://hsivonen.iki.fi/php-utf8/
2506
   *
2507
   * @param string $str    <p>The string to be checked.</p>
2508
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2509
   *
2510
   * @return bool
2511
   */
2512 60
  public static function is_utf8($str, $strict = false)
2513
  {
2514 60
    $str = (string)$str;
2515
2516 60
    if (!isset($str[0])) {
2517 3
      return true;
2518
    }
2519
2520 58
    if ($strict === true) {
2521 1
      if (self::is_utf16($str) !== false) {
2522 1
        return false;
2523
      }
2524
2525
      if (self::is_utf32($str) !== false) {
2526
        return false;
2527
      }
2528
    }
2529
2530 58
    if (self::pcre_utf8_support() !== true) {
2531
2532
      // If even just the first character can be matched, when the /u
2533
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2534
      // invalid, nothing at all will match, even if the string contains
2535
      // some valid sequences
2536
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
2537
    }
2538
2539 58
    $mState = 0; // cached expected number of octets after the current octet
2540
    // until the beginning of the next UTF8 character sequence
2541 58
    $mUcs4 = 0; // cached Unicode character
2542 58
    $mBytes = 1; // cached expected number of octets in the current sequence
2543
2544 58
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2545
      self::checkForSupport();
2546
    }
2547
2548 58
    if (self::$ORD === null) {
2549 1
      self::$ORD = self::getData('ord');
2550 1
    }
2551
2552 58
    $len = self::strlen_in_byte($str);
2553
    /** @noinspection ForeachInvariantsInspection */
2554 58
    for ($i = 0; $i < $len; $i++) {
2555 58
      $in = self::$ORD[$str[$i]];
2556 58
      if ($mState === 0) {
2557
        // When mState is zero we expect either a US-ASCII character or a
2558
        // multi-octet sequence.
2559 58
        if (0 === (0x80 & $in)) {
2560
          // US-ASCII, pass straight through.
2561 52
          $mBytes = 1;
2562 58 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
2563
          // First octet of 2 octet sequence.
2564 48
          $mUcs4 = $in;
2565 48
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
2566 48
          $mState = 1;
2567 48
          $mBytes = 2;
2568 55
        } elseif (0xE0 === (0xF0 & $in)) {
2569
          // First octet of 3 octet sequence.
2570 28
          $mUcs4 = $in;
2571 28
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
2572 28
          $mState = 2;
2573 28
          $mBytes = 3;
2574 45 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
2575
          // First octet of 4 octet sequence.
2576 11
          $mUcs4 = $in;
2577 11
          $mUcs4 = ($mUcs4 & 0x07) << 18;
2578 11
          $mState = 3;
2579 11
          $mBytes = 4;
2580 22
        } elseif (0xF8 === (0xFC & $in)) {
2581
          /* First octet of 5 octet sequence.
2582
          *
2583
          * This is illegal because the encoded codepoint must be either
2584
          * (a) not the shortest form or
2585
          * (b) outside the Unicode range of 0-0x10FFFF.
2586
          * Rather than trying to resynchronize, we will carry on until the end
2587
          * of the sequence and let the later error handling code catch it.
2588
          */
2589 4
          $mUcs4 = $in;
2590 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
2591 4
          $mState = 4;
2592 4
          $mBytes = 5;
2593 12 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
2594
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
2595 4
          $mUcs4 = $in;
2596 4
          $mUcs4 = ($mUcs4 & 1) << 30;
2597 4
          $mState = 5;
2598 4
          $mBytes = 6;
2599 4
        } else {
2600
          /* Current octet is neither in the US-ASCII range nor a legal first
2601
           * octet of a multi-octet sequence.
2602
           */
2603 6
          return false;
2604
        }
2605 57
      } else {
2606
        // When mState is non-zero, we expect a continuation of the multi-octet
2607
        // sequence
2608 52
        if (0x80 === (0xC0 & $in)) {
2609
          // Legal continuation.
2610 48
          $shift = ($mState - 1) * 6;
2611 48
          $tmp = $in;
2612 48
          $tmp = ($tmp & 0x0000003F) << $shift;
2613 48
          $mUcs4 |= $tmp;
2614
          /**
2615
           * End of the multi-octet sequence. mUcs4 now contains the final
2616
           * Unicode code point to be output
2617
           */
2618 48
          if (0 === --$mState) {
2619
            /*
2620
            * Check for illegal sequences and code points.
2621
            */
2622
            // From Unicode 3.1, non-shortest form is illegal
2623
            if (
2624 48
                (2 === $mBytes && $mUcs4 < 0x0080) ||
2625 48
                (3 === $mBytes && $mUcs4 < 0x0800) ||
2626 48
                (4 === $mBytes && $mUcs4 < 0x10000) ||
2627 48
                (4 < $mBytes) ||
2628
                // From Unicode 3.2, surrogate characters are illegal.
2629 48
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
2630
                // Code points outside the Unicode range are illegal.
2631 48
                ($mUcs4 > 0x10FFFF)
2632 48
            ) {
2633 7
              return false;
2634
            }
2635
            // initialize UTF8 cache
2636 48
            $mState = 0;
2637 48
            $mUcs4 = 0;
2638 48
            $mBytes = 1;
2639 48
          }
2640 48
        } else {
2641
          /**
2642
           *((0xC0 & (*in) != 0x80) && (mState != 0))
2643
           * Incomplete multi-octet sequence.
2644
           */
2645 26
          return false;
2646
        }
2647
      }
2648 57
    }
2649
2650 27
    return true;
2651
  }
2652
2653
  /**
2654
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2655
   * Decodes a JSON string
2656
   *
2657
   * @link http://php.net/manual/en/function.json-decode.php
2658
   *
2659
   * @param string $json    <p>
2660
   *                        The <i>json</i> string being decoded.
2661
   *                        </p>
2662
   *                        <p>
2663
   *                        This function only works with UTF-8 encoded strings.
2664
   *                        </p>
2665
   *                        <p>PHP implements a superset of
2666
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2667
   *                        only supports these values when they are nested inside an array or an object.
2668
   *                        </p>
2669
   * @param bool   $assoc   [optional] <p>
2670
   *                        When <b>TRUE</b>, returned objects will be converted into
2671
   *                        associative arrays.
2672
   *                        </p>
2673
   * @param int    $depth   [optional] <p>
2674
   *                        User specified recursion depth.
2675
   *                        </p>
2676
   * @param int    $options [optional] <p>
2677
   *                        Bitmask of JSON decode options. Currently only
2678
   *                        <b>JSON_BIGINT_AS_STRING</b>
2679
   *                        is supported (default is to cast large integers as floats)
2680
   *                        </p>
2681
   *
2682
   * @return mixed the value encoded in <i>json</i> in appropriate
2683
   * PHP type. Values true, false and
2684
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
2685
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
2686
   * <i>json</i> cannot be decoded or if the encoded
2687
   * data is deeper than the recursion limit.
2688
   */
2689 2 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2690
  {
2691 2
    $json = (string)self::filter($json);
2692
2693 2
    if (Bootup::is_php('5.4') === true) {
2694
      $json = json_decode($json, $assoc, $depth, $options);
2695
    } else {
2696 2
      $json = json_decode($json, $assoc, $depth);
2697
    }
2698
2699 2
    return $json;
2700
  }
2701
2702
  /**
2703
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2704
   * Returns the JSON representation of a value.
2705
   *
2706
   * @link http://php.net/manual/en/function.json-encode.php
2707
   *
2708
   * @param mixed $value   <p>
2709
   *                       The <i>value</i> being encoded. Can be any type except
2710
   *                       a resource.
2711
   *                       </p>
2712
   *                       <p>
2713
   *                       All string data must be UTF-8 encoded.
2714
   *                       </p>
2715
   *                       <p>PHP implements a superset of
2716
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2717
   *                       only supports these values when they are nested inside an array or an object.
2718
   *                       </p>
2719
   * @param int   $options [optional] <p>
2720
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
2721
   *                       <b>JSON_HEX_TAG</b>,
2722
   *                       <b>JSON_HEX_AMP</b>,
2723
   *                       <b>JSON_HEX_APOS</b>,
2724
   *                       <b>JSON_NUMERIC_CHECK</b>,
2725
   *                       <b>JSON_PRETTY_PRINT</b>,
2726
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
2727
   *                       <b>JSON_FORCE_OBJECT</b>,
2728
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
2729
   *                       constants is described on
2730
   *                       the JSON constants page.
2731
   *                       </p>
2732
   * @param int   $depth   [optional] <p>
2733
   *                       Set the maximum depth. Must be greater than zero.
2734
   *                       </p>
2735
   *
2736
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
2737
   */
2738 2 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2739
  {
2740 2
    $value = self::filter($value);
2741
2742 2
    if (Bootup::is_php('5.5') === true) {
2743
      $json = json_encode($value, $options, $depth);
2744
    } else {
2745 2
      $json = json_encode($value, $options);
2746
    }
2747
2748 2
    return $json;
2749
  }
2750
2751
  /**
2752
   * Makes string's first char lowercase.
2753
   *
2754
   * @param string  $str       <p>The input string</p>
2755
   * @param string  $encoding  [optional] <p>Set the charset.</p>
2756
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
2757
   *
2758
   * @return string <p>The resulting string</p>
2759
   */
2760 7
  public static function lcfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
2761
  {
2762 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
2763 7
    if ($strPartTwo === false) {
2764
      $strPartTwo = '';
2765
    }
2766
2767 7
    $strPartOne = self::strtolower(
2768 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
2769 7
        $encoding,
2770
        $cleanUtf8
2771 7
    );
2772
2773 7
    return $strPartOne . $strPartTwo;
2774
  }
2775
2776
  /**
2777
   * alias for "UTF8::lcfirst()"
2778
   *
2779
   * @see UTF8::lcfirst()
2780
   *
2781
   * @param string  $word
2782
   * @param string  $encoding
2783
   * @param boolean $cleanUtf8
2784
   *
2785
   * @return string
2786
   */
2787 1
  public static function lcword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
2788
  {
2789 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
2790
  }
2791
2792
  /**
2793
   * Lowercase for all words in the string.
2794
   *
2795
   * @param string   $str        <p>The input string.</p>
2796
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
2797
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
2798
   * @param string   $encoding   [optional] <p>Set the charset.</p>
2799
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
2800
   *
2801
   * @return string
2802
   */
2803 1
  public static function lcwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
2804
  {
2805 1
    if (!$str) {
2806 1
      return '';
2807
    }
2808
2809 1
    $words = self::str_to_words($str, $charlist);
2810 1
    $newWords = array();
2811
2812 1
    if (count($exceptions) > 0) {
2813 1
      $useExceptions = true;
2814 1
    } else {
2815 1
      $useExceptions = false;
2816
    }
2817
2818 1 View Code Duplication
    foreach ($words as $word) {
2819
2820 1
      if (!$word) {
2821 1
        continue;
2822
      }
2823
2824
      if (
2825
          $useExceptions === false
2826 1
          ||
2827
          (
2828
              $useExceptions === true
2829 1
              &&
2830 1
              !in_array($word, $exceptions, true)
2831 1
          )
2832 1
      ) {
2833 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
2834 1
      }
2835
2836 1
      $newWords[] = $word;
2837 1
    }
2838
2839 1
    return implode('', $newWords);
2840
  }
2841
2842
  /**
2843
   * Strip whitespace or other characters from beginning of a UTF-8 string.
2844
   *
2845
   * @param string $str   <p>The string to be trimmed</p>
2846
   * @param string $chars <p>Optional characters to be stripped</p>
2847
   *
2848
   * @return string <p>The string with unwanted characters stripped from the left.</p>
2849
   */
2850 24 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2851
  {
2852 24
    $str = (string)$str;
2853
2854 24
    if (!isset($str[0])) {
2855 2
      return '';
2856
    }
2857
2858
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
2859 23
    if ($chars === INF || !$chars) {
2860 2
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
2861
    }
2862
2863 23
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
2864
  }
2865
2866
  /**
2867
   * Returns the UTF-8 character with the maximum code point in the given data.
2868
   *
2869
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
2870
   *
2871
   * @return string <p>The character with the highest code point than others.</p>
2872
   */
2873 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2874
  {
2875 1
    if (is_array($arg) === true) {
2876 1
      $arg = implode('', $arg);
2877 1
    }
2878
2879 1
    return self::chr(max(self::codepoints($arg)));
2880
  }
2881
2882
  /**
2883
   * Calculates and returns the maximum number of bytes taken by any
2884
   * UTF-8 encoded character in the given string.
2885
   *
2886
   * @param string $str <p>The original Unicode string.</p>
2887
   *
2888
   * @return int <p>Max byte lengths of the given chars.</p>
2889
   */
2890 1
  public static function max_chr_width($str)
2891
  {
2892 1
    $bytes = self::chr_size_list($str);
2893 1
    if (count($bytes) > 0) {
2894 1
      return (int)max($bytes);
2895
    }
2896
2897 1
    return 0;
2898
  }
2899
2900
  /**
2901
   * Checks whether mbstring is available on the server.
2902
   *
2903
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2904
   */
2905 12
  public static function mbstring_loaded()
2906
  {
2907 12
    $return = extension_loaded('mbstring') ? true : false;
2908
2909 12
    if ($return === true) {
2910 12
      \mb_internal_encoding('UTF-8');
2911 12
    }
2912
2913 12
    return $return;
2914
  }
2915
2916 1
  private static function mbstring_overloaded()
2917
  {
2918
    if (
2919 1
        defined('MB_OVERLOAD_STRING')
2920 1
        &&
2921 1
        ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
2922 1
    ) {
2923
      return true;
2924
    }
2925
2926 1
    return false;
2927
  }
2928
2929
  /**
2930
   * Returns the UTF-8 character with the minimum code point in the given data.
2931
   *
2932
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
2933
   *
2934
   * @return string <p>The character with the lowest code point than others.</p>
2935
   */
2936 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2937
  {
2938 1
    if (is_array($arg) === true) {
2939 1
      $arg = implode('', $arg);
2940 1
    }
2941
2942 1
    return self::chr(min(self::codepoints($arg)));
2943
  }
2944
2945
  /**
2946
   * alias for "UTF8::normalize_encoding()"
2947
   *
2948
   * @see        UTF8::normalize_encoding()
2949
   *
2950
   * @param string $encoding
2951
   * @param mixed  $fallback
2952
   *
2953
   * @return string
2954
   *
2955
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
2956
   */
2957
  public static function normalizeEncoding($encoding, $fallback = false)
2958
  {
2959
    return self::normalize_encoding($encoding, $fallback);
2960
  }
2961
2962
  /**
2963
   * Normalize the encoding-"name" input.
2964
   *
2965
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
2966
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
2967
   *
2968
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
2969
   */
2970 80
  public static function normalize_encoding($encoding, $fallback = false)
2971
  {
2972 80
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
2973
2974 80
    if (!$encoding) {
2975 3
      return $fallback;
2976
    }
2977
2978 79
    if ('UTF-8' === $encoding) {
2979 1
      return $encoding;
2980
    }
2981
2982 79
    if (self::$ENCODINGS === null) {
2983 1
      self::$ENCODINGS = self::getData('encodings');
2984 1
    }
2985
2986 79
    if (in_array($encoding, self::$ENCODINGS, true)) {
2987 7
      return $encoding;
2988
    }
2989
2990 78
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
2991 77
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
2992
    }
2993
2994 5
    $encodingOrig = $encoding;
2995 5
    $encoding = strtoupper($encoding);
2996 5
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
2997
2998
    $equivalences = array(
2999 5
        'ISO8859'     => 'ISO-8859-1',
3000 5
        'ISO88591'    => 'ISO-8859-1',
3001 5
        'ISO'         => 'ISO-8859-1',
3002 5
        'LATIN'       => 'ISO-8859-1',
3003 5
        'LATIN1'      => 'ISO-8859-1', // Western European
3004 5
        'ISO88592'    => 'ISO-8859-2',
3005 5
        'LATIN2'      => 'ISO-8859-2', // Central European
3006 5
        'ISO88593'    => 'ISO-8859-3',
3007 5
        'LATIN3'      => 'ISO-8859-3', // Southern European
3008 5
        'ISO88594'    => 'ISO-8859-4',
3009 5
        'LATIN4'      => 'ISO-8859-4', // Northern European
3010 5
        'ISO88595'    => 'ISO-8859-5',
3011 5
        'ISO88596'    => 'ISO-8859-6', // Greek
3012 5
        'ISO88597'    => 'ISO-8859-7',
3013 5
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3014 5
        'ISO88599'    => 'ISO-8859-9',
3015 5
        'LATIN5'      => 'ISO-8859-9', // Turkish
3016 5
        'ISO885911'   => 'ISO-8859-11',
3017 5
        'TIS620'      => 'ISO-8859-11', // Thai
3018 5
        'ISO885910'   => 'ISO-8859-10',
3019 5
        'LATIN6'      => 'ISO-8859-10', // Nordic
3020 5
        'ISO885913'   => 'ISO-8859-13',
3021 5
        'LATIN7'      => 'ISO-8859-13', // Baltic
3022 5
        'ISO885914'   => 'ISO-8859-14',
3023 5
        'LATIN8'      => 'ISO-8859-14', // Celtic
3024 5
        'ISO885915'   => 'ISO-8859-15',
3025 5
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3026 5
        'ISO885916'   => 'ISO-8859-16',
3027 5
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3028 5
        'CP1250'      => 'WINDOWS-1250',
3029 5
        'WIN1250'     => 'WINDOWS-1250',
3030 5
        'WINDOWS1250' => 'WINDOWS-1250',
3031 5
        'CP1251'      => 'WINDOWS-1251',
3032 5
        'WIN1251'     => 'WINDOWS-1251',
3033 5
        'WINDOWS1251' => 'WINDOWS-1251',
3034 5
        'CP1252'      => 'WINDOWS-1252',
3035 5
        'WIN1252'     => 'WINDOWS-1252',
3036 5
        'WINDOWS1252' => 'WINDOWS-1252',
3037 5
        'CP1253'      => 'WINDOWS-1253',
3038 5
        'WIN1253'     => 'WINDOWS-1253',
3039 5
        'WINDOWS1253' => 'WINDOWS-1253',
3040 5
        'CP1254'      => 'WINDOWS-1254',
3041 5
        'WIN1254'     => 'WINDOWS-1254',
3042 5
        'WINDOWS1254' => 'WINDOWS-1254',
3043 5
        'CP1255'      => 'WINDOWS-1255',
3044 5
        'WIN1255'     => 'WINDOWS-1255',
3045 5
        'WINDOWS1255' => 'WINDOWS-1255',
3046 5
        'CP1256'      => 'WINDOWS-1256',
3047 5
        'WIN1256'     => 'WINDOWS-1256',
3048 5
        'WINDOWS1256' => 'WINDOWS-1256',
3049 5
        'CP1257'      => 'WINDOWS-1257',
3050 5
        'WIN1257'     => 'WINDOWS-1257',
3051 5
        'WINDOWS1257' => 'WINDOWS-1257',
3052 5
        'CP1258'      => 'WINDOWS-1258',
3053 5
        'WIN1258'     => 'WINDOWS-1258',
3054 5
        'WINDOWS1258' => 'WINDOWS-1258',
3055 5
        'UTF16'       => 'UTF-16',
3056 5
        'UTF32'       => 'UTF-32',
3057 5
        'UTF8'        => 'UTF-8',
3058 5
        'UTF'         => 'UTF-8',
3059 5
        'UTF7'        => 'UTF-7',
3060 5
        '8BIT'        => 'CP850',
3061 5
        'BINARY'      => 'CP850',
3062 5
    );
3063
3064 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3065 5
      $encoding = $equivalences[$encodingUpperHelper];
3066 5
    }
3067
3068 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3069
3070 5
    return $encoding;
3071
  }
3072
3073
  /**
3074
   * Normalize some MS Word special characters.
3075
   *
3076
   * @param string $str <p>The string to be normalized.</p>
3077
   *
3078
   * @return string
3079
   */
3080 16 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3081
  {
3082 16
    $str = (string)$str;
3083
3084 16
    if (!isset($str[0])) {
3085 1
      return '';
3086
    }
3087
3088 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
3089 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3090
3091 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3092
3093
3094 1
      if (self::$UTF8_MSWORD === null) {
3095 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
3096 1
      }
3097
3098 1
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3099 1
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3100 1
    }
3101
3102 16
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3103
  }
3104
3105
  /**
3106
   * Normalize the whitespace.
3107
   *
3108
   * @param string $str                     <p>The string to be normalized.</p>
3109
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3110
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3111
   *                                        bidirectional text chars.</p>
3112
   *
3113
   * @return string
3114
   */
3115 37
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3116
  {
3117 37
    $str = (string)$str;
3118
3119 37
    if (!isset($str[0])) {
3120 4
      return '';
3121
    }
3122
3123 37
    static $WHITESPACE_CACHE = array();
3124 37
    $cacheKey = (int)$keepNonBreakingSpace;
3125
3126 37
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3127
3128 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3129
3130 2
      if ($keepNonBreakingSpace === true) {
3131
        /** @noinspection OffsetOperationsInspection */
3132 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3133 1
      }
3134
3135 2
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3136 2
    }
3137
3138 37
    if ($keepBidiUnicodeControls === false) {
3139 37
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3140
3141 37
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3142 1
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3143 1
      }
3144
3145 37
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3146 37
    }
3147
3148 37
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3149
  }
3150
3151
  /**
3152
   * Strip all whitespace characters. This includes tabs and newline
3153
   * characters, as well as multibyte whitespace such as the thin space
3154
   * and ideographic space.
3155
   *
3156
   * @param string $str
3157
   *
3158
   * @return string
3159
   */
3160 12
  public static function strip_whitespace($str)
3161
  {
3162 12
    $str = (string)$str;
3163
3164 12
    if (!isset($str[0])) {
3165 1
      return '';
3166
    }
3167
3168 11
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3169
  }
3170
3171
  /**
3172
   * Format a number with grouped thousands.
3173
   *
3174
   * @param float  $number
3175
   * @param int    $decimals
3176
   * @param string $dec_point
3177
   * @param string $thousands_sep
3178
   *
3179
   * @return string
3180
   *
3181
   * @deprecated <p>This has nothing to do with UTF-8.</p>
3182
   */
3183
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3184
  {
3185
    $thousands_sep = (string)$thousands_sep;
3186
    $dec_point = (string)$dec_point;
3187
    $number = (float)$number;
3188
3189
    if (
3190
        isset($thousands_sep[1], $dec_point[1])
3191
        &&
3192
        Bootup::is_php('5.4') === true
3193
    ) {
3194
      return str_replace(
3195
          array(
3196
              '.',
3197
              ',',
3198
          ),
3199
          array(
3200
              $dec_point,
3201
              $thousands_sep,
3202
          ),
3203
          number_format($number, $decimals, '.', ',')
3204
      );
3205
    }
3206
3207
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3208
  }
3209
3210
  /**
3211
   * Calculates Unicode code point of the given UTF-8 encoded character.
3212
   *
3213
   * INFO: opposite to UTF8::chr()
3214
   *
3215
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3216
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3217
   *
3218
   * @return int <p>
3219
   *             Unicode code point of the given character,<br>
3220
   *             0 on invalid UTF-8 byte sequence.
3221
   *             </p>
3222
   */
3223 23
  public static function ord($chr, $encoding = 'UTF-8')
3224
  {
3225
    // init
3226 23
    static $CHAR_CACHE = array();
3227 23
    $encoding = (string)$encoding;
3228
3229
    // save the original string
3230 23
    $chr_orig = $chr;
3231
3232 23
    if ($encoding !== 'UTF-8') {
3233 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3234
3235
      // check again, if it's still not UTF-8
3236
      /** @noinspection NotOptimalIfConditionsInspection */
3237 2
      if ($encoding !== 'UTF-8') {
3238 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3239 2
      }
3240 2
    }
3241
3242 23
    $cacheKey = $chr_orig . $encoding;
3243 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3244 23
      return $CHAR_CACHE[$cacheKey];
3245
    }
3246
3247 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3248
      self::checkForSupport();
3249
    }
3250
3251 11
    if (self::$SUPPORT['intlChar'] === true) {
3252
      $code = \IntlChar::ord($chr);
3253
      if ($code) {
3254
        return $CHAR_CACHE[$cacheKey] = $code;
3255
      }
3256
    }
3257
3258
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3259 11
    $chr = unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3260 11
    $code = $chr ? $chr[1] : 0;
3261
3262 11
    if (0xF0 <= $code && isset($chr[4])) {
3263 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3264
    }
3265
3266 11
    if (0xE0 <= $code && isset($chr[3])) {
3267 4
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3268
    }
3269
3270 11
    if (0xC0 <= $code && isset($chr[2])) {
3271 7
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3272
    }
3273
3274 10
    return $CHAR_CACHE[$cacheKey] = $code;
3275
  }
3276
3277
  /**
3278
   * Parses the string into an array (into the the second parameter).
3279
   *
3280
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3281
   *          if the second parameter is not set!
3282
   *
3283
   * @link http://php.net/manual/en/function.parse-str.php
3284
   *
3285
   * @param string  $str       <p>The input string.</p>
3286
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3287
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3288
   *
3289
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3290
   */
3291 1
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3292
  {
3293 1
    if ($cleanUtf8 === true) {
3294 1
      $str = self::clean($str);
3295 1
    }
3296
3297
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3298 1
    $return = \mb_parse_str($str, $result);
3299 1
    if ($return === false || empty($result)) {
3300 1
      return false;
3301
    }
3302
3303 1
    return true;
3304
  }
3305
3306
  /**
3307
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3308
   *
3309
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3310
   */
3311 58
  public static function pcre_utf8_support()
3312
  {
3313
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3314 58
    return (bool)@preg_match('//u', '');
3315
  }
3316
3317
  /**
3318
   * Create an array containing a range of UTF-8 characters.
3319
   *
3320
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3321
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3322
   *
3323
   * @return array
3324
   */
3325 1
  public static function range($var1, $var2)
3326
  {
3327 1
    if (!$var1 || !$var2) {
3328 1
      return array();
3329
    }
3330
3331 1 View Code Duplication
    if (ctype_digit((string)$var1)) {
3332 1
      $start = (int)$var1;
3333 1
    } elseif (ctype_xdigit($var1)) {
3334
      $start = (int)self::hex_to_int($var1);
3335
    } else {
3336 1
      $start = self::ord($var1);
3337
    }
3338
3339 1
    if (!$start) {
3340
      return array();
3341
    }
3342
3343 1 View Code Duplication
    if (ctype_digit((string)$var2)) {
3344 1
      $end = (int)$var2;
3345 1
    } elseif (ctype_xdigit($var2)) {
3346
      $end = (int)self::hex_to_int($var2);
3347
    } else {
3348 1
      $end = self::ord($var2);
3349
    }
3350
3351 1
    if (!$end) {
3352
      return array();
3353
    }
3354
3355 1
    return array_map(
3356
        array(
3357 1
            '\\voku\\helper\\UTF8',
3358 1
            'chr',
3359 1
        ),
3360 1
        range($start, $end)
3361 1
    );
3362
  }
3363
3364
  /**
3365
   * Multi decode html entity & fix urlencoded-win1252-chars.
3366
   *
3367
   * e.g:
3368
   * 'test+test'                     => 'test+test'
3369
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3370
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3371
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3372
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3373
   * 'Düsseldorf'                   => 'Düsseldorf'
3374
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3375
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3376
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3377
   *
3378
   * @param string $str          <p>The input string.</p>
3379
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3380
   *
3381
   * @return string
3382
   */
3383 2 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3384
  {
3385 2
    $str = (string)$str;
3386
3387 2
    if (!isset($str[0])) {
3388 1
      return '';
3389
    }
3390
3391 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3392 2
    if (preg_match($pattern, $str)) {
3393 1
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3394 1
    }
3395
3396 2
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3397
3398
    do {
3399 2
      $str_compare = $str;
3400
3401 2
      $str = self::fix_simple_utf8(
3402 2
          rawurldecode(
3403 2
              self::html_entity_decode(
3404 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3405
                  $flags
3406 2
              )
3407 2
          )
3408 2
      );
3409
3410 2
    } while ($multi_decode === true && $str_compare !== $str);
3411
3412 2
    return (string)$str;
3413
  }
3414
3415
  /**
3416
   * alias for "UTF8::remove_bom()"
3417
   *
3418
   * @see        UTF8::remove_bom()
3419
   *
3420
   * @param string $str
3421
   *
3422
   * @return string
3423
   *
3424
   * @deprecated <p>use "UTF8::remove_bom()"</p>
3425
   */
3426
  public static function removeBOM($str)
3427
  {
3428
    return self::remove_bom($str);
3429
  }
3430
3431
  /**
3432
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3433
   *
3434
   * @param string $str <p>The input string.</p>
3435
   *
3436
   * @return string <p>String without UTF-BOM</p>
3437
   */
3438 40
  public static function remove_bom($str)
3439
  {
3440 40
    $str = (string)$str;
3441
3442 40
    if (!isset($str[0])) {
3443 5
      return '';
3444
    }
3445
3446 40
    foreach (self::$BOM as $bomString => $bomByteLength) {
3447 40
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
3448 5
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
3449 5
        if ($strTmp === false) {
3450
          $strTmp = '';
3451
        }
3452 5
        $str = (string)$strTmp;
3453 5
      }
3454 40
    }
3455
3456 40
    return $str;
3457
  }
3458
3459
  /**
3460
   * Removes duplicate occurrences of a string in another string.
3461
   *
3462
   * @param string          $str  <p>The base string.</p>
3463
   * @param string|string[] $what <p>String to search for in the base string.</p>
3464
   *
3465
   * @return string <p>The result string with removed duplicates.</p>
3466
   */
3467 1
  public static function remove_duplicates($str, $what = ' ')
3468
  {
3469 1
    if (is_string($what) === true) {
3470 1
      $what = array($what);
3471 1
    }
3472
3473 1
    if (is_array($what) === true) {
3474
      /** @noinspection ForeachSourceInspection */
3475 1
      foreach ($what as $item) {
3476 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3477 1
      }
3478 1
    }
3479
3480 1
    return $str;
3481
  }
3482
3483
  /**
3484
   * Remove invisible characters from a string.
3485
   *
3486
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3487
   *
3488
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3489
   *
3490
   * @param string $str
3491
   * @param bool   $url_encoded
3492
   * @param string $replacement
3493
   *
3494
   * @return string
3495
   */
3496 62
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3497
  {
3498
    // init
3499 62
    $non_displayables = array();
3500
3501
    // every control character except newline (dec 10),
3502
    // carriage return (dec 13) and horizontal tab (dec 09)
3503 62
    if ($url_encoded) {
3504 62
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
3505 62
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3506 62
    }
3507
3508 62
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
3509
3510
    do {
3511 62
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3512 62
    } while ($count !== 0);
3513
3514 62
    return $str;
3515
  }
3516
3517
  /**
3518
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3519
   *
3520
   * @param string $str                <p>The input string</p>
3521
   * @param string $replacementChar    <p>The replacement character.</p>
3522
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3523
   *
3524
   * @return string
3525
   */
3526 62
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
3527
  {
3528 62
    $str = (string)$str;
3529
3530 62
    if (!isset($str[0])) {
3531 4
      return '';
3532
    }
3533
3534 62
    if ($processInvalidUtf8 === true) {
3535 62
      $replacementCharHelper = $replacementChar;
3536 62
      if ($replacementChar === '') {
3537 62
        $replacementCharHelper = 'none';
3538 62
      }
3539
3540 62
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3541
        self::checkForSupport();
3542
      }
3543
3544 62
      $save = \mb_substitute_character();
3545 62
      \mb_substitute_character($replacementCharHelper);
3546 62
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3547 62
      \mb_substitute_character($save);
3548 62
    }
3549
3550 62
    return str_replace(
3551
        array(
3552 62
            "\xEF\xBF\xBD",
3553 62
            '�',
3554 62
        ),
3555
        array(
3556 62
            $replacementChar,
3557 62
            $replacementChar,
3558 62
        ),
3559
        $str
3560 62
    );
3561
  }
3562
3563
  /**
3564
   * Strip whitespace or other characters from end of a UTF-8 string.
3565
   *
3566
   * @param string $str   <p>The string to be trimmed.</p>
3567
   * @param string $chars <p>Optional characters to be stripped.</p>
3568
   *
3569
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3570
   */
3571 23 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3572
  {
3573 23
    $str = (string)$str;
3574
3575 23
    if (!isset($str[0])) {
3576 5
      return '';
3577
    }
3578
3579
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3580 19
    if ($chars === INF || !$chars) {
3581 3
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3582
    }
3583
3584 18
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3585
  }
3586
3587
  /**
3588
   * rxClass
3589
   *
3590
   * @param string $s
3591
   * @param string $class
3592
   *
3593
   * @return string
3594
   */
3595 60
  private static function rxClass($s, $class = '')
3596
  {
3597 60
    static $RX_CLASSS_CACHE = array();
3598
3599 60
    $cacheKey = $s . $class;
3600
3601 60
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
3602 48
      return $RX_CLASSS_CACHE[$cacheKey];
3603
    }
3604
3605
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3606 20
    $class = array($class);
3607
3608
    /** @noinspection SuspiciousLoopInspection */
3609 20
    foreach (self::str_split($s) as $s) {
3610 19
      if ('-' === $s) {
3611
        $class[0] = '-' . $class[0];
3612 19
      } elseif (!isset($s[2])) {
3613 19
        $class[0] .= preg_quote($s, '/');
3614 19
      } elseif (1 === self::strlen($s)) {
3615 2
        $class[0] .= $s;
3616 2
      } else {
3617
        $class[] = $s;
3618
      }
3619 20
    }
3620
3621 20
    if ($class[0]) {
3622 20
      $class[0] = '[' . $class[0] . ']';
3623 20
    }
3624
3625 20
    if (1 === count($class)) {
3626 20
      $return = $class[0];
3627 20
    } else {
3628
      $return = '(?:' . implode('|', $class) . ')';
3629
    }
3630
3631 20
    $RX_CLASSS_CACHE[$cacheKey] = $return;
3632
3633 20
    return $return;
3634
  }
3635
3636
  /**
3637
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
3638
   */
3639 1
  public static function showSupport()
3640
  {
3641 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3642
      self::checkForSupport();
3643
    }
3644
3645 1
    echo '<pre>';
3646 1
    foreach (self::$SUPPORT as $key => $value) {
3647 1
      echo $key . ' - ' . print_r($value, true) . "\n<br>";
3648 1
    }
3649 1
    echo '</pre>';
3650 1
  }
3651
3652
  /**
3653
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3654
   *
3655
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3656
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3657
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3658
   *
3659
   * @return string <p>The HTML numbered entity.</p>
3660
   */
3661 1
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
3662
  {
3663 1
    $char = (string)$char;
3664
3665 1
    if (!isset($char[0])) {
3666 1
      return '';
3667
    }
3668
3669
    if (
3670
        $keepAsciiChars === true
3671 1
        &&
3672 1
        self::is_ascii($char) === true
3673 1
    ) {
3674 1
      return $char;
3675
    }
3676
3677 1
    if ($encoding !== 'UTF-8') {
3678 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3679 1
    }
3680
3681 1
    return '&#' . self::ord($char, $encoding) . ';';
3682
  }
3683
3684
  /**
3685
   * Convert a string to an array of Unicode characters.
3686
   *
3687
   * @param string  $str       <p>The string to split into array.</p>
3688
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
3689
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3690
   *
3691
   * @return string[] <p>An array containing chunks of the string.</p>
3692
   */
3693 39
  public static function split($str, $length = 1, $cleanUtf8 = false)
3694
  {
3695 39
    $str = (string)$str;
3696
3697 39
    if (!isset($str[0])) {
3698 3
      return array();
3699
    }
3700
3701
    // init
3702 38
    $ret = array();
3703
3704 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3705
      self::checkForSupport();
3706
    }
3707
3708 38
    if ($cleanUtf8 === true) {
3709 7
      $str = self::clean($str);
3710 7
    }
3711
3712 38
    if (self::$SUPPORT['pcre_utf8'] === true) {
3713
3714 38
      preg_match_all('/./us', $str, $retArray);
3715 38
      if (isset($retArray[0])) {
3716 38
        $ret = $retArray[0];
3717 38
      }
3718 38
      unset($retArray);
3719
3720 38
    } else {
3721
3722
      // fallback
3723
3724 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3725
        self::checkForSupport();
3726
      }
3727
3728 2
      $len = self::strlen_in_byte($str);
3729
3730
      /** @noinspection ForeachInvariantsInspection */
3731 2
      for ($i = 0; $i < $len; $i++) {
3732
3733 2
        if (($str[$i] & "\x80") === "\x00") {
3734
3735 2
          $ret[] = $str[$i];
3736
3737 2
        } elseif (
3738 2
            isset($str[$i + 1])
3739 2
            &&
3740 2
            ($str[$i] & "\xE0") === "\xC0"
3741 2
        ) {
3742
3743
          if (($str[$i + 1] & "\xC0") === "\x80") {
3744
            $ret[] = $str[$i] . $str[$i + 1];
3745
3746
            $i++;
3747
          }
3748
3749 View Code Duplication
        } elseif (
3750 2
            isset($str[$i + 2])
3751 2
            &&
3752 2
            ($str[$i] & "\xF0") === "\xE0"
3753 2
        ) {
3754
3755
          if (
3756 2
              ($str[$i + 1] & "\xC0") === "\x80"
3757 2
              &&
3758 2
              ($str[$i + 2] & "\xC0") === "\x80"
3759 2
          ) {
3760 2
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3761
3762 2
            $i += 2;
3763 2
          }
3764
3765 2
        } elseif (
3766
            isset($str[$i + 3])
3767
            &&
3768
            ($str[$i] & "\xF8") === "\xF0"
3769
        ) {
3770
3771 View Code Duplication
          if (
3772
              ($str[$i + 1] & "\xC0") === "\x80"
3773
              &&
3774
              ($str[$i + 2] & "\xC0") === "\x80"
3775
              &&
3776
              ($str[$i + 3] & "\xC0") === "\x80"
3777
          ) {
3778
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3779
3780
            $i += 3;
3781
          }
3782
3783
        }
3784 2
      }
3785
    }
3786
3787 38
    if ($length > 1) {
3788 5
      $ret = array_chunk($ret, $length);
3789
3790 5
      return array_map(
3791
          function ($item) {
3792 5
            return implode('', $item);
3793 5
          }, $ret
3794 5
      );
3795
    }
3796
3797 34
    if (isset($ret[0]) && $ret[0] === '') {
3798
      return array();
3799
    }
3800
3801 34
    return $ret;
3802
  }
3803
3804
  /**
3805
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3806
   *
3807
   * @param string $str <p>The input string.</p>
3808
   *
3809
   * @return false|string <p>
3810
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
3811
   *                      otherwise it will return false.
3812
   *                      </p>
3813
   */
3814 12
  public static function str_detect_encoding($str)
3815
  {
3816
    //
3817
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
3818
    //
3819
3820 12
    if (self::is_binary($str) === true) {
3821
3822 3
      if (self::is_utf16($str) === 1) {
3823 1
        return 'UTF-16LE';
3824
      }
3825
3826 3
      if (self::is_utf16($str) === 2) {
3827 1
        return 'UTF-16BE';
3828
      }
3829
3830 2
      if (self::is_utf32($str) === 1) {
3831
        return 'UTF-32LE';
3832
      }
3833
3834 2
      if (self::is_utf32($str) === 2) {
3835
        return 'UTF-32BE';
3836
      }
3837
3838 2
    }
3839
3840
    //
3841
    // 2.) simple check for ASCII chars
3842
    //
3843
3844 12
    if (self::is_ascii($str) === true) {
3845 3
      return 'ASCII';
3846
    }
3847
3848
    //
3849
    // 3.) simple check for UTF-8 chars
3850
    //
3851
3852 12
    if (self::is_utf8($str) === true) {
3853 9
      return 'UTF-8';
3854
    }
3855
3856
    //
3857
    // 4.) check via "\mb_detect_encoding()"
3858
    //
3859
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
3860
3861
    $detectOrder = array(
3862 7
        'ISO-8859-1',
3863 7
        'ISO-8859-2',
3864 7
        'ISO-8859-3',
3865 7
        'ISO-8859-4',
3866 7
        'ISO-8859-5',
3867 7
        'ISO-8859-6',
3868 7
        'ISO-8859-7',
3869 7
        'ISO-8859-8',
3870 7
        'ISO-8859-9',
3871 7
        'ISO-8859-10',
3872 7
        'ISO-8859-13',
3873 7
        'ISO-8859-14',
3874 7
        'ISO-8859-15',
3875 7
        'ISO-8859-16',
3876 7
        'WINDOWS-1251',
3877 7
        'WINDOWS-1252',
3878 7
        'WINDOWS-1254',
3879 7
        'ISO-2022-JP',
3880 7
        'JIS',
3881 7
        'EUC-JP',
3882 7
    );
3883
3884 7
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
3885 7
    if ($encoding) {
3886 7
      return $encoding;
3887
    }
3888
3889
    //
3890
    // 5.) check via "iconv()"
3891
    //
3892
3893
    if (self::$ENCODINGS === null) {
3894
      self::$ENCODINGS = self::getData('encodings');
3895
    }
3896
3897
    $md5 = md5($str);
3898
    foreach (self::$ENCODINGS as $encodingTmp) {
0 ignored issues
show
Bug introduced by
The expression self::$ENCODINGS of type boolean|string|array|integer is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
3899
      # INFO: //IGNORE and //TRANSLIT still throw notice
3900
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3901
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
3902
        return $encodingTmp;
3903
      }
3904
    }
3905
3906
    return false;
3907
  }
3908
3909
  /**
3910
   * Check if the string ends with the given substring.
3911
   *
3912
   * @param string $haystack <p>The string to search in.</p>
3913
   * @param string $needle   <p>The substring to search for.</p>
3914
   *
3915
   * @return bool
3916
   */
3917 2
  public static function str_ends_with($haystack, $needle)
3918
  {
3919 2
    $haystack = (string)$haystack;
3920 2
    $needle = (string)$needle;
3921
3922 2
    if (!isset($haystack[0], $needle[0])) {
3923 1
      return false;
3924
    }
3925
3926 2
    if (substr($haystack, -strlen($needle)) === $needle) {
3927 2
      return true;
3928
    }
3929
3930 2
    return false;
3931
  }
3932
3933
  /**
3934
   * Check if the string ends with the given substring, case insensitive.
3935
   *
3936
   * @param string $haystack <p>The string to search in.</p>
3937
   * @param string $needle   <p>The substring to search for.</p>
3938
   *
3939
   * @return bool
3940
   */
3941 2
  public static function str_iends_with($haystack, $needle)
3942
  {
3943 2
    $haystack = (string)$haystack;
3944 2
    $needle = (string)$needle;
3945
3946 2
    if (!isset($haystack[0], $needle[0])) {
3947 1
      return false;
3948
    }
3949
3950 2
    if (self::strcasecmp(substr($haystack, -strlen($needle)), $needle) === 0) {
3951 2
      return true;
3952
    }
3953
3954 2
    return false;
3955
  }
3956
3957
  /**
3958
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
3959
   *
3960
   * @link  http://php.net/manual/en/function.str-ireplace.php
3961
   *
3962
   * @param mixed $search  <p>
3963
   *                       Every replacement with search array is
3964
   *                       performed on the result of previous replacement.
3965
   *                       </p>
3966
   * @param mixed $replace <p>
3967
   *                       </p>
3968
   * @param mixed $subject <p>
3969
   *                       If subject is an array, then the search and
3970
   *                       replace is performed with every entry of
3971
   *                       subject, and the return value is an array as
3972
   *                       well.
3973
   *                       </p>
3974
   * @param int   $count   [optional] <p>
3975
   *                       The number of matched and replaced needles will
3976
   *                       be returned in count which is passed by
3977
   *                       reference.
3978
   *                       </p>
3979
   *
3980
   * @return mixed <p>A string or an array of replacements.</p>
3981
   */
3982 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
3983
  {
3984 26
    $search = (array)$search;
3985
3986
    /** @noinspection AlterInForeachInspection */
3987 26
    foreach ($search as &$s) {
3988 26
      if ('' === $s .= '') {
3989 2
        $s = '/^(?<=.)$/';
3990 2
      } else {
3991 24
        $s = '/' . preg_quote($s, '/') . '/ui';
3992
      }
3993 26
    }
3994
3995 26
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
3996 26
    $count = $replace; // used as reference parameter
3997
3998 26
    return $subject;
3999
  }
4000
4001
  /**
4002
   * Check if the string starts with the given substring, case insensitive.
4003
   *
4004
   * @param string $haystack <p>The string to search in.</p>
4005
   * @param string $needle   <p>The substring to search for.</p>
4006
   *
4007
   * @return bool
4008
   */
4009 2 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4010
  {
4011 2
    $haystack = (string)$haystack;
4012 2
    $needle = (string)$needle;
4013
4014 2
    if (!isset($haystack[0], $needle[0])) {
4015 1
      return false;
4016
    }
4017
4018 2
    if (self::stripos($haystack, $needle) === 0) {
4019 2
      return true;
4020
    }
4021
4022 2
    return false;
4023
  }
4024
4025
  /**
4026
   * Limit the number of characters in a string, but also after the next word.
4027
   *
4028
   * @param string $str
4029
   * @param int    $length
4030
   * @param string $strAddOn
4031
   *
4032
   * @return string
4033
   */
4034 1
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4035
  {
4036 1
    $str = (string)$str;
4037
4038 1
    if (!isset($str[0])) {
4039 1
      return '';
4040
    }
4041
4042 1
    $length = (int)$length;
4043
4044 1
    if (self::strlen($str) <= $length) {
4045 1
      return $str;
4046
    }
4047
4048 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4049 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4050
    }
4051
4052 1
    $str = (string)self::substr($str, 0, $length);
4053 1
    $array = explode(' ', $str);
4054 1
    array_pop($array);
4055 1
    $new_str = implode(' ', $array);
4056
4057 1
    if ($new_str === '') {
4058 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4059 1
    } else {
4060 1
      $str = $new_str . $strAddOn;
4061
    }
4062
4063 1
    return $str;
4064
  }
4065
4066
  /**
4067
   * Pad a UTF-8 string to given length with another string.
4068
   *
4069
   * @param string $str        <p>The input string.</p>
4070
   * @param int    $pad_length <p>The length of return string.</p>
4071
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4072
   * @param int    $pad_type   [optional] <p>
4073
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4074
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4075
   *                           </p>
4076
   *
4077
   * @return string <strong>Returns the padded string</strong>
4078
   */
4079 2
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4080
  {
4081 2
    $str_length = self::strlen($str);
4082
4083
    if (
4084 2
        is_int($pad_length) === true
4085 2
        &&
4086
        $pad_length > 0
4087 2
        &&
4088
        $pad_length >= $str_length
4089 2
    ) {
4090 2
      $ps_length = self::strlen($pad_string);
4091
4092 2
      $diff = $pad_length - $str_length;
4093
4094
      switch ($pad_type) {
4095 2 View Code Duplication
        case STR_PAD_LEFT:
4096 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4097 2
          $pre = (string)self::substr($pre, 0, $diff);
4098 2
          $post = '';
4099 2
          break;
4100
4101 2
        case STR_PAD_BOTH:
4102 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4103 2
          $pre = (string)self::substr($pre, 0, (int)$diff / 2);
4104 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4105 2
          $post = (string)self::substr($post, 0, (int)ceil($diff / 2));
4106 2
          break;
4107
4108 2
        case STR_PAD_RIGHT:
4109 2 View Code Duplication
        default:
4110 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4111 2
          $post = (string)self::substr($post, 0, $diff);
4112 2
          $pre = '';
4113 2
      }
4114
4115 2
      return $pre . $str . $post;
4116
    }
4117
4118 2
    return $str;
4119
  }
4120
4121
  /**
4122
   * Repeat a string.
4123
   *
4124
   * @param string $str        <p>
4125
   *                           The string to be repeated.
4126
   *                           </p>
4127
   * @param int    $multiplier <p>
4128
   *                           Number of time the input string should be
4129
   *                           repeated.
4130
   *                           </p>
4131
   *                           <p>
4132
   *                           multiplier has to be greater than or equal to 0.
4133
   *                           If the multiplier is set to 0, the function
4134
   *                           will return an empty string.
4135
   *                           </p>
4136
   *
4137
   * @return string <p>The repeated string.</p>
4138
   */
4139 1
  public static function str_repeat($str, $multiplier)
4140
  {
4141 1
    $str = self::filter($str);
4142
4143 1
    return str_repeat($str, $multiplier);
4144
  }
4145
4146
  /**
4147
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4148
   *
4149
   * Replace all occurrences of the search string with the replacement string
4150
   *
4151
   * @link http://php.net/manual/en/function.str-replace.php
4152
   *
4153
   * @param mixed $search  <p>
4154
   *                       The value being searched for, otherwise known as the needle.
4155
   *                       An array may be used to designate multiple needles.
4156
   *                       </p>
4157
   * @param mixed $replace <p>
4158
   *                       The replacement value that replaces found search
4159
   *                       values. An array may be used to designate multiple replacements.
4160
   *                       </p>
4161
   * @param mixed $subject <p>
4162
   *                       The string or array being searched and replaced on,
4163
   *                       otherwise known as the haystack.
4164
   *                       </p>
4165
   *                       <p>
4166
   *                       If subject is an array, then the search and
4167
   *                       replace is performed with every entry of
4168
   *                       subject, and the return value is an array as
4169
   *                       well.
4170
   *                       </p>
4171
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4172
   *
4173
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4174
   */
4175 12
  public static function str_replace($search, $replace, $subject, &$count = null)
4176
  {
4177 12
    return str_replace($search, $replace, $subject, $count);
4178
  }
4179
4180
  /**
4181
   * Replace the first "$search"-term with the "$replace"-term.
4182
   *
4183
   * @param string $search
4184
   * @param string $replace
4185
   * @param string $subject
4186
   *
4187
   * @return string
4188
   */
4189 1
  public static function str_replace_first($search, $replace, $subject)
4190
  {
4191 1
    $pos = self::strpos($subject, $search);
4192
4193 1
    if ($pos !== false) {
4194 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4195
    }
4196
4197 1
    return $subject;
4198
  }
4199
4200
  /**
4201
   * Shuffles all the characters in the string.
4202
   *
4203
   * @param string $str <p>The input string</p>
4204
   *
4205
   * @return string <p>The shuffled string.</p>
4206
   */
4207 1
  public static function str_shuffle($str)
4208
  {
4209 1
    $array = self::split($str);
4210
4211 1
    shuffle($array);
4212
4213 1
    return implode('', $array);
4214
  }
4215
4216
  /**
4217
   * Sort all characters according to code points.
4218
   *
4219
   * @param string $str    <p>A UTF-8 string.</p>
4220
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4221
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4222
   *
4223
   * @return string <p>String of sorted characters.</p>
4224
   */
4225 1
  public static function str_sort($str, $unique = false, $desc = false)
4226
  {
4227 1
    $array = self::codepoints($str);
4228
4229 1
    if ($unique) {
4230 1
      $array = array_flip(array_flip($array));
4231 1
    }
4232
4233 1
    if ($desc) {
4234 1
      arsort($array);
4235 1
    } else {
4236 1
      asort($array);
4237
    }
4238
4239 1
    return self::string($array);
4240
  }
4241
4242
  /**
4243
   * Split a string into an array.
4244
   *
4245
   * @param string $str
4246
   * @param int    $len
4247
   *
4248
   * @return array
4249
   */
4250 23
  public static function str_split($str, $len = 1)
4251
  {
4252 23
    $str = (string)$str;
4253
4254 23
    if (!isset($str[0])) {
4255 1
      return array();
4256
    }
4257
4258 22
    $len = (int)$len;
4259
4260 22
    if ($len < 1) {
4261
      return str_split($str, $len);
4262
    }
4263
4264
    /** @noinspection PhpInternalEntityUsedInspection */
4265 22
    preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4266 22
    $a = $a[0];
4267
4268 22
    if ($len === 1) {
4269 22
      return $a;
4270
    }
4271
4272 1
    $arrayOutput = array();
4273 1
    $p = -1;
4274
4275
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4276 1
    foreach ($a as $l => $a) {
4277 1
      if ($l % $len) {
4278 1
        $arrayOutput[$p] .= $a;
4279 1
      } else {
4280 1
        $arrayOutput[++$p] = $a;
4281
      }
4282 1
    }
4283
4284 1
    return $arrayOutput;
4285
  }
4286
4287
  /**
4288
   * Check if the string starts with the given substring.
4289
   *
4290
   * @param string $haystack <p>The string to search in.</p>
4291
   * @param string $needle   <p>The substring to search for.</p>
4292
   *
4293
   * @return bool
4294
   */
4295 2 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4296
  {
4297 2
    $haystack = (string)$haystack;
4298 2
    $needle = (string)$needle;
4299
4300 2
    if (!isset($haystack[0], $needle[0])) {
4301 1
      return false;
4302
    }
4303
4304 2
    if (strpos($haystack, $needle) === 0) {
4305 2
      return true;
4306
    }
4307
4308 2
    return false;
4309
  }
4310
4311
  /**
4312
   * Get a binary representation of a specific string.
4313
   *
4314
   * @param string $str <p>The input string.</p>
4315
   *
4316
   * @return string
4317
   */
4318 1
  public static function str_to_binary($str)
4319
  {
4320 1
    $str = (string)$str;
4321
4322 1
    $value = unpack('H*', $str);
4323
4324 1
    return base_convert($value[1], 16, 2);
4325
  }
4326
4327
  /**
4328
   * Convert a string into an array of words.
4329
   *
4330
   * @param string   $str
4331
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
4332
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4333
   * @param null|int $removeShortValues
4334
   *
4335
   * @return array
4336
   */
4337 10
  public static function str_to_words($str, $charList = '', $removeEmptyValues = false, $removeShortValues = null)
4338
  {
4339 10
    $str = (string)$str;
4340
4341 10
    if ($removeShortValues !== null) {
4342 1
      $removeShortValues = (int)$removeShortValues;
4343 1
    }
4344
4345 10
    if (!isset($str[0])) {
4346 2
      if ($removeEmptyValues === true) {
4347
        return array();
4348
      }
4349
4350 2
      return array('');
4351
    }
4352
4353 10
    $charList = self::rxClass($charList, '\pL');
4354
4355 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4356
4357
    if (
4358
        $removeShortValues === null
4359 10
        &&
4360
        $removeEmptyValues === false
4361 10
    ) {
4362 10
      return $return;
4363
    }
4364
4365 1
    $tmpReturn = array();
4366 1
    foreach ($return as $returnValue) {
4367
      if (
4368
          $removeShortValues !== null
4369 1
          &&
4370 1
          self::strlen($returnValue) <= $removeShortValues
4371 1
      ) {
4372 1
        continue;
4373
      }
4374
4375
      if (
4376
          $removeEmptyValues === true
4377 1
          &&
4378 1
          trim($returnValue) === ''
4379 1
      ) {
4380 1
        continue;
4381
      }
4382
4383 1
      $tmpReturn[] = $returnValue;
4384 1
    }
4385
4386 1
    return $tmpReturn;
4387
  }
4388
4389
  /**
4390
   * alias for "UTF8::to_ascii()"
4391
   *
4392
   * @see UTF8::to_ascii()
4393
   *
4394
   * @param string $str
4395
   * @param string $unknown
4396
   * @param bool   $strict
4397
   *
4398
   * @return string
4399
   */
4400 7
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4401
  {
4402 7
    return self::to_ascii($str, $unknown, $strict);
4403
  }
4404
4405
  /**
4406
   * Counts number of words in the UTF-8 string.
4407
   *
4408
   * @param string $str      <p>The input string.</p>
4409
   * @param int    $format   [optional] <p>
4410
   *                         <strong>0</strong> => return a number of words (default)<br>
4411
   *                         <strong>1</strong> => return an array of words<br>
4412
   *                         <strong>2</strong> => return an array of words with word-offset as key
4413
   *                         </p>
4414
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4415
   *
4416
   * @return array|int <p>The number of words in the string</p>
4417
   */
4418 1
  public static function str_word_count($str, $format = 0, $charlist = '')
4419
  {
4420 1
    $strParts = self::str_to_words($str, $charlist);
4421
4422 1
    $len = count($strParts);
4423
4424 1
    if ($format === 1) {
4425
4426 1
      $numberOfWords = array();
4427 1
      for ($i = 1; $i < $len; $i += 2) {
4428 1
        $numberOfWords[] = $strParts[$i];
4429 1
      }
4430
4431 1
    } elseif ($format === 2) {
4432
4433 1
      $numberOfWords = array();
4434 1
      $offset = self::strlen($strParts[0]);
4435 1
      for ($i = 1; $i < $len; $i += 2) {
4436 1
        $numberOfWords[$offset] = $strParts[$i];
4437 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4438 1
      }
4439
4440 1
    } else {
4441
4442 1
      $numberOfWords = ($len - 1) / 2;
4443
4444
    }
4445
4446 1
    return $numberOfWords;
4447
  }
4448
4449
  /**
4450
   * Case-insensitive string comparison.
4451
   *
4452
   * INFO: Case-insensitive version of UTF8::strcmp()
4453
   *
4454
   * @param string $str1
4455
   * @param string $str2
4456
   *
4457
   * @return int <p>
4458
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
4459
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
4460
   *             <strong>0</strong> if they are equal.
4461
   *             </p>
4462
   */
4463 11
  public static function strcasecmp($str1, $str2)
4464
  {
4465 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4466
  }
4467
4468
  /**
4469
   * alias for "UTF8::strstr()"
4470
   *
4471
   * @see UTF8::strstr()
4472
   *
4473
   * @param string  $haystack
4474
   * @param string  $needle
4475
   * @param bool    $before_needle
4476
   * @param string  $encoding
4477
   * @param boolean $cleanUtf8
4478
   *
4479
   * @return string|false
4480
   */
4481 1
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4482
  {
4483 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4484
  }
4485
4486
  /**
4487
   * Case-sensitive string comparison.
4488
   *
4489
   * @param string $str1
4490
   * @param string $str2
4491
   *
4492
   * @return int  <p>
4493
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
4494
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
4495
   *              <strong>0</strong> if they are equal.
4496
   *              </p>
4497
   */
4498 14
  public static function strcmp($str1, $str2)
4499
  {
4500
    /** @noinspection PhpUndefinedClassInspection */
4501 14
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4502 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
4503 13
        \Normalizer::normalize($str2, \Normalizer::NFD)
4504 14
    );
4505
  }
4506
4507
  /**
4508
   * Find length of initial segment not matching mask.
4509
   *
4510
   * @param string $str
4511
   * @param string $charList
4512
   * @param int    $offset
4513
   * @param int    $length
4514
   *
4515
   * @return int|null
4516
   */
4517 15
  public static function strcspn($str, $charList, $offset = 0, $length = null)
4518
  {
4519 15
    if ('' === $charList .= '') {
4520 1
      return null;
4521
    }
4522
4523 14 View Code Duplication
    if ($offset || $length !== null) {
4524 2
      $strTmp = self::substr($str, $offset, $length);
4525 2
      if ($strTmp === false) {
4526
        return null;
4527
      }
4528 2
      $str = (string)$strTmp;
4529 2
    }
4530
4531 14
    $str = (string)$str;
4532 14
    if (!isset($str[0])) {
4533 1
      return null;
4534
    }
4535
4536 13
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4537
      /** @noinspection OffsetOperationsInspection */
4538 13
      return self::strlen($length[1]);
4539
    }
4540
4541 1
    return self::strlen($str);
4542
  }
4543
4544
  /**
4545
   * alias for "UTF8::stristr()"
4546
   *
4547
   * @see UTF8::stristr()
4548
   *
4549
   * @param string  $haystack
4550
   * @param string  $needle
4551
   * @param bool    $before_needle
4552
   * @param string  $encoding
4553
   * @param boolean $cleanUtf8
4554
   *
4555
   * @return string|false
4556
   */
4557 1
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4558
  {
4559 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4560
  }
4561
4562
  /**
4563
   * Create a UTF-8 string from code points.
4564
   *
4565
   * INFO: opposite to UTF8::codepoints()
4566
   *
4567
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4568
   *
4569
   * @return string <p>UTF-8 encoded string.</p>
4570
   */
4571 2
  public static function string(array $array)
4572
  {
4573 2
    return implode(
4574 2
        '',
4575 2
        array_map(
4576
            array(
4577 2
                '\\voku\\helper\\UTF8',
4578 2
                'chr',
4579 2
            ),
4580
            $array
4581 2
        )
4582 2
    );
4583
  }
4584
4585
  /**
4586
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4587
   *
4588
   * @param string $str <p>The input string.</p>
4589
   *
4590
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4591
   */
4592 3
  public static function string_has_bom($str)
4593
  {
4594 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
4595 3
      if (0 === strpos($str, $bomString)) {
4596 3
        return true;
4597
      }
4598 3
    }
4599
4600 3
    return false;
4601
  }
4602
4603
  /**
4604
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4605
   *
4606
   * @link http://php.net/manual/en/function.strip-tags.php
4607
   *
4608
   * @param string  $str            <p>
4609
   *                                The input string.
4610
   *                                </p>
4611
   * @param string  $allowable_tags [optional] <p>
4612
   *                                You can use the optional second parameter to specify tags which should
4613
   *                                not be stripped.
4614
   *                                </p>
4615
   *                                <p>
4616
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4617
   *                                can not be changed with allowable_tags.
4618
   *                                </p>
4619
   * @param boolean $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
4620
   *
4621
   * @return string <p>The stripped string.</p>
4622
   */
4623 2 View Code Duplication
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4624
  {
4625 2
    $str = (string)$str;
4626
4627 2
    if (!isset($str[0])) {
4628 1
      return '';
4629
    }
4630
4631 2
    if ($cleanUtf8 === true) {
4632 1
      $str = self::clean($str);
4633 1
    }
4634
4635 2
    return strip_tags($str, $allowable_tags);
4636
  }
4637
4638
  /**
4639
   * Finds position of first occurrence of a string within another, case insensitive.
4640
   *
4641
   * @link http://php.net/manual/en/function.mb-stripos.php
4642
   *
4643
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
4644
   * @param string  $needle    <p>The string to find in haystack.</p>
4645
   * @param int     $offset    [optional] <p>The position in haystack to start searching.</p>
4646
   * @param string  $encoding  [optional] <p>Set the charset.</p>
4647
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4648
   *
4649
   * @return int|false <p>
4650
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
4651
   *                   or false if needle is not found.
4652
   *                   </p>
4653
   */
4654 10
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4655
  {
4656 10
    $haystack = (string)$haystack;
4657 10
    $needle = (string)$needle;
4658 10
    $offset = (int)$offset;
4659
4660 10
    if (!isset($haystack[0], $needle[0])) {
4661 3
      return false;
4662
    }
4663
4664 9
    if ($cleanUtf8 === true) {
4665
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4666
      // if invalid characters are found in $haystack before $needle
4667 1
      $haystack = self::clean($haystack);
4668 1
      $needle = self::clean($needle);
4669 1
    }
4670
4671 View Code Duplication
    if (
4672
        $encoding === 'UTF-8'
4673 9
        ||
4674 2
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4675 9
    ) {
4676 9
      $encoding = 'UTF-8';
4677 9
    } else {
4678 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4679
    }
4680
4681 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4682
      self::checkForSupport();
4683
    }
4684
4685
    if (
4686
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4687 9
        &&
4688 9
        self::$SUPPORT['intl'] === true
4689 9
        &&
4690 9
        Bootup::is_php('5.4') === true
4691 9
    ) {
4692
      return \grapheme_stripos($haystack, $needle, $offset);
4693
    }
4694
4695
    // fallback to "mb_"-function via polyfill
4696 9
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4697
  }
4698
4699
  /**
4700
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4701
   *
4702
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
4703
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
4704
   * @param bool    $before_needle [optional] <p>
4705
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4706
   *                               haystack before the first occurrence of the needle (excluding the needle).
4707
   *                               </p>
4708
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4709
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
4710
   *
4711
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
4712
   */
4713 17
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4714
  {
4715 17
    $haystack = (string)$haystack;
4716 17
    $needle = (string)$needle;
4717 17
    $before_needle = (bool)$before_needle;
4718
4719 17
    if (!isset($haystack[0], $needle[0])) {
4720 6
      return false;
4721
    }
4722
4723 11
    if ($encoding !== 'UTF-8') {
4724 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4725 1
    }
4726
4727 11
    if ($cleanUtf8 === true) {
4728
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4729
      // if invalid characters are found in $haystack before $needle
4730 1
      $needle = self::clean($needle);
4731 1
      $haystack = self::clean($haystack);
4732 1
    }
4733
4734 11
    if (!$needle) {
4735
      return $haystack;
4736
    }
4737
4738 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4739
      self::checkForSupport();
4740
    }
4741
4742 View Code Duplication
    if (
4743
        $encoding !== 'UTF-8'
4744 11
        &&
4745 1
        self::$SUPPORT['mbstring'] === false
4746 11
    ) {
4747
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4748
    }
4749
4750 11
    if (self::$SUPPORT['mbstring'] === true) {
4751 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4752
    }
4753
4754
    if (
4755
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4756
        &&
4757
        self::$SUPPORT['intl'] === true
4758
        &&
4759
        Bootup::is_php('5.4') === true
4760
    ) {
4761
      return \grapheme_stristr($haystack, $needle, $before_needle);
4762
    }
4763
4764
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
4765
      return stristr($haystack, $needle, $before_needle);
4766
    }
4767
4768
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
4769
4770
    if (!isset($match[1])) {
4771
      return false;
4772
    }
4773
4774
    if ($before_needle) {
4775
      return $match[1];
4776
    }
4777
4778
    return self::substr($haystack, self::strlen($match[1]));
4779
  }
4780
4781
  /**
4782
   * Get the string length, not the byte-length!
4783
   *
4784
   * @link     http://php.net/manual/en/function.mb-strlen.php
4785
   *
4786
   * @param string  $str       <p>The string being checked for length.</p>
4787
   * @param string  $encoding  [optional] <p>Set the charset.</p>
4788
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4789
   *
4790
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4791
   *             character counted as +1)</p>
4792
   */
4793 88
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4794
  {
4795 88
    $str = (string)$str;
4796
4797 88
    if (!isset($str[0])) {
4798 6
      return 0;
4799
    }
4800
4801 View Code Duplication
    if (
4802
        $encoding === 'UTF-8'
4803 87
        ||
4804 14
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4805 87
    ) {
4806 78
      $encoding = 'UTF-8';
4807 78
    } else {
4808 13
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4809
    }
4810
4811 87
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4812
      self::checkForSupport();
4813
    }
4814
4815
    switch ($encoding) {
4816 87
      case 'ASCII':
4817 87
      case 'CP850':
4818 87
      case '8BIT':
4819
        if (
4820
            $encoding === 'CP850'
4821 10
            &&
4822 10
            self::$SUPPORT['mbstring_func_overload'] === false
4823 10
        ) {
4824 10
          return strlen($str);
4825
        }
4826
4827
        return \mb_strlen($str, '8BIT');
4828
    }
4829
4830 79
    if ($cleanUtf8 === true) {
4831
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
4832
      // if invalid characters are found in $str
4833 2
      $str = self::clean($str);
4834 2
    }
4835
4836 View Code Duplication
    if (
4837
        $encoding !== 'UTF-8'
4838 79
        &&
4839 2
        self::$SUPPORT['mbstring'] === false
4840 79
        &&
4841
        self::$SUPPORT['iconv'] === false
4842 79
    ) {
4843
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4844
    }
4845
4846
    if (
4847
        $encoding !== 'UTF-8'
4848 79
        &&
4849 2
        self::$SUPPORT['iconv'] === true
4850 79
        &&
4851 2
        self::$SUPPORT['mbstring'] === false
4852 79
    ) {
4853
      return \iconv_strlen($str, $encoding);
4854
    }
4855
4856 79
    if (self::$SUPPORT['mbstring'] === true) {
4857 78
      return \mb_strlen($str, $encoding);
4858
    }
4859
4860 2
    if (self::$SUPPORT['iconv'] === true) {
4861
      return \iconv_strlen($str, $encoding);
4862
    }
4863
4864
    if (
4865
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4866 2
        &&
4867 2
        self::$SUPPORT['intl'] === true
4868 2
        &&
4869
        Bootup::is_php('5.4') === true
4870 2
    ) {
4871
      return \grapheme_strlen($str);
4872
    }
4873
4874 2
    if (self::is_ascii($str)) {
4875 1
      return strlen($str);
4876
    }
4877
4878
    // fallback via vanilla php
4879 2
    preg_match_all('/./us', $str, $parts);
4880 2
    $returnTmp = count($parts[0]);
4881 2
    if ($returnTmp !== 0) {
4882 2
      return $returnTmp;
4883
    }
4884
4885
    // fallback to "mb_"-function via polyfill
4886
    return \mb_strlen($str, $encoding);
4887
  }
4888
4889
  /**
4890
   * Get string length in byte.
4891
   *
4892
   * @param string $str
4893
   *
4894
   * @return int
4895
   */
4896 69
  public static function strlen_in_byte($str)
4897
  {
4898 69
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
4899
      $len = \mb_strlen($str, '8BIT');
4900
    } else {
4901 69
      $len = \strlen($str);
4902
    }
4903
4904 69
    return $len;
4905
  }
4906
4907
  /**
4908
   * Case insensitive string comparisons using a "natural order" algorithm.
4909
   *
4910
   * INFO: natural order version of UTF8::strcasecmp()
4911
   *
4912
   * @param string $str1 <p>The first string.</p>
4913
   * @param string $str2 <p>The second string.</p>
4914
   *
4915
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
4916
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
4917
   *             <strong>0</strong> if they are equal
4918
   */
4919 1
  public static function strnatcasecmp($str1, $str2)
4920
  {
4921 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4922
  }
4923
4924
  /**
4925
   * String comparisons using a "natural order" algorithm
4926
   *
4927
   * INFO: natural order version of UTF8::strcmp()
4928
   *
4929
   * @link  http://php.net/manual/en/function.strnatcmp.php
4930
   *
4931
   * @param string $str1 <p>The first string.</p>
4932
   * @param string $str2 <p>The second string.</p>
4933
   *
4934
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
4935
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
4936
   *             <strong>0</strong> if they are equal
4937
   */
4938 2
  public static function strnatcmp($str1, $str2)
4939
  {
4940 2
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4941
  }
4942
4943
  /**
4944
   * Case-insensitive string comparison of the first n characters.
4945
   *
4946
   * @link  http://php.net/manual/en/function.strncasecmp.php
4947
   *
4948
   * @param string $str1 <p>The first string.</p>
4949
   * @param string $str2 <p>The second string.</p>
4950
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
4951
   *
4952
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4953
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4954
   *             <strong>0</strong> if they are equal
4955
   */
4956 1
  public static function strncasecmp($str1, $str2, $len)
4957
  {
4958 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4959
  }
4960
4961
  /**
4962
   * String comparison of the first n characters.
4963
   *
4964
   * @link  http://php.net/manual/en/function.strncmp.php
4965
   *
4966
   * @param string $str1 <p>The first string.</p>
4967
   * @param string $str2 <p>The second string.</p>
4968
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
4969
   *
4970
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
4971
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
4972
   *             <strong>0</strong> if they are equal
4973
   */
4974 2
  public static function strncmp($str1, $str2, $len)
4975
  {
4976 2
    $str1 = (string)self::substr($str1, 0, $len);
4977 2
    $str2 = (string)self::substr($str2, 0, $len);
4978
4979 2
    return self::strcmp($str1, $str2);
4980
  }
4981
4982
  /**
4983
   * Search a string for any of a set of characters.
4984
   *
4985
   * @link  http://php.net/manual/en/function.strpbrk.php
4986
   *
4987
   * @param string $haystack  <p>The string where char_list is looked for.</p>
4988
   * @param string $char_list <p>This parameter is case sensitive.</p>
4989
   *
4990
   * @return string String starting from the character found, or false if it is not found.
4991
   */
4992 1
  public static function strpbrk($haystack, $char_list)
4993
  {
4994 1
    $haystack = (string)$haystack;
4995 1
    $char_list = (string)$char_list;
4996
4997 1
    if (!isset($haystack[0], $char_list[0])) {
4998 1
      return false;
4999
    }
5000
5001 1
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5002 1
      return substr($haystack, strpos($haystack, $m[0]));
5003
    }
5004
5005 1
    return false;
5006
  }
5007
5008
  /**
5009
   * Find position of first occurrence of string in a string.
5010
   *
5011
   * @link http://php.net/manual/en/function.mb-strpos.php
5012
   *
5013
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5014
   * @param string  $needle    <p>The string to find in haystack.</p>
5015
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5016
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5017
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5018
   *
5019
   * @return int|false <p>
5020
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
5021
   *                   If needle is not found it returns false.
5022
   *                   </p>
5023
   */
5024 56
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5025
  {
5026 56
    $haystack = (string)$haystack;
5027 56
    $needle = (string)$needle;
5028
5029 56
    if (!isset($haystack[0], $needle[0])) {
5030 3
      return false;
5031
    }
5032
5033
    // init
5034 55
    $offset = (int)$offset;
5035
5036
    // iconv and mbstring do not support integer $needle
5037
5038 55
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5039
      $needle = (string)self::chr($needle);
5040
    }
5041
5042 55
    if ($cleanUtf8 === true) {
5043
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5044
      // if invalid characters are found in $haystack before $needle
5045 2
      $needle = self::clean($needle);
5046 2
      $haystack = self::clean($haystack);
5047 2
    }
5048
5049 View Code Duplication
    if (
5050
        $encoding === 'UTF-8'
5051 55
        ||
5052 42
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5053 55
    ) {
5054 15
      $encoding = 'UTF-8';
5055 15
    } else {
5056 41
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5057
    }
5058
5059 55
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5060
      self::checkForSupport();
5061
    }
5062
5063
    if (
5064
        $encoding === 'CP850'
5065 55
        &&
5066 41
        self::$SUPPORT['mbstring_func_overload'] === false
5067 55
    ) {
5068 41
      return strpos($haystack, $needle, $offset);
5069
    }
5070
5071 View Code Duplication
    if (
5072
        $encoding !== 'UTF-8'
5073 15
        &&
5074 1
        self::$SUPPORT['iconv'] === false
5075 15
        &&
5076
        self::$SUPPORT['mbstring'] === false
5077 15
    ) {
5078
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5079
    }
5080
5081
    if (
5082
        $offset >= 0 // iconv_strpos() can't handle negative offset
5083 15
        &&
5084
        $encoding !== 'UTF-8'
5085 15
        &&
5086 1
        self::$SUPPORT['mbstring'] === false
5087 15
        &&
5088
        self::$SUPPORT['iconv'] === true
5089 15
    ) {
5090
      // ignore invalid negative offset to keep compatibility
5091
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5092
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5093
    }
5094
5095 15
    if (self::$SUPPORT['mbstring'] === true) {
5096 15
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5097
    }
5098
5099
    if (
5100
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5101 1
        &&
5102 1
        self::$SUPPORT['intl'] === true
5103 1
        &&
5104
        Bootup::is_php('5.4') === true
5105 1
    ) {
5106
      return \grapheme_strpos($haystack, $needle, $offset);
5107
    }
5108
5109
    if (
5110
        $offset >= 0 // iconv_strpos() can't handle negative offset
5111 1
        &&
5112 1
        self::$SUPPORT['iconv'] === true
5113 1
    ) {
5114
      // ignore invalid negative offset to keep compatibility
5115
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5116
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5117
    }
5118
5119 1
    $haystackIsAscii = self::is_ascii($haystack);
5120 1
    if ($haystackIsAscii && self::is_ascii($needle)) {
5121 1
      return strpos($haystack, $needle, $offset);
5122
    }
5123
5124
    // fallback via vanilla php
5125
5126 1
    if ($haystackIsAscii) {
5127
      $haystackTmp = substr($haystack, $offset);
5128
    } else {
5129 1
      $haystackTmp = self::substr($haystack, $offset);
5130
    }
5131 1
    if ($haystackTmp === false) {
5132
      $haystackTmp = '';
5133
    }
5134 1
    $haystack = (string)$haystackTmp;
5135
5136 1
    if ($offset < 0) {
5137
      $offset = 0;
5138
    }
5139
5140 1
    $pos = strpos($haystack, $needle);
5141 1
    if ($pos === false) {
5142
      return false;
5143
    }
5144
5145 1
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5146 1
    if ($returnTmp !== false) {
5147 1
      return $returnTmp;
5148
    }
5149
5150
    // fallback to "mb_"-function via polyfill
5151
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5152
  }
5153
5154
  /**
5155
   * Finds the last occurrence of a character in a string within another.
5156
   *
5157
   * @link http://php.net/manual/en/function.mb-strrchr.php
5158
   *
5159
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5160
   * @param string $needle        <p>The string to find in haystack</p>
5161
   * @param bool   $before_needle [optional] <p>
5162
   *                              Determines which portion of haystack
5163
   *                              this function returns.
5164
   *                              If set to true, it returns all of haystack
5165
   *                              from the beginning to the last occurrence of needle.
5166
   *                              If set to false, it returns all of haystack
5167
   *                              from the last occurrence of needle to the end,
5168
   *                              </p>
5169
   * @param string $encoding      [optional] <p>
5170
   *                              Character encoding name to use.
5171
   *                              If it is omitted, internal character encoding is used.
5172
   *                              </p>
5173
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5174
   *
5175
   * @return string|false The portion of haystack or false if needle is not found.
5176
   */
5177 1 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5178
  {
5179 1
    if ($encoding !== 'UTF-8') {
5180 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5181 1
    }
5182
5183 1
    if ($cleanUtf8 === true) {
5184
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5185
      // if invalid characters are found in $haystack before $needle
5186 1
      $needle = self::clean($needle);
5187 1
      $haystack = self::clean($haystack);
5188 1
    }
5189
5190
    // fallback to "mb_"-function via polyfill
5191 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5192
  }
5193
5194
  /**
5195
   * Reverses characters order in the string.
5196
   *
5197
   * @param string $str The input string
5198
   *
5199
   * @return string The string with characters in the reverse sequence
5200
   */
5201 4
  public static function strrev($str)
5202
  {
5203 4
    $str = (string)$str;
5204
5205 4
    if (!isset($str[0])) {
5206 2
      return '';
5207
    }
5208
5209 3
    return implode('', array_reverse(self::split($str)));
5210
  }
5211
5212
  /**
5213
   * Finds the last occurrence of a character in a string within another, case insensitive.
5214
   *
5215
   * @link http://php.net/manual/en/function.mb-strrichr.php
5216
   *
5217
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5218
   * @param string  $needle        <p>The string to find in haystack.</p>
5219
   * @param bool    $before_needle [optional] <p>
5220
   *                               Determines which portion of haystack
5221
   *                               this function returns.
5222
   *                               If set to true, it returns all of haystack
5223
   *                               from the beginning to the last occurrence of needle.
5224
   *                               If set to false, it returns all of haystack
5225
   *                               from the last occurrence of needle to the end,
5226
   *                               </p>
5227
   * @param string  $encoding      [optional] <p>
5228
   *                               Character encoding name to use.
5229
   *                               If it is omitted, internal character encoding is used.
5230
   *                               </p>
5231
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5232
   *
5233
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
5234
   */
5235 1 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5236
  {
5237 1
    if ($encoding !== 'UTF-8') {
5238 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5239 1
    }
5240
5241 1
    if ($cleanUtf8 === true) {
5242
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5243
      // if invalid characters are found in $haystack before $needle
5244 1
      $needle = self::clean($needle);
5245 1
      $haystack = self::clean($haystack);
5246 1
    }
5247
5248 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5249
  }
5250
5251
  /**
5252
   * Find position of last occurrence of a case-insensitive string.
5253
   *
5254
   * @param string  $haystack  <p>The string to look in.</p>
5255
   * @param string  $needle    <p>The string to look for.</p>
5256
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5257
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5258
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5259
   *
5260
   * @return int|false <p>
5261
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5262
   *                   not found, it returns false.
5263
   *                   </p>
5264
   */
5265 1
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5266
  {
5267 1
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5268
      $needle = (string)self::chr($needle);
5269
    }
5270
5271
    // init
5272 1
    $haystack = (string)$haystack;
5273 1
    $needle = (string)$needle;
5274 1
    $offset = (int)$offset;
5275
5276 1
    if (!isset($haystack[0], $needle[0])) {
5277
      return false;
5278
    }
5279
5280 View Code Duplication
    if (
5281
        $cleanUtf8 === true
5282 1
        ||
5283
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5284 1
    ) {
5285
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5286
5287 1
      $needle = self::clean($needle);
5288 1
      $haystack = self::clean($haystack);
5289 1
    }
5290
5291 View Code Duplication
    if (
5292
        $encoding === 'UTF-8'
5293 1
        ||
5294 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5295 1
    ) {
5296 1
      $encoding = 'UTF-8';
5297 1
    } else {
5298 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5299
    }
5300
5301 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5302
      self::checkForSupport();
5303
    }
5304
5305 View Code Duplication
    if (
5306
        $encoding !== 'UTF-8'
5307 1
        &&
5308
        self::$SUPPORT['mbstring'] === false
5309 1
    ) {
5310
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5311
    }
5312
5313 1
    if (self::$SUPPORT['mbstring'] === true) {
5314 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5315
    }
5316
5317
    if (
5318
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5319
        &&
5320
        self::$SUPPORT['intl'] === true
5321
        &&
5322
        Bootup::is_php('5.4') === true
5323
    ) {
5324
      return \grapheme_strripos($haystack, $needle, $offset);
5325
    }
5326
5327
    // fallback via vanilla php
5328
5329
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5330
  }
5331
5332
  /**
5333
   * Find position of last occurrence of a string in a string.
5334
   *
5335
   * @link http://php.net/manual/en/function.mb-strrpos.php
5336
   *
5337
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5338
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5339
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5340
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5341
   *                              the end of the string.
5342
   *                              </p>
5343
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5344
   * @param boolean    $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5345
   *
5346
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5347
   *                   is not found, it returns false.</p>
5348
   */
5349 10
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5350
  {
5351 10
    if ((int)$needle === $needle && $needle >= 0) {
5352 2
      $needle = (string)self::chr($needle);
5353 2
    }
5354
5355
    // init
5356 10
    $haystack = (string)$haystack;
5357 10
    $needle = (string)$needle;
5358 10
    $offset = (int)$offset;
5359
5360 10
    if (!isset($haystack[0], $needle[0])) {
5361 2
      return false;
5362
    }
5363
5364 View Code Duplication
    if (
5365
        $cleanUtf8 === true
5366 9
        ||
5367
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5368 9
    ) {
5369
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5370 3
      $needle = self::clean($needle);
5371 3
      $haystack = self::clean($haystack);
5372 3
    }
5373
5374 View Code Duplication
    if (
5375
        $encoding === 'UTF-8'
5376 9
        ||
5377 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5378 9
    ) {
5379 9
      $encoding = 'UTF-8';
5380 9
    } else {
5381 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5382
    }
5383
5384 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5385
      self::checkForSupport();
5386
    }
5387
5388 View Code Duplication
    if (
5389
        $encoding !== 'UTF-8'
5390 9
        &&
5391 1
        self::$SUPPORT['mbstring'] === false
5392 9
    ) {
5393
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5394
    }
5395
5396 9
    if (self::$SUPPORT['mbstring'] === true) {
5397 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5398
    }
5399
5400
    if (
5401
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5402
        &&
5403
        self::$SUPPORT['intl'] === true
5404
        &&
5405
        Bootup::is_php('5.4') === true
5406
    ) {
5407
      return \grapheme_strrpos($haystack, $needle, $offset);
5408
    }
5409
5410
    // fallback via vanilla php
5411
5412
    $haystackTmp = null;
5413
    if ($offset > 0) {
5414
      $haystackTmp = self::substr($haystack, $offset);
5415
    } elseif ($offset < 0) {
5416
      $haystackTmp = self::substr($haystack, 0, $offset);
5417
      $offset = 0;
5418
    }
5419
5420
    if ($haystackTmp !== null) {
5421
      if ($haystackTmp === false) {
5422
        $haystackTmp = '';
5423
      }
5424
      $haystack = (string)$haystackTmp;
5425
    }
5426
5427
    $pos = strrpos($haystack, $needle);
5428
    if ($pos === false) {
5429
      return false;
5430
    }
5431
5432
    return $offset + self::strlen(substr($haystack, 0, $pos));
5433
  }
5434
5435
  /**
5436
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5437
   * mask.
5438
   *
5439
   * @param string $str    <p>The input string.</p>
5440
   * @param string $mask   <p>The mask of chars</p>
5441
   * @param int    $offset [optional]
5442
   * @param int    $length [optional]
5443
   *
5444
   * @return int
5445
   */
5446 10
  public static function strspn($str, $mask, $offset = 0, $length = null)
5447
  {
5448 10 View Code Duplication
    if ($offset || $length !== null) {
5449 2
      $strTmp = self::substr($str, $offset, $length);
5450 2
      if ($strTmp === false) {
5451
        $strTmp = '';
5452
      }
5453 2
      $str = (string)$strTmp;
5454 2
    }
5455
5456 10
    $str = (string)$str;
5457 10
    if (!isset($str[0], $mask[0])) {
5458 2
      return 0;
5459
    }
5460
5461 8
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5462
  }
5463
5464
  /**
5465
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5466
   *
5467
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5468
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5469
   * @param bool    $before_needle [optional] <p>
5470
   *                               If <b>TRUE</b>, strstr() returns the part of the
5471
   *                               haystack before the first occurrence of the needle (excluding the needle).
5472
   *                               </p>
5473
   * @param string  $encoding      [optional] <p>Set the charset.</p>
5474
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5475
   *
5476
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
5477
   */
5478 2
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5479
  {
5480 2
    $haystack = (string)$haystack;
5481 2
    $needle = (string)$needle;
5482
5483 2
    if (!isset($haystack[0], $needle[0])) {
5484 1
      return false;
5485
    }
5486
5487 2
    if ($cleanUtf8 === true) {
5488
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5489
      // if invalid characters are found in $haystack before $needle
5490
      $needle = self::clean($needle);
5491
      $haystack = self::clean($haystack);
5492
    }
5493
5494 2
    if ($encoding !== 'UTF-8') {
5495 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5496 1
    }
5497
5498 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5499
      self::checkForSupport();
5500
    }
5501
5502 View Code Duplication
    if (
5503
        $encoding !== 'UTF-8'
5504 2
        &&
5505 1
        self::$SUPPORT['mbstring'] === false
5506 2
    ) {
5507
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5508
    }
5509
5510 2
    if (self::$SUPPORT['mbstring'] === true) {
5511 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5512
    }
5513
5514
    if (
5515
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5516
        &&
5517
        self::$SUPPORT['intl'] === true
5518
        &&
5519
        Bootup::is_php('5.4') === true
5520
    ) {
5521
      return \grapheme_strstr($haystack, $needle, $before_needle);
5522
    }
5523
5524
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5525
5526
    if (!isset($match[1])) {
5527
      return false;
5528
    }
5529
5530
    if ($before_needle) {
5531
      return $match[1];
5532
    }
5533
5534
    return self::substr($haystack, self::strlen($match[1]));
5535
  }
5536
5537
  /**
5538
   * Unicode transformation for case-less matching.
5539
   *
5540
   * @link http://unicode.org/reports/tr21/tr21-5.html
5541
   *
5542
   * @param string  $str       <p>The input string.</p>
5543
   * @param bool    $full      [optional] <p>
5544
   *                           <b>true</b>, replace full case folding chars (default)<br>
5545
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5546
   *                           </p>
5547
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5548
   *
5549
   * @return string
5550
   */
5551 13
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5552
  {
5553
    // init
5554 13
    $str = (string)$str;
5555
5556 13
    if (!isset($str[0])) {
5557 4
      return '';
5558
    }
5559
5560 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5561 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5562
5563 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5564 1
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
5565 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
5566 1
    }
5567
5568 12
    $str = (string)str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5569
5570 12
    if ($full) {
5571
5572 12
      static $FULL_CASE_FOLD = null;
5573 12
      if ($FULL_CASE_FOLD === null) {
5574 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
5575 1
      }
5576
5577
      /** @noinspection OffsetOperationsInspection */
5578 12
      $str = (string)str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
5579 12
    }
5580
5581 12
    if ($cleanUtf8 === true) {
5582 1
      $str = self::clean($str);
5583 1
    }
5584
5585 12
    return self::strtolower($str);
5586
  }
5587
5588
  /**
5589
   * Make a string lowercase.
5590
   *
5591
   * @link http://php.net/manual/en/function.mb-strtolower.php
5592
   *
5593
   * @param string      $str       <p>The string being lowercased.</p>
5594
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5595
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5596
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5597
   *
5598
   * @return string str with all alphabetic characters converted to lowercase.
5599
   */
5600 25 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5601
  {
5602
    // init
5603 25
    $str = (string)$str;
5604
5605 25
    if (!isset($str[0])) {
5606 3
      return '';
5607
    }
5608
5609 23
    if ($cleanUtf8 === true) {
5610
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5611
      // if invalid characters are found in $haystack before $needle
5612 1
      $str = self::clean($str);
5613 1
    }
5614
5615 23
    if ($encoding !== 'UTF-8') {
5616 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5617 2
    }
5618
5619 23
    if ($lang !== null) {
5620
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5621
        self::checkForSupport();
5622
      }
5623
5624
      if (
5625
          self::$SUPPORT['intl'] === true
5626
          &&
5627
          Bootup::is_php('5.4') === true
5628
      ) {
5629
5630
        $langCode = $lang . '-Lower';
5631
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5632
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
5633
5634
          $langCode = 'Any-Lower';
5635
        }
5636
5637
        return transliterator_transliterate($langCode, $str);
5638
      }
5639
5640
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5641
    }
5642
5643 23
    return \mb_strtolower($str, $encoding);
5644
  }
5645
5646
  /**
5647
   * Generic case sensitive transformation for collation matching.
5648
   *
5649
   * @param string $str <p>The input string</p>
5650
   *
5651
   * @return string
5652
   */
5653 3
  private static function strtonatfold($str)
5654
  {
5655
    /** @noinspection PhpUndefinedClassInspection */
5656 3
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5657
  }
5658
5659
  /**
5660
   * Make a string uppercase.
5661
   *
5662
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5663
   *
5664
   * @param string      $str       <p>The string being uppercased.</p>
5665
   * @param string      $encoding  [optional] <p>Set the charset.</p>
5666
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5667
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5668
   *
5669
   * @return string str with all alphabetic characters converted to uppercase.
5670
   */
5671 19 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5672
  {
5673 19
    $str = (string)$str;
5674
5675 19
    if (!isset($str[0])) {
5676 3
      return '';
5677
    }
5678
5679 17
    if ($cleanUtf8 === true) {
5680
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5681
      // if invalid characters are found in $haystack before $needle
5682 2
      $str = self::clean($str);
5683 2
    }
5684
5685 17
    if ($encoding !== 'UTF-8') {
5686 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5687 3
    }
5688
5689 17
    if ($lang !== null) {
5690
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5691
        self::checkForSupport();
5692
      }
5693
5694
      if (
5695
          self::$SUPPORT['intl'] === true
5696
          &&
5697
          Bootup::is_php('5.4') === true
5698
      ) {
5699
5700
        $langCode = $lang . '-Upper';
5701
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
5702
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
5703
5704
          $langCode = 'Any-Upper';
5705
        }
5706
5707
        return transliterator_transliterate($langCode, $str);
5708
      }
5709
5710
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
5711
    }
5712
5713 17
    return \mb_strtoupper($str, $encoding);
5714
  }
5715
5716
  /**
5717
   * Translate characters or replace sub-strings.
5718
   *
5719
   * @link  http://php.net/manual/en/function.strtr.php
5720
   *
5721
   * @param string          $str  <p>The string being translated.</p>
5722
   * @param string|string[] $from <p>The string replacing from.</p>
5723
   * @param string|string[] $to   <p>The string being translated to to.</p>
5724
   *
5725
   * @return string <p>
5726
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5727
   *                corresponding character in to.
5728
   *                </p>
5729
   */
5730 1
  public static function strtr($str, $from, $to = INF)
5731
  {
5732 1
    $str = (string)$str;
5733
5734 1
    if (!isset($str[0])) {
5735
      return '';
5736
    }
5737
5738 1
    if ($from === $to) {
5739
      return $str;
5740
    }
5741
5742 1
    if (INF !== $to) {
5743 1
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5743 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5744 1
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5744 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5745 1
      $countFrom = count($from);
5746 1
      $countTo = count($to);
5747
5748 1
      if ($countFrom > $countTo) {
5749 1
        $from = array_slice($from, 0, $countTo);
5750 1
      } elseif ($countFrom < $countTo) {
5751 1
        $to = array_slice($to, 0, $countFrom);
5752 1
      }
5753
5754 1
      $from = array_combine($from, $to);
5755 1
    }
5756
5757 1
    if (is_string($from)) {
5758 1
      return str_replace($from, '', $str);
5759
    }
5760
5761 1
    return strtr($str, $from);
5762
  }
5763
5764
  /**
5765
   * Return the width of a string.
5766
   *
5767
   * @param string  $str       <p>The input string.</p>
5768
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5769
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5770
   *
5771
   * @return int
5772
   */
5773 1
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5774
  {
5775 1
    if ($encoding !== 'UTF-8') {
5776 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5777 1
    }
5778
5779 1
    if ($cleanUtf8 === true) {
5780
      // iconv and mbstring are not tolerant to invalid encoding
5781
      // further, their behaviour is inconsistent with that of PHP's substr
5782 1
      $str = self::clean($str);
5783 1
    }
5784
5785
    // fallback to "mb_"-function via polyfill
5786 1
    return \mb_strwidth($str, $encoding);
5787
  }
5788
5789
  /**
5790
   * Changes all keys in an array.
5791
   *
5792
   * @param array $array <p>The array to work on</p>
5793
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
5794
   *                     or <strong>CASE_LOWER</strong> (default)</p>
5795
   *
5796
   * @return array|false <p>An array with its keys lower or uppercased, or false if
5797
   *                     input is not an array.</p>
5798
   */
5799 1
  public static function array_change_key_case($array, $case = CASE_LOWER)
5800
  {
5801 1
    if (!is_array($array)) {
5802
      return false;
5803
    }
5804
5805
    if (
5806
        $case !== CASE_LOWER
5807 1
        &&
5808
        $case !== CASE_UPPER
5809 1
    ) {
5810
      $case = CASE_UPPER;
5811
    }
5812
5813 1
    $return = array();
5814 1
    foreach ($array as $key => $value) {
5815 1
      if ($case === CASE_LOWER) {
5816 1
        $key = self::strtolower($key);
5817 1
      } else {
5818 1
        $key = self::strtoupper($key);
5819
      }
5820
5821 1
      $return[$key] = $value;
5822 1
    }
5823
5824 1
    return $return;
5825
  }
5826
5827
  /**
5828
   * Get part of a string.
5829
   *
5830
   * @link http://php.net/manual/en/function.mb-substr.php
5831
   *
5832
   * @param string  $str       <p>The string being checked.</p>
5833
   * @param int     $offset    <p>The first position used in str.</p>
5834
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
5835
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5836
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5837
   *
5838
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
5839
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
5840
   *                      characters long, <b>FALSE</b> will be returned.</p>
5841
   */
5842 76
  public static function substr($str, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5843
  {
5844
    // init
5845 76
    $str = (string)$str;
5846
5847 76
    if (!isset($str[0])) {
5848 10
      return '';
5849
    }
5850
5851
    // Empty string
5852 74
    if ($length === 0) {
5853 3
      return '';
5854
    }
5855
5856 73
    if ($cleanUtf8 === true) {
5857
      // iconv and mbstring are not tolerant to invalid encoding
5858
      // further, their behaviour is inconsistent with that of PHP's substr
5859 1
      $str = self::clean($str);
5860 1
    }
5861
5862
    // Whole string
5863 73
    if (!$offset && $length === null) {
5864 2
      return $str;
5865
    }
5866
5867 71
    $str_length = 0;
5868 71
    if ($offset || $length === null) {
5869 45
      $str_length = (int)self::strlen($str, $encoding);
5870 45
    }
5871
5872
    // Impossible
5873 71
    if ($offset && $offset > $str_length) {
5874 2
      return false;
5875
    }
5876
5877 69
    if ($length === null) {
5878 30
      $length = $str_length;
5879 30
    } else {
5880 60
      $length = (int)$length;
5881
    }
5882
5883 View Code Duplication
    if (
5884
        $encoding === 'UTF-8'
5885 69
        ||
5886 25
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5887 69
    ) {
5888 47
      $encoding = 'UTF-8';
5889 47
    } else {
5890 24
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5891
    }
5892
5893 69
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5894
      self::checkForSupport();
5895
    }
5896
5897
    if (
5898
        $encoding === 'CP850'
5899 69
        &&
5900 22
        self::$SUPPORT['mbstring_func_overload'] === false
5901 69
    ) {
5902 22
      return substr($str, $offset, $length === null ? $str_length : $length);
5903
    }
5904
5905 View Code Duplication
    if (
5906
        $encoding !== 'UTF-8'
5907 47
        &&
5908 1
        self::$SUPPORT['mbstring'] === false
5909 47
    ) {
5910
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5911
    }
5912
5913 47
    if (self::$SUPPORT['mbstring'] === true) {
5914 47
      return \mb_substr($str, $offset, $length, $encoding);
5915
    }
5916
5917
    if (
5918
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5919
        &&
5920
        self::$SUPPORT['intl'] === true
5921
        &&
5922
        Bootup::is_php('5.4') === true
5923
    ) {
5924
      return \grapheme_substr($str, $offset, $length);
5925
    }
5926
5927
    if (
5928
        $length >= 0 // "iconv_substr()" can't handle negative length
5929
        &&
5930
        self::$SUPPORT['iconv'] === true
5931
    ) {
5932
      return \iconv_substr($str, $offset, $length);
5933
    }
5934
5935
    if (self::is_ascii($str)) {
5936
      return ($length === null) ?
5937
          substr($str, $offset) :
5938
          substr($str, $offset, $length);
5939
    }
5940
5941
    // fallback via vanilla php
5942
5943
    // split to array, and remove invalid characters
5944
    $array = self::split($str);
5945
5946
    // extract relevant part, and join to make sting again
5947
    return implode('', array_slice($array, $offset, $length));
5948
  }
5949
5950
  /**
5951
   * Binary safe comparison of two strings from an offset, up to length characters.
5952
   *
5953
   * @param string  $str1               <p>The main string being compared.</p>
5954
   * @param string  $str2               <p>The secondary string being compared.</p>
5955
   * @param int     $offset             [optional] <p>The start position for the comparison. If negative, it starts
5956
   *                                    counting from the end of the string.</p>
5957
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
5958
   *                                    the length of the str compared to the length of main_str less the offset.</p>
5959
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5960
   *                                    insensitive.</p>
5961
   *
5962
   * @return int <p>
5963
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5964
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5965
   *             <strong>0</strong> if they are equal.
5966
   *             </p>
5967
   */
5968 1
  public static function substr_compare($str1, $str2, $offset = 0, $length = null, $case_insensitivity = false)
5969
  {
5970
    if (
5971
        $offset !== 0
5972 1
        ||
5973
        $length !== null
5974 1
    ) {
5975 1
      $str1Tmp = self::substr($str1, $offset, $length);
5976 1
      if ($str1Tmp === false) {
5977
        $str1Tmp = '';
5978
      }
5979 1
      $str1 = (string)$str1Tmp;
5980
5981 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
5982 1
      if ($str2Tmp === false) {
5983
        $str2Tmp = '';
5984
      }
5985 1
      $str2 = (string)$str2Tmp;
5986 1
    }
5987
5988 1
    if ($case_insensitivity === true) {
5989 1
      return self::strcasecmp($str1, $str2);
5990
    }
5991
5992 1
    return self::strcmp($str1, $str2);
5993
  }
5994
5995
  /**
5996
   * Count the number of substring occurrences.
5997
   *
5998
   * @link  http://php.net/manual/en/function.substr-count.php
5999
   *
6000
   * @param string  $haystack  <p>The string to search in.</p>
6001
   * @param string  $needle    <p>The substring to search for.</p>
6002
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6003
   * @param int     $length    [optional] <p>
6004
   *                           The maximum length after the specified offset to search for the
6005
   *                           substring. It outputs a warning if the offset plus the length is
6006
   *                           greater than the haystack length.
6007
   *                           </p>
6008
   * @param string  $encoding  <p>Set the charset.</p>
6009
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6010
   *
6011
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6012
   */
6013 1
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6014
  {
6015
    // init
6016 1
    $haystack = (string)$haystack;
6017 1
    $needle = (string)$needle;
6018
6019 1
    if (!isset($haystack[0], $needle[0])) {
6020 1
      return false;
6021
    }
6022
6023 1
    if ($offset || $length !== null) {
6024
6025 1
      if ($length === null) {
6026 1
        $length = (int)self::strlen($haystack);
6027 1
      }
6028
6029 1
      $offset = (int)$offset;
6030 1
      $length = (int)$length;
6031
6032
      if (
6033
          (
6034
              $length !== 0
6035 1
              &&
6036
              $offset !== 0
6037 1
          )
6038 1
          &&
6039 1
          $length + $offset <= 0
6040 1
          &&
6041 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6042 1
      ) {
6043 1
        return false;
6044
      }
6045
6046 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6047 1
      if ($haystackTmp === false) {
6048
        $haystackTmp = '';
6049
      }
6050 1
      $haystack = (string)$haystackTmp;
6051 1
    }
6052
6053 1
    if ($encoding !== 'UTF-8') {
6054 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6055 1
    }
6056
6057 1
    if ($cleanUtf8 === true) {
6058
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6059
      // if invalid characters are found in $haystack before $needle
6060
      $needle = self::clean($needle);
6061
      $haystack = self::clean($haystack);
6062
    }
6063
6064 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6065
      self::checkForSupport();
6066
    }
6067
6068 View Code Duplication
    if (
6069
        $encoding !== 'UTF-8'
6070 1
        &&
6071 1
        self::$SUPPORT['mbstring'] === false
6072 1
    ) {
6073
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6074
    }
6075
6076 1
    if (self::$SUPPORT['mbstring'] === true) {
6077 1
      return \mb_substr_count($haystack, $needle, $encoding);
6078
    }
6079
6080
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6081
6082
    return count($matches);
6083
  }
6084
6085
  /**
6086
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6087
   *
6088
   * @param string $haystack <p>The string to search in.</p>
6089
   * @param string $needle   <p>The substring to search for.</p>
6090
   *
6091
   * @return string <p>Return the sub-string.</p>
6092
   */
6093 1 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6094
  {
6095
    // init
6096 1
    $haystack = (string)$haystack;
6097 1
    $needle = (string)$needle;
6098
6099 1
    if (!isset($haystack[0])) {
6100 1
      return '';
6101
    }
6102
6103 1
    if (!isset($needle[0])) {
6104 1
      return $haystack;
6105
    }
6106
6107 1
    if (self::str_istarts_with($haystack, $needle) === true) {
6108 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6109 1
      if ($haystackTmp === false) {
6110
        $haystackTmp = '';
6111
      }
6112 1
      $haystack = (string)$haystackTmp;
6113 1
    }
6114
6115 1
    return $haystack;
6116
  }
6117
6118
  /**
6119
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6120
   *
6121
   * @param string $haystack <p>The string to search in.</p>
6122
   * @param string $needle   <p>The substring to search for.</p>
6123
   *
6124
   * @return string <p>Return the sub-string.</p>
6125
   */
6126 1 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6127
  {
6128
    // init
6129 1
    $haystack = (string)$haystack;
6130 1
    $needle = (string)$needle;
6131
6132 1
    if (!isset($haystack[0])) {
6133 1
      return '';
6134
    }
6135
6136 1
    if (!isset($needle[0])) {
6137 1
      return $haystack;
6138
    }
6139
6140 1
    if (self::str_iends_with($haystack, $needle) === true) {
6141 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6142 1
      if ($haystackTmp === false) {
6143
        $haystackTmp = '';
6144
      }
6145 1
      $haystack = (string)$haystackTmp;
6146 1
    }
6147
6148 1
    return $haystack;
6149
  }
6150
6151
  /**
6152
   * Removes an prefix ($needle) from start of the string ($haystack).
6153
   *
6154
   * @param string $haystack <p>The string to search in.</p>
6155
   * @param string $needle   <p>The substring to search for.</p>
6156
   *
6157
   * @return string <p>Return the sub-string.</p>
6158
   */
6159 1 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6160
  {
6161
    // init
6162 1
    $haystack = (string)$haystack;
6163 1
    $needle = (string)$needle;
6164
6165 1
    if (!isset($haystack[0])) {
6166 1
      return '';
6167
    }
6168
6169 1
    if (!isset($needle[0])) {
6170 1
      return $haystack;
6171
    }
6172
6173 1
    if (self::str_starts_with($haystack, $needle) === true) {
6174 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6175 1
      if ($haystackTmp === false) {
6176
        $haystackTmp = '';
6177
      }
6178 1
      $haystack = (string)$haystackTmp;
6179 1
    }
6180
6181 1
    return $haystack;
6182
  }
6183
6184
  /**
6185
   * Replace text within a portion of a string.
6186
   *
6187
   * source: https://gist.github.com/stemar/8287074
6188
   *
6189
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6190
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6191
   * @param int|int[]       $offset           <p>
6192
   *                                          If start is positive, the replacing will begin at the start'th offset
6193
   *                                          into string.
6194
   *                                          <br><br>
6195
   *                                          If start is negative, the replacing will begin at the start'th character
6196
   *                                          from the end of string.
6197
   *                                          </p>
6198
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6199
   *                                          portion of string which is to be replaced. If it is negative, it
6200
   *                                          represents the number of characters from the end of string at which to
6201
   *                                          stop replacing. If it is not given, then it will default to strlen(
6202
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6203
   *                                          length is zero then this function will have the effect of inserting
6204
   *                                          replacement into string at the given start offset.</p>
6205
   *
6206
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6207
   */
6208 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
6209
  {
6210 7
    if (is_array($str) === true) {
6211 1
      $num = count($str);
6212
6213
      // the replacement
6214 1
      if (is_array($replacement) === true) {
6215 1
        $replacement = array_slice($replacement, 0, $num);
6216 1
      } else {
6217 1
        $replacement = array_pad(array($replacement), $num, $replacement);
6218
      }
6219
6220
      // the offset
6221 1 View Code Duplication
      if (is_array($offset) === true) {
6222 1
        $offset = array_slice($offset, 0, $num);
6223 1
        foreach ($offset as &$valueTmp) {
6224 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6225 1
        }
6226 1
        unset($valueTmp);
6227 1
      } else {
6228 1
        $offset = array_pad(array($offset), $num, $offset);
6229
      }
6230
6231
      // the length
6232 1
      if (!isset($length)) {
6233 1
        $length = array_fill(0, $num, 0);
6234 1 View Code Duplication
      } elseif (is_array($length) === true) {
6235 1
        $length = array_slice($length, 0, $num);
6236 1
        foreach ($length as &$valueTmpV2) {
6237 1
          if (isset($valueTmpV2)) {
6238 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6239 1
          } else {
6240
            $valueTmpV2 = 0;
6241
          }
6242 1
        }
6243 1
        unset($valueTmpV2);
6244 1
      } else {
6245 1
        $length = array_pad(array($length), $num, $length);
6246
      }
6247
6248
      // recursive call
6249 1
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return array_map(array('...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
6250
    }
6251
6252 7
    if (is_array($replacement) === true) {
6253 1
      if (count($replacement) > 0) {
6254 1
        $replacement = $replacement[0];
6255 1
      } else {
6256 1
        $replacement = '';
6257
      }
6258 1
    }
6259
6260
    // init
6261 7
    $str = (string)$str;
6262 7
    $replacement = (string)$replacement;
6263
6264 7
    if (!isset($str[0])) {
6265 1
      return $replacement;
6266
    }
6267
6268 6
    if (self::is_ascii($str)) {
6269 3
      return ($length === null) ?
6270 3
          substr_replace($str, $replacement, $offset) :
6271 3
          substr_replace($str, $replacement, $offset, $length);
6272
    }
6273
6274 5
    preg_match_all('/./us', $str, $smatches);
6275 5
    preg_match_all('/./us', $replacement, $rmatches);
6276
6277 5
    if ($length === null) {
6278 3
      $length = (int)self::strlen($str);
6279 3
    }
6280
6281 5
    array_splice($smatches[0], $offset, $length, $rmatches[0]);
6282
6283 5
    return implode('', $smatches[0]);
6284
  }
6285
6286
  /**
6287
   * Removes an suffix ($needle) from end of the string ($haystack).
6288
   *
6289
   * @param string $haystack <p>The string to search in.</p>
6290
   * @param string $needle   <p>The substring to search for.</p>
6291
   *
6292
   * @return string <p>Return the sub-string.</p>
6293
   */
6294 1 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6295
  {
6296 1
    $haystack = (string)$haystack;
6297 1
    $needle = (string)$needle;
6298
6299 1
    if (!isset($haystack[0])) {
6300 1
      return '';
6301
    }
6302
6303 1
    if (!isset($needle[0])) {
6304 1
      return $haystack;
6305
    }
6306
6307 1
    if (self::str_ends_with($haystack, $needle) === true) {
6308 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6309 1
      if ($haystackTmp === false) {
6310
        $haystackTmp = '';
6311
      }
6312 1
      $haystack = (string)$haystackTmp;
6313 1
    }
6314
6315 1
    return $haystack;
6316
  }
6317
6318
  /**
6319
   * Returns a case swapped version of the string.
6320
   *
6321
   * @param string  $str       <p>The input string.</p>
6322
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6323
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6324
   *
6325
   * @return string <p>Each character's case swapped.</p>
6326
   */
6327 1
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6328
  {
6329 1
    $str = (string)$str;
6330
6331 1
    if (!isset($str[0])) {
6332 1
      return '';
6333
    }
6334
6335 1
    if ($encoding !== 'UTF-8') {
6336 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6337 1
    }
6338
6339 1
    if ($cleanUtf8 === true) {
6340
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6341
      // if invalid characters are found in $haystack before $needle
6342 1
      $str = self::clean($str);
6343 1
    }
6344
6345 1
    $strSwappedCase = preg_replace_callback(
6346 1
        '/[\S]/u',
6347
        function ($match) use ($encoding) {
6348 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6349
6350 1
          if ($match[0] === $marchToUpper) {
6351 1
            return UTF8::strtolower($match[0], $encoding);
6352
          }
6353
6354 1
          return $marchToUpper;
6355 1
        },
6356
        $str
6357 1
    );
6358
6359 1
    return $strSwappedCase;
6360
  }
6361
6362
  /**
6363
   * alias for "UTF8::to_ascii()"
6364
   *
6365
   * @see        UTF8::to_ascii()
6366
   *
6367
   * @param string $s
6368
   * @param string $subst_chr
6369
   * @param bool   $strict
6370
   *
6371
   * @return string
6372
   *
6373
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6374
   */
6375
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6376
  {
6377
    return self::to_ascii($s, $subst_chr, $strict);
6378
  }
6379
6380
  /**
6381
   * alias for "UTF8::to_iso8859()"
6382
   *
6383
   * @see        UTF8::to_iso8859()
6384
   *
6385
   * @param string $str
6386
   *
6387
   * @return string|string[]
6388
   *
6389
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6390
   */
6391
  public static function toIso8859($str)
6392
  {
6393
    return self::to_iso8859($str);
6394
  }
6395
6396
  /**
6397
   * alias for "UTF8::to_latin1()"
6398
   *
6399
   * @see        UTF8::to_latin1()
6400
   *
6401
   * @param $str
6402
   *
6403
   * @return string
6404
   *
6405
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6406
   */
6407
  public static function toLatin1($str)
6408
  {
6409
    return self::to_latin1($str);
6410
  }
6411
6412
  /**
6413
   * alias for "UTF8::to_utf8()"
6414
   *
6415
   * @see        UTF8::to_utf8()
6416
   *
6417
   * @param string $str
6418
   *
6419
   * @return string
6420
   *
6421
   * @deprecated <p>use "UTF8::to_utf8()"</p>
6422
   */
6423
  public static function toUTF8($str)
6424
  {
6425
    return self::to_utf8($str);
6426
  }
6427
6428
  /**
6429
   * Convert a string into ASCII.
6430
   *
6431
   * @param string $str     <p>The input string.</p>
6432
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6433
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6434
   *                        performance</p>
6435
   *
6436
   * @return string
6437
   */
6438 21
  public static function to_ascii($str, $unknown = '?', $strict = false)
6439
  {
6440 21
    static $UTF8_TO_ASCII;
6441
6442
    // init
6443 21
    $str = (string)$str;
6444
6445 21
    if (!isset($str[0])) {
6446 4
      return '';
6447
    }
6448
6449
    // check if we only have ASCII, first (better performance)
6450 18
    if (self::is_ascii($str) === true) {
6451 6
      return $str;
6452
    }
6453
6454 13
    $str = self::clean($str, true, true, true);
6455
6456
    // check again, if we only have ASCII, now ...
6457 13
    if (self::is_ascii($str) === true) {
6458 7
      return $str;
6459
    }
6460
6461 7
    if ($strict === true) {
6462
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6463
        self::checkForSupport();
6464
      }
6465
6466
      if (
6467
          self::$SUPPORT['intl'] === true
6468
          &&
6469
          Bootup::is_php('5.4') === true
6470
      ) {
6471
6472
        // HACK for issue from "transliterator_transliterate()"
6473
        $str = str_replace(
6474
            'ℌ',
6475
            'H',
6476
            $str
6477
        );
6478
6479
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
6480
6481
        // check again, if we only have ASCII, now ...
6482
        if (self::is_ascii($str) === true) {
6483
          return $str;
6484
        }
6485
6486
      }
6487
    }
6488
6489 7
    if (self::$ORD === null) {
6490
      self::$ORD = self::getData('ord');
6491
    }
6492
6493 7
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6494 7
    $chars = $ar[0];
6495 7
    foreach ($chars as &$c) {
6496
6497 7
      $ordC0 = self::$ORD[$c[0]];
6498
6499 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6500 7
        continue;
6501
      }
6502
6503 7
      $ordC1 = self::$ORD[$c[1]];
6504
6505
      // ASCII - next please
6506 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6507 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6508 7
      }
6509
6510 7
      if ($ordC0 >= 224) {
6511 2
        $ordC2 = self::$ORD[$c[2]];
6512
6513 2
        if ($ordC0 <= 239) {
6514 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6515 2
        }
6516
6517 2
        if ($ordC0 >= 240) {
6518 1
          $ordC3 = self::$ORD[$c[3]];
6519
6520 1
          if ($ordC0 <= 247) {
6521 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6522 1
          }
6523
6524 1
          if ($ordC0 >= 248) {
6525
            $ordC4 = self::$ORD[$c[4]];
6526
6527 View Code Duplication
            if ($ordC0 <= 251) {
6528
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6529
            }
6530
6531
            if ($ordC0 >= 252) {
6532
              $ordC5 = self::$ORD[$c[5]];
6533
6534 View Code Duplication
              if ($ordC0 <= 253) {
6535
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6536
              }
6537
            }
6538
          }
6539 1
        }
6540 2
      }
6541
6542 7
      if ($ordC0 === 254 || $ordC0 === 255) {
6543
        $c = $unknown;
6544
        continue;
6545
      }
6546
6547 7
      if (!isset($ord)) {
6548
        $c = $unknown;
6549
        continue;
6550
      }
6551
6552 7
      $bank = $ord >> 8;
6553 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
6554 3
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
6555 3
        if ($UTF8_TO_ASCII[$bank] === false) {
6556 1
          $UTF8_TO_ASCII[$bank] = array();
6557 1
        }
6558 3
      }
6559
6560 7
      $newchar = $ord & 255;
6561
6562 7
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
6563
6564
        // keep for debugging
6565
        /*
6566
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6567
        echo "char: " . $c . "\n";
6568
        echo "ord: " . $ord . "\n";
6569
        echo "newchar: " . $newchar . "\n";
6570
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
6571
        echo "bank:" . $bank . "\n\n";
6572
        */
6573
6574 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6575 7
      } else {
6576
6577
        // keep for debugging missing chars
6578
        /*
6579
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6580
        echo "char: " . $c . "\n";
6581
        echo "ord: " . $ord . "\n";
6582
        echo "newchar: " . $newchar . "\n";
6583
        echo "bank:" . $bank . "\n\n";
6584
        */
6585
6586 1
        $c = $unknown;
6587
      }
6588 7
    }
6589
6590 7
    return implode('', $chars);
6591
  }
6592
6593
  /**
6594
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6595
   *
6596
   * @param string|string[] $str
6597
   *
6598
   * @return string|string[]
6599
   */
6600 3
  public static function to_iso8859($str)
6601
  {
6602 3
    if (is_array($str) === true) {
6603
6604
      /** @noinspection ForeachSourceInspection */
6605 1
      foreach ($str as $k => $v) {
6606
        /** @noinspection AlterInForeachInspection */
6607
        /** @noinspection OffsetOperationsInspection */
6608 1
        $str[$k] = self::to_iso8859($v);
6609 1
      }
6610
6611 1
      return $str;
6612
    }
6613
6614 3
    $str = (string)$str;
6615
6616 3
    if (!isset($str[0])) {
6617 1
      return '';
6618
    }
6619
6620 3
    return self::utf8_decode($str);
6621
  }
6622
6623
  /**
6624
   * alias for "UTF8::to_iso8859()"
6625
   *
6626
   * @see UTF8::to_iso8859()
6627
   *
6628
   * @param string|string[] $str
6629
   *
6630
   * @return string|string[]
6631
   */
6632 1
  public static function to_latin1($str)
6633
  {
6634 1
    return self::to_iso8859($str);
6635
  }
6636
6637
  /**
6638
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6639
   *
6640
   * <ul>
6641
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6642
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
6643
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
6644
   * case.</li>
6645
   * </ul>
6646
   *
6647
   * @param string|string[] $str                    <p>Any string or array.</p>
6648
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6649
   *
6650
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6651
   */
6652 20
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
6653
  {
6654 20
    if (is_array($str) === true) {
6655
      /** @noinspection ForeachSourceInspection */
6656 2
      foreach ($str as $k => $v) {
6657
        /** @noinspection AlterInForeachInspection */
6658
        /** @noinspection OffsetOperationsInspection */
6659 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6660 2
      }
6661
6662 2
      return $str;
6663
    }
6664
6665 20
    $str = (string)$str;
6666
6667 20
    if (!isset($str[0])) {
6668 3
      return $str;
6669
    }
6670
6671 20
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6672
      self::checkForSupport();
6673
    }
6674
6675 20
    $max = self::strlen_in_byte($str);
6676 20
    $buf = '';
6677
6678
    /** @noinspection ForeachInvariantsInspection */
6679 20
    for ($i = 0; $i < $max; $i++) {
6680 20
      $c1 = $str[$i];
6681
6682 20
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
6683
6684 20
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
6685
6686 18
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6687
6688 18
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
6689 15
            $buf .= $c1 . $c2;
6690 15
            $i++;
6691 15
          } else { // not valid UTF8 - convert it
6692 8
            $buf .= self::to_utf8_convert($c1);
6693
          }
6694
6695 20
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
6696
6697 18
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6698 18
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6699
6700 18
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
6701 12
            $buf .= $c1 . $c2 . $c3;
6702 12
            $i += 2;
6703 12
          } else { // not valid UTF8 - convert it
6704 10
            $buf .= self::to_utf8_convert($c1);
6705
          }
6706
6707 19
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
6708
6709 12
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6710 12
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6711 12
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6712
6713 12
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
6714 5
            $buf .= $c1 . $c2 . $c3 . $c4;
6715 5
            $i += 3;
6716 5
          } else { // not valid UTF8 - convert it
6717 9
            $buf .= self::to_utf8_convert($c1);
6718
          }
6719
6720 12
        } else { // doesn't look like UTF8, but should be converted
6721 9
          $buf .= self::to_utf8_convert($c1);
6722
        }
6723
6724 20
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
6725
6726 3
        $buf .= self::to_utf8_convert($c1);
6727
6728 3
      } else { // it doesn't need conversion
6729 18
        $buf .= $c1;
6730
      }
6731 20
    }
6732
6733
    // decode unicode escape sequences
6734 20
    $buf = preg_replace_callback(
6735 20
        '/\\\\u([0-9a-f]{4})/i',
6736 20
        function ($match) {
6737 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6738 20
        },
6739
        $buf
6740 20
    );
6741
6742
    // decode UTF-8 codepoints
6743 20
    if ($decodeHtmlEntityToUtf8 === true) {
6744 1
      $buf = self::html_entity_decode($buf);
6745 1
    }
6746
6747 20
    return $buf;
6748
  }
6749
6750
  /**
6751
   * @param int $int
6752
   *
6753
   * @return string
6754
   */
6755 15
  private static function to_utf8_convert($int)
6756
  {
6757
    // init
6758 15
    $buf = '';
6759
6760 15
    if (self::$ORD === null) {
6761
      self::$ORD = self::getData('ord');
6762
    }
6763
6764 15
    if (self::$CHR === null) {
6765 1
      self::$CHR = self::getData('chr');
6766 1
    }
6767
6768 15
    if (self::$WIN1252_TO_UTF8 === null) {
6769 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
6770 1
    }
6771
6772 15
    $ordC1 = self::$ORD[$int];
6773 15
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
6774 15
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
6775 15
    } else {
6776 2
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
6777 2
      $cc2 = ($int & "\x3F") | "\x80";
6778 2
      $buf .= $cc1 . $cc2;
6779
    }
6780
6781 15
    return $buf;
6782
  }
6783
6784
  /**
6785
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6786
   *
6787
   * INFO: This is slower then "trim()"
6788
   *
6789
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6790
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6791
   *
6792
   * @param string $str   <p>The string to be trimmed</p>
6793
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
6794
   *
6795
   * @return string <p>The trimmed string.</p>
6796
   */
6797 26
  public static function trim($str = '', $chars = INF)
6798
  {
6799 26
    $str = (string)$str;
6800
6801 26
    if (!isset($str[0])) {
6802 5
      return '';
6803
    }
6804
6805
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6806 22
    if ($chars === INF || !$chars) {
6807 6
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6808
    }
6809
6810 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
6811
  }
6812
6813
  /**
6814
   * Makes string's first char uppercase.
6815
   *
6816
   * @param string  $str       <p>The input string.</p>
6817
   * @param string  $encoding  [optional] <p>Set the charset.</p>
6818
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6819
   *
6820
   * @return string <p>The resulting string</p>
6821
   */
6822 14
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6823
  {
6824 14
    if ($cleanUtf8 === true) {
6825
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6826
      // if invalid characters are found in $haystack before $needle
6827 1
      $str = self::clean($str);
6828 1
    }
6829
6830 14
    $strPartTwo = self::substr($str, 1, null, $encoding);
6831 14
    if ($strPartTwo === false) {
6832
      $strPartTwo = '';
6833
    }
6834
6835 14
    $strPartOne = self::strtoupper(
6836 14
        (string)self::substr($str, 0, 1, $encoding),
6837 14
        $encoding,
6838
        $cleanUtf8
6839 14
    );
6840
6841 14
    return $strPartOne . $strPartTwo;
6842
  }
6843
6844
  /**
6845
   * alias for "UTF8::ucfirst()"
6846
   *
6847
   * @see UTF8::ucfirst()
6848
   *
6849
   * @param string  $word
6850
   * @param string  $encoding
6851
   * @param boolean $cleanUtf8
6852
   *
6853
   * @return string
6854
   */
6855 1
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
6856
  {
6857 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
6858
  }
6859
6860
  /**
6861
   * Uppercase for all words in the string.
6862
   *
6863
   * @param string   $str        <p>The input string.</p>
6864
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6865
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6866
   * @param string   $encoding   [optional] <p>Set the charset.</p>
6867
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
6868
   *
6869
   * @return string
6870
   */
6871 8
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
6872
  {
6873 8
    if (!$str) {
6874 2
      return '';
6875
    }
6876
6877
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
6878
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
6879
6880 7
    if ($cleanUtf8 === true) {
6881
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6882
      // if invalid characters are found in $haystack before $needle
6883 1
      $str = self::clean($str);
6884 1
    }
6885
6886 7
    $usePhpDefaultFunctions = !(bool)($charlist . implode('', $exceptions));
6887
6888
    if (
6889
        $usePhpDefaultFunctions === true
6890 7
        &&
6891 7
        self::is_ascii($str) === true
6892 7
    ) {
6893
      return ucwords($str);
6894
    }
6895
6896 7
    $words = self::str_to_words($str, $charlist);
6897 7
    $newWords = array();
6898
6899 7
    if (count($exceptions) > 0) {
6900 1
      $useExceptions = true;
6901 1
    } else {
6902 7
      $useExceptions = false;
6903
    }
6904
6905 7 View Code Duplication
    foreach ($words as $word) {
6906
6907 7
      if (!$word) {
6908 7
        continue;
6909
      }
6910
6911
      if (
6912
          $useExceptions === false
6913 7
          ||
6914
          (
6915
              $useExceptions === true
6916 1
              &&
6917 1
              !in_array($word, $exceptions, true)
6918 1
          )
6919 7
      ) {
6920 7
        $word = self::ucfirst($word, $encoding);
6921 7
      }
6922
6923 7
      $newWords[] = $word;
6924 7
    }
6925
6926 7
    return implode('', $newWords);
6927
  }
6928
6929
  /**
6930
   * Multi decode html entity & fix urlencoded-win1252-chars.
6931
   *
6932
   * e.g:
6933
   * 'test+test'                     => 'test test'
6934
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6935
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6936
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6937
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6938
   * 'Düsseldorf'                   => 'Düsseldorf'
6939
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6940
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6941
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6942
   *
6943
   * @param string $str          <p>The input string.</p>
6944
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6945
   *
6946
   * @return string
6947
   */
6948 1 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6949
  {
6950 1
    $str = (string)$str;
6951
6952 1
    if (!isset($str[0])) {
6953 1
      return '';
6954
    }
6955
6956 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
6957 1
    if (preg_match($pattern, $str)) {
6958 1
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
6959 1
    }
6960
6961 1
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6962
6963
    do {
6964 1
      $str_compare = $str;
6965
6966 1
      $str = self::fix_simple_utf8(
6967 1
          urldecode(
6968 1
              self::html_entity_decode(
6969 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6970
                  $flags
6971 1
              )
6972 1
          )
6973 1
      );
6974
6975 1
    } while ($multi_decode === true && $str_compare !== $str);
6976
6977 1
    return (string)$str;
6978
  }
6979
6980
  /**
6981
   * Return a array with "urlencoded"-win1252 -> UTF-8
6982
   *
6983
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
6984
   *
6985
   * @return array
6986
   */
6987
  public static function urldecode_fix_win1252_chars()
6988
  {
6989
    return array(
6990
        '%20' => ' ',
6991
        '%21' => '!',
6992
        '%22' => '"',
6993
        '%23' => '#',
6994
        '%24' => '$',
6995
        '%25' => '%',
6996
        '%26' => '&',
6997
        '%27' => "'",
6998
        '%28' => '(',
6999
        '%29' => ')',
7000
        '%2A' => '*',
7001
        '%2B' => '+',
7002
        '%2C' => ',',
7003
        '%2D' => '-',
7004
        '%2E' => '.',
7005
        '%2F' => '/',
7006
        '%30' => '0',
7007
        '%31' => '1',
7008
        '%32' => '2',
7009
        '%33' => '3',
7010
        '%34' => '4',
7011
        '%35' => '5',
7012
        '%36' => '6',
7013
        '%37' => '7',
7014
        '%38' => '8',
7015
        '%39' => '9',
7016
        '%3A' => ':',
7017
        '%3B' => ';',
7018
        '%3C' => '<',
7019
        '%3D' => '=',
7020
        '%3E' => '>',
7021
        '%3F' => '?',
7022
        '%40' => '@',
7023
        '%41' => 'A',
7024
        '%42' => 'B',
7025
        '%43' => 'C',
7026
        '%44' => 'D',
7027
        '%45' => 'E',
7028
        '%46' => 'F',
7029
        '%47' => 'G',
7030
        '%48' => 'H',
7031
        '%49' => 'I',
7032
        '%4A' => 'J',
7033
        '%4B' => 'K',
7034
        '%4C' => 'L',
7035
        '%4D' => 'M',
7036
        '%4E' => 'N',
7037
        '%4F' => 'O',
7038
        '%50' => 'P',
7039
        '%51' => 'Q',
7040
        '%52' => 'R',
7041
        '%53' => 'S',
7042
        '%54' => 'T',
7043
        '%55' => 'U',
7044
        '%56' => 'V',
7045
        '%57' => 'W',
7046
        '%58' => 'X',
7047
        '%59' => 'Y',
7048
        '%5A' => 'Z',
7049
        '%5B' => '[',
7050
        '%5C' => '\\',
7051
        '%5D' => ']',
7052
        '%5E' => '^',
7053
        '%5F' => '_',
7054
        '%60' => '`',
7055
        '%61' => 'a',
7056
        '%62' => 'b',
7057
        '%63' => 'c',
7058
        '%64' => 'd',
7059
        '%65' => 'e',
7060
        '%66' => 'f',
7061
        '%67' => 'g',
7062
        '%68' => 'h',
7063
        '%69' => 'i',
7064
        '%6A' => 'j',
7065
        '%6B' => 'k',
7066
        '%6C' => 'l',
7067
        '%6D' => 'm',
7068
        '%6E' => 'n',
7069
        '%6F' => 'o',
7070
        '%70' => 'p',
7071
        '%71' => 'q',
7072
        '%72' => 'r',
7073
        '%73' => 's',
7074
        '%74' => 't',
7075
        '%75' => 'u',
7076
        '%76' => 'v',
7077
        '%77' => 'w',
7078
        '%78' => 'x',
7079
        '%79' => 'y',
7080
        '%7A' => 'z',
7081
        '%7B' => '{',
7082
        '%7C' => '|',
7083
        '%7D' => '}',
7084
        '%7E' => '~',
7085
        '%7F' => '',
7086
        '%80' => '`',
7087
        '%81' => '',
7088
        '%82' => '‚',
7089
        '%83' => 'ƒ',
7090
        '%84' => '„',
7091
        '%85' => '…',
7092
        '%86' => '†',
7093
        '%87' => '‡',
7094
        '%88' => 'ˆ',
7095
        '%89' => '‰',
7096
        '%8A' => 'Š',
7097
        '%8B' => '‹',
7098
        '%8C' => 'Œ',
7099
        '%8D' => '',
7100
        '%8E' => 'Ž',
7101
        '%8F' => '',
7102
        '%90' => '',
7103
        '%91' => '‘',
7104
        '%92' => '’',
7105
        '%93' => '“',
7106
        '%94' => '”',
7107
        '%95' => '•',
7108
        '%96' => '–',
7109
        '%97' => '—',
7110
        '%98' => '˜',
7111
        '%99' => '™',
7112
        '%9A' => 'š',
7113
        '%9B' => '›',
7114
        '%9C' => 'œ',
7115
        '%9D' => '',
7116
        '%9E' => 'ž',
7117
        '%9F' => 'Ÿ',
7118
        '%A0' => '',
7119
        '%A1' => '¡',
7120
        '%A2' => '¢',
7121
        '%A3' => '£',
7122
        '%A4' => '¤',
7123
        '%A5' => '¥',
7124
        '%A6' => '¦',
7125
        '%A7' => '§',
7126
        '%A8' => '¨',
7127
        '%A9' => '©',
7128
        '%AA' => 'ª',
7129
        '%AB' => '«',
7130
        '%AC' => '¬',
7131
        '%AD' => '',
7132
        '%AE' => '®',
7133
        '%AF' => '¯',
7134
        '%B0' => '°',
7135
        '%B1' => '±',
7136
        '%B2' => '²',
7137
        '%B3' => '³',
7138
        '%B4' => '´',
7139
        '%B5' => 'µ',
7140
        '%B6' => '¶',
7141
        '%B7' => '·',
7142
        '%B8' => '¸',
7143
        '%B9' => '¹',
7144
        '%BA' => 'º',
7145
        '%BB' => '»',
7146
        '%BC' => '¼',
7147
        '%BD' => '½',
7148
        '%BE' => '¾',
7149
        '%BF' => '¿',
7150
        '%C0' => 'À',
7151
        '%C1' => 'Á',
7152
        '%C2' => 'Â',
7153
        '%C3' => 'Ã',
7154
        '%C4' => 'Ä',
7155
        '%C5' => 'Å',
7156
        '%C6' => 'Æ',
7157
        '%C7' => 'Ç',
7158
        '%C8' => 'È',
7159
        '%C9' => 'É',
7160
        '%CA' => 'Ê',
7161
        '%CB' => 'Ë',
7162
        '%CC' => 'Ì',
7163
        '%CD' => 'Í',
7164
        '%CE' => 'Î',
7165
        '%CF' => 'Ï',
7166
        '%D0' => 'Ð',
7167
        '%D1' => 'Ñ',
7168
        '%D2' => 'Ò',
7169
        '%D3' => 'Ó',
7170
        '%D4' => 'Ô',
7171
        '%D5' => 'Õ',
7172
        '%D6' => 'Ö',
7173
        '%D7' => '×',
7174
        '%D8' => 'Ø',
7175
        '%D9' => 'Ù',
7176
        '%DA' => 'Ú',
7177
        '%DB' => 'Û',
7178
        '%DC' => 'Ü',
7179
        '%DD' => 'Ý',
7180
        '%DE' => 'Þ',
7181
        '%DF' => 'ß',
7182
        '%E0' => 'à',
7183
        '%E1' => 'á',
7184
        '%E2' => 'â',
7185
        '%E3' => 'ã',
7186
        '%E4' => 'ä',
7187
        '%E5' => 'å',
7188
        '%E6' => 'æ',
7189
        '%E7' => 'ç',
7190
        '%E8' => 'è',
7191
        '%E9' => 'é',
7192
        '%EA' => 'ê',
7193
        '%EB' => 'ë',
7194
        '%EC' => 'ì',
7195
        '%ED' => 'í',
7196
        '%EE' => 'î',
7197
        '%EF' => 'ï',
7198
        '%F0' => 'ð',
7199
        '%F1' => 'ñ',
7200
        '%F2' => 'ò',
7201
        '%F3' => 'ó',
7202
        '%F4' => 'ô',
7203
        '%F5' => 'õ',
7204
        '%F6' => 'ö',
7205
        '%F7' => '÷',
7206
        '%F8' => 'ø',
7207
        '%F9' => 'ù',
7208
        '%FA' => 'ú',
7209
        '%FB' => 'û',
7210
        '%FC' => 'ü',
7211
        '%FD' => 'ý',
7212
        '%FE' => 'þ',
7213
        '%FF' => 'ÿ',
7214
    );
7215
  }
7216
7217
  /**
7218
   * Decodes an UTF-8 string to ISO-8859-1.
7219
   *
7220
   * @param string $str <p>The input string.</p>
7221
   * @param bool   $keepUtf8Chars
7222
   *
7223
   * @return string
7224
   */
7225 6
  public static function utf8_decode($str, $keepUtf8Chars = false)
7226
  {
7227
    // init
7228 6
    $str = (string)$str;
7229
7230 6
    if (!isset($str[0])) {
7231 3
      return '';
7232
    }
7233
7234 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7235 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7236
7237 6 View Code Duplication
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7238
7239 1
      if (self::$WIN1252_TO_UTF8 === null) {
7240
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
7241
      }
7242
7243 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
7244 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
7245 1
    }
7246
7247
    /** @noinspection PhpInternalEntityUsedInspection */
7248 6
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7249
7250 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7251
      self::checkForSupport();
7252
    }
7253
7254
    // save for later comparision
7255 6
    $str_backup = $str;
7256 6
    $len = self::strlen_in_byte($str);
7257
7258 6
    if (self::$ORD === null) {
7259
      self::$ORD = self::getData('ord');
7260
    }
7261
7262 6
    if (self::$CHR === null) {
7263
      self::$CHR = self::getData('chr');
7264
    }
7265
7266 6
    $noCharFound = '?';
7267
    /** @noinspection ForeachInvariantsInspection */
7268 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7269 6
      switch ($str[$i] & "\xF0") {
7270 6
        case "\xC0":
7271 6
        case "\xD0":
7272 6
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
7273 6
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
7274 6
          break;
7275
7276
        /** @noinspection PhpMissingBreakStatementInspection */
7277 6
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7278
          ++$i;
7279 6
        case "\xE0":
7280 5
          $str[$j] = $noCharFound;
7281 5
          $i += 2;
7282 5
          break;
7283
7284 6
        default:
7285 6
          $str[$j] = $str[$i];
7286 6
      }
7287 6
    }
7288
7289 6
    $return = (string)self::substr($str, 0, $j, '8BIT');
7290
7291
    if (
7292
        $keepUtf8Chars === true
7293 6
        &&
7294 1
        self::strlen($return) >= self::strlen($str_backup)
7295 6
    ) {
7296 1
      return $str_backup;
7297
    }
7298
7299 6
    return $return;
7300
  }
7301
7302
  /**
7303
   * Encodes an ISO-8859-1 string to UTF-8.
7304
   *
7305
   * @param string $str <p>The input string.</p>
7306
   *
7307
   * @return string
7308
   */
7309 7
  public static function utf8_encode($str)
7310
  {
7311
    // init
7312 7
    $str = (string)$str;
7313
7314 7
    if (!isset($str[0])) {
7315 7
      return '';
7316
    }
7317
7318 7
    $strTmp = \utf8_encode($str);
7319
7320
    // the polyfill maybe return false
7321 7
    if ($strTmp === false) {
7322
      return '';
7323
    }
7324
7325 7
    $str = (string)$strTmp;
7326 7
    if (false === strpos($str, "\xC2")) {
7327 3
      return $str;
7328
    }
7329
7330 6
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
7331 6
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
7332
7333 6 View Code Duplication
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
7334
7335 1
      if (self::$WIN1252_TO_UTF8 === null) {
7336
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
7337
      }
7338
7339 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
7340 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
7341 1
    }
7342
7343 6
    return str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
7344
  }
7345
7346
  /**
7347
   * fix -> utf8-win1252 chars
7348
   *
7349
   * @param string $str <p>The input string.</p>
7350
   *
7351
   * @return string
7352
   *
7353
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
7354
   */
7355
  public static function utf8_fix_win1252_chars($str)
7356
  {
7357
    return self::fix_simple_utf8($str);
7358
  }
7359
7360
  /**
7361
   * Returns an array with all utf8 whitespace characters.
7362
   *
7363
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7364
   *
7365
   * @author: Derek E. [email protected]
7366
   *
7367
   * @return array <p>
7368
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7369
   *               as defined in above URL.
7370
   *               </p>
7371
   */
7372 1
  public static function whitespace_table()
7373
  {
7374 1
    return self::$WHITESPACE_TABLE;
7375
  }
7376
7377
  /**
7378
   * Limit the number of words in a string.
7379
   *
7380
   * @param string $str      <p>The input string.</p>
7381
   * @param int    $limit    <p>The limit of words as integer.</p>
7382
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7383
   *
7384
   * @return string
7385
   */
7386 1
  public static function words_limit($str, $limit = 100, $strAddOn = '...')
7387
  {
7388 1
    $str = (string)$str;
7389
7390 1
    if (!isset($str[0])) {
7391 1
      return '';
7392
    }
7393
7394
    // init
7395 1
    $limit = (int)$limit;
7396
7397 1
    if ($limit < 1) {
7398 1
      return '';
7399
    }
7400
7401 1
    preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7402
7403
    if (
7404 1
        !isset($matches[0])
7405 1
        ||
7406 1
        self::strlen($str) === self::strlen($matches[0])
7407 1
    ) {
7408 1
      return $str;
7409
    }
7410
7411 1
    return self::rtrim($matches[0]) . $strAddOn;
7412
  }
7413
7414
  /**
7415
   * Wraps a string to a given number of characters
7416
   *
7417
   * @link  http://php.net/manual/en/function.wordwrap.php
7418
   *
7419
   * @param string $str   <p>The input string.</p>
7420
   * @param int    $width [optional] <p>The column width.</p>
7421
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7422
   * @param bool   $cut   [optional] <p>
7423
   *                      If the cut is set to true, the string is
7424
   *                      always wrapped at or before the specified width. So if you have
7425
   *                      a word that is larger than the given width, it is broken apart.
7426
   *                      </p>
7427
   *
7428
   * @return string <p>The given string wrapped at the specified column.</p>
7429
   */
7430 10
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7431
  {
7432 10
    $str = (string)$str;
7433 10
    $break = (string)$break;
7434
7435 10
    if (!isset($str[0], $break[0])) {
7436 3
      return '';
7437
    }
7438
7439 8
    $w = '';
7440 8
    $strSplit = explode($break, $str);
7441 8
    $count = count($strSplit);
7442
7443 8
    $chars = array();
7444
    /** @noinspection ForeachInvariantsInspection */
7445 8
    for ($i = 0; $i < $count; ++$i) {
7446
7447 8
      if ($i) {
7448 1
        $chars[] = $break;
7449 1
        $w .= '#';
7450 1
      }
7451
7452 8
      $c = $strSplit[$i];
7453 8
      unset($strSplit[$i]);
7454
7455 8
      foreach (self::split($c) as $c) {
7456 8
        $chars[] = $c;
7457 8
        $w .= ' ' === $c ? ' ' : '?';
7458 8
      }
7459 8
    }
7460
7461 8
    $strReturn = '';
7462 8
    $j = 0;
7463 8
    $b = $i = -1;
7464 8
    $w = wordwrap($w, $width, '#', $cut);
7465
7466 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7467 6
      for (++$i; $i < $b; ++$i) {
7468 6
        $strReturn .= $chars[$j];
7469 6
        unset($chars[$j++]);
7470 6
      }
7471
7472 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7473 3
        unset($chars[$j++]);
7474 3
      }
7475
7476 6
      $strReturn .= $break;
7477 6
    }
7478
7479 8
    return $strReturn . implode('', $chars);
7480
  }
7481
7482
  /**
7483
   * Returns an array of Unicode White Space characters.
7484
   *
7485
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7486
   */
7487 1
  public static function ws()
7488
  {
7489 1
    return self::$WHITESPACE;
7490
  }
7491
7492
}
7493