Completed
Push — master ( e02fc9...00e11d )
by Lars
07:49
created

UTF8::str_to_words()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 11
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 0
Metric Value
dl 0
loc 11
ccs 0
cts 0
cp 0
rs 9.4285
c 0
b 0
f 0
cc 2
eloc 6
nc 2
nop 2
crap 6
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
final class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  private static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  private static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  private static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
93
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
94
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
96
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
98
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
99
  );
100
101
  /**
102
   * Numeric code point => UTF-8 Character
103
   *
104
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
105
   *
106
   * @var array
107
   */
108
  private static $whitespace = array(
109
    // NUL Byte
110
    0     => "\x0",
111
    // Tab
112
    9     => "\x9",
113
    // New Line
114
    10    => "\xa",
115
    // Vertical Tab
116
    11    => "\xb",
117
    // Carriage Return
118
    13    => "\xd",
119
    // Ordinary Space
120
    32    => "\x20",
121
    // NO-BREAK SPACE
122
    160   => "\xc2\xa0",
123
    // OGHAM SPACE MARK
124
    5760  => "\xe1\x9a\x80",
125
    // MONGOLIAN VOWEL SEPARATOR
126
    6158  => "\xe1\xa0\x8e",
127
    // EN QUAD
128
    8192  => "\xe2\x80\x80",
129
    // EM QUAD
130
    8193  => "\xe2\x80\x81",
131
    // EN SPACE
132
    8194  => "\xe2\x80\x82",
133
    // EM SPACE
134
    8195  => "\xe2\x80\x83",
135
    // THREE-PER-EM SPACE
136
    8196  => "\xe2\x80\x84",
137
    // FOUR-PER-EM SPACE
138
    8197  => "\xe2\x80\x85",
139
    // SIX-PER-EM SPACE
140
    8198  => "\xe2\x80\x86",
141
    // FIGURE SPACE
142
    8199  => "\xe2\x80\x87",
143
    // PUNCTUATION SPACE
144
    8200  => "\xe2\x80\x88",
145
    // THIN SPACE
146
    8201  => "\xe2\x80\x89",
147
    //HAIR SPACE
148
    8202  => "\xe2\x80\x8a",
149
    // LINE SEPARATOR
150
    8232  => "\xe2\x80\xa8",
151
    // PARAGRAPH SEPARATOR
152
    8233  => "\xe2\x80\xa9",
153
    // NARROW NO-BREAK SPACE
154
    8239  => "\xe2\x80\xaf",
155
    // MEDIUM MATHEMATICAL SPACE
156
    8287  => "\xe2\x81\x9f",
157
    // IDEOGRAPHIC SPACE
158
    12288 => "\xe3\x80\x80",
159
  );
160
161
  /**
162
   * @var array
163
   */
164
  private static $whitespaceTable = array(
165
      'SPACE'                     => "\x20",
166
      'NO-BREAK SPACE'            => "\xc2\xa0",
167
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
168
      'EN QUAD'                   => "\xe2\x80\x80",
169
      'EM QUAD'                   => "\xe2\x80\x81",
170
      'EN SPACE'                  => "\xe2\x80\x82",
171
      'EM SPACE'                  => "\xe2\x80\x83",
172
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
173
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
174
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
175
      'FIGURE SPACE'              => "\xe2\x80\x87",
176
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
177
      'THIN SPACE'                => "\xe2\x80\x89",
178
      'HAIR SPACE'                => "\xe2\x80\x8a",
179
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
180
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
181
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
182
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
183
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
184
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
185
  );
186
187
  /**
188
   * bidirectional text chars
189
   *
190
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
191
   *
192
   * @var array
193
   */
194
  private static $bidiUniCodeControlsTable = array(
195
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
196
    8234 => "\xE2\x80\xAA",
197
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
198
    8235 => "\xE2\x80\xAB",
199
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
200
    8236 => "\xE2\x80\xAC",
201
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
202
    8237 => "\xE2\x80\xAD",
203
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
204
    8238 => "\xE2\x80\xAE",
205
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
206
    8294 => "\xE2\x81\xA6",
207
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
208
    8295 => "\xE2\x81\xA7",
209
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
210
    8296 => "\xE2\x81\xA8",
211
    // POP DIRECTIONAL ISOLATE
212
    8297 => "\xE2\x81\xA9",
213
  );
214
215
  /**
216
   * @var array
217
   */
218
  private static $commonCaseFold = array(
219
      'ſ'            => 's',
220
      "\xCD\x85"     => 'ι',
221
      'ς'            => 'σ',
222
      "\xCF\x90"     => 'β',
223
      "\xCF\x91"     => 'θ',
224
      "\xCF\x95"     => 'φ',
225
      "\xCF\x96"     => 'π',
226
      "\xCF\xB0"     => 'κ',
227
      "\xCF\xB1"     => 'ρ',
228
      "\xCF\xB5"     => 'ε',
229
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
230
      "\xE1\xBE\xBE" => 'ι',
231
  );
232
233
  /**
234
   * @var array
235
   */
236
  private static $brokenUtf8ToUtf8 = array(
237
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
238
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
239
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
240
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
241
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
242
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
243
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
244
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
245
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
246
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
247
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
248
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
249
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
250
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
251
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
252
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
253
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
254
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
255
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
256
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
257
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
258
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
259
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
260
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
261
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
262
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
263
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
264
      'ü'       => 'ü',
265
      'ä'       => 'ä',
266
      'ö'       => 'ö',
267
      'Ö'       => 'Ö',
268
      'ß'       => 'ß',
269
      'Ã '       => 'à',
270
      'á'       => 'á',
271
      'â'       => 'â',
272
      'ã'       => 'ã',
273
      'ù'       => 'ù',
274
      'ú'       => 'ú',
275
      'û'       => 'û',
276
      'Ù'       => 'Ù',
277
      'Ú'       => 'Ú',
278
      'Û'       => 'Û',
279
      'Ü'       => 'Ü',
280
      'ò'       => 'ò',
281
      'ó'       => 'ó',
282
      'ô'       => 'ô',
283
      'è'       => 'è',
284
      'é'       => 'é',
285
      'ê'       => 'ê',
286
      'ë'       => 'ë',
287
      'À'       => 'À',
288
      'Á'       => 'Á',
289
      'Â'       => 'Â',
290
      'Ã'       => 'Ã',
291
      'Ä'       => 'Ä',
292
      'Ã…'       => 'Å',
293
      'Ç'       => 'Ç',
294
      'È'       => 'È',
295
      'É'       => 'É',
296
      'Ê'       => 'Ê',
297
      'Ë'       => 'Ë',
298
      'ÃŒ'       => 'Ì',
299
      'Í'       => 'Í',
300
      'ÃŽ'       => 'Î',
301
      'Ï'       => 'Ï',
302
      'Ñ'       => 'Ñ',
303
      'Ã’'       => 'Ò',
304
      'Ó'       => 'Ó',
305
      'Ô'       => 'Ô',
306
      'Õ'       => 'Õ',
307
      'Ø'       => 'Ø',
308
      'Ã¥'       => 'å',
309
      'æ'       => 'æ',
310
      'ç'       => 'ç',
311
      'ì'       => 'ì',
312
      'í'       => 'í',
313
      'î'       => 'î',
314
      'ï'       => 'ï',
315
      'ð'       => 'ð',
316
      'ñ'       => 'ñ',
317
      'õ'       => 'õ',
318
      'ø'       => 'ø',
319
      'ý'       => 'ý',
320
      'ÿ'       => 'ÿ',
321
      '€'      => '€',
322
  );
323
324
  /**
325
   * @var array
326
   */
327
  private static $utf8ToWin1252 = array(
328
      "\xe2\x82\xac" => "\x80", // EURO SIGN
329
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
330
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
331
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
332
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
333
      "\xe2\x80\xa0" => "\x86", // DAGGER
334
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
335
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
336
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
337
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
338
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
339
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
340
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
341
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
342
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
343
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
344
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
345
      "\xe2\x80\xa2" => "\x95", // BULLET
346
      "\xe2\x80\x93" => "\x96", // EN DASH
347
      "\xe2\x80\x94" => "\x97", // EM DASH
348
      "\xcb\x9c"     => "\x98", // SMALL TILDE
349
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
350
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
351
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
352
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
353
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
354
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
355
  );
356
357
  /**
358
   * @var array
359
   */
360
  private static $utf8MSWord = array(
361
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
362
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
363
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
364
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
365
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
366
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
367
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
368
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
369
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
370
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
371
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
372
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
373
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
374
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
375
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
376
  );
377
378
  /**
379
   * @var array
380
   */
381
  private static $iconvEncoding = array(
382
      'ANSI_X3.4-1968',
383
      'ANSI_X3.4-1986',
384
      'ASCII',
385
      'CP367',
386
      'IBM367',
387
      'ISO-IR-6',
388
      'ISO646-US',
389
      'ISO_646.IRV:1991',
390
      'US',
391
      'US-ASCII',
392
      'CSASCII',
393
      'UTF-8',
394
      'ISO-10646-UCS-2',
395
      'UCS-2',
396
      'CSUNICODE',
397
      'UCS-2BE',
398
      'UNICODE-1-1',
399
      'UNICODEBIG',
400
      'CSUNICODE11',
401
      'UCS-2LE',
402
      'UNICODELITTLE',
403
      'ISO-10646-UCS-4',
404
      'UCS-4',
405
      'CSUCS4',
406
      'UCS-4BE',
407
      'UCS-4LE',
408
      'UTF-16',
409
      'UTF-16BE',
410
      'UTF-16LE',
411
      'UTF-32',
412
      'UTF-32BE',
413
      'UTF-32LE',
414
      'UNICODE-1-1-UTF-7',
415
      'UTF-7',
416
      'CSUNICODE11UTF7',
417
      'UCS-2-INTERNAL',
418
      'UCS-2-SWAPPED',
419
      'UCS-4-INTERNAL',
420
      'UCS-4-SWAPPED',
421
      'C99',
422
      'JAVA',
423
      'CP819',
424
      'IBM819',
425
      'ISO-8859-1',
426
      'ISO-IR-100',
427
      'ISO8859-1',
428
      'ISO_8859-1',
429
      'ISO_8859-1:1987',
430
      'L1',
431
      'LATIN1',
432
      'CSISOLATIN1',
433
      'ISO-8859-2',
434
      'ISO-IR-101',
435
      'ISO8859-2',
436
      'ISO_8859-2',
437
      'ISO_8859-2:1987',
438
      'L2',
439
      'LATIN2',
440
      'CSISOLATIN2',
441
      'ISO-8859-3',
442
      'ISO-IR-109',
443
      'ISO8859-3',
444
      'ISO_8859-3',
445
      'ISO_8859-3:1988',
446
      'L3',
447
      'LATIN3',
448
      'CSISOLATIN3',
449
      'ISO-8859-4',
450
      'ISO-IR-110',
451
      'ISO8859-4',
452
      'ISO_8859-4',
453
      'ISO_8859-4:1988',
454
      'L4',
455
      'LATIN4',
456
      'CSISOLATIN4',
457
      'CYRILLIC',
458
      'ISO-8859-5',
459
      'ISO-IR-144',
460
      'ISO8859-5',
461
      'ISO_8859-5',
462
      'ISO_8859-5:1988',
463
      'CSISOLATINCYRILLIC',
464
      'ARABIC',
465
      'ASMO-708',
466
      'ECMA-114',
467
      'ISO-8859-6',
468
      'ISO-IR-127',
469
      'ISO8859-6',
470
      'ISO_8859-6',
471
      'ISO_8859-6:1987',
472
      'CSISOLATINARABIC',
473
      'ECMA-118',
474
      'ELOT_928',
475
      'GREEK',
476
      'GREEK8',
477
      'ISO-8859-7',
478
      'ISO-IR-126',
479
      'ISO8859-7',
480
      'ISO_8859-7',
481
      'ISO_8859-7:1987',
482
      'ISO_8859-7:2003',
483
      'CSISOLATINGREEK',
484
      'HEBREW',
485
      'ISO-8859-8',
486
      'ISO-IR-138',
487
      'ISO8859-8',
488
      'ISO_8859-8',
489
      'ISO_8859-8:1988',
490
      'CSISOLATINHEBREW',
491
      'ISO-8859-9',
492
      'ISO-IR-148',
493
      'ISO8859-9',
494
      'ISO_8859-9',
495
      'ISO_8859-9:1989',
496
      'L5',
497
      'LATIN5',
498
      'CSISOLATIN5',
499
      'ISO-8859-10',
500
      'ISO-IR-157',
501
      'ISO8859-10',
502
      'ISO_8859-10',
503
      'ISO_8859-10:1992',
504
      'L6',
505
      'LATIN6',
506
      'CSISOLATIN6',
507
      'ISO-8859-11',
508
      'ISO8859-11',
509
      'ISO_8859-11',
510
      'ISO-8859-13',
511
      'ISO-IR-179',
512
      'ISO8859-13',
513
      'ISO_8859-13',
514
      'L7',
515
      'LATIN7',
516
      'ISO-8859-14',
517
      'ISO-CELTIC',
518
      'ISO-IR-199',
519
      'ISO8859-14',
520
      'ISO_8859-14',
521
      'ISO_8859-14:1998',
522
      'L8',
523
      'LATIN8',
524
      'ISO-8859-15',
525
      'ISO-IR-203',
526
      'ISO8859-15',
527
      'ISO_8859-15',
528
      'ISO_8859-15:1998',
529
      'LATIN-9',
530
      'ISO-8859-16',
531
      'ISO-IR-226',
532
      'ISO8859-16',
533
      'ISO_8859-16',
534
      'ISO_8859-16:2001',
535
      'L10',
536
      'LATIN10',
537
      'KOI8-R',
538
      'CSKOI8R',
539
      'KOI8-U',
540
      'KOI8-RU',
541
      'CP1250',
542
      'MS-EE',
543
      'WINDOWS-1250',
544
      'CP1251',
545
      'MS-CYRL',
546
      'WINDOWS-1251',
547
      'CP1252',
548
      'MS-ANSI',
549
      'WINDOWS-1252',
550
      'CP1253',
551
      'MS-GREEK',
552
      'WINDOWS-1253',
553
      'CP1254',
554
      'MS-TURK',
555
      'WINDOWS-1254',
556
      'CP1255',
557
      'MS-HEBR',
558
      'WINDOWS-1255',
559
      'CP1256',
560
      'MS-ARAB',
561
      'WINDOWS-1256',
562
      'CP1257',
563
      'WINBALTRIM',
564
      'WINDOWS-1257',
565
      'CP1258',
566
      'WINDOWS-1258',
567
      '850',
568
      'CP850',
569
      'IBM850',
570
      'CSPC850MULTILINGUAL',
571
      '862',
572
      'CP862',
573
      'IBM862',
574
      'CSPC862LATINHEBREW',
575
      '866',
576
      'CP866',
577
      'IBM866',
578
      'CSIBM866',
579
      'MAC',
580
      'MACINTOSH',
581
      'MACROMAN',
582
      'CSMACINTOSH',
583
      'MACCENTRALEUROPE',
584
      'MACICELAND',
585
      'MACCROATIAN',
586
      'MACROMANIA',
587
      'MACCYRILLIC',
588
      'MACUKRAINE',
589
      'MACGREEK',
590
      'MACTURKISH',
591
      'MACHEBREW',
592
      'MACARABIC',
593
      'MACTHAI',
594
      'HP-ROMAN8',
595
      'R8',
596
      'ROMAN8',
597
      'CSHPROMAN8',
598
      'NEXTSTEP',
599
      'ARMSCII-8',
600
      'GEORGIAN-ACADEMY',
601
      'GEORGIAN-PS',
602
      'KOI8-T',
603
      'CP154',
604
      'CYRILLIC-ASIAN',
605
      'PT154',
606
      'PTCP154',
607
      'CSPTCP154',
608
      'KZ-1048',
609
      'RK1048',
610
      'STRK1048-2002',
611
      'CSKZ1048',
612
      'MULELAO-1',
613
      'CP1133',
614
      'IBM-CP1133',
615
      'ISO-IR-166',
616
      'TIS-620',
617
      'TIS620',
618
      'TIS620-0',
619
      'TIS620.2529-1',
620
      'TIS620.2533-0',
621
      'TIS620.2533-1',
622
      'CP874',
623
      'WINDOWS-874',
624
      'VISCII',
625
      'VISCII1.1-1',
626
      'CSVISCII',
627
      'TCVN',
628
      'TCVN-5712',
629
      'TCVN5712-1',
630
      'TCVN5712-1:1993',
631
      'ISO-IR-14',
632
      'ISO646-JP',
633
      'JIS_C6220-1969-RO',
634
      'JP',
635
      'CSISO14JISC6220RO',
636
      'JISX0201-1976',
637
      'JIS_X0201',
638
      'X0201',
639
      'CSHALFWIDTHKATAKANA',
640
      'ISO-IR-87',
641
      'JIS0208',
642
      'JIS_C6226-1983',
643
      'JIS_X0208',
644
      'JIS_X0208-1983',
645
      'JIS_X0208-1990',
646
      'X0208',
647
      'CSISO87JISX0208',
648
      'ISO-IR-159',
649
      'JIS_X0212',
650
      'JIS_X0212-1990',
651
      'JIS_X0212.1990-0',
652
      'X0212',
653
      'CSISO159JISX02121990',
654
      'CN',
655
      'GB_1988-80',
656
      'ISO-IR-57',
657
      'ISO646-CN',
658
      'CSISO57GB1988',
659
      'CHINESE',
660
      'GB_2312-80',
661
      'ISO-IR-58',
662
      'CSISO58GB231280',
663
      'CN-GB-ISOIR165',
664
      'ISO-IR-165',
665
      'ISO-IR-149',
666
      'KOREAN',
667
      'KSC_5601',
668
      'KS_C_5601-1987',
669
      'KS_C_5601-1989',
670
      'CSKSC56011987',
671
      'EUC-JP',
672
      'EUCJP',
673
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
674
      'CSEUCPKDFMTJAPANESE',
675
      'MS_KANJI',
676
      'SHIFT-JIS',
677
      'SHIFT_JIS',
678
      'SJIS',
679
      'CSSHIFTJIS',
680
      'CP932',
681
      'ISO-2022-JP',
682
      'CSISO2022JP',
683
      'ISO-2022-JP-1',
684
      'ISO-2022-JP-2',
685
      'CSISO2022JP2',
686
      'CN-GB',
687
      'EUC-CN',
688
      'EUCCN',
689
      'GB2312',
690
      'CSGB2312',
691
      'GBK',
692
      'CP936',
693
      'MS936',
694
      'WINDOWS-936',
695
      'GB18030',
696
      'ISO-2022-CN',
697
      'CSISO2022CN',
698
      'ISO-2022-CN-EXT',
699
      'HZ',
700
      'HZ-GB-2312',
701
      'EUC-TW',
702
      'EUCTW',
703
      'CSEUCTW',
704
      'BIG-5',
705
      'BIG-FIVE',
706
      'BIG5',
707
      'BIGFIVE',
708
      'CN-BIG5',
709
      'CSBIG5',
710
      'CP950',
711
      'BIG5-HKSCS:1999',
712
      'BIG5-HKSCS:2001',
713
      'BIG5-HKSCS',
714
      'BIG5-HKSCS:2004',
715
      'BIG5HKSCS',
716
      'EUC-KR',
717
      'EUCKR',
718
      'CSEUCKR',
719
      'CP949',
720
      'UHC',
721
      'CP1361',
722
      'JOHAB',
723
      'ISO-2022-KR',
724
      'CSISO2022KR',
725
      'CP856',
726
      'CP922',
727
      'CP943',
728
      'CP1046',
729
      'CP1124',
730
      'CP1129',
731
      'CP1161',
732
      'IBM-1161',
733
      'IBM1161',
734
      'CSIBM1161',
735
      'CP1162',
736
      'IBM-1162',
737
      'IBM1162',
738
      'CSIBM1162',
739
      'CP1163',
740
      'IBM-1163',
741
      'IBM1163',
742
      'CSIBM1163',
743
      'DEC-KANJI',
744
      'DEC-HANYU',
745
      '437',
746
      'CP437',
747
      'IBM437',
748
      'CSPC8CODEPAGE437',
749
      'CP737',
750
      'CP775',
751
      'IBM775',
752
      'CSPC775BALTIC',
753
      '852',
754
      'CP852',
755
      'IBM852',
756
      'CSPCP852',
757
      'CP853',
758
      '855',
759
      'CP855',
760
      'IBM855',
761
      'CSIBM855',
762
      '857',
763
      'CP857',
764
      'IBM857',
765
      'CSIBM857',
766
      'CP858',
767
      '860',
768
      'CP860',
769
      'IBM860',
770
      'CSIBM860',
771
      '861',
772
      'CP-IS',
773
      'CP861',
774
      'IBM861',
775
      'CSIBM861',
776
      '863',
777
      'CP863',
778
      'IBM863',
779
      'CSIBM863',
780
      'CP864',
781
      'IBM864',
782
      'CSIBM864',
783
      '865',
784
      'CP865',
785
      'IBM865',
786
      'CSIBM865',
787
      '869',
788
      'CP-GR',
789
      'CP869',
790
      'IBM869',
791
      'CSIBM869',
792
      'CP1125',
793
      'EUC-JISX0213',
794
      'SHIFT_JISX0213',
795
      'ISO-2022-JP-3',
796
      'BIG5-2003',
797
      'ISO-IR-230',
798
      'TDS565',
799
      'ATARI',
800
      'ATARIST',
801
      'RISCOS-LATIN1',
802
  );
803
804
  /**
805
   * @var array
806
   */
807 1
  private static $support = array();
808
809 1
  /**
810 1
   * __construct()
811
   */
812
  public function __construct()
813
  {
814
    self::checkForSupport();
815
  }
816
817
  /**
818
   * Return the character at the specified position: $str[1] like functionality.
819
   *
820 2
   * @param string $str <p>A UTF-8 string.</p>
821
   * @param int    $pos <p>The position of character to return.</p>
822 2
   *
823
   * @return string <p>Single Multi-Byte character.</p>
824
   */
825
  public static function access($str, $pos)
826
  {
827
    return self::substr($str, $pos, 1);
828
  }
829
830
  /**
831
   * Prepends UTF-8 BOM character to the string and returns the whole string.
832
   *
833
   * INFO: If BOM already existed there, the Input string is returned.
834 1
   *
835
   * @param string $str <p>The input string.</p>
836 1
   *
837 1
   * @return string <p>The output string that contains BOM.</p>
838 1
   */
839
  public static function add_bom_to_string($str)
840 1
  {
841
    if (self::string_has_bom($str) === false) {
842
      $str = self::bom() . $str;
843
    }
844
845
    return $str;
846
  }
847
848
  /**
849
   * Convert binary into an string.
850 1
   *
851
   * @param mixed $bin 1|0
852 1
   *
853
   * @return string
854
   */
855
  public static function binary_to_str($bin)
856
  {
857
    return pack('H*', base_convert($bin, 2, 16));
858
  }
859
860 2
  /**
861
   * Returns the UTF-8 Byte Order Mark Character.
862 2
   *
863
   * @return string UTF-8 Byte Order Mark
864
   */
865
  public static function bom()
866
  {
867
    return "\xEF\xBB\xBF";
868
  }
869
870
  /**
871
   * @alias of UTF8::chr_map()
872
   * @see   UTF8::chr_map()
873
   *
874 1
   * @param string|array $callback
875
   * @param string       $str
876 1
   *
877
   * @return array
878
   */
879
  public static function callback($callback, $str)
880
  {
881
    return self::chr_map($callback, $str);
882
  }
883
884 2
  /**
885
   * This method will auto-detect your server environment for UTF-8 support.
886 2
   *
887
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
888 1
   */
889
  public static function checkForSupport()
890 1
  {
891 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
892 1
893 1
      self::$support['already_checked_via_portable_utf8'] = true;
894 1
895 1
      // http://php.net/manual/en/book.mbstring.php
896 2
      self::$support['mbstring'] = self::mbstring_loaded();
897
898
      // http://php.net/manual/en/book.iconv.php
899
      self::$support['iconv'] = self::iconv_loaded();
900
901
      // http://php.net/manual/en/book.intl.php
902
      self::$support['intl'] = self::intl_loaded();
903
904
      // http://php.net/manual/en/class.intlchar.php
905
      self::$support['intlChar'] = self::intlChar_loaded();
906
907 9
      // http://php.net/manual/en/book.pcre.php
908
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
909 9
    }
910 9
  }
911 1
912
  /**
913
   * Generates a UTF-8 encoded character from the given code point.
914 9
   *
915
   * INFO: opposite to UTF8::ord()
916
   *
917
   * @param int    $code_point <p>The code point for which to generate a character.</p>
918 9
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
919
   *
920
   * @return string|null <p>Multi-Byte character, returns null on failure to encode.</p>
921
   */
922
  public static function chr($code_point, $encoding = 'UTF-8')
923 9
  {
924 9
    $i = (int)$code_point;
925 8
    if ($i !== $code_point) {
926
      return null;
927
    }
928
929 8
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
930 6
      self::checkForSupport();
931
    }
932
933 7
    if ($encoding !== 'UTF-8') {
934 6
      $encoding = self::normalize_encoding($encoding);
935 6
    } elseif (self::$support['intlChar'] === true) {
936
      return \IntlChar::chr($code_point);
937
    }
938 7
939 7
    // use static cache, if there is no support for "IntlChar"
940 7
    static $cache = array();
941 7
    $cacheKey = $code_point . $encoding;
942
    if (isset($cache[$cacheKey]) === true) {
943
      return $cache[$cacheKey];
944 1
    }
945 1
946 1
    if (0x80 > $code_point %= 0x200000) {
947 1
      $str = chr($code_point);
948 1
    } elseif (0x800 > $code_point) {
949
      $str = chr(0xC0 | $code_point >> 6) .
950
             chr(0x80 | $code_point & 0x3F);
951
    } elseif (0x10000 > $code_point) {
952
      $str = chr(0xE0 | $code_point >> 12) .
953
             chr(0x80 | $code_point >> 6 & 0x3F) .
954
             chr(0x80 | $code_point & 0x3F);
955
    } else {
956
      $str = chr(0xF0 | $code_point >> 18) .
957
             chr(0x80 | $code_point >> 12 & 0x3F) .
958
             chr(0x80 | $code_point >> 6 & 0x3F) .
959
             chr(0x80 | $code_point & 0x3F);
960
    }
961
962
    if ($encoding !== 'UTF-8') {
963 1
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
964
    }
965 1
966
    // add into static cache
967 1
    $cache[$cacheKey] = $str;
968
969
    return $str;
970
  }
971
972
  /**
973
   * Applies callback to all characters of a string.
974
   *
975
   * @param string|array $callback <p>The callback function.</p>
976
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
977
   *
978
   * @return array <p>The outcome of callback.</p>
979
   */
980
  public static function chr_map($callback, $str)
981
  {
982 4
    $chars = self::split($str);
983
984 4
    return array_map($callback, $chars);
985 3
  }
986
987
  /**
988 4
   * Generates an array of byte length of each character of a Unicode string.
989
   *
990
   * 1 byte => U+0000  - U+007F
991
   * 2 byte => U+0080  - U+07FF
992
   * 3 byte => U+0800  - U+FFFF
993
   * 4 byte => U+10000 - U+10FFFF
994
   *
995
   * @param string $str <p>The original Unicode string.</p>
996
   *
997
   * @return array <p>An array of byte lengths of each character.</p>
998 2
   */
999
  public static function chr_size_list($str)
1000 2
  {
1001 2
    if (!$str) {
1002 2
      return array();
1003
    }
1004 2
1005
    return array_map('strlen', self::split($str));
1006 2
  }
1007
1008
  /**
1009 2
   * Get a decimal code representation of a specific character.
1010
   *
1011 2
   * @param string $char <p>The input character.</p>
1012 2
   *
1013 2
   * @return int
1014
   */
1015 1
  public static function chr_to_decimal($char)
1016 1
  {
1017 1
    $char = (string)$char;
1018
    $code = self::ord($char[0]);
1019
    $bytes = 1;
1020
1021
    if (!($code & 0x80)) {
1022
      // 0xxxxxxx
1023 2
      return $code;
1024
    }
1025 2
1026 2
    if (($code & 0xe0) === 0xc0) {
1027
      // 110xxxxx
1028 2
      $bytes = 2;
1029
      $code &= ~0xc0;
1030
    } elseif (($code & 0xf0) === 0xe0) {
1031
      // 1110xxxx
1032
      $bytes = 3;
1033
      $code &= ~0xe0;
1034
    } elseif (($code & 0xf8) === 0xf0) {
1035
      // 11110xxx
1036
      $bytes = 4;
1037
      $code &= ~0xf0;
1038
    }
1039 1
1040
    for ($i = 2; $i <= $bytes; $i++) {
1041 1
      // 10xxxxxx
1042
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1043
    }
1044
1045
    return $code;
1046
  }
1047
1048
  /**
1049
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1050
   *
1051
   * @param string $char <p>The input character</p>
1052
   * @param string $pfix [optional]
1053 1
   *
1054
   * @return string <p>The code point encoded as U+xxxx<p>
1055 1
   */
1056
  public static function chr_to_hex($char, $pfix = 'U+')
1057
  {
1058
    return self::int_to_hex(self::ord($char), $pfix);
1059
  }
1060
1061
  /**
1062
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1063
   *
1064
   * @param string $body     <p>The original string to be split.</p>
1065
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1066
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1067
   *
1068
   * @return string <p>The chunked string</p>
1069
   */
1070
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1071 44
  {
1072
    return implode($end, self::split($body, $chunklen));
1073
  }
1074
1075
  /**
1076
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1077
   *
1078
   * @param string $str                     <p>The string to be sanitized.</p>
1079
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1080
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1081
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1082
   *                                        => "..."</p>
1083
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1084
   *                                        $normalize_whitespace</p>
1085
   *
1086 44
   * @return string <p>Clean UTF-8 encoded string.</p>
1087 44
   */
1088
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1089 44
  {
1090 44
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1091
    // caused connection reset problem on larger strings
1092 44
1093 17
    $regx = '/
1094 17
      (
1095
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1096 44
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1097 12
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1098 12
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1099
        ){1,100}                      # ...one or more times
1100 44
      )
1101 5
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1102 5
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1103
    /x';
1104 44
    $str = preg_replace($regx, '$1', $str);
1105
1106
    $str = self::replace_diamond_question_mark($str, '');
1107
    $str = self::remove_invisible_characters($str);
1108
1109
    if ($normalize_whitespace === true) {
1110
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1111
    }
1112
1113
    if ($normalize_msword === true) {
1114 4
      $str = self::normalize_msword($str);
1115
    }
1116 4
1117
    if ($remove_bom === true) {
1118 4
      $str = self::removeBOM($str);
1119 1
    }
1120
1121
    return $str;
1122
  }
1123 4
1124
  /**
1125
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1126
   *
1127
   * @param string $str <p>The input string.</p>
1128
   *
1129
   * @return string
1130 4
   */
1131
  public static function cleanup($str)
1132 4
  {
1133
    $str = (string)$str;
1134
1135
    if (!isset($str[0])) {
1136
      return '';
1137
    }
1138
1139
    // fixed ISO <-> UTF-8 Errors
1140
    $str = self::fix_simple_utf8($str);
1141
1142
    // remove all none UTF-8 symbols
1143
    // && remove diamond question mark (�)
1144
    // && remove remove invisible characters (e.g. "\0")
1145
    // && remove BOM
1146 5
    // && normalize whitespace chars (but keep non-breaking-spaces)
1147
    $str = self::clean($str, true, true, false, true);
1148 5
1149 5
    return (string)$str;
1150 5
  }
1151
1152 5
  /**
1153
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1154 5
   *
1155 5
   * INFO: opposite to UTF8::string()
1156 5
   *
1157
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1158 5
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1159
   *                                    default, code points will be returned as integers.</p>
1160 5
   *
1161 1
   * @return array <p>The array of code points.</p>
1162
   */
1163 1
  public static function codepoints($arg, $u_style = false)
1164 1
  {
1165 1
    if (is_string($arg)) {
1166
      $arg = self::split($arg);
1167 1
    }
1168 1
1169
    $arg = array_map(
1170 5
        array(
1171
            '\\voku\\helper\\UTF8',
1172
            'ord',
1173
        ),
1174
        $arg
1175
    );
1176
1177
    if ($u_style) {
1178
      $arg = array_map(
1179
          array(
1180
              '\\voku\\helper\\UTF8',
1181
              'int_to_hex',
1182 6
          ),
1183
          $arg
1184 6
      );
1185
    }
1186
1187
    return $arg;
1188
  }
1189
1190
  /**
1191
   * Returns count of characters used in a string.
1192
   *
1193
   * @param string $str       <p>The input string.</p>
1194 1
   * @param bool   $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
1195
   *
1196 1
   * @return array <p>An associative array of Character as keys and
1197 1
   *               their count as values.</p>
1198 1
   */
1199
  public static function count_chars($str, $cleanUtf8 = false)
1200 1
  {
1201
    return array_count_values(self::split($str, 1, $cleanUtf8));
1202
  }
1203
1204
  /**
1205
   * Get a UTF-8 character from its decimal code representation.
1206
   *
1207
   * @param int $code
1208
   *
1209
   * @return string
1210
   */
1211
  public static function decimal_to_chr($code)
1212
  {
1213
    return \mb_convert_encoding(
1214
        '&#x' . dechex($code) . ';',
1215
        'UTF-8',
1216 11
        'HTML-ENTITIES'
1217
    );
1218 11
  }
1219 11
1220
  /**
1221 11
   * Encode a string with a new charset-encoding.
1222 5
   *
1223
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1224
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1225 11
   *
1226 1
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1227 1
   * @param string $str      <p>The input string</p>
1228
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1229 11
   *                         /> otherwise we auto-detect the current string-encoding</p>
1230
   *
1231
   * @return string
1232
   */
1233 11
  public static function encode($encoding, $str, $force = true)
1234
  {
1235
    $str = (string)$str;
1236 11
    $encoding = (string)$encoding;
1237
1238 1
    if (!isset($str[0], $encoding[0])) {
1239 11
      return $str;
1240
    }
1241
1242
    if ($encoding !== 'UTF-8') {
1243 11
      $encoding = self::normalize_encoding($encoding);
1244
    }
1245
1246 11
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1247 1
      self::checkForSupport();
1248 1
    }
1249 1
1250 11
    $encodingDetected = self::str_detect_encoding($str);
1251 11
1252
    if (
1253
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1254
        &&
1255
        (
1256 2
            $force === true
1257
            ||
1258
            $encodingDetected !== $encoding
1259 1
        )
1260
    ) {
1261
1262 2
      if (
1263 1
          $encoding === 'UTF-8'
1264
          &&
1265
          (
1266 2
              $force === true
1267 2
              || $encodingDetected === 'UTF-8'
1268 2
              || $encodingDetected === 'WINDOWS-1252'
1269
              || $encodingDetected === 'ISO-8859-1'
1270 2
          )
1271
      ) {
1272 2
        return self::to_utf8($str);
1273 2
      }
1274
1275
      if (
1276
          $encoding === 'ISO-8859-1'
1277 1
          &&
1278
          (
1279
              $force === true
1280
              || $encodingDetected === 'ISO-8859-1'
1281
              || $encodingDetected === 'UTF-8'
1282
          )
1283
      ) {
1284
        return self::to_iso8859($str);
1285
      }
1286
1287 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1288
          $encoding !== 'UTF-8'
1289
          &&
1290
          $encoding !== 'WINDOWS-1252'
1291
          &&
1292
          self::$support['mbstring'] === false
1293
      ) {
1294
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1295
      }
1296
1297
      $strEncoded = \mb_convert_encoding(
1298
          $str,
1299
          $encoding,
1300
          $encodingDetected
1301
      );
1302
1303
      if ($strEncoded) {
1304
        return $strEncoded;
1305
      }
1306
    }
1307
1308
    return $str;
1309
  }
1310
1311
  /**
1312
   * Reads entire file into a string.
1313
   *
1314
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1315
   *
1316
   * @link http://php.net/manual/en/function.file-get-contents.php
1317
   *
1318
   * @param string        $filename      <p>
1319
   *                                     Name of the file to read.
1320
   *                                     </p>
1321
   * @param int|null      $flags         [optional] <p>
1322
   *                                     Prior to PHP 6, this parameter is called
1323
   *                                     use_include_path and is a bool.
1324
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1325
   *                                     to trigger include path
1326
   *                                     search.
1327
   *                                     </p>
1328
   *                                     <p>
1329
   *                                     The value of flags can be any combination of
1330
   *                                     the following flags (with some restrictions), joined with the
1331
   *                                     binary OR (|)
1332
   *                                     operator.
1333
   *                                     </p>
1334
   *                                     <p>
1335
   *                                     <table>
1336
   *                                     Available flags
1337
   *                                     <tr valign="top">
1338
   *                                     <td>Flag</td>
1339
   *                                     <td>Description</td>
1340
   *                                     </tr>
1341
   *                                     <tr valign="top">
1342
   *                                     <td>
1343
   *                                     FILE_USE_INCLUDE_PATH
1344
   *                                     </td>
1345
   *                                     <td>
1346
   *                                     Search for filename in the include directory.
1347
   *                                     See include_path for more
1348
   *                                     information.
1349
   *                                     </td>
1350
   *                                     </tr>
1351
   *                                     <tr valign="top">
1352
   *                                     <td>
1353
   *                                     FILE_TEXT
1354
   *                                     </td>
1355
   *                                     <td>
1356
   *                                     As of PHP 6, the default encoding of the read
1357
   *                                     data is UTF-8. You can specify a different encoding by creating a
1358
   *                                     custom context or by changing the default using
1359
   *                                     stream_default_encoding. This flag cannot be
1360
   *                                     used with FILE_BINARY.
1361
   *                                     </td>
1362 2
   *                                     </tr>
1363
   *                                     <tr valign="top">
1364
   *                                     <td>
1365 2
   *                                     FILE_BINARY
1366 2
   *                                     </td>
1367
   *                                     <td>
1368 2
   *                                     With this flag, the file is read in binary mode. This is the default
1369 2
   *                                     setting and cannot be used with FILE_TEXT.
1370
   *                                     </td>
1371
   *                                     </tr>
1372
   *                                     </table>
1373 2
   *                                     </p>
1374 2
   * @param resource|null $context       [optional] <p>
1375
   *                                     A valid context resource created with
1376 2
   *                                     stream_context_create. If you don't need to use a
1377 2
   *                                     custom context, you can skip this parameter by &null;.
1378
   *                                     </p>
1379 2
   * @param int|null      $offset        [optional] <p>
1380 1
   *                                     The offset where the reading starts.
1381 1
   *                                     </p>
1382 2
   * @param int|null      $maxlen        [optional] <p>
1383
   *                                     Maximum length of data read. The default is to read until end
1384
   *                                     of file is reached.
1385
   *                                     </p>
1386 2
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1387 1
   *
1388
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1389
   *                                     or pdf, because they used non default utf-8 chars</p>
1390 1
   *
1391 1
   * @return string <p>The function returns the read data or false on failure.</p>
1392 1
   */
1393 1
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1394
  {
1395 1
    // init
1396
    $timeout = (int)$timeout;
1397
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1398
1399
    if ($timeout && $context === null) {
1400
      $context = stream_context_create(
1401
          array(
1402
              'http' =>
1403
                  array(
1404
                      'timeout' => $timeout,
1405 1
                  ),
1406
          )
1407 1
      );
1408
    }
1409
1410
    if (is_int($maxlen)) {
1411
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1412
    } else {
1413
      $data = file_get_contents($filename, $flags, $context, $offset);
1414
    }
1415
1416
    // return false on error
1417
    if ($data === false) {
1418
      return false;
1419 9
    }
1420
1421 9
    if ($convertToUtf8 === true) {
1422 9
      $data = self::encode('UTF-8', $data, false);
1423 3
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1424
    }
1425 3
1426 3
    return $data;
1427 3
  }
1428 9
1429 2
  /**
1430 2
   * Checks if a file starts with BOM (Byte Order Mark) character.
1431 2
   *
1432 2
   * @param string $file_path <p>Path to a valid file.</p>
1433 9
   *
1434
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1435 8
   */
1436
  public static function file_has_bom($file_path)
1437 2
  {
1438 2
    return self::string_has_bom(file_get_contents($file_path));
1439
  }
1440 8
1441
  /**
1442 8
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1443 6
   *
1444 6
   * @param mixed  $var
1445 6
   * @param int    $normalization_form
1446
   * @param string $leading_combining
1447 6
   *
1448 3
   * @return mixed
1449 3
   */
1450 5
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1451
  {
1452
    switch (gettype($var)) {
1453 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1454
        foreach ($var as $k => $v) {
1455 8
          /** @noinspection AlterInForeachInspection */
1456 8
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1457 5
        }
1458 8
        break;
1459 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1460
        foreach ($var as $k => $v) {
1461 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1462 2
        }
1463 8
        break;
1464 8
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1465 9
1466
        if (false !== strpos($var, "\r")) {
1467 9
          // Workaround https://bugs.php.net/65732
1468
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1469
        }
1470
1471
        if (self::is_ascii($var) === false) {
1472
1473
          /** @noinspection PhpUndefinedClassInspection */
1474
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1475
            $n = '-';
1476
          } else {
1477
            /** @noinspection PhpUndefinedClassInspection */
1478
            $n = \Normalizer::normalize($var, $normalization_form);
1479
1480
            if (isset($n[0])) {
1481
              $var = $n;
1482
            } else {
1483
              $var = self::encode('UTF-8', $var);
1484
            }
1485
          }
1486
1487
          if (
1488
              $var[0] >= "\x80" && isset($n[0], $leading_combining[0])
1489
              &&
1490
              preg_match('/^\p{Mn}/u', $var)
1491
          ) {
1492
            // Prevent leading combining chars
1493
            // for NFC-safe concatenations.
1494
            $var = $leading_combining . $var;
1495
          }
1496
        }
1497
        break;
1498
    }
1499
1500
    return $var;
1501
  }
1502
1503
  /**
1504
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1505
   *
1506
   * Gets a specific external variable by name and optionally filters it
1507
   *
1508
   * @link  http://php.net/manual/en/function.filter-input.php
1509
   *
1510
   * @param int    $type          <p>
1511
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1512
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1513
   *                              <b>INPUT_ENV</b>.
1514
   *                              </p>
1515
   * @param string $variable_name <p>
1516
   *                              Name of a variable to get.
1517
   *                              </p>
1518
   * @param int    $filter        [optional] <p>
1519
   *                              The ID of the filter to apply. The
1520 1
   *                              manual page lists the available filters.
1521
   *                              </p>
1522 1
   * @param mixed  $options       [optional] <p>
1523 1
   *                              Associative array of options or bitwise disjunction of flags. If filter
1524 1
   *                              accepts options, flags can be provided in "flags" field of array.
1525 1
   *                              </p>
1526
   *
1527
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1528 1
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1529
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1530
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1531
   * @since 5.2.0
1532
   */
1533 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1534
  {
1535
    if (4 > func_num_args()) {
1536
      $var = filter_input($type, $variable_name, $filter);
1537
    } else {
1538
      $var = filter_input($type, $variable_name, $filter, $options);
1539
    }
1540 1
1541
    return self::filter($var);
1542 1
  }
1543 1
1544 1
  /**
1545 1
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1546
   *
1547
   * Gets external variables and optionally filters them
1548 1
   *
1549
   * @link  http://php.net/manual/en/function.filter-input-array.php
1550
   *
1551
   * @param int   $type       <p>
1552
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1553
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1554
   *                          <b>INPUT_ENV</b>.
1555
   *                          </p>
1556
   * @param mixed $definition [optional] <p>
1557
   *                          An array defining the arguments. A valid key is a string
1558
   *                          containing a variable name and a valid value is either a filter type, or an array
1559 1
   *                          optionally specifying the filter, flags and options. If the value is an
1560
   *                          array, valid keys are filter which specifies the
1561 1
   *                          filter type,
1562
   *                          flags which specifies any flags that apply to the
1563
   *                          filter, and options which specifies any options that
1564
   *                          apply to the filter. See the example below for a better understanding.
1565
   *                          </p>
1566
   *                          <p>
1567
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1568
   *                          input array are filtered by this filter.
1569
   *                          </p>
1570
   * @param bool  $add_empty  [optional] <p>
1571
   *                          Add missing keys as <b>NULL</b> to the return value.
1572
   *                          </p>
1573
   *
1574
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1575
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1576
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1577 7
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1578
   * fails.
1579 7
   * @since 5.2.0
1580 7
   */
1581 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1582 7
  {
1583
    if (2 > func_num_args()) {
1584 7
      $a = filter_input_array($type);
1585 2
    } else {
1586
      $a = filter_input_array($type, $definition, $add_empty);
1587
    }
1588 7
1589 1
    return self::filter($a);
1590 1
  }
1591 1
1592
  /**
1593 7
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1594
   *
1595
   * Filters a variable with a specified filter
1596
   *
1597
   * @link  http://php.net/manual/en/function.filter-var.php
1598
   *
1599
   * @param mixed $variable <p>
1600
   *                        Value to filter.
1601
   *                        </p>
1602
   * @param int   $filter   [optional] <p>
1603 1
   *                        The ID of the filter to apply. The
1604
   *                        manual page lists the available filters.
1605 1
   *                        </p>
1606
   * @param mixed $options  [optional] <p>
1607 1
   *                        Associative array of options or bitwise disjunction of flags. If filter
1608
   *                        accepts options, flags can be provided in "flags" field of array. For
1609
   *                        the "callback" filter, callable type should be passed. The
1610 1
   *                        callback must accept one argument, the value to be filtered, and return
1611 1
   *                        the value after filtering/sanitizing it.
1612
   *                        </p>
1613 1
   *                        <p>
1614
   *                        <code>
1615
   *                        // for filters that accept options, use this format
1616 1
   *                        $options = array(
1617 1
   *                        'options' => array(
1618 1
   *                        'default' => 3, // value to return if the filter fails
1619 1
   *                        // other options here
1620 1
   *                        'min_range' => 0
1621
   *                        ),
1622 1
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1623
   *                        );
1624
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1625
   *                        // for filter that only accept flags, you can pass them directly
1626
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1627
   *                        // for filter that only accept flags, you can also pass as an array
1628
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1629
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1630
   *                        // callback validate filter
1631
   *                        function foo($value)
1632 1
   *                        {
1633
   *                        // Expected format: Surname, GivenNames
1634 1
   *                        if (strpos($value, ", ") === false) return false;
1635
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1636
   *                        $empty = (empty($surname) || empty($givennames));
1637
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1638 1
   *                        if ($empty || $notstrings) {
1639
   *                        return false;
1640
   *                        } else {
1641
   *                        return $value;
1642
   *                        }
1643
   *                        }
1644
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1645
   *                        </code>
1646
   *                        </p>
1647
   *
1648
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1649
   * @since 5.2.0
1650
   */
1651 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1652
  {
1653
    if (3 > func_num_args()) {
1654 1
      $variable = filter_var($variable, $filter);
1655
    } else {
1656 1
      $variable = filter_var($variable, $filter, $options);
1657 1
    }
1658
1659
    return self::filter($variable);
1660 1
  }
1661
1662 1
  /**
1663 1
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1664 1
   *
1665 1
   * Gets multiple variables and optionally filters them
1666 1
   *
1667 1
   * @link  http://php.net/manual/en/function.filter-var-array.php
1668 1
   *
1669 1
   * @param array $data       <p>
1670 1
   *                          An array with string keys containing the data to filter.
1671 1
   *                          </p>
1672 1
   * @param mixed $definition [optional] <p>
1673
   *                          An array defining the arguments. A valid key is a string
1674
   *                          containing a variable name and a valid value is either a
1675
   *                          filter type, or an
1676
   *                          array optionally specifying the filter, flags and options.
1677
   *                          If the value is an array, valid keys are filter
1678
   *                          which specifies the filter type,
1679
   *                          flags which specifies any flags that apply to the
1680
   *                          filter, and options which specifies any options that
1681
   *                          apply to the filter. See the example below for a better understanding.
1682
   *                          </p>
1683
   *                          <p>
1684
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1685
   *                          input array are filtered by this filter.
1686
   *                          </p>
1687
   * @param bool  $add_empty  [optional] <p>
1688
   *                          Add missing keys as <b>NULL</b> to the return value.
1689
   *                          </p>
1690
   *
1691
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1692 1
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1693 1
   * the variable is not set.
1694
   * @since 5.2.0
1695
   */
1696 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1697
  {
1698
    if (2 > func_num_args()) {
1699
      $a = filter_var_array($data);
1700
    } else {
1701
      $a = filter_var_array($data, $definition, $add_empty);
1702
    }
1703
1704
    return self::filter($a);
1705
  }
1706
1707
  /**
1708
   * Check if the number of unicode characters are not more than the specified integer.
1709
   *
1710
   * @param string $str      The original string to be checked.
1711
   * @param int    $box_size The size in number of chars to be checked against string.
1712
   *
1713
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1714
   */
1715
  public static function fits_inside($str, $box_size)
1716
  {
1717
    return (self::strlen($str) <= $box_size);
1718
  }
1719
1720
  /**
1721
   * Try to fix simple broken UTF-8 strings.
1722
   *
1723
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1724
   *
1725
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1726
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1727
   * See: http://en.wikipedia.org/wiki/Windows-1252
1728
   *
1729
   * @param string $str <p>The input string</p>
1730
   *
1731
   * @return string
1732
   */
1733 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1734
  {
1735
    // init
1736
    $str = (string)$str;
1737
1738
    if (!isset($str[0])) {
1739
      return '';
1740
    }
1741
1742
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1743
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1744
1745
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1746
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$brokenUtf8ToUtf8);
1747
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$brokenUtf8ToUtf8);
1748
    }
1749
1750
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1751
  }
1752 1
1753
  /**
1754 1
   * Fix a double (or multiple) encoded UTF8 string.
1755 1
   *
1756
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1757 1
   *
1758
   * @return mixed
1759
   */
1760
  public static function fix_utf8($str)
1761
  {
1762
    if (is_array($str)) {
1763
1764
      /** @noinspection ForeachSourceInspection */
1765
      foreach ($str as $k => $v) {
1766
        /** @noinspection AlterInForeachInspection */
1767
        /** @noinspection OffsetOperationsInspection */
1768
        $str[$k] = self::fix_utf8($v);
1769
      }
1770
1771
      return $str;
1772 1
    }
1773
1774 1
    $last = '';
1775
    while ($last !== $str) {
1776
      $last = $str;
1777
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1777 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1778
    }
1779
1780
    return $str;
1781
  }
1782
1783
  /**
1784
   * Get character of a specific character.
1785
   *
1786 1
   * @param string $char
1787
   *
1788 1
   * @return string <p>'RTL' or 'LTR'</p>
1789 1
   */
1790
  public static function getCharDirection($char)
1791
  {
1792 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1793 1
      self::checkForSupport();
1794
    }
1795
1796 1
    if (self::$support['intlChar'] === true) {
1797
      $tmpReturn = \IntlChar::charDirection($char);
1798
1799
      // from "IntlChar"-Class
1800
      $charDirection = array(
1801
          'RTL' => array(1, 13, 14, 15, 21),
1802
          'LTR' => array(0, 11, 12, 20),
1803
      );
1804
1805
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1806
        return 'LTR';
1807
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
1808
        return 'RTL';
1809
      }
1810 1
    }
1811
1812 1
    $c = static::chr_to_decimal($char);
1813
1814
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1815
      return 'LTR';
1816
    }
1817
1818
    if (0x85e >= $c) {
1819
1820
      if (0x5be === $c ||
1821
          0x5c0 === $c ||
1822
          0x5c3 === $c ||
1823
          0x5c6 === $c ||
1824
          (0x5d0 <= $c && 0x5ea >= $c) ||
1825
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1826 2
          0x608 === $c ||
1827
          0x60b === $c ||
1828
          0x60d === $c ||
1829 2
          0x61b === $c ||
1830
          (0x61e <= $c && 0x64a >= $c) ||
1831 2
          (0x66d <= $c && 0x66f >= $c) ||
1832 2
          (0x671 <= $c && 0x6d5 >= $c) ||
1833 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1834 1
          (0x6ee <= $c && 0x6ef >= $c) ||
1835
          (0x6fa <= $c && 0x70d >= $c) ||
1836 2
          0x710 === $c ||
1837 1
          (0x712 <= $c && 0x72f >= $c) ||
1838 1
          (0x74d <= $c && 0x7a5 >= $c) ||
1839
          0x7b1 === $c ||
1840 2
          (0x7c0 <= $c && 0x7ea >= $c) ||
1841 2
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1842 2
          0x7fa === $c ||
1843
          (0x800 <= $c && 0x815 >= $c) ||
1844 2
          0x81a === $c ||
1845
          0x824 === $c ||
1846
          0x828 === $c ||
1847
          (0x830 <= $c && 0x83e >= $c) ||
1848
          (0x840 <= $c && 0x858 >= $c) ||
1849
          0x85e === $c
1850
      ) {
1851
        return 'RTL';
1852
      }
1853
1854
    } elseif (0x200f === $c) {
1855
1856
      return 'RTL';
1857
1858
    } elseif (0xfb1d <= $c) {
1859
1860
      if (0xfb1d === $c ||
1861
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1862
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1863
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1864
          0xfb3e === $c ||
1865
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1866
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1867
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1868
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1869
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1870
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1871
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1872
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1873
          (0xfe76 <= $c && 0xfefc >= $c) ||
1874
          (0x10800 <= $c && 0x10805 >= $c) ||
1875
          0x10808 === $c ||
1876
          (0x1080a <= $c && 0x10835 >= $c) ||
1877
          (0x10837 <= $c && 0x10838 >= $c) ||
1878
          0x1083c === $c ||
1879
          (0x1083f <= $c && 0x10855 >= $c) ||
1880
          (0x10857 <= $c && 0x1085f >= $c) ||
1881
          (0x10900 <= $c && 0x1091b >= $c) ||
1882
          (0x10920 <= $c && 0x10939 >= $c) ||
1883
          0x1093f === $c ||
1884
          0x10a00 === $c ||
1885
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1886
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1887
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1888
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1889
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1890
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1891
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1892
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1893
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1894
          (0x10b78 <= $c && 0x10b7f >= $c)
1895
      ) {
1896
        return 'RTL';
1897
      }
1898
    }
1899
1900
    return 'LTR';
1901
  }
1902
1903
  /**
1904
   * get data from "/data/*.ser"
1905
   *
1906
   * @param string $file
1907
   *
1908
   * @return bool|string|array|int <p>Will return false on error.</p>
1909
   */
1910
  private static function getData($file)
1911
  {
1912
    $file = __DIR__ . '/data/' . $file . '.php';
1913
    if (file_exists($file)) {
1914
      /** @noinspection PhpIncludeInspection */
1915
      return require $file;
1916
    } else {
1917
      return false;
1918
    }
1919
  }
1920
1921
  /**
1922
   * alias for "UTF8::string_has_bom()"
1923
   *
1924
   * @see UTF8::string_has_bom()
1925
   *
1926 9
   * @param string $str
1927
   *
1928 9
   * @return bool
1929
   */
1930 9
  public static function hasBom($str)
1931 6
  {
1932
    return self::string_has_bom($str);
1933
  }
1934 9
1935 7
  /**
1936
   * Converts hexadecimal U+xxxx code point representation to integer.
1937
   *
1938
   * INFO: opposite to UTF8::int_to_hex()
1939 9
   *
1940 9
   * @param string $str <p>The hexadecimal code point representation.</p>
1941
   *
1942 9
   * @return int|false <p>The code point, or false on failure.</p>
1943 9
   */
1944 9
  public static function hex_to_int($str)
1945 9
  {
1946 9
    if (!$str) {
1947 6
      return false;
1948
    }
1949
1950 9
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
1951 2
      return intval($match[1], 16);
1952 2
    }
1953
1954 9
    return false;
1955 4
  }
1956 4
1957 4
  /**
1958
   * alias for "UTF8::html_entity_decode()"
1959
   *
1960 4
   * @see UTF8::html_entity_decode()
1961
   *
1962
   * @param string $str
1963 9
   * @param int    $flags
1964
   * @param string $encoding
1965 9
   *
1966 9
   * @return string
1967
   */
1968 7
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
1969
  {
1970 7
    return self::html_entity_decode($str, $flags, $encoding);
1971 6
  }
1972
1973 4
  /**
1974
   * Converts a UTF-8 string to a series of HTML numbered entities.
1975 9
   *
1976
   * INFO: opposite to UTF8::html_decode()
1977 9
   *
1978
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1979
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1980 9
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
1981 9
   *
1982 9
   * @return string <p>HTML numbered entities.</p>
1983
   */
1984 9
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
1985
  {
1986 9
    // init
1987
    $str = (string)$str;
1988 9
1989
    if (!isset($str[0])) {
1990
      return '';
1991
    }
1992
1993
    if ($encoding !== 'UTF-8') {
1994
      $encoding = self::normalize_encoding($encoding);
1995
    }
1996
1997
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1998
    if (function_exists('mb_encode_numericentity')) {
1999
2000
      $startCode = 0x00;
2001
      if ($keepAsciiChars === true) {
2002
        $startCode = 0x80;
2003
      }
2004
2005
      return mb_encode_numericentity(
2006
          $str,
2007
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2008
          $encoding
2009
      );
2010
    }
2011
2012
    return implode(
2013
        '',
2014
        array_map(
2015
            function ($data) use ($keepAsciiChars, $encoding) {
2016
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2017
            },
2018
            self::split($str)
2019
        )
2020
    );
2021
  }
2022
2023
  /**
2024
   * UTF-8 version of html_entity_decode()
2025
   *
2026
   * The reason we are not using html_entity_decode() by itself is because
2027
   * while it is not technically correct to leave out the semicolon
2028
   * at the end of an entity most browsers will still interpret the entity
2029
   * correctly. html_entity_decode() does not convert entities without
2030
   * semicolons, so we are left with our own little solution here. Bummer.
2031
   *
2032
   * Convert all HTML entities to their applicable characters
2033
   *
2034
   * INFO: opposite to UTF8::html_encode()
2035
   *
2036
   * @link http://php.net/manual/en/function.html-entity-decode.php
2037
   *
2038
   * @param string $str      <p>
2039
   *                         The input string.
2040
   *                         </p>
2041
   * @param int    $flags    [optional] <p>
2042
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2043
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2044
   *                         <table>
2045
   *                         Available <i>flags</i> constants
2046
   *                         <tr valign="top">
2047
   *                         <td>Constant Name</td>
2048
   *                         <td>Description</td>
2049
   *                         </tr>
2050
   *                         <tr valign="top">
2051
   *                         <td><b>ENT_COMPAT</b></td>
2052
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2053
   *                         </tr>
2054
   *                         <tr valign="top">
2055
   *                         <td><b>ENT_QUOTES</b></td>
2056
   *                         <td>Will convert both double and single quotes.</td>
2057
   *                         </tr>
2058
   *                         <tr valign="top">
2059
   *                         <td><b>ENT_NOQUOTES</b></td>
2060
   *                         <td>Will leave both double and single quotes unconverted.</td>
2061
   *                         </tr>
2062
   *                         <tr valign="top">
2063
   *                         <td><b>ENT_HTML401</b></td>
2064
   *                         <td>
2065
   *                         Handle code as HTML 4.01.
2066
   *                         </td>
2067
   *                         </tr>
2068
   *                         <tr valign="top">
2069
   *                         <td><b>ENT_XML1</b></td>
2070
   *                         <td>
2071
   *                         Handle code as XML 1.
2072
   *                         </td>
2073
   *                         </tr>
2074
   *                         <tr valign="top">
2075
   *                         <td><b>ENT_XHTML</b></td>
2076
   *                         <td>
2077
   *                         Handle code as XHTML.
2078
   *                         </td>
2079
   *                         </tr>
2080
   *                         <tr valign="top">
2081
   *                         <td><b>ENT_HTML5</b></td>
2082
   *                         <td>
2083
   *                         Handle code as HTML 5.
2084
   *                         </td>
2085
   *                         </tr>
2086
   *                         </table>
2087
   *                         </p>
2088
   * @param string $encoding [optional] <p>Encoding to use.</p>
2089
   *
2090
   * @return string <p>The decoded string.</p>
2091
   */
2092
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2093
  {
2094 2
    // init
2095
    $str = (string)$str;
2096 2
2097 1
    if (!isset($str[0])) {
2098 1
      return '';
2099
    }
2100 2
2101
    if (!isset($str[3])) { // examples: &; || &x;
0 ignored issues
show
Unused Code Comprehensibility introduced by
46% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2102 2
      return $str;
2103 1
    }
2104
2105
    if (
2106 2
        strpos($str, '&') === false
2107 2
        ||
2108 2
        (
2109 2
            strpos($str, '&#') === false
2110 2
            &&
2111 1
            strpos($str, ';') === false
2112
        )
2113 1
    ) {
2114 1
      return $str;
2115 1
    }
2116 1
2117 1
    if ($encoding !== 'UTF-8') {
2118 2
      $encoding = self::normalize_encoding($encoding);
2119
    }
2120 2
2121
    if ($flags === null) {
2122
      if (Bootup::is_php('5.4') === true) {
2123
        $flags = ENT_COMPAT | ENT_HTML5;
2124
      } else {
2125
        $flags = ENT_COMPAT;
2126
      }
2127
    }
2128
2129
    do {
2130
      $str_compare = $str;
2131
2132
      $str = preg_replace_callback(
2133
          "/&#\d{2,6};/",
2134
          function ($matches) use ($encoding) {
2135
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2136
2137
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2138
              return $returnTmp;
2139
            } else {
2140
              return $matches[0];
2141
            }
2142
          },
2143
          $str
2144
      );
2145
2146
      // decode numeric & UTF16 two byte entities
2147
      $str = html_entity_decode(
2148
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2149
          $flags,
2150
          $encoding
2151
      );
2152
2153
    } while ($str_compare !== $str);
2154
2155
    return $str;
2156
  }
2157
2158
  /**
2159
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2160
   *
2161
   * @link http://php.net/manual/en/function.htmlentities.php
2162
   *
2163
   * @param string $str           <p>
2164
   *                              The input string.
2165
   *                              </p>
2166
   * @param int    $flags         [optional] <p>
2167
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2168
   *                              invalid code unit sequences and the used document type. The default is
2169
   *                              ENT_COMPAT | ENT_HTML401.
2170
   *                              <table>
2171
   *                              Available <i>flags</i> constants
2172
   *                              <tr valign="top">
2173
   *                              <td>Constant Name</td>
2174
   *                              <td>Description</td>
2175
   *                              </tr>
2176
   *                              <tr valign="top">
2177
   *                              <td><b>ENT_COMPAT</b></td>
2178
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2179
   *                              </tr>
2180
   *                              <tr valign="top">
2181
   *                              <td><b>ENT_QUOTES</b></td>
2182
   *                              <td>Will convert both double and single quotes.</td>
2183
   *                              </tr>
2184
   *                              <tr valign="top">
2185
   *                              <td><b>ENT_NOQUOTES</b></td>
2186
   *                              <td>Will leave both double and single quotes unconverted.</td>
2187
   *                              </tr>
2188
   *                              <tr valign="top">
2189
   *                              <td><b>ENT_IGNORE</b></td>
2190
   *                              <td>
2191
   *                              Silently discard invalid code unit sequences instead of returning
2192
   *                              an empty string. Using this flag is discouraged as it
2193
   *                              may have security implications.
2194
   *                              </td>
2195
   *                              </tr>
2196
   *                              <tr valign="top">
2197
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2198
   *                              <td>
2199
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2200
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2201
   *                              </td>
2202
   *                              </tr>
2203
   *                              <tr valign="top">
2204
   *                              <td><b>ENT_DISALLOWED</b></td>
2205
   *                              <td>
2206
   *                              Replace invalid code points for the given document type with a
2207
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2208
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2209
   *                              instance, to ensure the well-formedness of XML documents with
2210
   *                              embedded external content.
2211
   *                              </td>
2212
   *                              </tr>
2213
   *                              <tr valign="top">
2214
   *                              <td><b>ENT_HTML401</b></td>
2215
   *                              <td>
2216
   *                              Handle code as HTML 4.01.
2217
   *                              </td>
2218
   *                              </tr>
2219
   *                              <tr valign="top">
2220
   *                              <td><b>ENT_XML1</b></td>
2221
   *                              <td>
2222
   *                              Handle code as XML 1.
2223
   *                              </td>
2224
   *                              </tr>
2225
   *                              <tr valign="top">
2226
   *                              <td><b>ENT_XHTML</b></td>
2227
   *                              <td>
2228
   *                              Handle code as XHTML.
2229
   *                              </td>
2230
   *                              </tr>
2231
   *                              <tr valign="top">
2232 1
   *                              <td><b>ENT_HTML5</b></td>
2233
   *                              <td>
2234 1
   *                              Handle code as HTML 5.
2235
   *                              </td>
2236
   *                              </tr>
2237
   *                              </table>
2238 1
   *                              </p>
2239
   * @param string $encoding      [optional] <p>
2240
   *                              Like <b>htmlspecialchars</b>,
2241
   *                              <b>htmlentities</b> takes an optional third argument
2242
   *                              <i>encoding</i> which defines encoding used in
2243
   *                              conversion.
2244
   *                              Although this argument is technically optional, you are highly
2245
   *                              encouraged to specify the correct value for your code.
2246 1
   *                              </p>
2247
   * @param bool   $double_encode [optional] <p>
2248 1
   *                              When <i>double_encode</i> is turned off PHP will not
2249
   *                              encode existing html entities. The default is to convert everything.
2250
   *                              </p>
2251
   *
2252
   *
2253
   * @return string the encoded string.
2254
   * </p>
2255
   * <p>
2256
   * If the input <i>string</i> contains an invalid code unit
2257
   * sequence within the given <i>encoding</i> an empty string
2258
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2259
   * <b>ENT_SUBSTITUTE</b> flags are set.
2260
   */
2261 3
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2262
  {
2263 3
    if ($encoding !== 'UTF-8') {
2264 3
      $encoding = self::normalize_encoding($encoding);
2265
    }
2266 3
2267
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2268 3
2269
    if ($encoding !== 'UTF-8') {
2270
      return $str;
2271
    }
2272
2273
    $byteLengths = self::chr_size_list($str);
2274
    $search = array();
2275
    $replacements = array();
2276
    foreach ($byteLengths as $counter => $byteLength) {
2277
      if ($byteLength >= 3) {
2278
        $char = self::access($str, $counter);
2279 1
2280
        if (!isset($replacements[$char])) {
2281 1
          $search[$char] = $char;
2282
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 2278 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2283
        }
2284
      }
2285
    }
2286
2287
    return str_replace($search, $replacements, $str);
2288
  }
2289 2
2290
  /**
2291 2
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2292
   *
2293
   * INFO: Take a look at "UTF8::htmlentities()"
2294
   *
2295
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2296
   *
2297
   * @param string $str           <p>
2298
   *                              The string being converted.
2299
   *                              </p>
2300
   * @param int    $flags         [optional] <p>
2301
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2302
   *                              invalid code unit sequences and the used document type. The default is
2303 2
   *                              ENT_COMPAT | ENT_HTML401.
2304
   *                              <table>
2305 2
   *                              Available <i>flags</i> constants
2306
   *                              <tr valign="top">
2307
   *                              <td>Constant Name</td>
2308
   *                              <td>Description</td>
2309
   *                              </tr>
2310
   *                              <tr valign="top">
2311
   *                              <td><b>ENT_COMPAT</b></td>
2312
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2313
   *                              </tr>
2314
   *                              <tr valign="top">
2315
   *                              <td><b>ENT_QUOTES</b></td>
2316
   *                              <td>Will convert both double and single quotes.</td>
2317 1
   *                              </tr>
2318
   *                              <tr valign="top">
2319 1
   *                              <td><b>ENT_NOQUOTES</b></td>
2320
   *                              <td>Will leave both double and single quotes unconverted.</td>
2321
   *                              </tr>
2322
   *                              <tr valign="top">
2323
   *                              <td><b>ENT_IGNORE</b></td>
2324
   *                              <td>
2325
   *                              Silently discard invalid code unit sequences instead of returning
2326
   *                              an empty string. Using this flag is discouraged as it
2327
   *                              may have security implications.
2328
   *                              </td>
2329
   *                              </tr>
2330
   *                              <tr valign="top">
2331
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2332
   *                              <td>
2333
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2334
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2335
   *                              </td>
2336
   *                              </tr>
2337
   *                              <tr valign="top">
2338
   *                              <td><b>ENT_DISALLOWED</b></td>
2339
   *                              <td>
2340
   *                              Replace invalid code points for the given document type with a
2341
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2342
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2343
   *                              instance, to ensure the well-formedness of XML documents with
2344
   *                              embedded external content.
2345
   *                              </td>
2346
   *                              </tr>
2347
   *                              <tr valign="top">
2348
   *                              <td><b>ENT_HTML401</b></td>
2349
   *                              <td>
2350
   *                              Handle code as HTML 4.01.
2351
   *                              </td>
2352
   *                              </tr>
2353
   *                              <tr valign="top">
2354
   *                              <td><b>ENT_XML1</b></td>
2355
   *                              <td>
2356
   *                              Handle code as XML 1.
2357
   *                              </td>
2358
   *                              </tr>
2359 1
   *                              <tr valign="top">
2360
   *                              <td><b>ENT_XHTML</b></td>
2361 1
   *                              <td>
2362
   *                              Handle code as XHTML.
2363
   *                              </td>
2364
   *                              </tr>
2365
   *                              <tr valign="top">
2366
   *                              <td><b>ENT_HTML5</b></td>
2367
   *                              <td>
2368
   *                              Handle code as HTML 5.
2369
   *                              </td>
2370
   *                              </tr>
2371
   *                              </table>
2372
   *                              </p>
2373
   * @param string $encoding      [optional] <p>
2374
   *                              Defines encoding used in conversion.
2375
   *                              </p>
2376
   *                              <p>
2377
   *                              For the purposes of this function, the encodings
2378
   *                              ISO-8859-1, ISO-8859-15,
2379
   *                              UTF-8, cp866,
2380
   *                              cp1251, cp1252, and
2381
   *                              KOI8-R are effectively equivalent, provided the
2382
   *                              <i>string</i> itself is valid for the encoding, as
2383
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2384
   *                              the same positions in all of these encodings.
2385
   *                              </p>
2386
   * @param bool   $double_encode [optional] <p>
2387 1
   *                              When <i>double_encode</i> is turned off PHP will not
2388
   *                              encode existing html entities, the default is to convert everything.
2389 1
   *                              </p>
2390
   *
2391
   * @return string The converted string.
2392
   * </p>
2393
   * <p>
2394
   * If the input <i>string</i> contains an invalid code unit
2395
   * sequence within the given <i>encoding</i> an empty string
2396
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2397
   * <b>ENT_SUBSTITUTE</b> flags are set.
2398
   */
2399
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2400
  {
2401 1
    if ($encoding !== 'UTF-8') {
2402
      $encoding = self::normalize_encoding($encoding);
2403 1
    }
2404
2405
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2406
  }
2407
2408
  /**
2409
   * Checks whether iconv is available on the server.
2410
   *
2411
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2412
   */
2413
  public static function iconv_loaded()
2414
  {
2415
    $return = extension_loaded('iconv') ? true : false;
2416 16
2417
    if (Bootup::is_php('5.6')) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
2418 16
      // "default_charset" is already set by the "Bootup"-class
2419
    } else {
2420
      iconv_set_encoding('input_encoding', 'UTF-8');
2421
      iconv_set_encoding('output_encoding', 'UTF-8');
2422
      iconv_set_encoding('internal_encoding', 'UTF-8');
2423
    }
2424
2425
    return $return;
2426
  }
2427
2428
  /**
2429
   * Converts Integer to hexadecimal U+xxxx code point representation.
2430
   *
2431 28
   * INFO: opposite to UTF8::hex_to_int()
2432
   *
2433 28
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2434
   * @param string $pfix [optional]
2435 28
   *
2436 5
   * @return string <p>The code point, or empty string on failure.</p>
2437
   */
2438
  public static function int_to_hex($int, $pfix = 'U+')
2439 28
  {
2440
    if (ctype_digit((string)$int)) {
2441
      $hex = dechex((int)$int);
2442
2443
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2444
2445
      return $pfix . $hex;
2446
    }
2447
2448
    return '';
2449 1
  }
2450
2451 1
  /**
2452
   * Checks whether intl-char is available on the server.
2453 1
   *
2454 1
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2455
   */
2456
  public static function intlChar_loaded()
2457 1
  {
2458 1
    return (Bootup::is_php('7.0') === true && class_exists('IntlChar') === true);
2459
  }
2460 1
2461
  /**
2462
   * Checks whether intl is available on the server.
2463
   *
2464
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2465
   */
2466
  public static function intl_loaded()
2467
  {
2468
    return extension_loaded('intl') ? true : false;
2469
  }
2470
2471 16
  /**
2472
   * alias for "UTF8::is_ascii()"
2473
   *
2474 16
   * @see UTF8::is_ascii()
2475
   *
2476
   * @param string $str
2477 16
   *
2478
   * @return boolean
2479 16
   */
2480 16
  public static function isAscii($str)
2481 15
  {
2482 16
    return self::is_ascii($str);
2483 6
  }
2484
2485 15
  /**
2486
   * alias for "UTF8::is_base64()"
2487
   *
2488
   * @see UTF8::is_base64()
2489
   *
2490
   * @param string $str
2491
   *
2492
   * @return bool
2493
   */
2494
  public static function isBase64($str)
2495
  {
2496
    return self::is_base64($str);
2497
  }
2498
2499
  /**
2500
   * alias for "UTF8::is_binary()"
2501
   *
2502
   * @see UTF8::is_binary()
2503
   *
2504
   * @param string $str
2505
   *
2506
   * @return bool
2507
   */
2508
  public static function isBinary($str)
2509
  {
2510
    return self::is_binary($str);
2511
  }
2512
2513
  /**
2514
   * alias for "UTF8::is_bom()"
2515
   *
2516
   * @see UTF8::is_bom()
2517
   *
2518
   * @param string $utf8_chr
2519
   *
2520
   * @return boolean
2521
   */
2522
  public static function isBom($utf8_chr)
2523
  {
2524
    return self::is_bom($utf8_chr);
2525
  }
2526
2527
  /**
2528
   * alias for "UTF8::is_html()"
2529
   *
2530
   * @see UTF8::is_html()
2531
   *
2532
   * @param string $str
2533
   *
2534
   * @return boolean
2535
   */
2536 1
  public static function isHtml($str)
2537
  {
2538 1
    return self::is_html($str);
2539
  }
2540 1
2541
  /**
2542
   * alias for "UTF8::is_json()"
2543
   *
2544
   * @see UTF8::is_json()
2545 1
   *
2546
   * @param string $str
2547 1
   *
2548
   * @return bool
2549 1
   */
2550 1
  public static function isJson($str)
2551
  {
2552 1
    return self::is_json($str);
2553
  }
2554
2555
  /**
2556
   * alias for "UTF8::is_utf16()"
2557
   *
2558
   * @see UTF8::is_utf16()
2559
   *
2560
   * @param string $str
2561
   *
2562
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2563 1
   */
2564
  public static function isUtf16($str)
2565 1
  {
2566
    return self::is_utf16($str);
2567 1
  }
2568
2569
  /**
2570
   * alias for "UTF8::is_utf32()"
2571
   *
2572 1
   * @see UTF8::is_utf32()
2573 1
   *
2574 1
   * @param string $str
2575 1
   *
2576 1
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2577
   */
2578 1
  public static function isUtf32($str)
2579
  {
2580
    return self::is_utf32($str);
2581
  }
2582
2583
  /**
2584
   * alias for "UTF8::is_utf8()"
2585
   *
2586
   * @see UTF8::is_utf8()
2587
   *
2588
   * @param string $str
2589
   * @param bool   $strict
2590
   *
2591
   * @return bool
2592
   */
2593 4
  public static function isUtf8($str, $strict = false)
2594
  {
2595 4
    return self::is_utf8($str, $strict);
2596
  }
2597 4
2598
  /**
2599 4
   * Checks if a string is 7 bit ASCII.
2600 4
   *
2601 4
   * @param string $str <p>The string to check.</p>
2602 4
   *
2603 4
   * @return bool <p>
2604 4
   *              <strong>true</strong> if it is ASCII<br />
2605 4
   *              <strong>false</strong> otherwise
2606 4
   *              </p>
2607 4
   */
2608 2
  public static function is_ascii($str)
2609 2
  {
2610 4
    $str = (string)$str;
2611 4
2612 4
    if (!isset($str[0])) {
2613
      return true;
2614 4
    }
2615 4
2616 4
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2617 4
  }
2618 4
2619 4
  /**
2620 4
   * Returns true if the string is base64 encoded, false otherwise.
2621 4
   *
2622 4
   * @param string $str <p>The input string.</p>
2623 3
   *
2624 3
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2625 4
   */
2626 4
  public static function is_base64($str)
2627 4
  {
2628
    $str = (string)$str;
2629 4
2630 3
    if (!isset($str[0])) {
2631 2
      return false;
2632
    }
2633 3
2634
    if (base64_encode(base64_decode($str, true)) === $str) {
2635
      return true;
2636
    } else {
2637 3
      return false;
2638
    }
2639 3
  }
2640
2641
  /**
2642
   * Check if the input is binary... (is look like a hack).
2643
   *
2644
   * @param mixed $input
2645
   *
2646
   * @return bool
2647
   */
2648
  public static function is_binary($input)
2649
  {
2650
2651
    $testLength = strlen($input);
2652
2653 3
    if (
2654
        preg_match('~^[01]+$~', $input)
2655 3
        ||
2656
        substr_count($input, "\x00") > 0
2657 3
        ||
2658
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
2659 3
    ) {
2660 3
      return true;
2661 3
    } else {
2662 3
      return false;
2663 3
    }
2664 3
  }
2665 3
2666 3
  /**
2667 3
   * Check if the file is binary.
2668 1
   *
2669 1
   * @param string $file
2670 3
   *
2671 3
   * @return boolean
2672 3
   */
2673
  public static function is_binary_file($file)
2674 3
  {
2675 3
    try {
2676 3
      $fp = fopen($file, 'r');
2677 3
      $block = fread($fp, 512);
2678 3
      fclose($fp);
2679 3
    } catch (\Exception $e) {
2680 3
      $block = '';
2681 3
    }
2682 3
2683 1
    return self::is_binary($block);
2684 1
  }
2685 3
2686 3
  /**
2687 3
   * Checks if the given string is equal to any "Byte Order Mark".
2688
   *
2689 3
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2690 1
   *
2691 1
   * @param string $str <p>The input string.</p>
2692
   *
2693 1
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2694
   */
2695
  public static function is_bom($str)
2696
  {
2697 3
    foreach (self::$bom as $bomString => $bomByteLength) {
2698
      if ($str === $bomString) {
2699 3
        return true;
2700
      }
2701
    }
2702
2703
    return false;
2704
  }
2705
2706
  /**
2707
   * Check if the string contains any html-tags <lall>.
2708
   *
2709
   * @param string $str <p>The input string.</p>
2710
   *
2711
   * @return boolean
2712 43
   */
2713
  public static function is_html($str)
2714 43
  {
2715
    $str = (string)$str;
2716 43
2717 3
    if (!isset($str[0])) {
2718
      return false;
2719
    }
2720 41
2721 1
    // init
2722 1
    $matches = array();
2723
2724
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
2725
2726
    if (count($matches) == 0) {
2727
      return false;
2728
    } else {
2729
      return true;
2730 41
    }
2731
  }
2732
2733
  /**
2734
   * Try to check if "$str" is an json-string.
2735
   *
2736
   * @param string $str <p>The input string.</p>
2737
   *
2738
   * @return bool
2739
   */
2740 41
  public static function is_json($str)
2741
  {
2742 41
    $str = (string)$str;
2743 41
2744 41
    if (!isset($str[0])) {
2745
      return false;
2746
    }
2747 41
2748 41
    if (
2749 41
        is_object(self::json_decode($str))
2750
        &&
2751
        json_last_error() === JSON_ERROR_NONE
2752 41
    ) {
2753
      return true;
2754 36
    } else {
2755 41
      return false;
2756
    }
2757 34
  }
2758 34
2759 34
  /**
2760 34
   * Check if the string is UTF-16.
2761 39
   *
2762
   * @param string $str <p>The input string.</p>
2763 21
   *
2764 21
   * @return int|false <p>
2765 21
   *                   <strong>false</strong> if is't not UTF-16,<br />
2766 21
   *                   <strong>1</strong> for UTF-16LE,<br />
2767 33
   *                   <strong>2</strong> for UTF-16BE.
2768
   *                   </p>
2769 9
   */
2770 9 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2771 9
  {
2772 9
    $str = self::remove_bom($str);
2773 16
2774
    if (self::is_binary($str)) {
2775
2776
      $maybeUTF16LE = 0;
2777
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2778
      if ($test) {
2779
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2780
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2781
        if ($test3 === $test) {
2782 3
          $strChars = self::count_chars($str, true);
2783 3
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2784 3
            if (in_array($test3char, $strChars, true) === true) {
2785 3
              $maybeUTF16LE++;
2786 9
            }
2787
          }
2788 3
        }
2789 3
      }
2790 3
2791 3
      $maybeUTF16BE = 0;
2792 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2793
      if ($test) {
2794
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2795
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2796 5
        if ($test3 === $test) {
2797
          $strChars = self::count_chars($str, true);
2798 41
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2799
            if (in_array($test3char, $strChars, true) === true) {
2800
              $maybeUTF16BE++;
2801 36
            }
2802
          }
2803 33
        }
2804 33
      }
2805 33
2806 33
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2807
        if ($maybeUTF16LE > $maybeUTF16BE) {
2808
          return 1;
2809
        } else {
2810
          return 2;
2811 33
        }
2812
      }
2813
2814
    }
2815
2816
    return false;
2817 33
  }
2818 33
2819 33
  /**
2820 33
   * Check if the string is UTF-32.
2821
   *
2822 33
   * @param string $str
2823
   *
2824 33
   * @return int|false <p>
2825 33
   *                   <strong>false</strong> if is't not UTF-16,<br />
2826 5
   *                   <strong>1</strong> for UTF-32LE,<br />
2827
   *                   <strong>2</strong> for UTF-32BE.
2828
   *                   </p>
2829 33
   */
2830 33 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2831 33
  {
2832 33
    $str = self::remove_bom($str);
2833 33
2834
    if (self::is_binary($str)) {
2835
2836
      $maybeUTF32LE = 0;
2837
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2838 18
      if ($test) {
2839
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2840
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2841 41
        if ($test3 === $test) {
2842
          $strChars = self::count_chars($str, true);
2843 20
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2844
            if (in_array($test3char, $strChars, true) === true) {
2845
              $maybeUTF32LE++;
2846
            }
2847
          }
2848
        }
2849
      }
2850
2851
      $maybeUTF32BE = 0;
2852
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2853
      if ($test) {
2854
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2855
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2856
        if ($test3 === $test) {
2857
          $strChars = self::count_chars($str, true);
2858
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2859
            if (in_array($test3char, $strChars, true) === true) {
2860
              $maybeUTF32BE++;
2861
            }
2862
          }
2863
        }
2864
      }
2865
2866
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2867
        if ($maybeUTF32LE > $maybeUTF32BE) {
2868
          return 1;
2869
        } else {
2870
          return 2;
2871
        }
2872
      }
2873
2874
    }
2875
2876
    return false;
2877
  }
2878
2879
  /**
2880
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2881
   *
2882
   * @see    http://hsivonen.iki.fi/php-utf8/
2883 2
   *
2884
   * @param string $str    <p>The string to be checked.</p>
2885 2
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2886
   *
2887 2
   * @return bool
2888 2
   */
2889 2
  public static function is_utf8($str, $strict = false)
2890
  {
2891
    $str = (string)$str;
2892
2893 2
    if (!isset($str[0])) {
2894
      return true;
2895
    }
2896
2897
    if ($strict === true) {
2898
      if (self::is_utf16($str) !== false) {
2899
        return false;
2900
      }
2901
2902
      if (self::is_utf32($str) !== false) {
2903
        return false;
2904
      }
2905
    }
2906
2907
    if (self::pcre_utf8_support() !== true) {
2908
2909
      // If even just the first character can be matched, when the /u
2910
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2911
      // invalid, nothing at all will match, even if the string contains
2912
      // some valid sequences
2913
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
2914
2915
    } else {
2916
2917
      $mState = 0; // cached expected number of octets after the current octet
2918
      // until the beginning of the next UTF8 character sequence
2919
      $mUcs4 = 0; // cached Unicode character
2920
      $mBytes = 1; // cached expected number of octets in the current sequence
2921
      $len = strlen($str);
2922
2923
      /** @noinspection ForeachInvariantsInspection */
2924
      for ($i = 0; $i < $len; $i++) {
2925
        $in = ord($str[$i]);
2926
        if ($mState === 0) {
2927
          // When mState is zero we expect either a US-ASCII character or a
2928
          // multi-octet sequence.
2929
          if (0 === (0x80 & $in)) {
2930
            // US-ASCII, pass straight through.
2931
            $mBytes = 1;
2932 2 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2933
            // First octet of 2 octet sequence.
2934 2
            $mUcs4 = $in;
2935
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
2936 2
            $mState = 1;
2937
            $mBytes = 2;
2938
          } elseif (0xE0 === (0xF0 & $in)) {
2939 2
            // First octet of 3 octet sequence.
2940
            $mUcs4 = $in;
2941
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
2942 2
            $mState = 2;
2943
            $mBytes = 3;
2944 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2945
            // First octet of 4 octet sequence.
2946
            $mUcs4 = $in;
2947
            $mUcs4 = ($mUcs4 & 0x07) << 18;
2948
            $mState = 3;
2949
            $mBytes = 4;
2950
          } elseif (0xF8 === (0xFC & $in)) {
2951
            /* First octet of 5 octet sequence.
2952 6
            *
2953
            * This is illegal because the encoded codepoint must be either
2954 6
            * (a) not the shortest form or
2955
            * (b) outside the Unicode range of 0-0x10FFFF.
2956
            * Rather than trying to resynchronize, we will carry on until the end
2957
            * of the sequence and let the later error handling code catch it.
2958
            */
2959
            $mUcs4 = $in;
2960
            $mUcs4 = ($mUcs4 & 0x03) << 24;
2961
            $mState = 4;
2962
            $mBytes = 5;
2963 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2964
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
2965 24
            $mUcs4 = $in;
2966
            $mUcs4 = ($mUcs4 & 1) << 30;
2967 24
            $mState = 5;
2968
            $mBytes = 6;
2969 24
          } else {
2970 2
            /* Current octet is neither in the US-ASCII range nor a legal first
2971
             * octet of a multi-octet sequence.
2972
             */
2973
            return false;
2974 23
          }
2975 2
        } else {
2976
          // When mState is non-zero, we expect a continuation of the multi-octet
2977
          // sequence
2978 23
          if (0x80 === (0xC0 & $in)) {
2979
            // Legal continuation.
2980 23
            $shift = ($mState - 1) * 6;
2981
            $tmp = $in;
2982
            $tmp = ($tmp & 0x0000003F) << $shift;
2983
            $mUcs4 |= $tmp;
2984
            /**
2985
             * End of the multi-octet sequence. mUcs4 now contains the final
2986
             * Unicode code point to be output
2987
             */
2988
            if (0 === --$mState) {
2989
              /*
2990 1
              * Check for illegal sequences and code points.
2991
              */
2992 1
              // From Unicode 3.1, non-shortest form is illegal
2993
              if (
2994
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
2995
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
2996 1
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
2997
                  (4 < $mBytes) ||
2998
                  // From Unicode 3.2, surrogate characters are illegal.
2999
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3000
                  // Code points outside the Unicode range are illegal.
3001
                  ($mUcs4 > 0x10FFFF)
3002
              ) {
3003
                return false;
3004
              }
3005
              // initialize UTF8 cache
3006
              $mState = 0;
3007 1
              $mUcs4 = 0;
3008
              $mBytes = 1;
3009 1
            }
3010 1
          } else {
3011 1
            /**
3012
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3013 1
             * Incomplete multi-octet sequence.
3014
             */
3015
            return false;
3016
          }
3017
        }
3018
      }
3019
3020
      return true;
3021
    }
3022 2
  }
3023
3024 2
  /**
3025
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3026 2
   * Decodes a JSON string
3027 2
   *
3028 2
   * @link http://php.net/manual/en/function.json-decode.php
3029
   *
3030 2
   * @param string $json    <p>
3031
   *                        The <i>json</i> string being decoded.
3032
   *                        </p>
3033
   *                        <p>
3034
   *                        This function only works with UTF-8 encoded strings.
3035
   *                        </p>
3036
   *                        <p>PHP implements a superset of
3037
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3038
   *                        only supports these values when they are nested inside an array or an object.
3039
   *                        </p>
3040 1
   * @param bool   $assoc   [optional] <p>
3041
   *                        When <b>TRUE</b>, returned objects will be converted into
3042 1
   *                        associative arrays.
3043
   *                        </p>
3044
   * @param int    $depth   [optional] <p>
3045
   *                        User specified recursion depth.
3046 1
   *                        </p>
3047
   * @param int    $options [optional] <p>
3048
   *                        Bitmask of JSON decode options. Currently only
3049
   *                        <b>JSON_BIGINT_AS_STRING</b>
3050
   *                        is supported (default is to cast large integers as floats)
3051
   *                        </p>
3052
   *
3053
   * @return mixed the value encoded in <i>json</i> in appropriate
3054
   * PHP type. Values true, false and
3055
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3056
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3057
   * <i>json</i> cannot be decoded or if the encoded
3058 1
   * data is deeper than the recursion limit.
3059
   */
3060 1
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3061
  {
3062
    $json = self::filter($json);
3063
3064
    if (Bootup::is_php('5.4') === true) {
3065
      $json = json_decode($json, $assoc, $depth, $options);
3066
    } else {
3067
      $json = json_decode($json, $assoc, $depth);
3068
    }
3069
3070 16
    return $json;
3071
  }
3072 16
3073
  /**
3074 16
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3075 2
   * Returns the JSON representation of a value.
3076
   *
3077
   * @link http://php.net/manual/en/function.json-encode.php
3078 16
   *
3079 1
   * @param mixed $value   <p>
3080
   *                       The <i>value</i> being encoded. Can be any type except
3081
   *                       a resource.
3082 16
   *                       </p>
3083 4
   *                       <p>
3084
   *                       All string data must be UTF-8 encoded.
3085
   *                       </p>
3086 15
   *                       <p>PHP implements a superset of
3087 14
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3088
   *                       only supports these values when they are nested inside an array or an object.
3089
   *                       </p>
3090 4
   * @param int   $options [optional] <p>
3091 4
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3092 4
   *                       <b>JSON_HEX_TAG</b>,
3093
   *                       <b>JSON_HEX_AMP</b>,
3094
   *                       <b>JSON_HEX_APOS</b>,
3095 4
   *                       <b>JSON_NUMERIC_CHECK</b>,
3096 4
   *                       <b>JSON_PRETTY_PRINT</b>,
3097 4
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3098 4
   *                       <b>JSON_FORCE_OBJECT</b>,
3099 4
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3100 4
   *                       constants is described on
3101 4
   *                       the JSON constants page.
3102 4
   *                       </p>
3103 4
   * @param int   $depth   [optional] <p>
3104 4
   *                       Set the maximum depth. Must be greater than zero.
3105 4
   *                       </p>
3106 4
   *
3107 4
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3108 4
   */
3109 4
  public static function json_encode($value, $options = 0, $depth = 512)
3110
  {
3111 4
    $value = self::filter($value);
3112 4
3113 4
    if (Bootup::is_php('5.5')) {
3114
      $json = json_encode($value, $options, $depth);
3115 4
    } else {
3116
      $json = json_encode($value, $options);
3117 4
    }
3118
3119
    return $json;
3120
  }
3121
3122
  /**
3123
   * Makes string's first char lowercase.
3124
   *
3125
   * @param string $str <p>The input string</p>
3126
   *
3127 13
   * @return string <p>The resulting string</p>
3128
   */
3129 13
  public static function lcfirst($str)
3130 13
  {
3131
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3132 13
  }
3133 1
3134 1
  /**
3135 1
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3136
   *
3137 13
   * @param string $str   <p>The string to be trimmed</p>
3138
   * @param string $chars <p>Optional characters to be stripped</p>
3139
   *
3140
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3141
   */
3142 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3143
  {
3144
    $str = (string)$str;
3145
3146
    if (!isset($str[0])) {
3147
      return '';
3148
    }
3149
3150 18
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3151
    if ($chars === INF || !$chars) {
3152 18
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3153 18
    }
3154
3155 18
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3156
  }
3157 18
3158
  /**
3159 2
   * Returns the UTF-8 character with the maximum code point in the given data.
3160
   *
3161 2
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3162
   *
3163 1
   * @return string <p>The character with the highest code point than others.</p>
3164 1
   */
3165 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3166 2
  {
3167 2
    if (is_array($arg)) {
3168
      $arg = implode('', $arg);
3169 18
    }
3170 18
3171 1
    return self::chr(max(self::codepoints($arg)));
3172 1
  }
3173
3174 18
  /**
3175 18
   * Calculates and returns the maximum number of bytes taken by any
3176
   * UTF-8 encoded character in the given string.
3177 18
   *
3178
   * @param string $str <p>The original Unicode string.</p>
3179
   *
3180
   * @return int <p>Max byte lengths of the given chars.</p>
3181
   */
3182
  public static function max_chr_width($str)
3183
  {
3184
    $bytes = self::chr_size_list($str);
3185
    if (count($bytes) > 0) {
3186
      return (int)max($bytes);
3187
    } else {
3188
      return 0;
3189
    }
3190
  }
3191
3192
  /**
3193
   * Checks whether mbstring is available on the server.
3194
   *
3195
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3196
   */
3197
  public static function mbstring_loaded()
3198
  {
3199
    $return = extension_loaded('mbstring') ? true : false;
3200
3201
    if ($return === true) {
3202
      \mb_internal_encoding('UTF-8');
3203
    }
3204
3205
    return $return;
3206
  }
3207
3208
  /**
3209
   * Returns the UTF-8 character with the minimum code point in the given data.
3210
   *
3211
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3212
   *
3213
   * @return string <p>The character with the lowest code point than others.</p>
3214
   */
3215 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3216
  {
3217
    if (is_array($arg)) {
3218
      $arg = implode('', $arg);
3219
    }
3220
3221
    return self::chr(min(self::codepoints($arg)));
3222
  }
3223
3224
  /**
3225
   * alias for "UTF8::normalize_encoding()"
3226
   *
3227
   * @see UTF8::normalize_encoding()
3228
   *
3229
   * @param string $encoding
3230 17
   *
3231
   * @return string
3232 17
   */
3233 3
  public static function normalizeEncoding($encoding)
3234
  {
3235
    return self::normalize_encoding($encoding);
3236 16
  }
3237
3238
  /**
3239
   * Normalize the encoding-"name" input.
3240 16
   *
3241
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3242
   *
3243
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3244
   */
3245
  public static function normalize_encoding($encoding)
3246
  {
3247
    static $staticNormalizeEncodingCache = array();
3248 16
3249 16
    if (!$encoding) {
3250 15
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalize_encoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
3251
    }
3252
3253 9
    if ('UTF-8' === $encoding) {
3254 9
      return $encoding;
3255 9
    }
3256
3257 9
    if (in_array($encoding, self::$iconvEncoding, true)) {
3258 1
      return $encoding;
3259
    }
3260
3261 9
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3262 4
      return $staticNormalizeEncodingCache[$encoding];
3263
    }
3264
3265 9
    $encodingOrig = $encoding;
3266 5
    $encoding = strtoupper($encoding);
3267
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3268
3269 9
    $equivalences = array(
3270
        'ISO88591'    => 'ISO-8859-1',
3271
        'ISO8859'     => 'ISO-8859-1',
3272
        'ISO'         => 'ISO-8859-1',
3273
        'LATIN1'      => 'ISO-8859-1',
3274
        'LATIN'       => 'ISO-8859-1',
3275
        'WIN1252'     => 'ISO-8859-1',
3276
        'WINDOWS1252' => 'ISO-8859-1',
3277
        'UTF16'       => 'UTF-16',
3278
        'UTF32'       => 'UTF-32',
3279
        'UTF8'        => 'UTF-8',
3280
        'UTF'         => 'UTF-8',
3281
        'UTF7'        => 'UTF-7',
3282
        '8BIT'        => 'CP850',
3283
        'BINARY'      => 'CP850',
3284
    );
3285 1
3286
    if (!empty($equivalences[$encodingUpperHelper])) {
3287
      $encoding = $equivalences[$encodingUpperHelper];
3288 1
    }
3289
3290 1
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3291 1
3292 1
    return $encoding;
3293
  }
3294
3295 1
  /**
3296
   * Normalize some MS Word special characters.
3297
   *
3298
   * @param string $str <p>The string to be normalized.</p>
3299
   *
3300
   * @return string
3301
   */
3302 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3303 41
  {
3304
    // init
3305
    $str = (string)$str;
3306 41
3307
    if (!isset($str[0])) {
3308
      return '';
3309
    }
3310
3311
    static $UTF8_MSWORD_KEYS_CACHE = null;
3312
    static $UTF8_MSWORD_VALUES_CACHE = null;
3313
3314
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3315
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$utf8MSWord);
3316
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$utf8MSWord);
3317 1
    }
3318
3319 1
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3320 1
  }
3321
3322
  /**
3323 1
   * Normalize the whitespace.
3324 1
   *
3325 1
   * @param string $str                     <p>The string to be normalized.</p>
3326
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3327
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3328 1
   *                                        bidirectional text chars.</p>
3329
   *
3330
   * @return string
3331 1
   */
3332
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3333
  {
3334
    // init
3335 1
    $str = (string)$str;
3336 1
3337 1
    if (!isset($str[0])) {
3338
      return '';
3339
    }
3340 1
3341
    static $WHITESPACE_CACHE = array();
3342
    $cacheKey = (int)$keepNonBreakingSpace;
3343 1
3344
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3345
3346
      $WHITESPACE_CACHE[$cacheKey] = self::$whitespaceTable;
3347 1
3348
      if ($keepNonBreakingSpace === true) {
3349 1
        /** @noinspection OffsetOperationsInspection */
3350 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3351 1
      }
3352 1
3353 1
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3354
    }
3355
3356
    if ($keepBidiUnicodeControls === false) {
3357
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3358
3359
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3360
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$bidiUniCodeControlsTable);
3361
      }
3362
3363
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3364
    }
3365 5
3366
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3367 5
  }
3368
3369
  /**
3370
   * Format a number with grouped thousands.
3371
   *
3372
   * @param float  $number
3373
   * @param int    $decimals
3374
   * @param string $dec_point
3375
   * @param string $thousands_sep
3376
   *
3377 10
   * @return string
3378
   *    *
3379 10
   * @deprecated Because this has nothing to do with UTF8. :/
3380 10
   */
3381 5
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3382 5
  {
3383 10
    $thousands_sep = (string)$thousands_sep;
3384
    $dec_point = (string)$dec_point;
3385 10
3386
    if (
3387
        isset($thousands_sep[1], $dec_point[1])
3388
        &&
3389
        Bootup::is_php('5.4') === true
3390
    ) {
3391
      return str_replace(
3392
          array(
3393
              '.',
3394
              ',',
3395
          ),
3396 1
          array(
3397
              $dec_point,
3398 1
              $thousands_sep,
3399 1
          ),
3400 1
          number_format($number, $decimals, '.', ',')
3401
      );
3402 1
    }
3403 1
3404 1
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3405 1
  }
3406 1
3407
  /**
3408 1
   * Calculates Unicode code point of the given UTF-8 encoded character.
3409
   *
3410
   * INFO: opposite to UTF8::chr()
3411
   *
3412
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3413
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3414
   *
3415
   * @return int <p>
3416
   *             Unicode code point of the given character,<br />
3417
   *             0 on invalid UTF-8 byte sequence.
3418
   *             </p>
3419
   */
3420
  public static function ord($chr, $encoding = 'UTF-8')
3421
  {
3422
    if (!$chr && $chr !== '0') {
3423
      return 0;
3424 45
    }
3425
3426
    if ($encoding !== 'UTF-8') {
3427 45
      $encoding = self::normalize_encoding($encoding);
3428
      $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3429
    }
3430
3431 45
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3432 45
      self::checkForSupport();
3433 45
    }
3434 45
3435
    if (self::$support['intlChar'] === true) {
3436 45
      $tmpReturn = \IntlChar::ord($chr);
3437
      if ($tmpReturn) {
3438
        return $tmpReturn;
3439 45
      }
3440 45
    }
3441
3442 45
    // use static cache, if there is no support for "IntlChar"
3443
    static $cache = array();
3444
    if (isset($cache[$chr]) === true) {
3445
      return $cache[$chr];
3446
    }
3447
3448
    $chr_orig = $chr;
3449
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3450
    $chr = unpack('C*', substr($chr, 0, 4));
3451
    $code = $chr ? $chr[1] : 0;
3452
3453 45
    if (0xF0 <= $code && isset($chr[4])) {
3454
      return $cache[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3455 45
    }
3456
3457 45
    if (0xE0 <= $code && isset($chr[3])) {
3458 45
      return $cache[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3459 45
    }
3460
3461 45
    if (0xC0 <= $code && isset($chr[2])) {
3462 45
      return $cache[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3463 45
    }
3464
3465 45
    return $cache[$chr_orig] = $code;
3466
  }
3467
3468
  /**
3469
   * Parses the string into an array (into the the second parameter).
3470
   *
3471
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3472
   *          if the second parameter is not set!
3473
   *
3474
   * @link http://php.net/manual/en/function.parse-str.php
3475
   *
3476 23
   * @param string $str    <p>The input string.</p>
3477
   * @param array  $result <p>The result will be returned into this reference parameter.</p>
3478 23
   *
3479
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3480 23
   */
3481 5
  public static function parse_str($str, &$result)
3482
  {
3483
    // clean broken utf8
3484
    $str = self::clean($str);
3485 19
3486 3
    $return = \mb_parse_str($str, $result);
3487
    if ($return === false || empty($result)) {
3488
      return false;
3489 18
    }
3490
3491 18
    return true;
3492
  }
3493
3494
  /**
3495
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3496
   *
3497
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3498
   */
3499
  public static function pcre_utf8_support()
3500
  {
3501
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3502 52
    return (bool)@preg_match('//u', '');
3503
  }
3504 52
3505
  /**
3506 52
   * Create an array containing a range of UTF-8 characters.
3507
   *
3508 52
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3509 40
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3510
   *
3511
   * @return array
3512 18
   */
3513
  public static function range($var1, $var2)
3514
  {
3515 18
    if (!$var1 || !$var2) {
3516 17
      return array();
3517
    }
3518 17
3519 17 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3520 17
      $start = (int)$var1;
3521 2
    } elseif (ctype_xdigit($var1)) {
3522 2
      $start = (int)self::hex_to_int($var1);
3523
    } else {
3524
      $start = self::ord($var1);
3525 18
    }
3526
3527 18
    if (!$start) {
3528 18
      return array();
3529 18
    }
3530
3531 18 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3532 18
      $end = (int)$var2;
3533 18
    } elseif (ctype_xdigit($var2)) {
3534
      $end = (int)self::hex_to_int($var2);
3535
    } else {
3536
      $end = self::ord($var2);
3537 18
    }
3538
3539 18
    if (!$end) {
3540
      return array();
3541
    }
3542
3543
    return array_map(
3544
        array(
3545
            '\\voku\\helper\\UTF8',
3546
            'chr',
3547
        ),
3548
        range($start, $end)
3549
    );
3550
  }
3551
3552
  /**
3553
   * Multi decode html entity & fix urlencoded-win1252-chars.
3554
   *
3555
   * e.g:
3556
   * 'test+test'                     => 'test+test'
3557
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3558
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3559
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3560 1
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3561
   * 'Düsseldorf'                   => 'Düsseldorf'
3562 1
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3563 1
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3564
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3565
   *
3566
   * @param string $str          <p>The input string.</p>
3567
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3568 1
   *
3569 1
   * @return string
3570 1
   */
3571 1 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3572
  {
3573
    $str = (string)$str;
3574 1
3575
    if (!isset($str[0])) {
3576
      return '';
3577
    }
3578
3579
    $pattern = '/%u([0-9a-f]{3,4})/i';
3580
    if (preg_match($pattern, $str)) {
3581
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3582
    }
3583
3584
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3585
3586 36
    do {
3587
      $str_compare = $str;
3588 36
3589
      $str = self::fix_simple_utf8(
3590 36
          rawurldecode(
3591 2
              self::html_entity_decode(
3592
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3593
                  $flags
3594
              )
3595 36
          )
3596 36
      );
3597
3598 36
    } while ($multi_decode === true && $str_compare !== $str);
3599
3600
    return (string)$str;
3601
  }
3602 36
3603
  /**
3604 36
   * alias for "UTF8::remove_bom()"
3605 6
   *
3606 6
   * @see UTF8::remove_bom()
3607
   *
3608 36
   * @param string $str
3609 36
   *
3610 36
   * @return string
3611 36
   */
3612 36
  public static function removeBOM($str)
3613
  {
3614 36
    return self::remove_bom($str);
3615
  }
3616
3617
  /**
3618
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3619
   *
3620
   * @param string $str <p>The input string.</p>
3621
   *
3622
   * @return string <p>String without UTF-BOM</p>
3623
   */
3624
  public static function remove_bom($str)
3625
  {
3626
    foreach (self::$bom as $bomString => $bomByteLength) {
3627
      if (0 === strpos($str, $bomString)) {
3628
        $str = substr($str, $bomByteLength);
3629
      }
3630
    }
3631
3632
    return $str;
3633
  }
3634
3635
  /**
3636
   * Removes duplicate occurrences of a string in another string.
3637
   *
3638
   * @param string          $str  <p>The base string.</p>
3639
   * @param string|string[] $what <p>String to search for in the base string.</p>
3640
   *
3641
   * @return string <p>The result string with removed duplicates.</p>
3642
   */
3643
  public static function remove_duplicates($str, $what = ' ')
3644
  {
3645
    if (is_string($what)) {
3646 36
      $what = array($what);
3647 5
    }
3648
3649 5
    if (is_array($what)) {
3650 5
      /** @noinspection ForeachSourceInspection */
3651
      foreach ($what as $item) {
3652
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3653 36
      }
3654
    }
3655
3656
    return $str;
3657 36
  }
3658
3659
  /**
3660
   * Remove invisible characters from a string.
3661
   *
3662
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3663
   *
3664
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3665
   *
3666
   * @param string $str
3667
   * @param bool   $url_encoded
3668
   * @param string $replacement
3669
   *
3670 12
   * @return string
3671
   */
3672
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3673
  {
3674
    // init
3675
    $non_displayables = array();
3676 12
3677 2
    // every control character except newline (dec 10),
3678 1
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3679 2
    if ($url_encoded) {
3680 1
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3681 2
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3682
    }
3683 2
3684
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3685
3686 2
    do {
3687
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3688
    } while ($count !== 0);
3689
3690
    return $str;
3691
  }
3692 12
3693 3
  /**
3694
   * Replace the diamond question mark (�) with the replacement.
3695
   *
3696
   * @param string $str
3697
   * @param string $unknown
3698
   *
3699
   * @return string
3700 12
   */
3701 9
  public static function replace_diamond_question_mark($str, $unknown = '?')
3702
  {
3703
    return str_replace(
3704
        array(
3705
            "\xEF\xBF\xBD",
3706
            '�',
3707
        ),
3708
        array(
3709
            $unknown,
3710 6
            $unknown,
3711 6
        ),
3712 6
        $str
3713 6
    );
3714 6
  }
3715 6
3716 6
  /**
3717 6
   * Strip whitespace or other characters from end of a UTF-8 string.
3718 6
   *
3719 6
   * @param string $str   <p>The string to be trimmed.</p>
3720 6
   * @param string $chars <p>Optional characters to be stripped.</p>
3721 6
   *
3722 6
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3723 6
   */
3724 6 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3725 6
  {
3726 6
    $str = (string)$str;
3727 6
3728 6
    if (!isset($str[0])) {
3729 6
      return '';
3730 6
    }
3731
3732 6
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3733 6
    if ($chars === INF || !$chars) {
3734 6
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3735
    }
3736
3737
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3738
  }
3739
3740
  /**
3741
   * rxClass
3742
   *
3743
   * @param string $s
3744
   * @param string $class
3745
   *
3746
   * @return string
3747
   */
3748
  private static function rxClass($s, $class = '')
3749
  {
3750
    static $rxClassCache = array();
3751
3752
    $cacheKey = $s . $class;
3753
3754
    if (isset($rxClassCache[$cacheKey])) {
3755
      return $rxClassCache[$cacheKey];
3756
    }
3757
3758
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3759
    $class = array($class);
3760
3761
    /** @noinspection SuspiciousLoopInspection */
3762
    foreach (self::str_split($s) as $s) {
3763
      if ('-' === $s) {
3764
        $class[0] = '-' . $class[0];
3765
      } elseif (!isset($s[2])) {
3766
        $class[0] .= preg_quote($s, '/');
3767
      } elseif (1 === self::strlen($s)) {
3768
        $class[0] .= $s;
3769
      } else {
3770
        $class[] = $s;
3771
      }
3772
    }
3773
3774
    if ($class[0]) {
3775
      $class[0] = '[' . $class[0] . ']';
3776
    }
3777
3778 14
    if (1 === count($class)) {
3779
      $return = $class[0];
3780 14
    } else {
3781
      $return = '(?:' . implode('|', $class) . ')';
3782
    }
3783 14
3784 14
    $rxClassCache[$cacheKey] = $return;
3785 1
3786 1
    return $return;
3787 13
  }
3788
3789 14
  /**
3790
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
3791 14
   */
3792 14
  public static function showSupport()
3793
  {
3794 14
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3795
      self::checkForSupport();
3796
    }
3797
3798
    foreach (self::$support as $utf8Support) {
3799
      echo $utf8Support . "\n<br>";
3800
    }
3801
  }
3802
3803
  /**
3804
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3805
   *
3806 1
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3807
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3808 1
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3809
   *
3810 1
   * @return string <p>The HTML numbered entity.</p>
3811
   */
3812
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
3813
  {
3814 1
    // init
3815
    $char = (string)$char;
3816 1
3817
    if (!isset($char[0])) {
3818
      return '';
3819
    }
3820 1
3821 1
    if (
3822
        $keepAsciiChars === true
3823
        &&
3824 1
        self::isAscii($char) === true
3825 1
    ) {
3826 1
      return $char;
3827 1
    }
3828
3829 1
    if ($encoding !== 'UTF-8') {
3830
      $encoding = self::normalize_encoding($encoding);
3831
    }
3832 1
3833
    return '&#' . self::ord($char, $encoding) . ';';
3834
  }
3835 1
3836
  /**
3837
   * Convert a string to an array of Unicode characters.
3838
   *
3839
   * @param string  $str       <p>The string to split into array.</p>
3840
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
3841
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3842
   *
3843
   * @return string[] <p>An array containing chunks of the string.</p>
3844
   */
3845
  public static function split($str, $length = 1, $cleanUtf8 = false)
3846
  {
3847
    $str = (string)$str;
3848
3849
    if (!isset($str[0])) {
3850
      return array();
3851 2
    }
3852
3853 2
    // init
3854
    $str = (string)$str;
3855
    $ret = array();
3856 2
3857 2
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3858
      self::checkForSupport();
3859 2
    }
3860
3861 2
    if (self::$support['pcre_utf8'] === true) {
3862 2
3863
      if ($cleanUtf8 === true) {
3864 2
        $str = self::clean($str);
3865
      }
3866
3867 2
      preg_match_all('/./us', $str, $retArray);
3868 2
      if (isset($retArray[0])) {
3869 2
        $ret = $retArray[0];
3870 2
      }
3871 2
      unset($retArray);
3872
3873 2
    } else {
3874 2
3875 2
      // fallback
3876 2
3877 2
      $len = strlen($str);
3878 2
3879
      /** @noinspection ForeachInvariantsInspection */
3880 2
      for ($i = 0; $i < $len; $i++) {
3881 2
        if (($str[$i] & "\x80") === "\x00") {
3882 2
          $ret[] = $str[$i];
3883 2
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
3884 2
          if (($str[$i + 1] & "\xC0") === "\x80") {
3885 2
            $ret[] = $str[$i] . $str[$i + 1];
3886
3887 2
            $i++;
3888
          }
3889 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3890 2
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
3891
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3892
3893
            $i += 2;
3894
          }
3895
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
3896 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3897
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3898
3899
            $i += 3;
3900
          }
3901
        }
3902
      }
3903
    }
3904
3905
    if ($length > 1) {
3906
      $ret = array_chunk($ret, $length);
3907
3908
      return array_map(
3909
          function ($item) {
3910
            return implode('', $item);
3911 1
          }, $ret
3912
      );
3913 1
    }
3914
3915 1
    /** @noinspection OffsetOperationsInspection */
3916
    if (isset($ret[0]) && $ret[0] === '') {
3917
      return array();
3918
    }
3919
3920
    return $ret;
3921
  }
3922
3923
  /**
3924
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3925
   *
3926
   * @param string $str <p>The input string.</p>
3927
   *
3928
   * @return false|string <p>
3929
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
3930
   *                      otherwise it will return false.
3931
   *                      </p>
3932
   */
3933
  public static function str_detect_encoding($str)
3934
  {
3935
    //
3936
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
3937
    //
3938
3939
    if (self::is_binary($str)) {
3940
      if (self::is_utf16($str) === 1) {
3941
        return 'UTF-16LE';
3942
      } elseif (self::is_utf16($str) === 2) {
3943
        return 'UTF-16BE';
3944
      } elseif (self::is_utf32($str) === 1) {
3945
        return 'UTF-32LE';
3946
      } elseif (self::is_utf32($str) === 2) {
3947 12
        return 'UTF-32BE';
3948
      }
3949 12
    }
3950
3951
    //
3952
    // 2.) simple check for ASCII chars
3953
    //
3954
3955
    if (self::is_ascii($str) === true) {
3956
      return 'ASCII';
3957
    }
3958
3959 1
    //
3960
    // 3.) simple check for UTF-8 chars
3961 1
    //
3962
3963 1
    if (self::is_utf8($str) === true) {
3964
      return 'UTF-8';
3965 1
    }
3966
3967
    //
3968
    // 4.) check via "\mb_detect_encoding()"
3969
    //
3970
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
3971
3972
    $detectOrder = array(
3973
        'ISO-8859-1',
3974
        'ISO-8859-2',
3975
        'ISO-8859-3',
3976
        'ISO-8859-4',
3977 1
        'ISO-8859-5',
3978
        'ISO-8859-6',
3979 1
        'ISO-8859-7',
3980
        'ISO-8859-8',
3981 1
        'ISO-8859-9',
3982 1
        'ISO-8859-10',
3983 1
        'ISO-8859-13',
3984
        'ISO-8859-14',
3985 1
        'ISO-8859-15',
3986 1
        'ISO-8859-16',
3987 1
        'WINDOWS-1251',
3988 1
        'WINDOWS-1252',
3989
        'WINDOWS-1254',
3990
        'ISO-2022-JP',
3991 1
        'JIS',
3992
        'EUC-JP',
3993
    );
3994
3995
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
3996
    if ($encoding) {
3997
      return $encoding;
3998
    }
3999
4000
    //
4001
    // 5.) check via "iconv()"
4002 21
    //
4003
4004
    $md5 = md5($str);
4005 21
    foreach (self::$iconvEncoding as $encodingTmp) {
4006 21
      # INFO: //IGNORE and //TRANSLIT still throw notice
4007
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4008 21
      if (md5(@\iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
4009 1
        return $encodingTmp;
4010
      }
4011
    }
4012 20
4013
    return false;
4014
  }
4015
4016 20
  /**
4017 20
   * Check if the string ends with the given substring.
4018
   *
4019 20
   * @param string $haystack <p>The string to search in.</p>
4020 20
   * @param string $needle   <p>The substring to search for.</p>
4021
   *
4022
   * @return bool
4023 1
   */
4024 1 View Code Duplication
  public static function str_ends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4025
  {
4026
    $haystack = (string)$haystack;
4027 1
    $needle = (string)$needle;
4028 1
4029 1
    if (!isset($haystack[0], $needle[0])) {
4030 1
      return false;
4031 1
    }
4032
4033 1
    if ($needle === self::substr($haystack, -self::strlen($needle))) {
4034
      return true;
4035 1
    }
4036
4037
    return false;
4038
  }
4039
4040
  /**
4041
   * Check if the string ends with the given substring, case insensitive.
4042
   *
4043
   * @param string $haystack <p>The string to search in.</p>
4044
   * @param string $needle   <p>The substring to search for.</p>
4045 1
   *
4046
   * @return bool
4047 1
   */
4048 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4049 1
  {
4050
    $haystack = (string)$haystack;
4051 1
    $needle = (string)$needle;
4052
4053
    if (!isset($haystack[0], $needle[0])) {
4054
      return false;
4055
    }
4056
4057
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4058
      return true;
4059
    }
4060
4061
    return false;
4062
  }
4063
4064
  /**
4065 7
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4066
   *
4067 7
   * @link  http://php.net/manual/en/function.str-ireplace.php
4068
   *
4069
   * @param mixed $search  <p>
4070
   *                       Every replacement with search array is
4071
   *                       performed on the result of previous replacement.
4072
   *                       </p>
4073
   * @param mixed $replace <p>
4074
   *                       </p>
4075
   * @param mixed $subject <p>
4076
   *                       If subject is an array, then the search and
4077
   *                       replace is performed with every entry of
4078
   *                       subject, and the return value is an array as
4079
   *                       well.
4080
   *                       </p>
4081
   * @param int   $count   [optional] <p>
4082
   *                       The number of matched and replaced needles will
4083 1
   *                       be returned in count which is passed by
4084
   *                       reference.
4085 1
   *                       </p>
4086 1
   *
4087
   * @return mixed <p>A string or an array of replacements.</p>
4088 1
   */
4089
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4090 1
  {
4091
    $search = (array)$search;
4092 1
4093 1
    /** @noinspection AlterInForeachInspection */
4094 1
    foreach ($search as &$s) {
4095 1
      if ('' === $s .= '') {
4096
        $s = '/^(?<=.)$/';
4097 1
      } else {
4098
        $s = '/' . preg_quote($s, '/') . '/ui';
4099 1
      }
4100 1
    }
4101 1
4102 1
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4103 1
    $count = $replace; // used as reference parameter
4104 1
4105
    return $subject;
4106 1
  }
4107
4108 1
  /**
4109
   * Check if the string starts with the given substring, case insensitive.
4110
   *
4111
   * @param string $haystack <p>The string to search in.</p>
4112 1
   * @param string $needle   <p>The substring to search for.</p>
4113
   *
4114
   * @return bool
4115
   */
4116 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4117
  {
4118
    $haystack = (string)$haystack;
4119
    $needle = (string)$needle;
4120
4121
    if (!isset($haystack[0], $needle[0])) {
4122
      return false;
4123
    }
4124
4125
    if (self::stripos($haystack, $needle) === 0) {
4126
      return true;
4127
    }
4128
4129 9
    return false;
4130
  }
4131 9
4132
  /**
4133
   * Limit the number of characters in a string, but also after the next word.
4134
   *
4135
   * @param string $str
4136
   * @param int    $length
4137
   * @param string $strAddOn
4138
   *
4139
   * @return string
4140
   */
4141
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4142
  {
4143
    $str = (string)$str;
4144
4145
    if (!isset($str[0])) {
4146
      return '';
4147 1
    }
4148
4149 1
    $length = (int)$length;
4150
4151
    if (self::strlen($str) <= $length) {
4152
      return $str;
4153
    }
4154
4155
    if (self::substr($str, $length - 1, 1) === ' ') {
4156
      return self::substr($str, 0, $length - 1) . $strAddOn;
4157
    }
4158
4159
    $str = self::substr($str, 0, $length);
4160
    $array = explode(' ', $str);
4161
    array_pop($array);
4162
    $new_str = implode(' ', $array);
4163
4164 12
    if ($new_str === '') {
4165
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4166 12
    } else {
4167 11
      $str = $new_str . $strAddOn;
4168 11
    }
4169 12
4170
    return $str;
4171
  }
4172
4173
  /**
4174
   * Pad a UTF-8 string to given length with another string.
4175
   *
4176
   * @param string $str        <p>The input string.</p>
4177
   * @param int    $pad_length <p>The length of return string.</p>
4178
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4179
   * @param int    $pad_type   [optional] <p>
4180
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4181
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4182 9
   *                           </p>
4183
   *
4184 9
   * @return string <strong>Returns the padded string</strong>
4185 1
   */
4186
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4187
  {
4188 8
    $str_length = self::strlen($str);
4189 2
4190 2
    if (
4191
        is_int($pad_length) === true
4192 8
        &&
4193 8
        $pad_length > 0
4194 1
        &&
4195
        $pad_length >= $str_length
4196
    ) {
4197 7
      $ps_length = self::strlen($pad_string);
4198
4199 7
      $diff = $pad_length - $str_length;
4200
4201
      switch ($pad_type) {
4202 1 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4203
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4204
          $pre = self::substr($pre, 0, $diff);
4205
          $post = '';
4206
          break;
4207
4208
        case STR_PAD_BOTH:
4209
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4210
          $pre = self::substr($pre, 0, (int)$diff / 2);
4211
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4212
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4213
          break;
4214
4215
        case STR_PAD_RIGHT:
4216 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4217
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4218 1
          $post = self::substr($post, 0, $diff);
4219
          $pre = '';
4220 1
      }
4221
4222
      return $pre . $str . $post;
4223
    }
4224
4225
    return $str;
4226
  }
4227
4228
  /**
4229
   * Repeat a string.
4230
   *
4231
   * @param string $str        <p>
4232 2
   *                           The string to be repeated.
4233
   *                           </p>
4234 2
   * @param int    $multiplier <p>
4235 2
   *                           Number of time the input string should be
4236
   *                           repeated.
4237 2
   *                           </p>
4238 2
   *                           <p>
4239 2
   *                           multiplier has to be greater than or equal to 0.
4240
   *                           If the multiplier is set to 0, the function
4241 2
   *                           will return an empty string.
4242 2
   *                           </p>
4243
   *
4244
   * @return string <p>The repeated string.</p>
4245
   */
4246
  public static function str_repeat($str, $multiplier)
4247
  {
4248
    $str = self::filter($str);
4249
4250
    return str_repeat($str, $multiplier);
4251
  }
4252 3
4253
  /**
4254 3
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4255 3
   *
4256 3
   * Replace all occurrences of the search string with the replacement string
4257
   *
4258 3
   * @link http://php.net/manual/en/function.str-replace.php
4259
   *
4260 3
   * @param mixed $search  <p>
4261
   *                       The value being searched for, otherwise known as the needle.
4262
   *                       An array may be used to designate multiple needles.
4263
   *                       </p>
4264
   * @param mixed $replace <p>
4265
   *                       The replacement value that replaces found search
4266
   *                       values. An array may be used to designate multiple replacements.
4267
   *                       </p>
4268
   * @param mixed $subject <p>
4269
   *                       The string or array being searched and replaced on,
4270
   *                       otherwise known as the haystack.
4271
   *                       </p>
4272
   *                       <p>
4273
   *                       If subject is an array, then the search and
4274
   *                       replace is performed with every entry of
4275
   *                       subject, and the return value is an array as
4276
   *                       well.
4277
   *                       </p>
4278
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4279
   *
4280
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4281
   */
4282 2
  public static function str_replace($search, $replace, $subject, &$count = null)
4283
  {
4284
    return str_replace($search, $replace, $subject, $count);
4285 2
  }
4286
4287 2
  /**
4288
   * Shuffles all the characters in the string.
4289
   *
4290
   * @param string $str <p>The input string</p>
4291
   *
4292
   * @return string <p>The shuffled string.</p>
4293
   */
4294
  public static function str_shuffle($str)
4295
  {
4296
    $array = self::split($str);
4297
4298
    shuffle($array);
4299
4300
    return implode('', $array);
4301
  }
4302
4303
  /**
4304
   * Sort all characters according to code points.
4305
   *
4306
   * @param string $str    <p>A UTF-8 string.</p>
4307
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4308
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4309
   *
4310
   * @return string <p>String of sorted characters.</p>
4311
   */
4312
  public static function str_sort($str, $unique = false, $desc = false)
4313
  {
4314 8
    $array = self::codepoints($str);
4315
4316 8
    if ($unique) {
4317 8
      $array = array_flip(array_flip($array));
4318
    }
4319 8
4320 3
    if ($desc) {
4321
      arsort($array);
4322
    } else {
4323 7
      asort($array);
4324 1
    }
4325 1
4326 1
    return self::string($array);
4327
  }
4328
4329
  /**
4330 7
   * Split a string into an array.
4331 1
   *
4332 7
   * @param string $str
4333 7
   * @param int    $len
4334 7
   *
4335
   * @return array
4336
   */
4337
  public static function str_split($str, $len = 1)
4338 7
  {
4339
    // init
4340
    $len = (int)$len;
4341
    $str = (string)$str;
4342
4343
    if (!isset($str[0])) {
4344
      return array();
4345
    }
4346
4347
    if ($len < 1) {
4348
      return str_split($str, $len);
4349
    }
4350
4351
    /** @noinspection PhpInternalEntityUsedInspection */
4352
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4353
    $a = $a[0];
4354
4355 8
    if ($len === 1) {
4356
      return $a;
4357 8
    }
4358 2
4359
    $arrayOutput = array();
4360
    $p = -1;
4361 6
4362
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4363
    foreach ($a as $l => $a) {
4364
      if ($l % $len) {
4365 6
        $arrayOutput[$p] .= $a;
4366
      } else {
4367
        $arrayOutput[++$p] = $a;
4368
      }
4369
    }
4370
4371
    return $arrayOutput;
4372 6
  }
4373
4374
  /**
4375
   * Check if the string starts with the given substring.
4376
   *
4377
   * @param string $haystack <p>The string to search in.</p>
4378
   * @param string $needle   <p>The substring to search for.</p>
4379
   *
4380
   * @return bool
4381
   */
4382 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4383
  {
4384
    $haystack = (string)$haystack;
4385
    $needle = (string)$needle;
4386
4387 62
    if (!isset($haystack[0], $needle[0])) {
4388
      return false;
4389 62
    }
4390
4391 62
    if (self::strpos($haystack, $needle) === 0) {
4392 4
      return true;
4393
    }
4394
4395
    return false;
4396
  }
4397 61
4398 2
  /**
4399 61
   * Get a binary representation of a specific string.
4400 60
   *
4401 60
   * @param string $str <p>The input string.</p>
4402 2
   *
4403
   * @return string
4404
   */
4405
  public static function str_to_binary($str)
4406 61
  {
4407 61
    $str = (string)$str;
4408 1
4409
    $value = unpack('H*', $str);
4410
4411 61
    return base_convert($value[1], 16, 2);
4412 2
  }
4413 2
4414
  /**
4415 61
   * alias for "UTF8::to_ascii()"
4416
   *
4417
   * @see UTF8::to_ascii()
4418
   *
4419
   * @param string $str
4420
   * @param string $unknown
4421
   * @param bool   $strict
4422
   *
4423
   * @return string
4424
   */
4425
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4426
  {
4427
    return self::to_ascii($str, $unknown, $strict);
4428
  }
4429
4430 1
  /**
4431
   * Convert a string into an array of words.
4432 1
   *
4433
   * @param string $str
4434
   * @param string $charlist
4435
   *
4436
   * @return array
4437
   */
4438
  public static function str_to_words($str, $charlist = '')
4439
  {
4440
    $str = (string)$str;
4441
4442
    if (!isset($str[0])) {
4443
      return array('');
4444
    }
4445
4446
    $charlist = self::rxClass($charlist, '\pL');
4447
    return \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4448
  }
4449 2
4450
  /**
4451 2
   * Counts number of words in the UTF-8 string.
4452
   *
4453
   * @param string $str      <p>The input string.</p>
4454
   * @param int    $format   [optional] <p>
4455
   *                         <strong>0</strong> => return a number of words (default)<br />
4456
   *                         <strong>1</strong> => return an array of words<br />
4457
   *                         <strong>2</strong> => return an array of words with word-offset as key
4458
   *                         </p>
4459
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4460
   *
4461
   * @return array|int <p>The number of words in the string</p>
4462
   */
4463
  public static function str_word_count($str, $format = 0, $charlist = '')
4464
  {
4465
    $strParts = self::str_to_words($str, $charlist);
4466
4467 1
    $len = count($strParts);
4468
4469 1
    if ($format === 1) {
4470
4471
      $numberOfWords = array();
4472
      for ($i = 1; $i < $len; $i += 2) {
4473
        $numberOfWords[] = $strParts[$i];
4474
      }
4475
4476
    } elseif ($format === 2) {
4477
4478
      $numberOfWords = array();
4479
      $offset = self::strlen($strParts[0]);
4480
      for ($i = 1; $i < $len; $i += 2) {
4481
        $numberOfWords[$offset] = $strParts[$i];
4482
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4483
      }
4484
4485 2
    } else {
4486
4487 2
      $numberOfWords = ($len - 1) / 2;
4488 2
4489
    }
4490 2
4491
    return $numberOfWords;
4492
  }
4493
4494
  /**
4495
   * Case-insensitive string comparison.
4496
   *
4497
   * INFO: Case-insensitive version of UTF8::strcmp()
4498
   *
4499
   * @param string $str1
4500
   * @param string $str2
4501
   *
4502
   * @return int <p>
4503 1
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4504
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4505 1
   *             <strong>0</strong> if they are equal.
4506 1
   *             </p>
4507
   */
4508 1
  public static function strcasecmp($str1, $str2)
4509 1
  {
4510
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4511
  }
4512 1
4513 1
  /**
4514
   * alias for "UTF8::strstr()"
4515 1
   *
4516
   * @see UTF8::strstr()
4517
   *
4518
   * @param string  $haystack
4519
   * @param string  $needle
4520
   * @param bool    $before_needle
4521
   * @param string  $encoding
4522
   * @param boolean $cleanUtf8
4523
   *
4524
   * @return string|false
4525
   */
4526
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4527
  {
4528
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4529
  }
4530
4531
  /**
4532
   * Case-sensitive string comparison.
4533
   *
4534
   * @param string $str1
4535 15
   * @param string $str2
4536
   *
4537 15
   * @return int  <p>
4538 15
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4539
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4540 15
   *              <strong>0</strong> if they are equal.
4541 2
   *              </p>
4542
   */
4543
  public static function strcmp($str1, $str2)
4544
  {
4545 14
    /** @noinspection PhpUndefinedClassInspection */
4546
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4547
        \Normalizer::normalize($str1, \Normalizer::NFD),
4548
        \Normalizer::normalize($str2, \Normalizer::NFD)
4549 14
    );
4550
  }
4551
4552
  /**
4553 14
   * Find length of initial segment not matching mask.
4554
   *
4555
   * @param string $str
4556 2
   * @param string $charList
4557 2
   * @param int    $offset
4558 2
   * @param int    $length
4559
   *
4560 14
   * @return int|null
4561
   */
4562
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4563
  {
4564
    if ('' === $charList .= '') {
4565
      return null;
4566 14
    }
4567 2
4568 14
    if ($offset || 2147483647 !== $length) {
4569 14
      $str = (string)self::substr($str, $offset, $length);
4570 14
    }
4571 1
4572
    $str = (string)$str;
4573
    if (!isset($str[0])) {
4574 14
      return null;
4575 14
    }
4576
4577
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4578
      /** @noinspection OffsetOperationsInspection */
4579
      return self::strlen($length[1]);
4580
    }
4581
4582
    return self::strlen($str);
4583
  }
4584
4585
  /**
4586
   * alias for "UTF8::stristr()"
4587
   *
4588
   * @see UTF8::stristr()
4589
   *
4590
   * @param string  $haystack
4591
   * @param string  $needle
4592
   * @param bool    $before_needle
4593
   * @param string  $encoding
4594
   * @param boolean $cleanUtf8
4595
   *
4596
   * @return string|false
4597
   */
4598
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4599
  {
4600
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4601
  }
4602
4603
  /**
4604
   * Create a UTF-8 string from code points.
4605
   *
4606
   * INFO: opposite to UTF8::codepoints()
4607
   *
4608
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4609
   *
4610
   * @return string <p>UTF-8 encoded string.</p>
4611
   */
4612
  public static function string(array $array)
4613
  {
4614
    return implode(
4615
        '',
4616
        array_map(
4617
            array(
4618
                '\\voku\\helper\\UTF8',
4619
                'chr',
4620 1
            ),
4621
            $array
4622 1
        )
4623 1
    );
4624 1
  }
4625
4626 1
  /**
4627
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4628
   *
4629
   * @param string $str <p>The input string.</p>
4630
   *
4631
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4632
   */
4633 1
  public static function string_has_bom($str)
4634
  {
4635
    foreach (self::$bom as $bomString => $bomByteLength) {
4636
      if (0 === strpos($str, $bomString)) {
4637
        return true;
4638
      }
4639
    }
4640
4641
    return false;
4642
  }
4643 4
4644
  /**
4645 4
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4646
   *
4647 4
   * @link http://php.net/manual/en/function.strip-tags.php
4648 2
   *
4649
   * @param string $str            <p>
4650
   *                               The input string.
4651 3
   *                               </p>
4652
   * @param string $allowable_tags [optional] <p>
4653
   *                               You can use the optional second parameter to specify tags which should
4654
   *                               not be stripped.
4655
   *                               </p>
4656
   *                               <p>
4657
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
4658
   *                               can not be changed with allowable_tags.
4659
   *                               </p>
4660
   *
4661
   * @return string <p>The stripped string.</p>
4662
   */
4663
  public static function strip_tags($str, $allowable_tags = null)
4664
  {
4665
    // clean broken utf8
4666
    $str = self::clean($str);
4667
4668
    return strip_tags($str, $allowable_tags);
4669
  }
4670
4671
  /**
4672
   * Finds position of first occurrence of a string within another, case insensitive.
4673
   *
4674
   * @link http://php.net/manual/en/function.mb-stripos.php
4675
   *
4676
   * @param string  $haystack  <p>
4677 1
   *                           The string from which to get the position of the first occurrence
4678
   *                           of needle
4679 1
   *                           </p>
4680 1
   * @param string  $needle    <p>
4681 1
   *                           The string to find in haystack
4682
   *                           </p>
4683 1
   * @param int     $offset    [optional] <p>
4684
   *                           The position in haystack
4685
   *                           to start searching
4686
   *                           </p>
4687
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4688
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4689
   *
4690 1
   * @return int|false <p>
4691
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
4692
   *                   or false if needle is not found.
4693
   *                   </p>
4694
   */
4695
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4696
  {
4697
    $haystack = (string)$haystack;
4698
    $needle = (string)$needle;
4699
4700
    if (!isset($haystack[0], $needle[0])) {
4701
      return false;
4702
    }
4703
4704
    if ($cleanUtf8 === true) {
4705
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4706
      // if invalid characters are found in $haystack before $needle
4707 1
      $haystack = self::clean($haystack);
4708
      $needle = self::clean($needle);
4709 1
    }
4710
4711 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4712
        $encoding === 'UTF-8'
4713
        ||
4714
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4715
    ) {
4716
      $encoding = 'UTF-8';
4717
    } else {
4718
      $encoding = self::normalize_encoding($encoding);
4719
    }
4720
4721
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4722
      self::checkForSupport();
4723
    }
4724
4725
    if (
4726
        $encoding == 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4727
        &&
4728
        self::$support['intl'] === true
4729 11
        &&
4730
        Bootup::is_php('5.4')
4731 11
    ) {
4732
      return \grapheme_stripos($haystack, $needle, $offset);
4733 11
    }
4734 2
4735 2
    // fallback to "mb_"-function via polyfill
4736
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4737 11
  }
4738
4739 11
  /**
4740 2
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4741
   *
4742
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
4743
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
4744 10
   * @param bool    $before_needle [optional] <p>
4745 10
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4746
   *                               haystack before the first occurrence of the needle (excluding the needle).
4747
   *                               </p>
4748
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4749 10
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4750
   *
4751 10
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
4752
   */
4753
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4754 3
  {
4755 3
    $haystack = (string)$haystack;
4756 3
    $needle = (string)$needle;
4757
4758 10
    if (!isset($haystack[0], $needle[0])) {
4759
      return false;
4760
    }
4761
4762
    if ($encoding !== 'UTF-8') {
4763
      $encoding = self::normalize_encoding($encoding);
4764 10
    }
4765 1
4766 10
    if ($cleanUtf8 === true) {
4767 10
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4768 10
      // if invalid characters are found in $haystack before $needle
4769 1
      $needle = self::clean($needle);
4770
      $haystack = self::clean($haystack);
4771
    }
4772
4773
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4774 10
      self::checkForSupport();
4775 10
    }
4776 10
4777 10 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4778
        $encoding !== 'UTF-8'
4779
        &&
4780
        self::$support['mbstring'] === false
4781
    ) {
4782
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4783
    }
4784
4785
    if (self::$support['mbstring'] === true) {
4786
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4787
    }
4788
4789
    if (self::$support['intl'] === true) {
4790
      return \grapheme_stristr($haystack, $needle, $before_needle);
4791
    }
4792
4793
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
4794
4795
    if (!isset($match[1])) {
4796
      return false;
4797
    }
4798
4799
    if ($before_needle) {
4800
      return $match[1];
4801
    }
4802
4803
    return self::substr($haystack, self::strlen($match[1]));
4804
  }
4805
4806
  /**
4807
   * Get the string length, not the byte-length!
4808
   *
4809
   * @link     http://php.net/manual/en/function.mb-strlen.php
4810
   *
4811
   * @param string  $str       <p>The string being checked for length.</p>
4812
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4813 10
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4814
   *
4815
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4816 10
   *             character counted as +1)</p>
4817 10
   */
4818
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4819 10
  {
4820 2
    $str = (string)$str;
4821 2
4822
    if (!isset($str[0])) {
4823 10
      return 0;
4824 10
    }
4825 2
4826 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4827
        $encoding === 'UTF-8'
4828 8
        ||
4829
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4830
    ) {
4831
      $encoding = 'UTF-8';
4832
    } else {
4833
      $encoding = self::normalize_encoding($encoding);
4834
    }
4835
4836
    switch ($encoding) {
4837
      case 'ASCII':
4838
      case 'CP850':
4839
        return strlen($str);
4840
    }
4841
4842
    if ($cleanUtf8 === true) {
4843
      $str = self::clean($str);
4844
    }
4845 2
4846
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4847 2
      self::checkForSupport();
4848
    }
4849
4850 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4851
        $encoding !== 'UTF-8'
4852
        &&
4853
        self::$support['mbstring'] === false
4854 2
    ) {
4855 1
      trigger_error('UTF8::strlen() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4856 1
    }
4857
4858
    if (self::$support['mbstring'] === true) {
4859
      return \mb_strlen($str, $encoding);
4860 2
    }
4861 2
4862 2
    if (self::$support['iconv'] === true) {
4863 2
      $returnTmp = \iconv_strlen($str, $encoding);
4864
      if ($returnTmp !== false) {
4865
        return $returnTmp;
4866
      }
4867
    }
4868
4869
    if (self::$support['intl'] === true) {
4870
      $returnTmp = \grapheme_strlen($str);
4871
      if ($returnTmp !== null) {
4872
        return $returnTmp;
4873
      }
4874
    }
4875
4876
    // fallback via vanilla php
4877
    preg_match_all('/./us', $str, $parts);
4878
    $returnTmp = count($parts[0]);
4879
    if ($returnTmp !== 0) {
4880
      return $returnTmp;
4881
    }
4882 11
4883
    // fallback to "mb_"-function via polyfill
4884 11
    return \mb_strlen($str);
4885 11
  }
4886 11
4887
  /**
4888 11
   * Case insensitive string comparisons using a "natural order" algorithm.
4889 1
   *
4890 1
   * INFO: natural order version of UTF8::strcasecmp()
4891 1
   *
4892
   * @param string $str1 <p>The first string.</p>
4893 11
   * @param string $str2 <p>The second string.</p>
4894
   *
4895 11
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
4896
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
4897 11
   *             <strong>0</strong> if they are equal
4898 1
   */
4899 1
  public static function strnatcasecmp($str1, $str2)
4900
  {
4901
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4902 11
  }
4903 11
4904
  /**
4905 11
   * String comparisons using a "natural order" algorithm
4906
   *
4907 11
   * INFO: natural order version of UTF8::strcmp()
4908
   *
4909
   * @link  http://php.net/manual/en/function.strnatcmp.php
4910
   *
4911
   * @param string $str1 <p>The first string.</p>
4912
   * @param string $str2 <p>The second string.</p>
4913
   *
4914
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
4915
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
4916
   *             <strong>0</strong> if they are equal
4917
   */
4918
  public static function strnatcmp($str1, $str2)
4919
  {
4920
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4921 21
  }
4922
4923
  /**
4924 21
   * Case-insensitive string comparison of the first n characters.
4925
   *
4926 21
   * @link  http://php.net/manual/en/function.strncasecmp.php
4927 6
   *
4928
   * @param string $str1 <p>The first string.</p>
4929
   * @param string $str2 <p>The second string.</p>
4930 19
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
4931
   *
4932
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
4933
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
4934
   *             <strong>0</strong> if they are equal
4935
   */
4936 19
  public static function strncasecmp($str1, $str2, $len)
4937 2
  {
4938 2
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4939
  }
4940 19
4941
  /**
4942
   * String comparison of the first n characters.
4943
   *
4944
   * @link  http://php.net/manual/en/function.strncmp.php
4945
   *
4946
   * @param string $str1 <p>The first string.</p>
4947
   * @param string $str2 <p>The second string.</p>
4948
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
4949
   *
4950 3
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
4951
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
4952 3
   *             <strong>0</strong> if they are equal
4953
   */
4954
  public static function strncmp($str1, $str2, $len)
4955
  {
4956
    $str1 = self::substr($str1, 0, $len);
4957
    $str2 = self::substr($str2, 0, $len);
4958
4959
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 4956 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 4957 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
4960
  }
4961
4962
  /**
4963
   * Search a string for any of a set of characters.
4964
   *
4965
   * @link  http://php.net/manual/en/function.strpbrk.php
4966 16
   *
4967
   * @param string $haystack  <p>The string where char_list is looked for.</p>
4968 16
   * @param string $char_list <p>This parameter is case sensitive.</p>
4969
   *
4970 16
   * @return string String starting from the character found, or false if it is not found.
4971 2
   */
4972
  public static function strpbrk($haystack, $char_list)
4973
  {
4974 15
    $haystack = (string)$haystack;
4975
    $char_list = (string)$char_list;
4976
4977
    if (!isset($haystack[0], $char_list[0])) {
4978
      return false;
4979
    }
4980 15
4981 2
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
4982 2
      return substr($haystack, strpos($haystack, $m[0]));
4983
    } else {
4984 15
      return false;
4985
    }
4986
  }
4987
4988
  /**
4989
   * Find position of first occurrence of string in a string.
4990
   *
4991
   * @link http://php.net/manual/en/function.mb-strpos.php
4992
   *
4993
   * @param string  $haystack  <p>The string being checked.</p>
4994
   * @param string  $needle    <p>The position counted from the beginning of haystack.</p>
4995
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
4996
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4997
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4998
   *
4999
   * @return int|false <p>
5000
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
5001 1
   *                   If needle is not found it returns false.
5002
   *                   </p>
5003 1
   */
5004 1
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5005 1
  {
5006 1
    $haystack = (string)$haystack;
5007 1
    $needle = (string)$needle;
5008
5009 1
    if (!isset($haystack[0], $needle[0])) {
5010 1
      return false;
5011 1
    }
5012 1
5013 1
    // init
5014
    $offset = (int)$offset;
5015 1
5016 1
    // iconv and mbstring do not support integer $needle
5017
5018 1
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5019
      $needle = (string)self::chr($needle);
5020
    }
5021
5022
    if ($cleanUtf8 === true) {
5023
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5024
      // if invalid characters are found in $haystack before $needle
5025
      $needle = self::clean($needle);
5026
      $haystack = self::clean($haystack);
5027
    }
5028
5029 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5030 1
        $encoding === 'UTF-8'
5031
        ||
5032 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5033 1
    ) {
5034 1
      $encoding = 'UTF-8';
5035
    } else {
5036 1
      $encoding = self::normalize_encoding($encoding);
5037
    }
5038
5039
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5040 1
      self::checkForSupport();
5041 1
    }
5042
5043 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5044
        $encoding !== 'UTF-8'
5045
        &&
5046
        self::$support['mbstring'] === false
5047
    ) {
5048
      trigger_error('UTF8::strpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5049
    }
5050
5051
    if (self::$support['mbstring'] === true) {
5052
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5053
    }
5054
5055
    if (
5056
        $offset >= 0 // iconv_strpos() can't handle negative offset
5057
        &&
5058
        self::$support['iconv'] === true
5059 47
    ) {
5060
      // ignore invalid negative offset to keep compatibility
5061
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5062 47
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5063
    }
5064 47
5065 9 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5066
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
5067
      if ($returnTmp !== false) {
5068 45
        return $returnTmp;
5069
      }
5070
    }
5071
5072 1
    // fallback via vanilla php
5073 1
5074
    $haystack = self::substr($haystack, $offset);
5075 45
5076 45
    if ($offset < 0) {
5077 37
      $offset = 0;
5078 37
    }
5079
5080 45
    $pos = strpos($haystack, $needle);
5081 2
    if ($pos === false) {
5082
      return false;
5083
    }
5084 43
5085 20
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5086 20
    if ($returnTmp !== false) {
5087 41
      return $returnTmp;
5088
    }
5089
5090 43
    // fallback to "mb_"-function via polyfill
5091
    return \mb_strpos($haystack, $needle, $offset);
5092
  }
5093
5094
  /**
5095
   * Finds the last occurrence of a character in a string within another.
5096 43
   *
5097 2
   * @link http://php.net/manual/en/function.mb-strrchr.php
5098 43
   *
5099 43
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5100 43
   * @param string $needle        <p>The string to find in haystack</p>
5101 1
   * @param bool   $before_needle [optional] <p>
5102
   *                              Determines which portion of haystack
5103
   *                              this function returns.
5104 43
   *                              If set to true, it returns all of haystack
5105 43
   *                              from the beginning to the last occurrence of needle.
5106
   *                              If set to false, it returns all of haystack
5107
   *                              from the last occurrence of needle to the end,
5108
   *                              </p>
5109
   * @param string $encoding      [optional] <p>
5110
   *                              Character encoding name to use.
5111
   *                              If it is omitted, internal character encoding is used.
5112
   *                              </p>
5113
   * @param bool   $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5114
   *
5115
   * @return string|false The portion of haystack or false if needle is not found.
5116
   */
5117 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5118
  {
5119
    if ($encoding !== 'UTF-8') {
5120
      $encoding = self::normalize_encoding($encoding);
5121
    }
5122
5123
    if ($cleanUtf8 === true) {
5124
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5125
      // if invalid characters are found in $haystack before $needle
5126
      $needle = self::clean($needle);
5127
      $haystack = self::clean($haystack);
5128
    }
5129
5130
    // fallback to "mb_"-function via polyfill
5131
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5132
  }
5133
5134
  /**
5135 1
   * Reverses characters order in the string.
5136
   *
5137 1
   * @param string $str The input string
5138 1
   *
5139
   * @return string The string with characters in the reverse sequence
5140 1
   */
5141
  public static function strrev($str)
5142
  {
5143
    $str = (string)$str;
5144
5145
    if (!isset($str[0])) {
5146
      return '';
5147
    }
5148
5149
    return implode('', array_reverse(self::split($str)));
5150
  }
5151
5152
  /**
5153
   * Finds the last occurrence of a character in a string within another, case insensitive.
5154
   *
5155
   * @link http://php.net/manual/en/function.mb-strrichr.php
5156
   *
5157
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5158
   * @param string  $needle        <p>The string to find in haystack.</p>
5159
   * @param bool    $before_needle [optional] <p>
5160
   *                               Determines which portion of haystack
5161 1
   *                               this function returns.
5162
   *                               If set to true, it returns all of haystack
5163 1
   *                               from the beginning to the last occurrence of needle.
5164 1
   *                               If set to false, it returns all of haystack
5165
   *                               from the last occurrence of needle to the end,
5166 1
   *                               </p>
5167 1
   * @param string  $encoding      [optional] <p>
5168
   *                               Character encoding name to use.
5169
   *                               If it is omitted, internal character encoding is used.
5170 1
   *                               </p>
5171 1
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5172 1
   *
5173
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
5174 1
   */
5175 1 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5176
  {
5177
    if ($encoding !== 'UTF-8') {
5178 1
      $encoding = self::normalize_encoding($encoding);
5179 1
    }
5180
5181 1
    if ($cleanUtf8 === true) {
5182 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5183 1
      // if invalid characters are found in $haystack before $needle
5184
      $needle = self::clean($needle);
5185 1
      $haystack = self::clean($haystack);
5186
    }
5187
5188
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5189
  }
5190
5191
  /**
5192 1
   * Find position of last occurrence of a case-insensitive string.
5193
   *
5194
   * @param string  $haystack  <p>The string to look in.</p>
5195
   * @param string  $needle    <p>The string to look for.</p>
5196
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5197
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5198
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5199
   *
5200
   * @return int|false <p>
5201
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5202
   *                   not found, it returns false.
5203
   *                   </p>
5204
   */
5205
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5206
  {
5207 6
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5208
      $needle = (string)self::chr($needle);
5209 6
    }
5210 1
5211
    // init
5212
    $haystack = (string)$haystack;
5213 1
    $needle = (string)$needle;
5214 1
    $offset = (int)$offset;
5215 1
5216 1
    if (!isset($haystack[0], $needle[0])) {
5217
      return false;
5218
    }
5219
5220 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5221 1
        $cleanUtf8 === true
5222 1
        ||
5223 1
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5224 1
    ) {
5225 1
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5226 1
5227 1
      $needle = self::clean($needle);
5228
      $haystack = self::clean($haystack);
5229
    }
5230
5231 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5232 1
        $encoding === 'UTF-8'
5233 1
        ||
5234 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5235 1
    ) {
5236 1
      $encoding = 'UTF-8';
5237 1
    } else {
5238 1
      $encoding = self::normalize_encoding($encoding);
5239
    }
5240
5241 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5242 1
      self::checkForSupport();
5243 1
    }
5244 1
5245 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5246
        $encoding !== 'UTF-8'
5247
        &&
5248 1
        self::$support['mbstring'] === false
5249
    ) {
5250 6
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5251 1
    }
5252 1
5253 1
    if (self::$support['mbstring'] === true) {
5254 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5255
    }
5256 1
5257 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5258
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
5259 6
      if ($returnTmp !== false) {
5260 6
        return $returnTmp;
5261
      }
5262 6
    }
5263 4
5264 4
    // fallback via vanilla php
5265
5266 6
    return self::strrpos(self::strtoupper($haystack, $encoding), self::strtoupper($needle, $encoding), $offset, $encoding, $cleanUtf8);
5267
  }
5268 6
5269
  /**
5270
   * Find position of last occurrence of a string in a string.
5271
   *
5272
   * @link http://php.net/manual/en/function.mb-strrpos.php
5273
   *
5274
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5275
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
5276
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5277
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5278
   *                              the end of the string.
5279
   *                              </p>
5280 1
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5281
   * @param boolean    $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5282 1
   *
5283
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5284 1
   *                   is not found, it returns false.</p>
5285 1
   */
5286
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5287
  {
5288 1
    if ((int)$needle === $needle && $needle >= 0) {
5289 1
      $needle = (string)self::chr($needle);
5290 1
    }
5291
5292 1
    // init
5293
    $haystack = (string)$haystack;
5294
    $needle = (string)$needle;
5295 1
    $offset = (int)$offset;
5296 1
5297
    if (!isset($haystack[0], $needle[0])) {
5298 1
      return false;
5299 1
    }
5300
5301 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5302
        $cleanUtf8 === true
5303 1
        ||
5304 1
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5305
    ) {
5306 1
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5307
5308 1
      $needle = self::clean($needle);
5309
      $haystack = self::clean($haystack);
5310 1
    }
5311
5312 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5313
        $encoding === 'UTF-8'
5314
        ||
5315
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5316
    ) {
5317
      $encoding = 'UTF-8';
5318
    } else {
5319
      $encoding = self::normalize_encoding($encoding);
5320
    }
5321
5322
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5323
      self::checkForSupport();
5324
    }
5325
5326 7 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5327
        $encoding !== 'UTF-8'
5328 7
        &&
5329
        self::$support['mbstring'] === false
5330
    ) {
5331
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5332
    }
5333
5334 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5335
      $returnTmp = \mb_strrpos($haystack, $needle, $offset, $encoding);
5336
      if ($returnTmp !== false) {
5337
        return $returnTmp;
5338
      }
5339
    }
5340 1
5341 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5342 1
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
5343
      if ($returnTmp !== false) {
5344
        return $returnTmp;
5345
      }
5346
    }
5347
5348
    // fallback via vanilla php
5349
5350
    if ($offset > 0) {
5351
      $haystack = self::substr($haystack, $offset);
5352
    } elseif ($offset < 0) {
5353
      $haystack = self::substr($haystack, 0, $offset);
5354 1
      $offset = 0;
5355
    }
5356 1
5357
    $pos = strrpos($haystack, $needle);
5358
    if ($pos === false) {
5359
      return false;
5360
    }
5361
5362
    return $offset + self::strlen(substr($haystack, 0, $pos));
5363
  }
5364
5365
  /**
5366
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5367
   * mask.
5368 1
   *
5369
   * @param string $str    <p>The input string.</p>
5370 1
   * @param string $mask   <p>The mask of chars</p>
5371
   * @param int    $offset [optional]
5372
   * @param int    $length [optional]
5373
   *
5374
   * @return int
5375
   */
5376
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5377
  {
5378
    // init
5379
    $length = (int)$length;
5380
    $offset = (int)$offset;
5381
5382
    if ($offset || 2147483647 !== $length) {
5383
      $str = self::substr($str, $offset, $length);
5384
    }
5385 13
5386
    $str = (string)$str;
5387 13
    if (!isset($str[0], $mask[0])) {
5388
      return 0;
5389
    }
5390 13
5391
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5392 13
  }
5393 3
5394
  /**
5395
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5396 11
   *
5397
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5398
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5399 11
   * @param bool    $before_needle [optional] <p>
5400 7
   *                               If <b>TRUE</b>, strstr() returns the part of the
5401
   *                               haystack before the first occurrence of the needle (excluding the needle).
5402
   *                               </p>
5403 5
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5404 1
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5405
   *
5406
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5407
   */
5408 1
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5409 1
  {
5410
    $haystack = (string)$haystack;
5411
    $needle = (string)$needle;
5412 1
5413 1
    if (!isset($haystack[0], $needle[0])) {
5414
      return false;
5415
    }
5416 1
5417
    if ($cleanUtf8 === true) {
5418
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5419 1
      // if invalid characters are found in $haystack before $needle
5420
      $needle = self::clean($needle);
5421 5
      $haystack = self::clean($haystack);
5422 5
    }
5423 5
5424
    if ($encoding !== 'UTF-8') {
5425 5
      $encoding = self::normalize_encoding($encoding);
5426
    }
5427 5
5428 5
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5429
      self::checkForSupport();
5430
    }
5431 5
5432 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5433
        $encoding !== 'UTF-8'
5434 5
        &&
5435 5
        self::$support['mbstring'] === false
5436 5
    ) {
5437
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5438 5
    }
5439 2
5440
    if (self::$support['mbstring'] === true) {
5441 2
      $returnTmp = \mb_strstr($haystack, $needle, $before_needle, $encoding);
5442 2
      if ($returnTmp !== false) {
5443 2
        return $returnTmp;
5444
      }
5445 2
    }
5446 1
5447
    if (self::$support['intl'] === true) {
5448 1
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
5449 1
      if ($returnTmp !== false) {
5450 1
        return $returnTmp;
5451
      }
5452 1
    }
5453
5454
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5455
5456
    if (!isset($match[1])) {
5457
      return false;
5458
    }
5459
5460
    if ($before_needle) {
5461
      return $match[1];
5462
    }
5463
5464
    return self::substr($haystack, self::strlen($match[1]));
5465
  }
5466
5467 1
  /**
5468 2
   * Unicode transformation for case-less matching.
5469
   *
5470 5
   * @link http://unicode.org/reports/tr21/tr21-5.html
5471
   *
5472
   * @param string  $str       <p>The input string.</p>
5473
   * @param bool    $full      [optional] <p>
5474
   *                           <b>true</b>, replace full case folding chars (default)<br />
5475 5
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5476
   *                           </p>
5477
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5478
   *
5479
   * @return string
5480 5
   */
5481 5
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5482 1
  {
5483 1
    // init
5484
    $str = (string)$str;
5485 1
5486 1
    if (!isset($str[0])) {
5487 1
      return '';
5488
    }
5489 1
5490
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5491 5
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5492 5
5493 5
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5494 5
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$commonCaseFold);
5495 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$commonCaseFold);
5496
    }
5497 5
5498
    $str = str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5499 5
5500
    if ($full) {
5501
5502
      static $fullCaseFold = null;
5503
5504
      if ($fullCaseFold === null) {
5505
        $fullCaseFold = self::getData('caseFolding_full');
5506
      }
5507
5508
      /** @noinspection OffsetOperationsInspection */
5509 2
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5510
    }
5511 2
5512
    if ($cleanUtf8 === true) {
5513 1
      $str = self::clean($str);
5514
    }
5515
5516 1
    return self::strtolower($str);
5517 1
  }
5518
5519 1
  /**
5520
   * Make a string lowercase.
5521
   *
5522 2
   * @link http://php.net/manual/en/function.mb-strtolower.php
5523
   *
5524 2
   * @param string  $str       <p>The string being lowercased.</p>
5525 1
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5526
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5527
   *
5528 2
   * @return string str with all alphabetic characters converted to lowercase.
5529
   */
5530 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5531
  {
5532
    // init
5533
    $str = (string)$str;
5534
5535
    if (!isset($str[0])) {
5536
      return '';
5537
    }
5538
5539
    if ($cleanUtf8 === true) {
5540 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5541
      // if invalid characters are found in $haystack before $needle
5542 1
      $str = self::clean($str);
5543
    }
5544
5545
    if ($encoding !== 'UTF-8') {
5546
      $encoding = self::normalize_encoding($encoding);
5547
    }
5548
5549
    return \mb_strtolower($str, $encoding);
5550
  }
5551
5552
  /**
5553
   * Generic case sensitive transformation for collation matching.
5554
   *
5555
   * @param string $str <p>The input string</p>
5556
   *
5557
   * @return string
5558
   */
5559
  private static function strtonatfold($str)
5560
  {
5561
    /** @noinspection PhpUndefinedClassInspection */
5562
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5563
  }
5564
5565
  /**
5566
   * Make a string uppercase.
5567
   *
5568 20
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5569
   *
5570 20
   * @param string  $str       <p>The string being uppercased.</p>
5571 2
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5572
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5573
   *
5574 2
   * @return string str with all alphabetic characters converted to uppercase.
5575 2
   */
5576 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5577 2
  {
5578
    $str = (string)$str;
5579
5580 20
    if (!isset($str[0])) {
5581
      return '';
5582 20
    }
5583 4
5584
    if ($cleanUtf8 === true) {
5585
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5586 19
      // if invalid characters are found in $haystack before $needle
5587 19
      $str = self::clean($str);
5588
    }
5589
5590 19
    if ($encoding !== 'UTF-8') {
5591 19
      $encoding = self::normalize_encoding($encoding);
5592
    }
5593 19
5594 19
    return \mb_strtoupper($str, $encoding);
5595 19
  }
5596 19
5597
  /**
5598 19
   * Translate characters or replace sub-strings.
5599
   *
5600 16
   * @link  http://php.net/manual/en/function.strtr.php
5601 16
   *
5602 16
   * @param string          $str  <p>The string being translated.</p>
5603 16
   * @param string|string[] $from <p>The string replacing from.</p>
5604 5
   * @param string|string[] $to   <p>The string being translated to to.</p>
5605 5
   *
5606 5
   * @return string <p>
5607
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5608
   *                corresponding character in to.
5609 19
   *                </p>
5610
   */
5611 17
  public static function strtr($str, $from, $to = INF)
5612 13
  {
5613 13
    if (INF !== $to) {
5614 13
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5614 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5615 8
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5615 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5616 8
      $countFrom = count($from);
5617 8
      $countTo = count($to);
5618
5619
      if ($countFrom > $countTo) {
5620 19
        $from = array_slice($from, 0, $countTo);
5621
      } elseif ($countFrom < $countTo) {
5622 9
        $to = array_slice($to, 0, $countFrom);
5623 4
      }
5624 4
5625 4
      $from = array_combine($from, $to);
5626 6
    }
5627 6
5628 6
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5611 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5629
  }
5630
5631 9
  /**
5632 6
   * Return the width of a string.
5633 6
   *
5634 6
   * @param string  $str       <p>The input string.</p>
5635
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5636
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5637 19
   *
5638
   * @return int
5639 4
   */
5640 4
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5641 2
  {
5642 2
    if ($encoding !== 'UTF-8') {
5643 3
      $encoding = self::normalize_encoding($encoding);
5644 3
    }
5645 3
5646
    if ($cleanUtf8 === true) {
5647
      // iconv and mbstring are not tolerant to invalid encoding
5648 4
      // further, their behaviour is inconsistent with that of PHP's substr
5649 16
5650
      $str = self::clean($str);
5651 19
    }
5652
5653
    // fallback to "mb_"-function via polyfill
5654 19
    return \mb_strwidth($str, $encoding);
5655 19
  }
5656
5657 3
  /**
5658 19
   * Get part of a string.
5659
   *
5660 19
   * @link http://php.net/manual/en/function.mb-substr.php
5661
   *
5662
   * @param string  $str       <p>The string being checked.</p>
5663 19
   * @param int     $start     <p>The first position used in str.</p>
5664 19
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
5665 19
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5666 2
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5667 19
   *
5668
   * @return string <p>Returns a sub-string specified by the start and length parameters.</p>
5669 19
   */
5670
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5671 19
  {
5672
    // init
5673
    $str = (string)$str;
5674
5675
    if (!isset($str[0])) {
5676
      return '';
5677
    }
5678
5679
    if ($cleanUtf8 === true) {
5680
      // iconv and mbstring are not tolerant to invalid encoding
5681
      // further, their behaviour is inconsistent with that of PHP's substr
5682
5683
      $str = self::clean($str);
5684
    }
5685
5686
    $str_length = 0;
5687 26
    if ($start || $length === null) {
5688
      $str_length = (int)self::strlen($str);
5689 26
    }
5690
5691 26
    if ($start && $start > $str_length) {
5692 5
      return false;
5693
    }
5694
5695
    if ($length === null) {
5696 22
      $length = $str_length;
5697 6
    } else {
5698
      $length = (int)$length;
5699
    }
5700 16
5701 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5702
        $encoding === 'UTF-8'
5703
        ||
5704
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5705
    ) {
5706
      $encoding = 'UTF-8';
5707
    } else {
5708
      $encoding = self::normalize_encoding($encoding);
5709
    }
5710
5711
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5712 14
      self::checkForSupport();
5713
    }
5714 14
5715 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5716
        $encoding !== 'UTF-8'
5717
        &&
5718
        self::$support['mbstring'] === false
5719
    ) {
5720
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5721
    }
5722
5723
    if (self::$support['mbstring'] === true) {
5724
      return \mb_substr($str, $start, $length, $encoding);
5725
    }
5726
5727
    if (
5728 1
        $length >= 0 // "iconv_substr()" can't handle negative length
5729
        &&
5730 1
        self::$support['iconv'] === true
5731
    ) {
5732
      return \iconv_substr($str, $start, $length);
5733
    }
5734
5735
    if (self::$support['intl'] === true) {
5736
      return \grapheme_substr($str, $start, $length);
5737
    }
5738
5739
    // fallback via vanilla php
5740
5741
    // split to array, and remove invalid characters
5742
    $array = self::split($str);
5743
5744 8
    // extract relevant part, and join to make sting again
5745
    return implode('', array_slice($array, $start, $length));
5746 8
  }
5747 2
5748
  /**
5749
   * Binary safe comparison of two strings from an offset, up to length characters.
5750 7
   *
5751 7
   * @param string  $main_str           <p>The main string being compared.</p>
5752 7
   * @param string  $str                <p>The secondary string being compared.</p>
5753
   * @param int     $offset             <p>The start position for the comparison. If negative, it starts counting from
5754 7
   *                                    the end of the string.</p>
5755 1
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
5756 1
   *                                    the length of the str compared to the length of main_str less the offset.</p>
5757 7
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5758
   *                                    insensitive.</p>
5759
   *
5760 7
   * @return int
5761
   */
5762 7
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5763 7
  {
5764
    $main_str = self::substr($main_str, $offset, $length);
5765
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5764 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5766
5767 7
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5764 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5765 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5764 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5765 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5768
  }
5769
5770
  /**
5771 1
   * Count the number of substring occurrences.
5772 1
   *
5773 1
   * @link  http://php.net/manual/en/function.substr-count.php
5774 7
   *
5775 7
   * @param string  $haystack  <p>The string to search in.</p>
5776 7
   * @param string  $needle    <p>The substring to search for.</p>
5777
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
5778 7
   * @param int     $length    [optional] <p>
5779 7
   *                           The maximum length after the specified offset to search for the
5780
   *                           substring. It outputs a warning if the offset plus the length is
5781 7
   *                           greater than the haystack length.
5782
   *                           </p>
5783
   * @param string  $encoding  <p>Set the charset for e.g. "\mb_" function.</p>
5784
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5785
   *
5786
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5787
   */
5788
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5789
  {
5790
    // init
5791
    $haystack = (string)$haystack;
5792
    $needle = (string)$needle;
5793
5794
    if (!isset($haystack[0], $needle[0])) {
5795
      return false;
5796
    }
5797
5798
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5799
      $offset = (int)$offset;
5800
      $length = (int)$length;
5801 1
5802
      if (
5803 1
          $length + $offset <= 0
5804
          &&
5805 1
          Bootup::is_php('7.1') === false
5806 1
      ) {
5807
        return false;
5808
      }
5809 1
5810
      $haystack = self::substr($haystack, $offset, $length, $encoding);
5811 1
    }
5812
5813 1
    if ($encoding !== 'UTF-8') {
5814 1
      $encoding = self::normalize_encoding($encoding);
5815 1
    }
5816 1
5817
    if ($cleanUtf8 === true) {
5818 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5819 1
      // if invalid characters are found in $haystack before $needle
5820 1
      $needle = self::clean($needle);
5821
      $haystack = self::clean($haystack);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack can also be of type false; however, voku\helper\UTF8::clean() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
5822 1
    }
5823
5824
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5825
      self::checkForSupport();
5826
    }
5827
5828 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5829
        $encoding !== 'UTF-8'
5830 1
        &&
5831
        self::$support['mbstring'] === false
5832
    ) {
5833
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5834
    }
5835
5836
    if (self::$support['mbstring'] === true) {
5837
      return \mb_substr_count($haystack, $needle, $encoding);
5838
    }
5839
5840
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
5841
5842
    return count($matches);
5843
  }
5844
5845
  /**
5846
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
5847
   *
5848
   * @param string $haystack <p>The string to search in.</p>
5849
   * @param string $needle   <p>The substring to search for.</p>
5850
   *
5851
   * @return string <p>Return the sub-string.</p>
5852
   */
5853 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5854
  {
5855
    // init
5856
    $haystack = (string)$haystack;
5857
    $needle = (string)$needle;
5858
5859
    if (!isset($haystack[0])) {
5860
      return '';
5861
    }
5862
5863
    if (!isset($needle[0])) {
5864
      return $haystack;
5865
    }
5866
5867
    if (self::str_istarts_with($haystack, $needle) === true) {
5868
      $haystack = self::substr($haystack, self::strlen($needle));
5869
    }
5870
5871
    return $haystack;
5872
  }
5873
5874
  /**
5875
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
5876
   *
5877
   * @param string $haystack <p>The string to search in.</p>
5878
   * @param string $needle   <p>The substring to search for.</p>
5879
   *
5880
   * @return string <p>Return the sub-string.</p>
5881
   */
5882 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5883
  {
5884
    // init
5885
    $haystack = (string)$haystack;
5886
    $needle = (string)$needle;
5887
5888
    if (!isset($haystack[0])) {
5889
      return '';
5890
    }
5891
5892
    if (!isset($needle[0])) {
5893
      return $haystack;
5894
    }
5895
5896
    if (self::str_iends_with($haystack, $needle) === true) {
5897
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5898
    }
5899
5900
    return $haystack;
5901
  }
5902
5903
  /**
5904
   * Removes an prefix ($needle) from start of the string ($haystack).
5905
   *
5906
   * @param string $haystack <p>The string to search in.</p>
5907
   * @param string $needle   <p>The substring to search for.</p>
5908
   *
5909
   * @return string <p>Return the sub-string.</p>
5910
   */
5911 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5912
  {
5913
    // init
5914
    $haystack = (string)$haystack;
5915
    $needle = (string)$needle;
5916
5917
    if (!isset($haystack[0])) {
5918
      return '';
5919
    }
5920
5921
    if (!isset($needle[0])) {
5922
      return $haystack;
5923
    }
5924
5925
    if (self::str_starts_with($haystack, $needle) === true) {
5926
      $haystack = self::substr($haystack, self::strlen($needle));
5927
    }
5928
5929
    return $haystack;
5930
  }
5931
5932
  /**
5933
   * Replace text within a portion of a string.
5934
   *
5935
   * source: https://gist.github.com/stemar/8287074
5936
   *
5937
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
5938
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
5939
   * @param int|int[]       $start            <p>
5940
   *                                          If start is positive, the replacing will begin at the start'th offset
5941
   *                                          into string.
5942
   *                                          <br /><br />
5943
   *                                          If start is negative, the replacing will begin at the start'th character
5944
   *                                          from the end of string.
5945
   *                                          </p>
5946
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
5947
   *                                          portion of string which is to be replaced. If it is negative, it
5948
   *                                          represents the number of characters from the end of string at which to
5949
   *                                          stop replacing. If it is not given, then it will default to strlen(
5950
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
5951
   *                                          length is zero then this function will have the effect of inserting
5952
   *                                          replacement into string at the given start offset.</p>
5953
   *
5954
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
5955
   */
5956
  public static function substr_replace($str, $replacement, $start, $length = null)
5957
  {
5958
    if (is_array($str)) {
5959
      $num = count($str);
5960
5961
      // $replacement
5962
      if (is_array($replacement)) {
5963
        $replacement = array_slice($replacement, 0, $num);
5964
      } else {
5965
        $replacement = array_pad(array($replacement), $num, $replacement);
5966
      }
5967
5968
      // $start
5969 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5970
        $start = array_slice($start, 0, $num);
5971
        foreach ($start as &$valueTmp) {
5972
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5973
        }
5974
        unset($valueTmp);
5975
      } else {
5976
        $start = array_pad(array($start), $num, $start);
5977
      }
5978
5979
      // $length
5980
      if (!isset($length)) {
5981
        $length = array_fill(0, $num, 0);
5982 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5983
        $length = array_slice($length, 0, $num);
5984
        foreach ($length as &$valueTmpV2) {
5985
          if (isset($valueTmpV2)) {
5986
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5987
          } else {
5988
            $valueTmpV2 = 0;
5989
          }
5990
        }
5991
        unset($valueTmpV2);
5992
      } else {
5993
        $length = array_pad(array($length), $num, $length);
5994
      }
5995
5996
      // Recursive call
5997
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
5998
5999
    } else {
6000
6001
      if (is_array($replacement)) {
6002
        if (count($replacement) > 0) {
6003
          $replacement = $replacement[0];
6004
        } else {
6005
          $replacement = '';
6006
        }
6007
      }
6008
    }
6009
6010
    // init
6011
    $str = (string)$str;
6012
    $replacement = (string)$replacement;
6013
6014
    if (!isset($str[0])) {
6015
      return $replacement;
6016
    }
6017
6018
    preg_match_all('/./us', $str, $smatches);
6019
    preg_match_all('/./us', $replacement, $rmatches);
6020
6021
    if ($length === null) {
6022
      $length = (int)self::strlen($str);
6023
    }
6024
6025
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6026
6027
    return implode('', $smatches[0]);
6028
  }
6029
6030
  /**
6031
   * Removes an suffix ($needle) from end of the string ($haystack).
6032
   *
6033
   * @param string $haystack <p>The string to search in.</p>
6034
   * @param string $needle   <p>The substring to search for.</p>
6035
   *
6036
   * @return string <p>Return the sub-string.</p>
6037
   */
6038 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6039
  {
6040
    $haystack = (string)$haystack;
6041
    $needle = (string)$needle;
6042
6043
    if (!isset($haystack[0])) {
6044
      return '';
6045
    }
6046
6047
    if (!isset($needle[0])) {
6048
      return $haystack;
6049
    }
6050
6051
    if (self::str_ends_with($haystack, $needle) === true) {
6052
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6053
    }
6054
6055
    return $haystack;
6056
  }
6057 1
6058
  /**
6059 1
   * Returns a case swapped version of the string.
6060
   *
6061
   * @param string  $str       <p>The input string.</p>
6062
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6063
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6064
   *
6065
   * @return string <p>Each character's case swapped.</p>
6066
   */
6067
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6068
  {
6069 6
    $str = (string)$str;
6070
6071 6
    if (!isset($str[0])) {
6072 6
      return '';
6073
    }
6074 6
6075
    if ($encoding !== 'UTF-8') {
6076 6
      $encoding = self::normalize_encoding($encoding);
6077 3
    }
6078
6079
    if ($cleanUtf8 === true) {
6080
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6081 6
      // if invalid characters are found in $haystack before $needle
6082
      $str = self::clean($str);
6083 6
    }
6084 1
6085 1
    $strSwappedCase = preg_replace_callback(
6086 1
        '/[\S]/u',
6087
        function ($match) use ($encoding) {
6088 6
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6089
6090
          if ($match[0] === $marchToUpper) {
6091
            return UTF8::strtolower($match[0], $encoding);
6092
          } else {
6093
            return $marchToUpper;
6094
          }
6095
        },
6096
        $str
6097
    );
6098 6
6099
    return $strSwappedCase;
6100 6
  }
6101
6102 6
  /**
6103 6
   * alias for "UTF8::to_ascii()"
6104
   *
6105
   * @see UTF8::to_ascii()
6106 5
   *
6107 5
   * @param string $s
6108
   * @param string $subst_chr
6109 5
   * @param bool   $strict
6110 1
   *
6111 1
   * @return string
6112 1
   */
6113
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6114 5
  {
6115
    return self::to_ascii($s, $subst_chr, $strict);
6116
  }
6117
6118
  /**
6119
   * alias for "UTF8::to_iso8859()"
6120
   *
6121
   * @see UTF8::to_iso8859()
6122
   *
6123
   * @param string $str
6124
   *
6125
   * @return string|string[]
6126
   */
6127
  public static function toIso8859($str)
6128
  {
6129
    return self::to_iso8859($str);
6130
  }
6131
6132
  /**
6133
   * alias for "UTF8::to_latin1()"
6134
   *
6135
   * @see UTF8::to_latin1()
6136
   *
6137
   * @param $str
6138
   *
6139
   * @return string
6140
   */
6141
  public static function toLatin1($str)
6142
  {
6143
    return self::to_latin1($str);
6144 1
  }
6145
6146 1
  /**
6147
   * alias for "UTF8::to_utf8()"
6148
   *
6149
   * @see UTF8::to_utf8()
6150
   *
6151
   * @param string $str
6152
   *
6153
   * @return string
6154
   */
6155
  public static function toUTF8($str)
6156
  {
6157
    return self::to_utf8($str);
6158 1
  }
6159
6160 1
  /**
6161
   * Convert a string into ASCII.
6162 1
   *
6163 1
   * @param string $str     <p>The input string.</p>
6164
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6165
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6166 1
   *                        performance</p>
6167
   *
6168 1
   * @return string
6169 1
   *
6170
   * @throws \Exception
6171
   */
6172 1
  public static function to_ascii($str, $unknown = '?', $strict = false)
6173
  {
6174
    static $UTF8_TO_ASCII;
6175 1
6176 1
    // init
6177 1
    $str = (string)$str;
6178 1
6179 1
    if (!isset($str[0])) {
6180
      return '';
6181
    }
6182 1
6183
    $str = self::clean($str, false, true, true);
6184
6185
    // check if we only have ASCII
6186
    if (self::is_ascii($str) === true) {
6187
      return $str;
6188
    }
6189
6190
    if ($strict === true) {
6191
      if (!isset(self::$support['already_checked_via_portable_utf8'])) {
6192
        self::checkForSupport();
6193
      }
6194
6195
      if (self::$support['intl'] === true && Bootup::is_php('5.4')) {
6196
        $str = transliterator_transliterate('Any-Latin; Latin-ASCII;', $str);
6197
6198
        // check again, if we only have ASCII, now ...
6199
        if (self::is_ascii($str) === true) {
6200
          return $str;
6201 10
        }
6202
6203 10
      } else {
6204 10
        throw new \Exception('Intl is not supported or you use PHP < 5.4!');
6205
      }
6206 10
    }
6207 3
6208
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6209
    $chars = $ar[0];
6210 8
    foreach ($chars as &$c) {
6211 8
6212 8
      $ordC0 = ord($c[0]);
6213
6214 8
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6215
        continue;
6216 8
      }
6217
6218 8
      $ordC1 = ord($c[1]);
6219 1
6220 1
      // ASCII - next please
6221 1
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6222
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6223 8
      }
6224 8
6225
      if ($ordC0 >= 224) {
6226 8
        $ordC2 = ord($c[2]);
6227 8
6228 8
        if ($ordC0 <= 239) {
6229 8
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6230 8
        }
6231
6232 8
        if ($ordC0 >= 240) {
6233 8
          $ordC3 = ord($c[3]);
6234 8
6235 8
          if ($ordC0 <= 247) {
6236
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6237 8
          }
6238 6
6239 6
          if ($ordC0 >= 248) {
6240 6
            $ordC4 = ord($c[4]);
6241 6
6242 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6243 6
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6244 3
            }
6245 3
6246
            if ($ordC0 >= 252) {
6247 6
              $ordC5 = ord($c[5]);
6248 6
6249 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6250 8
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6251
              }
6252
            }
6253
          }
6254
        }
6255
      }
6256
6257
      if ($ordC0 >= 254 && $ordC0 <= 255) {
6258 1
        $c = $unknown;
6259
        continue;
6260 1
      }
6261
6262
      if (!isset($ord)) {
6263
        $c = $unknown;
6264
        continue;
6265
      }
6266
6267
      $bank = $ord >> 8;
6268
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
6269
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
6270
        if (file_exists($bankfile)) {
6271
          /** @noinspection PhpIncludeInspection */
6272
          require $bankfile;
6273
        } else {
6274
          $UTF8_TO_ASCII[$bank] = array();
6275
        }
6276
      }
6277
6278
      $newchar = $ord & 255;
6279
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
6280
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6281
      } else {
6282
        $c = $unknown;
6283
      }
6284
    }
6285
6286
    return implode('', $chars);
6287
  }
6288
6289
  /**
6290
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6291
   *
6292
   * @param string|string[] $str
6293
   *
6294
   * @return string|string[]
6295
   */
6296
  public static function to_iso8859($str)
6297
  {
6298
    if (is_array($str)) {
6299
6300
      /** @noinspection ForeachSourceInspection */
6301
      foreach ($str as $k => $v) {
6302
        /** @noinspection AlterInForeachInspection */
6303
        /** @noinspection OffsetOperationsInspection */
6304
        $str[$k] = self::to_iso8859($v);
6305
      }
6306
6307
      return $str;
6308
    }
6309
6310
    $str = (string)$str;
6311
6312
    if (!isset($str[0])) {
6313
      return '';
6314
    }
6315
6316
    return self::utf8_decode($str);
6317
  }
6318
6319
  /**
6320
   * alias for "UTF8::to_iso8859()"
6321
   *
6322
   * @see UTF8::to_iso8859()
6323
   *
6324
   * @param string|string[] $str
6325
   *
6326
   * @return string|string[]
6327
   */
6328
  public static function to_latin1($str)
6329
  {
6330
    return self::to_iso8859($str);
6331
  }
6332
6333
  /**
6334
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6335
   *
6336
   * - It decode UTF-8 codepoints and unicode escape sequences.
6337
   *
6338
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6339
   *
6340
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6341
   *
6342
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6343
   *    are followed by any of these:  ("group B")
6344
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6345
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6346
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6347
   * is also a valid unicode character, and will be left unchanged.
6348
   *
6349
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6350
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6351
   *
6352
   * @param string|string[] $str <p>Any string or array.</p>
6353
   *
6354
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6355
   */
6356
  public static function to_utf8($str)
6357
  {
6358
    if (is_array($str)) {
6359
      /** @noinspection ForeachSourceInspection */
6360
      foreach ($str as $k => $v) {
6361
        /** @noinspection AlterInForeachInspection */
6362
        /** @noinspection OffsetOperationsInspection */
6363
        $str[$k] = self::to_utf8($v);
6364
      }
6365
6366
      return $str;
6367
    }
6368
6369
    $str = (string)$str;
6370
6371
    if (!isset($str[0])) {
6372
      return $str;
6373
    }
6374
6375
    $max = strlen($str);
6376
    $buf = '';
6377
6378
    /** @noinspection ForeachInvariantsInspection */
6379
    for ($i = 0; $i < $max; $i++) {
6380
      $c1 = $str[$i];
6381
6382
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6383
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6384
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6385
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6386
6387
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6388
6389
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6390
            $buf .= $c1 . $c2;
6391
            $i++;
6392
          } else { // not valid UTF8 - convert it
6393
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6394
            $cc2 = ($c1 & "\x3f") | "\x80";
6395
            $buf .= $cc1 . $cc2;
6396
          }
6397
6398 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6399
6400
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6401
            $buf .= $c1 . $c2 . $c3;
6402
            $i += 2;
6403
          } else { // not valid UTF8 - convert it
6404
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6405
            $cc2 = ($c1 & "\x3f") | "\x80";
6406
            $buf .= $cc1 . $cc2;
6407
          }
6408
6409
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6410
6411 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6412
            $buf .= $c1 . $c2 . $c3 . $c4;
6413
            $i += 3;
6414
          } else { // not valid UTF8 - convert it
6415
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6416
            $cc2 = ($c1 & "\x3f") | "\x80";
6417
            $buf .= $cc1 . $cc2;
6418
          }
6419
6420
        } else { // doesn't look like UTF8, but should be converted
6421
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6422
          $cc2 = (($c1 & "\x3f") | "\x80");
6423
          $buf .= $cc1 . $cc2;
6424
        }
6425
6426
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6427
6428
        $ordC1 = ord($c1);
6429
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6430
          $buf .= self::$win1252ToUtf8[$ordC1];
6431
        } else {
6432
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6433
          $cc2 = (($c1 & "\x3f") | "\x80");
6434
          $buf .= $cc1 . $cc2;
6435
        }
6436
6437
      } else { // it doesn't need conversion
6438
        $buf .= $c1;
6439
      }
6440
    }
6441
6442
    // decode unicode escape sequences
6443
    $buf = preg_replace_callback(
6444
        '/\\\\u([0-9a-f]{4})/i',
6445
        function ($match) {
6446
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6447
        },
6448
        $buf
6449
    );
6450
6451
    // decode UTF-8 codepoints
6452
    $buf = self::html_entity_decode($buf, ENT_QUOTES);
6453
6454
    return $buf;
6455
  }
6456
6457
  /**
6458
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6459
   *
6460
   * INFO: This is slower then "trim()"
6461
   *
6462
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6463
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6464
   *
6465
   * @param string $str   <p>The string to be trimmed</p>
6466
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
6467
   *
6468
   * @return string <p>The trimmed string.</p>
6469
   */
6470
  public static function trim($str = '', $chars = INF)
6471
  {
6472
    $str = (string)$str;
6473
6474
    if (!isset($str[0])) {
6475
      return '';
6476
    }
6477
6478
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6479
    if ($chars === INF || !$chars) {
6480
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6481
    }
6482
6483
    return self::rtrim(self::ltrim($str, $chars), $chars);
6484
  }
6485
6486
  /**
6487
   * Makes string's first char uppercase.
6488
   *
6489
   * @param string  $str       <p>The input string.</p>
6490
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6491
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6492
   *
6493
   * @return string <p>The resulting string</p>
6494
   */
6495
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6496
  {
6497
    return self::strtoupper(self::substr($str, 0, 1, $encoding, $cleanUtf8), $encoding, $cleanUtf8) . self::substr($str, 1, null, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1, $encoding, $cleanUtf8) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6498
  }
6499
6500
  /**
6501
   * alias for "UTF8::ucfirst()"
6502
   *
6503
   * @see UTF8::ucfirst()
6504
   *
6505
   * @param string  $word
6506
   * @param string  $encoding
6507
   * @param boolean $cleanUtf8
6508
   *
6509
   * @return string
6510
   */
6511
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
6512
  {
6513
    return self::ucfirst($word, $encoding, $cleanUtf8);
6514
  }
6515
6516
  /**
6517
   * Uppercase for all words in the string.
6518
   *
6519
   * @param string   $str        <p>The input string.</p>
6520
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6521
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6522
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6523
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
6524
   *
6525
   * @return string
6526
   */
6527
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
6528
  {
6529
    if (!$str) {
6530
      return '';
6531
    }
6532
6533
    $words = self::str_to_words($str, $charlist);
6534
    $newwords = array();
6535
6536
    if (count($exceptions) > 0) {
6537
      $useExceptions = true;
6538
    } else {
6539
      $useExceptions = false;
6540
    }
6541
6542
    foreach ($words as $word) {
6543
6544
      if (!$word) {
6545
        continue;
6546
      }
6547
6548
      if (
6549
          ($useExceptions === false)
6550
          ||
6551
          (
6552
              $useExceptions === true
6553
              &&
6554
              !in_array($word, $exceptions, true)
6555
          )
6556
      ) {
6557
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
6558
      }
6559
6560
      $newwords[] = $word;
6561
    }
6562
6563
    return implode('', $newwords);
6564
  }
6565
6566
  /**
6567
   * Multi decode html entity & fix urlencoded-win1252-chars.
6568
   *
6569
   * e.g:
6570
   * 'test+test'                     => 'test test'
6571
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6572
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6573
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6574
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6575
   * 'Düsseldorf'                   => 'Düsseldorf'
6576
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6577
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6578
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6579
   *
6580
   * @param string $str          <p>The input string.</p>
6581
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6582
   *
6583
   * @return string
6584
   */
6585 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6586
  {
6587
    $str = (string)$str;
6588
6589
    if (!isset($str[0])) {
6590
      return '';
6591
    }
6592
6593
    $pattern = '/%u([0-9a-f]{3,4})/i';
6594
    if (preg_match($pattern, $str)) {
6595
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
6596
    }
6597
6598
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6599
6600
    do {
6601
      $str_compare = $str;
6602
6603
      $str = self::fix_simple_utf8(
6604
          urldecode(
6605
              self::html_entity_decode(
6606
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6607
                  $flags
6608
              )
6609
          )
6610
      );
6611
6612
    } while ($multi_decode === true && $str_compare !== $str);
6613
6614
    return (string)$str;
6615
  }
6616
6617
  /**
6618
   * Return a array with "urlencoded"-win1252 -> UTF-8
6619
   *
6620
   * @deprecated use the "UTF8::urldecode()" function to decode a string
6621
   *
6622
   * @return array
6623
   */
6624
  public static function urldecode_fix_win1252_chars()
6625
  {
6626
    static $array = array(
6627
        '%20' => ' ',
6628
        '%21' => '!',
6629
        '%22' => '"',
6630
        '%23' => '#',
6631
        '%24' => '$',
6632
        '%25' => '%',
6633
        '%26' => '&',
6634
        '%27' => "'",
6635
        '%28' => '(',
6636
        '%29' => ')',
6637
        '%2A' => '*',
6638
        '%2B' => '+',
6639
        '%2C' => ',',
6640
        '%2D' => '-',
6641
        '%2E' => '.',
6642
        '%2F' => '/',
6643
        '%30' => '0',
6644
        '%31' => '1',
6645
        '%32' => '2',
6646
        '%33' => '3',
6647
        '%34' => '4',
6648
        '%35' => '5',
6649
        '%36' => '6',
6650
        '%37' => '7',
6651
        '%38' => '8',
6652
        '%39' => '9',
6653
        '%3A' => ':',
6654
        '%3B' => ';',
6655
        '%3C' => '<',
6656
        '%3D' => '=',
6657
        '%3E' => '>',
6658
        '%3F' => '?',
6659
        '%40' => '@',
6660
        '%41' => 'A',
6661
        '%42' => 'B',
6662
        '%43' => 'C',
6663
        '%44' => 'D',
6664
        '%45' => 'E',
6665
        '%46' => 'F',
6666
        '%47' => 'G',
6667
        '%48' => 'H',
6668
        '%49' => 'I',
6669
        '%4A' => 'J',
6670
        '%4B' => 'K',
6671
        '%4C' => 'L',
6672
        '%4D' => 'M',
6673
        '%4E' => 'N',
6674
        '%4F' => 'O',
6675
        '%50' => 'P',
6676
        '%51' => 'Q',
6677
        '%52' => 'R',
6678
        '%53' => 'S',
6679
        '%54' => 'T',
6680
        '%55' => 'U',
6681
        '%56' => 'V',
6682
        '%57' => 'W',
6683
        '%58' => 'X',
6684
        '%59' => 'Y',
6685
        '%5A' => 'Z',
6686
        '%5B' => '[',
6687
        '%5C' => '\\',
6688
        '%5D' => ']',
6689
        '%5E' => '^',
6690
        '%5F' => '_',
6691
        '%60' => '`',
6692
        '%61' => 'a',
6693
        '%62' => 'b',
6694
        '%63' => 'c',
6695
        '%64' => 'd',
6696
        '%65' => 'e',
6697
        '%66' => 'f',
6698
        '%67' => 'g',
6699
        '%68' => 'h',
6700
        '%69' => 'i',
6701
        '%6A' => 'j',
6702
        '%6B' => 'k',
6703
        '%6C' => 'l',
6704
        '%6D' => 'm',
6705
        '%6E' => 'n',
6706
        '%6F' => 'o',
6707
        '%70' => 'p',
6708
        '%71' => 'q',
6709
        '%72' => 'r',
6710
        '%73' => 's',
6711
        '%74' => 't',
6712
        '%75' => 'u',
6713
        '%76' => 'v',
6714
        '%77' => 'w',
6715
        '%78' => 'x',
6716
        '%79' => 'y',
6717
        '%7A' => 'z',
6718
        '%7B' => '{',
6719
        '%7C' => '|',
6720
        '%7D' => '}',
6721
        '%7E' => '~',
6722
        '%7F' => '',
6723
        '%80' => '`',
6724
        '%81' => '',
6725
        '%82' => '‚',
6726
        '%83' => 'ƒ',
6727
        '%84' => '„',
6728
        '%85' => '…',
6729
        '%86' => '†',
6730
        '%87' => '‡',
6731
        '%88' => 'ˆ',
6732
        '%89' => '‰',
6733
        '%8A' => 'Š',
6734
        '%8B' => '‹',
6735
        '%8C' => 'Œ',
6736
        '%8D' => '',
6737
        '%8E' => 'Ž',
6738
        '%8F' => '',
6739
        '%90' => '',
6740
        '%91' => '‘',
6741
        '%92' => '’',
6742
        '%93' => '“',
6743
        '%94' => '”',
6744
        '%95' => '•',
6745
        '%96' => '–',
6746
        '%97' => '—',
6747
        '%98' => '˜',
6748
        '%99' => '™',
6749
        '%9A' => 'š',
6750
        '%9B' => '›',
6751
        '%9C' => 'œ',
6752
        '%9D' => '',
6753
        '%9E' => 'ž',
6754
        '%9F' => 'Ÿ',
6755
        '%A0' => '',
6756
        '%A1' => '¡',
6757
        '%A2' => '¢',
6758
        '%A3' => '£',
6759
        '%A4' => '¤',
6760
        '%A5' => '¥',
6761
        '%A6' => '¦',
6762
        '%A7' => '§',
6763
        '%A8' => '¨',
6764
        '%A9' => '©',
6765
        '%AA' => 'ª',
6766
        '%AB' => '«',
6767
        '%AC' => '¬',
6768
        '%AD' => '',
6769
        '%AE' => '®',
6770
        '%AF' => '¯',
6771
        '%B0' => '°',
6772
        '%B1' => '±',
6773
        '%B2' => '²',
6774
        '%B3' => '³',
6775
        '%B4' => '´',
6776
        '%B5' => 'µ',
6777
        '%B6' => '¶',
6778
        '%B7' => '·',
6779
        '%B8' => '¸',
6780
        '%B9' => '¹',
6781
        '%BA' => 'º',
6782
        '%BB' => '»',
6783
        '%BC' => '¼',
6784
        '%BD' => '½',
6785
        '%BE' => '¾',
6786
        '%BF' => '¿',
6787
        '%C0' => 'À',
6788
        '%C1' => 'Á',
6789
        '%C2' => 'Â',
6790
        '%C3' => 'Ã',
6791
        '%C4' => 'Ä',
6792
        '%C5' => 'Å',
6793
        '%C6' => 'Æ',
6794
        '%C7' => 'Ç',
6795
        '%C8' => 'È',
6796
        '%C9' => 'É',
6797
        '%CA' => 'Ê',
6798
        '%CB' => 'Ë',
6799
        '%CC' => 'Ì',
6800
        '%CD' => 'Í',
6801
        '%CE' => 'Î',
6802
        '%CF' => 'Ï',
6803
        '%D0' => 'Ð',
6804
        '%D1' => 'Ñ',
6805
        '%D2' => 'Ò',
6806
        '%D3' => 'Ó',
6807
        '%D4' => 'Ô',
6808
        '%D5' => 'Õ',
6809
        '%D6' => 'Ö',
6810
        '%D7' => '×',
6811
        '%D8' => 'Ø',
6812
        '%D9' => 'Ù',
6813
        '%DA' => 'Ú',
6814
        '%DB' => 'Û',
6815
        '%DC' => 'Ü',
6816
        '%DD' => 'Ý',
6817
        '%DE' => 'Þ',
6818
        '%DF' => 'ß',
6819
        '%E0' => 'à',
6820
        '%E1' => 'á',
6821
        '%E2' => 'â',
6822
        '%E3' => 'ã',
6823
        '%E4' => 'ä',
6824
        '%E5' => 'å',
6825
        '%E6' => 'æ',
6826
        '%E7' => 'ç',
6827
        '%E8' => 'è',
6828
        '%E9' => 'é',
6829
        '%EA' => 'ê',
6830
        '%EB' => 'ë',
6831
        '%EC' => 'ì',
6832
        '%ED' => 'í',
6833
        '%EE' => 'î',
6834
        '%EF' => 'ï',
6835
        '%F0' => 'ð',
6836
        '%F1' => 'ñ',
6837
        '%F2' => 'ò',
6838
        '%F3' => 'ó',
6839
        '%F4' => 'ô',
6840
        '%F5' => 'õ',
6841
        '%F6' => 'ö',
6842
        '%F7' => '÷',
6843
        '%F8' => 'ø',
6844
        '%F9' => 'ù',
6845
        '%FA' => 'ú',
6846
        '%FB' => 'û',
6847
        '%FC' => 'ü',
6848
        '%FD' => 'ý',
6849
        '%FE' => 'þ',
6850
        '%FF' => 'ÿ',
6851
    );
6852
6853
    return $array;
6854
  }
6855
6856
  /**
6857
   * Decodes an UTF-8 string to ISO-8859-1.
6858
   *
6859
   * @param string $str <p>The input string.</p>
6860
   *
6861
   * @return string
6862
   */
6863
  public static function utf8_decode($str)
6864
  {
6865
    // init
6866
    $str = (string)$str;
6867
6868
    if (!isset($str[0])) {
6869
      return '';
6870
    }
6871
6872
    $str = (string)self::to_utf8($str);
6873
6874
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
6875
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
6876
6877
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
6878
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$utf8ToWin1252);
6879
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$utf8ToWin1252);
6880
    }
6881
6882
    /** @noinspection PhpInternalEntityUsedInspection */
6883
    return Xml::utf8_decode(str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str));
6884
  }
6885
6886
  /**
6887
   * Encodes an ISO-8859-1 string to UTF-8.
6888
   *
6889
   * @param string $str <p>The input string.</p>
6890
   *
6891
   * @return string
6892
   */
6893
  public static function utf8_encode($str)
6894
  {
6895
    // init
6896
    $str = (string)$str;
6897
6898
    if (!isset($str[0])) {
6899
      return '';
6900
    }
6901
6902
    $str = \utf8_encode($str);
6903
6904
    if (false === strpos($str, "\xC2")) {
6905
      return $str;
6906
    } else {
6907
6908
      static $CP1252_TO_UTF8_KEYS_CACHE = null;
6909
      static $CP1252_TO_UTF8_VALUES_CACHE = null;
6910
6911
      if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
6912
        $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$cp1252ToUtf8);
6913
        $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$cp1252ToUtf8);
6914
      }
6915
6916
      return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
6917
    }
6918
  }
6919
6920
  /**
6921
   * fix -> utf8-win1252 chars
6922
   *
6923
   * @param string $str <p>The input string.</p>
6924
   *
6925
   * @return string
6926
   *
6927
   * @deprecated use "UTF8::fix_simple_utf8()"
6928
   */
6929
  public static function utf8_fix_win1252_chars($str)
6930
  {
6931
    return self::fix_simple_utf8($str);
6932
  }
6933
6934
  /**
6935
   * Returns an array with all utf8 whitespace characters.
6936
   *
6937
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6938
   *
6939
   * @author: Derek E. [email protected]
6940
   *
6941
   * @return array <p>
6942
   *               An array with all known whitespace characters as values and the type of whitespace as keys
6943
   *               as defined in above URL.
6944
   *               </p>
6945
   */
6946
  public static function whitespace_table()
6947
  {
6948
    return self::$whitespaceTable;
6949
  }
6950
6951
  /**
6952
   * Limit the number of words in a string.
6953
   *
6954
   * @param string $str      <p>The input string.</p>
6955
   * @param int    $words    <p>The limit of words as integer.</p>
6956
   * @param string $strAddOn <p>Replacement for the striped string.</p>
6957
   *
6958
   * @return string
6959
   */
6960
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6961
  {
6962
    $str = (string)$str;
6963
6964
    if (!isset($str[0])) {
6965
      return '';
6966
    }
6967
6968
    $words = (int)$words;
6969
6970
    if ($words < 1) {
6971
      return '';
6972
    }
6973
6974
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6975
6976
    if (
6977
        !isset($matches[0])
6978
        ||
6979
        self::strlen($str) === self::strlen($matches[0])
6980
    ) {
6981
      return $str;
6982
    }
6983
6984
    return self::rtrim($matches[0]) . $strAddOn;
6985
  }
6986
6987
  /**
6988
   * Wraps a string to a given number of characters
6989
   *
6990
   * @link  http://php.net/manual/en/function.wordwrap.php
6991
   *
6992
   * @param string $str   <p>The input string.</p>
6993
   * @param int    $width [optional] <p>The column width.</p>
6994
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
6995
   * @param bool   $cut   [optional] <p>
6996
   *                      If the cut is set to true, the string is
6997
   *                      always wrapped at or before the specified width. So if you have
6998
   *                      a word that is larger than the given width, it is broken apart.
6999
   *                      </p>
7000
   *
7001
   * @return string <p>The given string wrapped at the specified column.</p>
7002
   */
7003
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7004
  {
7005
    $str = (string)$str;
7006
    $break = (string)$break;
7007
7008
    if (!isset($str[0], $break[0])) {
7009
      return '';
7010
    }
7011
7012
    $w = '';
7013
    $strSplit = explode($break, $str);
7014
    $count = count($strSplit);
7015
7016
    $chars = array();
7017
    /** @noinspection ForeachInvariantsInspection */
7018
    for ($i = 0; $i < $count; ++$i) {
7019
7020
      if ($i) {
7021
        $chars[] = $break;
7022
        $w .= '#';
7023
      }
7024
7025
      $c = $strSplit[$i];
7026
      unset($strSplit[$i]);
7027
7028
      foreach (self::split($c) as $c) {
7029
        $chars[] = $c;
7030
        $w .= ' ' === $c ? ' ' : '?';
7031
      }
7032
    }
7033
7034
    $strReturn = '';
7035
    $j = 0;
7036
    $b = $i = -1;
7037
    $w = wordwrap($w, $width, '#', $cut);
7038
7039
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7040
      for (++$i; $i < $b; ++$i) {
7041
        $strReturn .= $chars[$j];
7042
        unset($chars[$j++]);
7043
      }
7044
7045
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7046
        unset($chars[$j++]);
7047
      }
7048
7049
      $strReturn .= $break;
7050
    }
7051
7052
    return $strReturn . implode('', $chars);
7053
  }
7054
7055
  /**
7056
   * Returns an array of Unicode White Space characters.
7057
   *
7058
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7059
   */
7060
  public static function ws()
7061
  {
7062
    return self::$whitespace;
7063
  }
7064
7065
}
7066