Completed
Push — master ( b336bb...0b5f4b )
by Lars
04:47
created

UTF8::access()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 2
crap 1
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
final class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  private static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  private static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  private static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
93
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
94
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
96
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
  );
98
99
  /**
100
   * Numeric code point => UTF-8 Character
101
   *
102
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
103
   *
104
   * @var array
105
   */
106
  private static $whitespace = array(
107
    // NUL Byte
108
    0     => "\x0",
109
    // Tab
110
    9     => "\x9",
111
    // New Line
112
    10    => "\xa",
113
    // Vertical Tab
114
    11    => "\xb",
115
    // Carriage Return
116
    13    => "\xd",
117
    // Ordinary Space
118
    32    => "\x20",
119
    // NO-BREAK SPACE
120
    160   => "\xc2\xa0",
121
    // OGHAM SPACE MARK
122
    5760  => "\xe1\x9a\x80",
123
    // MONGOLIAN VOWEL SEPARATOR
124
    6158  => "\xe1\xa0\x8e",
125
    // EN QUAD
126
    8192  => "\xe2\x80\x80",
127
    // EM QUAD
128
    8193  => "\xe2\x80\x81",
129
    // EN SPACE
130
    8194  => "\xe2\x80\x82",
131
    // EM SPACE
132
    8195  => "\xe2\x80\x83",
133
    // THREE-PER-EM SPACE
134
    8196  => "\xe2\x80\x84",
135
    // FOUR-PER-EM SPACE
136
    8197  => "\xe2\x80\x85",
137
    // SIX-PER-EM SPACE
138
    8198  => "\xe2\x80\x86",
139
    // FIGURE SPACE
140
    8199  => "\xe2\x80\x87",
141
    // PUNCTUATION SPACE
142
    8200  => "\xe2\x80\x88",
143
    // THIN SPACE
144
    8201  => "\xe2\x80\x89",
145
    //HAIR SPACE
146
    8202  => "\xe2\x80\x8a",
147
    // LINE SEPARATOR
148
    8232  => "\xe2\x80\xa8",
149
    // PARAGRAPH SEPARATOR
150
    8233  => "\xe2\x80\xa9",
151
    // NARROW NO-BREAK SPACE
152
    8239  => "\xe2\x80\xaf",
153
    // MEDIUM MATHEMATICAL SPACE
154
    8287  => "\xe2\x81\x9f",
155
    // IDEOGRAPHIC SPACE
156
    12288 => "\xe3\x80\x80",
157
  );
158
159
  /**
160
   * @var array
161
   */
162
  private static $whitespaceTable = array(
163
      'SPACE'                     => "\x20",
164
      'NO-BREAK SPACE'            => "\xc2\xa0",
165
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
166
      'EN QUAD'                   => "\xe2\x80\x80",
167
      'EM QUAD'                   => "\xe2\x80\x81",
168
      'EN SPACE'                  => "\xe2\x80\x82",
169
      'EM SPACE'                  => "\xe2\x80\x83",
170
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
171
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
172
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
173
      'FIGURE SPACE'              => "\xe2\x80\x87",
174
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
175
      'THIN SPACE'                => "\xe2\x80\x89",
176
      'HAIR SPACE'                => "\xe2\x80\x8a",
177
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
178
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
179
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
180
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
181
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
182
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
183
  );
184
185
  /**
186
   * bidirectional text chars
187
   *
188
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
189
   *
190
   * @var array
191
   */
192
  private static $bidiUniCodeControlsTable = array(
193
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
194
    8234 => "\xE2\x80\xAA",
195
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
196
    8235 => "\xE2\x80\xAB",
197
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
198
    8236 => "\xE2\x80\xAC",
199
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
200
    8237 => "\xE2\x80\xAD",
201
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
202
    8238 => "\xE2\x80\xAE",
203
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
204
    8294 => "\xE2\x81\xA6",
205
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
206
    8295 => "\xE2\x81\xA7",
207
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
208
    8296 => "\xE2\x81\xA8",
209
    // POP DIRECTIONAL ISOLATE
210
    8297 => "\xE2\x81\xA9",
211
  );
212
213
  /**
214
   * @var array
215
   */
216
  private static $commonCaseFold = array(
217
      'ſ'            => 's',
218
      "\xCD\x85"     => 'ι',
219
      'ς'            => 'σ',
220
      "\xCF\x90"     => 'β',
221
      "\xCF\x91"     => 'θ',
222
      "\xCF\x95"     => 'φ',
223
      "\xCF\x96"     => 'π',
224
      "\xCF\xB0"     => 'κ',
225
      "\xCF\xB1"     => 'ρ',
226
      "\xCF\xB5"     => 'ε',
227
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
228
      "\xE1\xBE\xBE" => 'ι',
229
  );
230
231
  /**
232
   * @var array
233
   */
234
  private static $brokenUtf8ToUtf8 = array(
235
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
236
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
237
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
238
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
239
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
240
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
241
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
242
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
243
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
244
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
245
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
246
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
247
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
248
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
249
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
250
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
251
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
252
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
253
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
254
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
255
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
256
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
257
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
258
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
259
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
260
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
261
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
262
      'ü'       => 'ü',
263
      'ä'       => 'ä',
264
      'ö'       => 'ö',
265
      'Ö'       => 'Ö',
266
      'ß'       => 'ß',
267
      'Ã '       => 'à',
268
      'á'       => 'á',
269
      'â'       => 'â',
270
      'ã'       => 'ã',
271
      'ù'       => 'ù',
272
      'ú'       => 'ú',
273
      'û'       => 'û',
274
      'Ù'       => 'Ù',
275
      'Ú'       => 'Ú',
276
      'Û'       => 'Û',
277
      'Ü'       => 'Ü',
278
      'ò'       => 'ò',
279
      'ó'       => 'ó',
280
      'ô'       => 'ô',
281
      'è'       => 'è',
282
      'é'       => 'é',
283
      'ê'       => 'ê',
284
      'ë'       => 'ë',
285
      'À'       => 'À',
286
      'Á'       => 'Á',
287
      'Â'       => 'Â',
288
      'Ã'       => 'Ã',
289
      'Ä'       => 'Ä',
290
      'Ã…'       => 'Å',
291
      'Ç'       => 'Ç',
292
      'È'       => 'È',
293
      'É'       => 'É',
294
      'Ê'       => 'Ê',
295
      'Ë'       => 'Ë',
296
      'ÃŒ'       => 'Ì',
297
      'Í'       => 'Í',
298
      'ÃŽ'       => 'Î',
299
      'Ï'       => 'Ï',
300
      'Ñ'       => 'Ñ',
301
      'Ã’'       => 'Ò',
302
      'Ó'       => 'Ó',
303
      'Ô'       => 'Ô',
304
      'Õ'       => 'Õ',
305
      'Ø'       => 'Ø',
306
      'Ã¥'       => 'å',
307
      'æ'       => 'æ',
308
      'ç'       => 'ç',
309
      'ì'       => 'ì',
310
      'í'       => 'í',
311
      'î'       => 'î',
312
      'ï'       => 'ï',
313
      'ð'       => 'ð',
314
      'ñ'       => 'ñ',
315
      'õ'       => 'õ',
316
      'ø'       => 'ø',
317
      'ý'       => 'ý',
318
      'ÿ'       => 'ÿ',
319
      '€'      => '€',
320
  );
321
322
  /**
323
   * @var array
324
   */
325
  private static $utf8ToWin1252 = array(
326
      "\xe2\x82\xac" => "\x80", // EURO SIGN
327
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
328
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
329
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
330
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
331
      "\xe2\x80\xa0" => "\x86", // DAGGER
332
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
333
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
334
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
335
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
336
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
337
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
338
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
339
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
340
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
341
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
342
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
343
      "\xe2\x80\xa2" => "\x95", // BULLET
344
      "\xe2\x80\x93" => "\x96", // EN DASH
345
      "\xe2\x80\x94" => "\x97", // EM DASH
346
      "\xcb\x9c"     => "\x98", // SMALL TILDE
347
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
348
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
349
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
350
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
351
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
352
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
353
  );
354
355
  /**
356
   * @var array
357
   */
358
  private static $utf8MSWord = array(
359
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
360
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
361
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
362
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
363
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
364
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
365
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
366
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
367
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
368
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
369
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
370
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
371
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
372
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
373
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
374
  );
375
376
  private static $iconvEncoding = array(
377
      'ANSI_X3.4-1968',
378
      'ANSI_X3.4-1986',
379
      'ASCII',
380
      'CP367',
381
      'IBM367',
382
      'ISO-IR-6',
383
      'ISO646-US',
384
      'ISO_646.IRV:1991',
385
      'US',
386
      'US-ASCII',
387
      'CSASCII',
388
      'UTF-8',
389
      'ISO-10646-UCS-2',
390
      'UCS-2',
391
      'CSUNICODE',
392
      'UCS-2BE',
393
      'UNICODE-1-1',
394
      'UNICODEBIG',
395
      'CSUNICODE11',
396
      'UCS-2LE',
397
      'UNICODELITTLE',
398
      'ISO-10646-UCS-4',
399
      'UCS-4',
400
      'CSUCS4',
401
      'UCS-4BE',
402
      'UCS-4LE',
403
      'UTF-16',
404
      'UTF-16BE',
405
      'UTF-16LE',
406
      'UTF-32',
407
      'UTF-32BE',
408
      'UTF-32LE',
409
      'UNICODE-1-1-UTF-7',
410
      'UTF-7',
411
      'CSUNICODE11UTF7',
412
      'UCS-2-INTERNAL',
413
      'UCS-2-SWAPPED',
414
      'UCS-4-INTERNAL',
415
      'UCS-4-SWAPPED',
416
      'C99',
417
      'JAVA',
418
      'CP819',
419
      'IBM819',
420
      'ISO-8859-1',
421
      'ISO-IR-100',
422
      'ISO8859-1',
423
      'ISO_8859-1',
424
      'ISO_8859-1:1987',
425
      'L1',
426
      'LATIN1',
427
      'CSISOLATIN1',
428
      'ISO-8859-2',
429
      'ISO-IR-101',
430
      'ISO8859-2',
431
      'ISO_8859-2',
432
      'ISO_8859-2:1987',
433
      'L2',
434
      'LATIN2',
435
      'CSISOLATIN2',
436
      'ISO-8859-3',
437
      'ISO-IR-109',
438
      'ISO8859-3',
439
      'ISO_8859-3',
440
      'ISO_8859-3:1988',
441
      'L3',
442
      'LATIN3',
443
      'CSISOLATIN3',
444
      'ISO-8859-4',
445
      'ISO-IR-110',
446
      'ISO8859-4',
447
      'ISO_8859-4',
448
      'ISO_8859-4:1988',
449
      'L4',
450
      'LATIN4',
451
      'CSISOLATIN4',
452
      'CYRILLIC',
453
      'ISO-8859-5',
454
      'ISO-IR-144',
455
      'ISO8859-5',
456
      'ISO_8859-5',
457
      'ISO_8859-5:1988',
458
      'CSISOLATINCYRILLIC',
459
      'ARABIC',
460
      'ASMO-708',
461
      'ECMA-114',
462
      'ISO-8859-6',
463
      'ISO-IR-127',
464
      'ISO8859-6',
465
      'ISO_8859-6',
466
      'ISO_8859-6:1987',
467
      'CSISOLATINARABIC',
468
      'ECMA-118',
469
      'ELOT_928',
470
      'GREEK',
471
      'GREEK8',
472
      'ISO-8859-7',
473
      'ISO-IR-126',
474
      'ISO8859-7',
475
      'ISO_8859-7',
476
      'ISO_8859-7:1987',
477
      'ISO_8859-7:2003',
478
      'CSISOLATINGREEK',
479
      'HEBREW',
480
      'ISO-8859-8',
481
      'ISO-IR-138',
482
      'ISO8859-8',
483
      'ISO_8859-8',
484
      'ISO_8859-8:1988',
485
      'CSISOLATINHEBREW',
486
      'ISO-8859-9',
487
      'ISO-IR-148',
488
      'ISO8859-9',
489
      'ISO_8859-9',
490
      'ISO_8859-9:1989',
491
      'L5',
492
      'LATIN5',
493
      'CSISOLATIN5',
494
      'ISO-8859-10',
495
      'ISO-IR-157',
496
      'ISO8859-10',
497
      'ISO_8859-10',
498
      'ISO_8859-10:1992',
499
      'L6',
500
      'LATIN6',
501
      'CSISOLATIN6',
502
      'ISO-8859-11',
503
      'ISO8859-11',
504
      'ISO_8859-11',
505
      'ISO-8859-13',
506
      'ISO-IR-179',
507
      'ISO8859-13',
508
      'ISO_8859-13',
509
      'L7',
510
      'LATIN7',
511
      'ISO-8859-14',
512
      'ISO-CELTIC',
513
      'ISO-IR-199',
514
      'ISO8859-14',
515
      'ISO_8859-14',
516
      'ISO_8859-14:1998',
517
      'L8',
518
      'LATIN8',
519
      'ISO-8859-15',
520
      'ISO-IR-203',
521
      'ISO8859-15',
522
      'ISO_8859-15',
523
      'ISO_8859-15:1998',
524
      'LATIN-9',
525
      'ISO-8859-16',
526
      'ISO-IR-226',
527
      'ISO8859-16',
528
      'ISO_8859-16',
529
      'ISO_8859-16:2001',
530
      'L10',
531
      'LATIN10',
532
      'KOI8-R',
533
      'CSKOI8R',
534
      'KOI8-U',
535
      'KOI8-RU',
536
      'CP1250',
537
      'MS-EE',
538
      'WINDOWS-1250',
539
      'CP1251',
540
      'MS-CYRL',
541
      'WINDOWS-1251',
542
      'CP1252',
543
      'MS-ANSI',
544
      'WINDOWS-1252',
545
      'CP1253',
546
      'MS-GREEK',
547
      'WINDOWS-1253',
548
      'CP1254',
549
      'MS-TURK',
550
      'WINDOWS-1254',
551
      'CP1255',
552
      'MS-HEBR',
553
      'WINDOWS-1255',
554
      'CP1256',
555
      'MS-ARAB',
556
      'WINDOWS-1256',
557
      'CP1257',
558
      'WINBALTRIM',
559
      'WINDOWS-1257',
560
      'CP1258',
561
      'WINDOWS-1258',
562
      '850',
563
      'CP850',
564
      'IBM850',
565
      'CSPC850MULTILINGUAL',
566
      '862',
567
      'CP862',
568
      'IBM862',
569
      'CSPC862LATINHEBREW',
570
      '866',
571
      'CP866',
572
      'IBM866',
573
      'CSIBM866',
574
      'MAC',
575
      'MACINTOSH',
576
      'MACROMAN',
577
      'CSMACINTOSH',
578
      'MACCENTRALEUROPE',
579
      'MACICELAND',
580
      'MACCROATIAN',
581
      'MACROMANIA',
582
      'MACCYRILLIC',
583
      'MACUKRAINE',
584
      'MACGREEK',
585
      'MACTURKISH',
586
      'MACHEBREW',
587
      'MACARABIC',
588
      'MACTHAI',
589
      'HP-ROMAN8',
590
      'R8',
591
      'ROMAN8',
592
      'CSHPROMAN8',
593
      'NEXTSTEP',
594
      'ARMSCII-8',
595
      'GEORGIAN-ACADEMY',
596
      'GEORGIAN-PS',
597
      'KOI8-T',
598
      'CP154',
599
      'CYRILLIC-ASIAN',
600
      'PT154',
601
      'PTCP154',
602
      'CSPTCP154',
603
      'KZ-1048',
604
      'RK1048',
605
      'STRK1048-2002',
606
      'CSKZ1048',
607
      'MULELAO-1',
608
      'CP1133',
609
      'IBM-CP1133',
610
      'ISO-IR-166',
611
      'TIS-620',
612
      'TIS620',
613
      'TIS620-0',
614
      'TIS620.2529-1',
615
      'TIS620.2533-0',
616
      'TIS620.2533-1',
617
      'CP874',
618
      'WINDOWS-874',
619
      'VISCII',
620
      'VISCII1.1-1',
621
      'CSVISCII',
622
      'TCVN',
623
      'TCVN-5712',
624
      'TCVN5712-1',
625
      'TCVN5712-1:1993',
626
      'ISO-IR-14',
627
      'ISO646-JP',
628
      'JIS_C6220-1969-RO',
629
      'JP',
630
      'CSISO14JISC6220RO',
631
      'JISX0201-1976',
632
      'JIS_X0201',
633
      'X0201',
634
      'CSHALFWIDTHKATAKANA',
635
      'ISO-IR-87',
636
      'JIS0208',
637
      'JIS_C6226-1983',
638
      'JIS_X0208',
639
      'JIS_X0208-1983',
640
      'JIS_X0208-1990',
641
      'X0208',
642
      'CSISO87JISX0208',
643
      'ISO-IR-159',
644
      'JIS_X0212',
645
      'JIS_X0212-1990',
646
      'JIS_X0212.1990-0',
647
      'X0212',
648
      'CSISO159JISX02121990',
649
      'CN',
650
      'GB_1988-80',
651
      'ISO-IR-57',
652
      'ISO646-CN',
653
      'CSISO57GB1988',
654
      'CHINESE',
655
      'GB_2312-80',
656
      'ISO-IR-58',
657
      'CSISO58GB231280',
658
      'CN-GB-ISOIR165',
659
      'ISO-IR-165',
660
      'ISO-IR-149',
661
      'KOREAN',
662
      'KSC_5601',
663
      'KS_C_5601-1987',
664
      'KS_C_5601-1989',
665
      'CSKSC56011987',
666
      'EUC-JP',
667
      'EUCJP',
668
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
669
      'CSEUCPKDFMTJAPANESE',
670
      'MS_KANJI',
671
      'SHIFT-JIS',
672
      'SHIFT_JIS',
673
      'SJIS',
674
      'CSSHIFTJIS',
675
      'CP932',
676
      'ISO-2022-JP',
677
      'CSISO2022JP',
678
      'ISO-2022-JP-1',
679
      'ISO-2022-JP-2',
680
      'CSISO2022JP2',
681
      'CN-GB',
682
      'EUC-CN',
683
      'EUCCN',
684
      'GB2312',
685
      'CSGB2312',
686
      'GBK',
687
      'CP936',
688
      'MS936',
689
      'WINDOWS-936',
690
      'GB18030',
691
      'ISO-2022-CN',
692
      'CSISO2022CN',
693
      'ISO-2022-CN-EXT',
694
      'HZ',
695
      'HZ-GB-2312',
696
      'EUC-TW',
697
      'EUCTW',
698
      'CSEUCTW',
699
      'BIG-5',
700
      'BIG-FIVE',
701
      'BIG5',
702
      'BIGFIVE',
703
      'CN-BIG5',
704
      'CSBIG5',
705
      'CP950',
706
      'BIG5-HKSCS:1999',
707
      'BIG5-HKSCS:2001',
708
      'BIG5-HKSCS',
709
      'BIG5-HKSCS:2004',
710
      'BIG5HKSCS',
711
      'EUC-KR',
712
      'EUCKR',
713
      'CSEUCKR',
714
      'CP949',
715
      'UHC',
716
      'CP1361',
717
      'JOHAB',
718
      'ISO-2022-KR',
719
      'CSISO2022KR',
720
      'CP856',
721
      'CP922',
722
      'CP943',
723
      'CP1046',
724
      'CP1124',
725
      'CP1129',
726
      'CP1161',
727
      'IBM-1161',
728
      'IBM1161',
729
      'CSIBM1161',
730
      'CP1162',
731
      'IBM-1162',
732
      'IBM1162',
733
      'CSIBM1162',
734
      'CP1163',
735
      'IBM-1163',
736
      'IBM1163',
737
      'CSIBM1163',
738
      'DEC-KANJI',
739
      'DEC-HANYU',
740
      '437',
741
      'CP437',
742
      'IBM437',
743
      'CSPC8CODEPAGE437',
744
      'CP737',
745
      'CP775',
746
      'IBM775',
747
      'CSPC775BALTIC',
748
      '852',
749
      'CP852',
750
      'IBM852',
751
      'CSPCP852',
752
      'CP853',
753
      '855',
754
      'CP855',
755
      'IBM855',
756
      'CSIBM855',
757
      '857',
758
      'CP857',
759
      'IBM857',
760
      'CSIBM857',
761
      'CP858',
762
      '860',
763
      'CP860',
764
      'IBM860',
765
      'CSIBM860',
766
      '861',
767
      'CP-IS',
768
      'CP861',
769
      'IBM861',
770
      'CSIBM861',
771
      '863',
772
      'CP863',
773
      'IBM863',
774
      'CSIBM863',
775
      'CP864',
776
      'IBM864',
777
      'CSIBM864',
778
      '865',
779
      'CP865',
780
      'IBM865',
781
      'CSIBM865',
782
      '869',
783
      'CP-GR',
784
      'CP869',
785
      'IBM869',
786
      'CSIBM869',
787
      'CP1125',
788
      'EUC-JISX0213',
789
      'SHIFT_JISX0213',
790
      'ISO-2022-JP-3',
791
      'BIG5-2003',
792
      'ISO-IR-230',
793
      'TDS565',
794
      'ATARI',
795
      'ATARIST',
796
      'RISCOS-LATIN1',
797
  );
798
799
  /**
800
   * @var array
801
   */
802
  private static $support = array();
803
804
  /**
805
   * __construct()
806
   */
807 1
  public function __construct()
808
  {
809 1
    self::checkForSupport();
810 1
  }
811
812
  /**
813
   * Return the character at the specified position: $str[1] like functionality.
814
   *
815
   * @param string $str <p>A UTF-8 string.</p>
816
   * @param int    $pos <p>The position of character to return.</p>
817
   *
818
   * @return string <p>Single Multi-Byte character.</p>
819
   */
820 2
  public static function access($str, $pos)
821
  {
822 2
    return self::substr($str, $pos, 1);
823
  }
824
825
  /**
826
   * Prepends UTF-8 BOM character to the string and returns the whole string.
827
   *
828
   * INFO: If BOM already existed there, the Input string is returned.
829
   *
830
   * @param string $str <p>The input string.</p>
831
   *
832
   * @return string <p>The output string that contains BOM.</p>
833
   */
834 1
  public static function add_bom_to_string($str)
835
  {
836 1
    if (self::string_has_bom($str) === false) {
837 1
      $str = self::bom() . $str;
838 1
    }
839
840 1
    return $str;
841
  }
842
843
  /**
844
   * Convert binary into an string.
845
   *
846
   * @param mixed $bin 1|0
847
   *
848
   * @return string
849
   */
850 1
  public static function binary_to_str($bin)
851
  {
852 1
    return pack('H*', base_convert($bin, 2, 16));
853
  }
854
855
  /**
856
   * Returns the UTF-8 Byte Order Mark Character.
857
   *
858
   * @return string UTF-8 Byte Order Mark
859
   */
860 2
  public static function bom()
861
  {
862 2
    return "\xEF\xBB\xBF";
863
  }
864
865
  /**
866
   * @alias of UTF8::chr_map()
867
   * @see   UTF8::chr_map()
868
   *
869
   * @param string|array $callback
870
   * @param string       $str
871
   *
872
   * @return array
873
   */
874 1
  public static function callback($callback, $str)
875
  {
876 1
    return self::chr_map($callback, $str);
877
  }
878
879
  /**
880
   * This method will auto-detect your server environment for UTF-8 support.
881
   *
882
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
883
   */
884 2
  public static function checkForSupport()
885
  {
886 2
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
887
888 1
      self::$support['already_checked_via_portable_utf8'] = true;
889
890 1
      self::$support['mbstring'] = self::mbstring_loaded();
891 1
      self::$support['iconv'] = self::iconv_loaded();
892 1
      self::$support['intl'] = self::intl_loaded();
893 1
      self::$support['intlChar'] = self::intlChar_loaded();
894 1
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
895 1
    }
896 2
  }
897
898
  /**
899
   * Generates a UTF-8 encoded character from the given code point.
900
   *
901
   * INFO: opposite to UTF8::ord()
902
   *
903
   * @param int    $code_point <p>The code point for which to generate a character.</p>
904
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
905
   *
906
   * @return string|null <p>Multi-Byte character, returns null on failure to encode.</p>
907 9
   */
908
  public static function chr($code_point, $encoding = 'UTF-8')
909 9
  {
910 9
    $i = (int)$code_point;
911 1
    if ($i !== $code_point) {
912
      return null;
913
    }
914 9
915
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
916
      self::checkForSupport();
917
    }
918 9
919
    if ($encoding !== 'UTF-8') {
920
      $encoding = self::normalize_encoding($encoding);
921
    } elseif (self::$support['intlChar'] === true) {
922
      return \IntlChar::chr($code_point);
923 9
    }
924 9
925 8
    // use static cache, if there is no support for "IntlChar"
926
    static $cache = array();
927
    $cacheKey = $code_point . $encoding;
928
    if (isset($cache[$cacheKey]) === true) {
929 8
      return $cache[$cacheKey];
930 6
    }
931
932
    if (0x80 > $code_point %= 0x200000) {
933 7
      $str = chr($code_point);
934 6
    } elseif (0x800 > $code_point) {
935 6
      $str = chr(0xC0 | $code_point >> 6) .
936
             chr(0x80 | $code_point & 0x3F);
937
    } elseif (0x10000 > $code_point) {
938 7
      $str = chr(0xE0 | $code_point >> 12) .
939 7
             chr(0x80 | $code_point >> 6 & 0x3F) .
940 7
             chr(0x80 | $code_point & 0x3F);
941 7
    } else {
942
      $str = chr(0xF0 | $code_point >> 18) .
943
             chr(0x80 | $code_point >> 12 & 0x3F) .
944 1
             chr(0x80 | $code_point >> 6 & 0x3F) .
945 1
             chr(0x80 | $code_point & 0x3F);
946 1
    }
947 1
948 1
    if ($encoding !== 'UTF-8') {
949
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
950
    }
951
952
    // add into static cache
953
    $cache[$cacheKey] = $str;
954
955
    return $str;
956
  }
957
958
  /**
959
   * Applies callback to all characters of a string.
960
   *
961
   * @param string|array $callback <p>The callback function.</p>
962
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
963 1
   *
964
   * @return array <p>The outcome of callback.</p>
965 1
   */
966
  public static function chr_map($callback, $str)
967 1
  {
968
    $chars = self::split($str);
969
970
    return array_map($callback, $chars);
971
  }
972
973
  /**
974
   * Generates an array of byte length of each character of a Unicode string.
975
   *
976
   * 1 byte => U+0000  - U+007F
977
   * 2 byte => U+0080  - U+07FF
978
   * 3 byte => U+0800  - U+FFFF
979
   * 4 byte => U+10000 - U+10FFFF
980
   *
981
   * @param string $str <p>The original Unicode string.</p>
982 4
   *
983
   * @return array <p>An array of byte lengths of each character.</p>
984 4
   */
985 3
  public static function chr_size_list($str)
986
  {
987
    if (!$str) {
988 4
      return array();
989
    }
990
991
    return array_map('strlen', self::split($str));
992
  }
993
994
  /**
995
   * Get a decimal code representation of a specific character.
996
   *
997
   * @param string $char <p>The input character.</p>
998 2
   *
999
   * @return int
1000 2
   */
1001 2
  public static function chr_to_decimal($char)
1002 2
  {
1003
    $char = (string)$char;
1004 2
    $code = self::ord($char[0]);
1005
    $bytes = 1;
1006 2
1007
    if (!($code & 0x80)) {
1008
      // 0xxxxxxx
1009 2
      return $code;
1010
    }
1011 2
1012 2
    if (($code & 0xe0) === 0xc0) {
1013 2
      // 110xxxxx
1014
      $bytes = 2;
1015 1
      $code &= ~0xc0;
1016 1
    } elseif (($code & 0xf0) === 0xe0) {
1017 1
      // 1110xxxx
1018
      $bytes = 3;
1019
      $code &= ~0xe0;
1020
    } elseif (($code & 0xf8) === 0xf0) {
1021
      // 11110xxx
1022
      $bytes = 4;
1023 2
      $code &= ~0xf0;
1024
    }
1025 2
1026 2
    for ($i = 2; $i <= $bytes; $i++) {
1027
      // 10xxxxxx
1028 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1029
    }
1030
1031
    return $code;
1032
  }
1033
1034
  /**
1035
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1036
   *
1037
   * @param string $char <p>The input character</p>
1038
   * @param string $pfix [optional]
1039 1
   *
1040
   * @return string <p>The code point encoded as U+xxxx<p>
1041 1
   */
1042
  public static function chr_to_hex($char, $pfix = 'U+')
1043
  {
1044
    return self::int_to_hex(self::ord($char), $pfix);
1045
  }
1046
1047
  /**
1048
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1049
   *
1050
   * @param string $body     <p>The original string to be split.</p>
1051
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1052
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1053 1
   *
1054
   * @return string <p>The chunked string</p>
1055 1
   */
1056
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1057
  {
1058
    return implode($end, self::split($body, $chunklen));
1059
  }
1060
1061
  /**
1062
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1063
   *
1064
   * @param string $str                     <p>The string to be sanitized.</p>
1065
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1066
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1067
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1068
   *                                        => "..."</p>
1069
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1070
   *                                        $normalize_whitespace</p>
1071 44
   *
1072
   * @return string <p>Clean UTF-8 encoded string.</p>
1073
   */
1074
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1075
  {
1076
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1077
    // caused connection reset problem on larger strings
1078
1079
    $regx = '/
1080
      (
1081
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1082
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1083
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1084
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1085
        ){1,100}                      # ...one or more times
1086 44
      )
1087 44
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1088
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1089 44
    /x';
1090 44
    $str = preg_replace($regx, '$1', $str);
1091
1092 44
    $str = self::replace_diamond_question_mark($str, '');
1093 17
    $str = self::remove_invisible_characters($str);
1094 17
1095
    if ($normalize_whitespace === true) {
1096 44
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1097 12
    }
1098 12
1099
    if ($normalize_msword === true) {
1100 44
      $str = self::normalize_msword($str);
1101 5
    }
1102 5
1103
    if ($remove_bom === true) {
1104 44
      $str = self::removeBOM($str);
1105
    }
1106
1107
    return $str;
1108
  }
1109
1110
  /**
1111
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1112
   *
1113
   * @param string $str <p>The input string.</p>
1114 4
   *
1115
   * @return string
1116 4
   */
1117
  public static function cleanup($str)
1118 4
  {
1119 1
    $str = (string)$str;
1120
1121
    if (!isset($str[0])) {
1122
      return '';
1123 4
    }
1124
1125
    // fixed ISO <-> UTF-8 Errors
1126
    $str = self::fix_simple_utf8($str);
1127
1128
    // remove all none UTF-8 symbols
1129
    // && remove diamond question mark (�)
1130 4
    // && remove remove invisible characters (e.g. "\0")
1131
    // && remove BOM
1132 4
    // && normalize whitespace chars (but keep non-breaking-spaces)
1133
    $str = self::clean($str, true, true, false, true);
1134
1135
    return (string)$str;
1136
  }
1137
1138
  /**
1139
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1140
   *
1141
   * INFO: opposite to UTF8::string()
1142
   *
1143
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1144
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1145
   *                                    default, code points will be returned as integers.</p>
1146 5
   *
1147
   * @return array <p>The array of code points.</p>
1148 5
   */
1149 5
  public static function codepoints($arg, $u_style = false)
1150 5
  {
1151
    if (is_string($arg)) {
1152 5
      $arg = self::split($arg);
1153
    }
1154 5
1155 5
    $arg = array_map(
1156 5
        array(
1157
            '\\voku\\helper\\UTF8',
1158 5
            'ord',
1159
        ),
1160 5
        $arg
1161 1
    );
1162
1163 1
    if ($u_style) {
1164 1
      $arg = array_map(
1165 1
          array(
1166
              '\\voku\\helper\\UTF8',
1167 1
              'int_to_hex',
1168 1
          ),
1169
          $arg
1170 5
      );
1171
    }
1172
1173
    return $arg;
1174
  }
1175
1176
  /**
1177
   * Returns count of characters used in a string.
1178
   *
1179
   * @param string $str       <p>The input string.</p>
1180
   * @param bool   $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
1181
   *
1182 6
   * @return array <p>An associative array of Character as keys and
1183
   *               their count as values.</p>
1184 6
   */
1185
  public static function count_chars($str, $cleanUtf8 = false)
1186
  {
1187
    return array_count_values(self::split($str, 1, $cleanUtf8));
1188
  }
1189
1190
  /**
1191
   * Get a UTF-8 character from its decimal code representation.
1192
   *
1193
   * @param int $code
1194 1
   *
1195
   * @return string
1196 1
   */
1197 1
  public static function decimal_to_chr($code)
1198 1
  {
1199
    return \mb_convert_encoding(
1200 1
        '&#x' . dechex($code) . ';',
1201
        'UTF-8',
1202
        'HTML-ENTITIES'
1203
    );
1204
  }
1205
1206
  /**
1207
   * Encode a string with a new charset-encoding.
1208
   *
1209
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1210
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1211
   *
1212
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1213
   * @param string $str      <p>The input string</p>
1214
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1215
   *                         /> otherwise we auto-detect the current string-encoding</p>
1216 11
   *
1217
   * @return string
1218 11
   */
1219 11
  public static function encode($encoding, $str, $force = true)
1220
  {
1221 11
    $str = (string)$str;
1222 5
    $encoding = (string)$encoding;
1223
1224
    if (!isset($str[0], $encoding[0])) {
1225 11
      return $str;
1226 1
    }
1227 1
1228
    if ($encoding !== 'UTF-8') {
1229 11
      $encoding = self::normalize_encoding($encoding);
1230
    }
1231
1232
    $encodingDetected = self::str_detect_encoding($str);
1233 11
1234
    if (
1235
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1236 11
        &&
1237
        (
1238 1
            $force === true
1239 11
            ||
1240
            $encodingDetected !== $encoding
1241
        )
1242
    ) {
1243 11
1244
      if (
1245
          $encoding === 'UTF-8'
1246 11
          &&
1247 1
          (
1248 1
              $force === true
1249 1
              || $encodingDetected === 'UTF-8'
1250 11
              || $encodingDetected === 'WINDOWS-1252'
1251 11
              || $encodingDetected === 'ISO-8859-1'
1252
          )
1253
      ) {
1254
        return self::to_utf8($str);
1255
      }
1256 2
1257
      if (
1258
          $encoding === 'ISO-8859-1'
1259 1
          &&
1260
          (
1261
              $force === true
1262 2
              || $encodingDetected === 'ISO-8859-1'
1263 1
              || $encodingDetected === 'UTF-8'
1264
          )
1265
      ) {
1266 2
        return self::to_iso8859($str);
1267 2
      }
1268 2
1269
      $strEncoded = \mb_convert_encoding(
1270 2
          $str,
1271
          $encoding,
1272 2
          $encodingDetected
1273 2
      );
1274
1275
      if ($strEncoded) {
1276
        return $strEncoded;
1277 1
      }
1278
    }
1279
1280
    return $str;
1281
  }
1282
1283
  /**
1284
   * Reads entire file into a string.
1285
   *
1286
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1287
   *
1288
   * @link http://php.net/manual/en/function.file-get-contents.php
1289
   *
1290
   * @param string        $filename      <p>
1291
   *                                     Name of the file to read.
1292
   *                                     </p>
1293
   * @param int|null      $flags         [optional] <p>
1294
   *                                     Prior to PHP 6, this parameter is called
1295
   *                                     use_include_path and is a bool.
1296
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1297
   *                                     to trigger include path
1298
   *                                     search.
1299
   *                                     </p>
1300
   *                                     <p>
1301
   *                                     The value of flags can be any combination of
1302
   *                                     the following flags (with some restrictions), joined with the
1303
   *                                     binary OR (|)
1304
   *                                     operator.
1305
   *                                     </p>
1306
   *                                     <p>
1307
   *                                     <table>
1308
   *                                     Available flags
1309
   *                                     <tr valign="top">
1310
   *                                     <td>Flag</td>
1311
   *                                     <td>Description</td>
1312
   *                                     </tr>
1313
   *                                     <tr valign="top">
1314
   *                                     <td>
1315
   *                                     FILE_USE_INCLUDE_PATH
1316
   *                                     </td>
1317
   *                                     <td>
1318
   *                                     Search for filename in the include directory.
1319
   *                                     See include_path for more
1320
   *                                     information.
1321
   *                                     </td>
1322
   *                                     </tr>
1323
   *                                     <tr valign="top">
1324
   *                                     <td>
1325
   *                                     FILE_TEXT
1326
   *                                     </td>
1327
   *                                     <td>
1328
   *                                     As of PHP 6, the default encoding of the read
1329
   *                                     data is UTF-8. You can specify a different encoding by creating a
1330
   *                                     custom context or by changing the default using
1331
   *                                     stream_default_encoding. This flag cannot be
1332
   *                                     used with FILE_BINARY.
1333
   *                                     </td>
1334
   *                                     </tr>
1335
   *                                     <tr valign="top">
1336
   *                                     <td>
1337
   *                                     FILE_BINARY
1338
   *                                     </td>
1339
   *                                     <td>
1340
   *                                     With this flag, the file is read in binary mode. This is the default
1341
   *                                     setting and cannot be used with FILE_TEXT.
1342
   *                                     </td>
1343
   *                                     </tr>
1344
   *                                     </table>
1345
   *                                     </p>
1346
   * @param resource|null $context       [optional] <p>
1347
   *                                     A valid context resource created with
1348
   *                                     stream_context_create. If you don't need to use a
1349
   *                                     custom context, you can skip this parameter by &null;.
1350
   *                                     </p>
1351
   * @param int|null      $offset        [optional] <p>
1352
   *                                     The offset where the reading starts.
1353
   *                                     </p>
1354
   * @param int|null      $maxlen        [optional] <p>
1355
   *                                     Maximum length of data read. The default is to read until end
1356
   *                                     of file is reached.
1357
   *                                     </p>
1358
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1359
   *
1360
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1361
   *                                     or pdf, because they used non default utf-8 chars</p>
1362 2
   *
1363
   * @return string <p>The function returns the read data or false on failure.</p>
1364
   */
1365 2
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1366 2
  {
1367
    // init
1368 2
    $timeout = (int)$timeout;
1369 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1370
1371
    if ($timeout && $context === null) {
1372
      $context = stream_context_create(
1373 2
          array(
1374 2
              'http' =>
1375
                  array(
1376 2
                      'timeout' => $timeout,
1377 2
                  ),
1378
          )
1379 2
      );
1380 1
    }
1381 1
1382 2
    if (is_int($maxlen)) {
1383
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1384
    } else {
1385
      $data = file_get_contents($filename, $flags, $context, $offset);
1386 2
    }
1387 1
1388
    // return false on error
1389
    if ($data === false) {
1390 1
      return false;
1391 1
    }
1392 1
1393 1
    if ($convertToUtf8 === true) {
1394
      $data = self::encode('UTF-8', $data, false);
1395 1
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1396
    }
1397
1398
    return $data;
1399
  }
1400
1401
  /**
1402
   * Checks if a file starts with BOM (Byte Order Mark) character.
1403
   *
1404
   * @param string $file_path <p>Path to a valid file.</p>
1405 1
   *
1406
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1407 1
   */
1408
  public static function file_has_bom($file_path)
1409
  {
1410
    return self::string_has_bom(file_get_contents($file_path));
1411
  }
1412
1413
  /**
1414
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1415
   *
1416
   * @param mixed  $var
1417
   * @param int    $normalization_form
1418
   * @param string $leading_combining
1419 9
   *
1420
   * @return mixed
1421 9
   */
1422 9
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1423 3
  {
1424
    switch (gettype($var)) {
1425 3 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1426 3
        foreach ($var as $k => $v) {
1427 3
          /** @noinspection AlterInForeachInspection */
1428 9
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1429 2
        }
1430 2
        break;
1431 2 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1432 2
        foreach ($var as $k => $v) {
1433 9
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1434
        }
1435 8
        break;
1436
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1437 2
1438 2
        if (false !== strpos($var, "\r")) {
1439
          // Workaround https://bugs.php.net/65732
1440 8
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1441
        }
1442 8
1443 6
        if (self::is_ascii($var) === false) {
1444 6
1445 6
          /** @noinspection PhpUndefinedClassInspection */
1446
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1447 6
            $n = '-';
1448 3
          } else {
1449 3
            /** @noinspection PhpUndefinedClassInspection */
1450 5
            $n = \Normalizer::normalize($var, $normalization_form);
1451
1452
            if (isset($n[0])) {
1453
              $var = $n;
1454
            } else {
1455 8
              $var = self::encode('UTF-8', $var);
1456 8
            }
1457 5
          }
1458 8
1459
          if (
1460
              $var[0] >= "\x80" && isset($n[0], $leading_combining[0])
1461 2
              &&
1462 2
              preg_match('/^\p{Mn}/u', $var)
1463 8
          ) {
1464 8
            // Prevent leading combining chars
1465 9
            // for NFC-safe concatenations.
1466
            $var = $leading_combining . $var;
1467 9
          }
1468
        }
1469
        break;
1470
    }
1471
1472
    return $var;
1473
  }
1474
1475
  /**
1476
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1477
   *
1478
   * Gets a specific external variable by name and optionally filters it
1479
   *
1480
   * @link  http://php.net/manual/en/function.filter-input.php
1481
   *
1482
   * @param int    $type          <p>
1483
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1484
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1485
   *                              <b>INPUT_ENV</b>.
1486
   *                              </p>
1487
   * @param string $variable_name <p>
1488
   *                              Name of a variable to get.
1489
   *                              </p>
1490
   * @param int    $filter        [optional] <p>
1491
   *                              The ID of the filter to apply. The
1492
   *                              manual page lists the available filters.
1493
   *                              </p>
1494
   * @param mixed  $options       [optional] <p>
1495
   *                              Associative array of options or bitwise disjunction of flags. If filter
1496
   *                              accepts options, flags can be provided in "flags" field of array.
1497
   *                              </p>
1498
   *
1499
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1500
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1501
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1502
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1503
   * @since 5.2.0
1504
   */
1505 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1506
  {
1507
    if (4 > func_num_args()) {
1508
      $var = filter_input($type, $variable_name, $filter);
1509
    } else {
1510
      $var = filter_input($type, $variable_name, $filter, $options);
1511
    }
1512
1513
    return self::filter($var);
1514
  }
1515
1516
  /**
1517
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1518
   *
1519
   * Gets external variables and optionally filters them
1520 1
   *
1521
   * @link  http://php.net/manual/en/function.filter-input-array.php
1522 1
   *
1523 1
   * @param int   $type       <p>
1524 1
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1525 1
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1526
   *                          <b>INPUT_ENV</b>.
1527
   *                          </p>
1528 1
   * @param mixed $definition [optional] <p>
1529
   *                          An array defining the arguments. A valid key is a string
1530
   *                          containing a variable name and a valid value is either a filter type, or an array
1531
   *                          optionally specifying the filter, flags and options. If the value is an
1532
   *                          array, valid keys are filter which specifies the
1533
   *                          filter type,
1534
   *                          flags which specifies any flags that apply to the
1535
   *                          filter, and options which specifies any options that
1536
   *                          apply to the filter. See the example below for a better understanding.
1537
   *                          </p>
1538
   *                          <p>
1539
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1540 1
   *                          input array are filtered by this filter.
1541
   *                          </p>
1542 1
   * @param bool  $add_empty  [optional] <p>
1543 1
   *                          Add missing keys as <b>NULL</b> to the return value.
1544 1
   *                          </p>
1545 1
   *
1546
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1547
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1548 1
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1549
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1550
   * fails.
1551
   * @since 5.2.0
1552
   */
1553 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1554
  {
1555
    if (2 > func_num_args()) {
1556
      $a = filter_input_array($type);
1557
    } else {
1558
      $a = filter_input_array($type, $definition, $add_empty);
1559 1
    }
1560
1561 1
    return self::filter($a);
1562
  }
1563
1564
  /**
1565
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1566
   *
1567
   * Filters a variable with a specified filter
1568
   *
1569
   * @link  http://php.net/manual/en/function.filter-var.php
1570
   *
1571
   * @param mixed $variable <p>
1572
   *                        Value to filter.
1573
   *                        </p>
1574
   * @param int   $filter   [optional] <p>
1575
   *                        The ID of the filter to apply. The
1576
   *                        manual page lists the available filters.
1577 7
   *                        </p>
1578
   * @param mixed $options  [optional] <p>
1579 7
   *                        Associative array of options or bitwise disjunction of flags. If filter
1580 7
   *                        accepts options, flags can be provided in "flags" field of array. For
1581
   *                        the "callback" filter, callable type should be passed. The
1582 7
   *                        callback must accept one argument, the value to be filtered, and return
1583
   *                        the value after filtering/sanitizing it.
1584 7
   *                        </p>
1585 2
   *                        <p>
1586
   *                        <code>
1587
   *                        // for filters that accept options, use this format
1588 7
   *                        $options = array(
1589 1
   *                        'options' => array(
1590 1
   *                        'default' => 3, // value to return if the filter fails
1591 1
   *                        // other options here
1592
   *                        'min_range' => 0
1593 7
   *                        ),
1594
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1595
   *                        );
1596
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1597
   *                        // for filter that only accept flags, you can pass them directly
1598
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1599
   *                        // for filter that only accept flags, you can also pass as an array
1600
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1601
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1602
   *                        // callback validate filter
1603 1
   *                        function foo($value)
1604
   *                        {
1605 1
   *                        // Expected format: Surname, GivenNames
1606
   *                        if (strpos($value, ", ") === false) return false;
1607 1
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1608
   *                        $empty = (empty($surname) || empty($givennames));
1609
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1610 1
   *                        if ($empty || $notstrings) {
1611 1
   *                        return false;
1612
   *                        } else {
1613 1
   *                        return $value;
1614
   *                        }
1615
   *                        }
1616 1
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1617 1
   *                        </code>
1618 1
   *                        </p>
1619 1
   *
1620 1
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1621
   * @since 5.2.0
1622 1
   */
1623 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1624
  {
1625
    if (3 > func_num_args()) {
1626
      $variable = filter_var($variable, $filter);
1627
    } else {
1628
      $variable = filter_var($variable, $filter, $options);
1629
    }
1630
1631
    return self::filter($variable);
1632 1
  }
1633
1634 1
  /**
1635
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1636
   *
1637
   * Gets multiple variables and optionally filters them
1638 1
   *
1639
   * @link  http://php.net/manual/en/function.filter-var-array.php
1640
   *
1641
   * @param array $data       <p>
1642
   *                          An array with string keys containing the data to filter.
1643
   *                          </p>
1644
   * @param mixed $definition [optional] <p>
1645
   *                          An array defining the arguments. A valid key is a string
1646
   *                          containing a variable name and a valid value is either a
1647
   *                          filter type, or an
1648
   *                          array optionally specifying the filter, flags and options.
1649
   *                          If the value is an array, valid keys are filter
1650
   *                          which specifies the filter type,
1651
   *                          flags which specifies any flags that apply to the
1652
   *                          filter, and options which specifies any options that
1653
   *                          apply to the filter. See the example below for a better understanding.
1654 1
   *                          </p>
1655
   *                          <p>
1656 1
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1657 1
   *                          input array are filtered by this filter.
1658
   *                          </p>
1659
   * @param bool  $add_empty  [optional] <p>
1660 1
   *                          Add missing keys as <b>NULL</b> to the return value.
1661
   *                          </p>
1662 1
   *
1663 1
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1664 1
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1665 1
   * the variable is not set.
1666 1
   * @since 5.2.0
1667 1
   */
1668 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1669 1
  {
1670 1
    if (2 > func_num_args()) {
1671 1
      $a = filter_var_array($data);
1672 1
    } else {
1673
      $a = filter_var_array($data, $definition, $add_empty);
1674
    }
1675
1676
    return self::filter($a);
1677
  }
1678
1679
  /**
1680
   * Check if the number of unicode characters are not more than the specified integer.
1681
   *
1682
   * @param string $str      The original string to be checked.
1683
   * @param int    $box_size The size in number of chars to be checked against string.
1684
   *
1685
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1686
   */
1687
  public static function fits_inside($str, $box_size)
1688
  {
1689
    return (self::strlen($str) <= $box_size);
1690
  }
1691
1692 1
  /**
1693 1
   * Try to fix simple broken UTF-8 strings.
1694
   *
1695
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1696
   *
1697
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1698
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1699
   * See: http://en.wikipedia.org/wiki/Windows-1252
1700
   *
1701
   * @param string $str <p>The input string</p>
1702
   *
1703
   * @return string
1704
   */
1705 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1706
  {
1707
    // init
1708
    $str = (string)$str;
1709
1710
    if (!isset($str[0])) {
1711
      return '';
1712
    }
1713
1714
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1715
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1716
1717
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1718
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$brokenUtf8ToUtf8);
1719
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$brokenUtf8ToUtf8);
1720
    }
1721
1722
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1723
  }
1724
1725
  /**
1726
   * Fix a double (or multiple) encoded UTF8 string.
1727
   *
1728
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1729
   *
1730
   * @return mixed
1731
   */
1732
  public static function fix_utf8($str)
1733
  {
1734
    if (is_array($str)) {
1735
1736
      /** @noinspection ForeachSourceInspection */
1737
      foreach ($str as $k => $v) {
1738
        /** @noinspection AlterInForeachInspection */
1739
        /** @noinspection OffsetOperationsInspection */
1740
        $str[$k] = self::fix_utf8($v);
1741
      }
1742
1743
      return $str;
1744
    }
1745
1746
    $last = '';
1747
    while ($last !== $str) {
1748
      $last = $str;
1749
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1749 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1750
    }
1751
1752 1
    return $str;
1753
  }
1754 1
1755 1
  /**
1756
   * Get character of a specific character.
1757 1
   *
1758
   * @param string $char
1759
   *
1760
   * @return string <p>'RTL' or 'LTR'</p>
1761
   */
1762
  public static function getCharDirection($char)
1763
  {
1764
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1765
      self::checkForSupport();
1766
    }
1767
1768
    if (self::$support['intlChar'] === true) {
1769
      $tmpReturn = \IntlChar::charDirection($char);
1770
1771
      // from "IntlChar"-Class
1772 1
      $charDirection = array(
1773
          'RTL' => array(1, 13, 14, 15, 21),
1774 1
          'LTR' => array(0, 11, 12, 20),
1775
      );
1776
1777
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1778
        return 'LTR';
1779
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
1780
        return 'RTL';
1781
      }
1782
    }
1783
1784
    $c = static::chr_to_decimal($char);
1785
1786 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1787
      return 'LTR';
1788 1
    }
1789 1
1790
    if (0x85e >= $c) {
1791
1792 1
      if (0x5be === $c ||
1793 1
          0x5c0 === $c ||
1794
          0x5c3 === $c ||
1795
          0x5c6 === $c ||
1796 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1797
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1798
          0x608 === $c ||
1799
          0x60b === $c ||
1800
          0x60d === $c ||
1801
          0x61b === $c ||
1802
          (0x61e <= $c && 0x64a >= $c) ||
1803
          (0x66d <= $c && 0x66f >= $c) ||
1804
          (0x671 <= $c && 0x6d5 >= $c) ||
1805
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1806
          (0x6ee <= $c && 0x6ef >= $c) ||
1807
          (0x6fa <= $c && 0x70d >= $c) ||
1808
          0x710 === $c ||
1809
          (0x712 <= $c && 0x72f >= $c) ||
1810 1
          (0x74d <= $c && 0x7a5 >= $c) ||
1811
          0x7b1 === $c ||
1812 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
1813
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1814
          0x7fa === $c ||
1815
          (0x800 <= $c && 0x815 >= $c) ||
1816
          0x81a === $c ||
1817
          0x824 === $c ||
1818
          0x828 === $c ||
1819
          (0x830 <= $c && 0x83e >= $c) ||
1820
          (0x840 <= $c && 0x858 >= $c) ||
1821
          0x85e === $c
1822
      ) {
1823
        return 'RTL';
1824
      }
1825
1826 2
    } elseif (0x200f === $c) {
1827
1828
      return 'RTL';
1829 2
1830
    } elseif (0xfb1d <= $c) {
1831 2
1832 2
      if (0xfb1d === $c ||
1833 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1834 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1835
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1836 2
          0xfb3e === $c ||
1837 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1838 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1839
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1840 2
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1841 2
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1842 2
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1843
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1844 2
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1845
          (0xfe76 <= $c && 0xfefc >= $c) ||
1846
          (0x10800 <= $c && 0x10805 >= $c) ||
1847
          0x10808 === $c ||
1848
          (0x1080a <= $c && 0x10835 >= $c) ||
1849
          (0x10837 <= $c && 0x10838 >= $c) ||
1850
          0x1083c === $c ||
1851
          (0x1083f <= $c && 0x10855 >= $c) ||
1852
          (0x10857 <= $c && 0x1085f >= $c) ||
1853
          (0x10900 <= $c && 0x1091b >= $c) ||
1854
          (0x10920 <= $c && 0x10939 >= $c) ||
1855
          0x1093f === $c ||
1856
          0x10a00 === $c ||
1857
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1858
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1859
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1860
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1861
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1862
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1863
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1864
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1865
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1866
          (0x10b78 <= $c && 0x10b7f >= $c)
1867
      ) {
1868
        return 'RTL';
1869
      }
1870
    }
1871
1872
    return 'LTR';
1873
  }
1874
1875
  /**
1876
   * get data from "/data/*.ser"
1877
   *
1878
   * @param string $file
1879
   *
1880
   * @return bool|string|array|int <p>Will return false on error.</p>
1881
   */
1882
  private static function getData($file)
1883
  {
1884
    $file = __DIR__ . '/data/' . $file . '.php';
1885
    if (file_exists($file)) {
1886
      /** @noinspection PhpIncludeInspection */
1887
      return require $file;
1888
    } else {
1889
      return false;
1890
    }
1891
  }
1892
1893
  /**
1894
   * alias for "UTF8::string_has_bom()"
1895
   *
1896
   * @see UTF8::string_has_bom()
1897
   *
1898
   * @param string $str
1899
   *
1900
   * @return bool
1901
   */
1902
  public static function hasBom($str)
1903
  {
1904
    return self::string_has_bom($str);
1905
  }
1906
1907
  /**
1908
   * Converts hexadecimal U+xxxx code point representation to integer.
1909
   *
1910
   * INFO: opposite to UTF8::int_to_hex()
1911
   *
1912
   * @param string $str <p>The hexadecimal code point representation.</p>
1913
   *
1914
   * @return int|false <p>The code point, or false on failure.</p>
1915
   */
1916
  public static function hex_to_int($str)
1917
  {
1918
    if (!$str) {
1919
      return false;
1920
    }
1921
1922
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
1923
      return intval($match[1], 16);
1924
    }
1925
1926 9
    return false;
1927
  }
1928 9
1929
  /**
1930 9
   * alias for "UTF8::html_entity_decode()"
1931 6
   *
1932
   * @see UTF8::html_entity_decode()
1933
   *
1934 9
   * @param string $str
1935 7
   * @param int    $flags
1936
   * @param string $encoding
1937
   *
1938
   * @return string
1939 9
   */
1940 9
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
1941
  {
1942 9
    return self::html_entity_decode($str, $flags, $encoding);
1943 9
  }
1944 9
1945 9
  /**
1946 9
   * Converts a UTF-8 string to a series of HTML numbered entities.
1947 6
   *
1948
   * INFO: opposite to UTF8::html_decode()
1949
   *
1950 9
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1951 2
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1952 2
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
1953
   *
1954 9
   * @return string <p>HTML numbered entities.</p>
1955 4
   */
1956 4
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
1957 4
  {
1958
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1959
    if (function_exists('mb_encode_numericentity')) {
1960 4
1961
      $startCode = 0x00;
1962
      if ($keepAsciiChars === true) {
1963 9
        $startCode = 0x80;
1964
      }
1965 9
1966 9
      if ($encoding !== 'UTF-8') {
1967
        $encoding = self::normalize_encoding($encoding);
1968 7
      }
1969
1970 7
      return mb_encode_numericentity(
1971 6
          $str,
1972
          array($startCode, 0xffff, 0, 0xffff,),
1973 4
          $encoding
1974
      );
1975 9
    }
1976
1977 9
    return implode(
1978
        array_map(
1979
            function ($data) use ($keepAsciiChars) {
1980 9
              return UTF8::single_chr_html_encode($data, $keepAsciiChars);
1981 9
            },
1982 9
            self::split($str)
1983
        )
1984 9
    );
1985
  }
1986 9
1987
  /**
1988 9
   * UTF-8 version of html_entity_decode()
1989
   *
1990
   * The reason we are not using html_entity_decode() by itself is because
1991
   * while it is not technically correct to leave out the semicolon
1992
   * at the end of an entity most browsers will still interpret the entity
1993
   * correctly. html_entity_decode() does not convert entities without
1994
   * semicolons, so we are left with our own little solution here. Bummer.
1995
   *
1996
   * Convert all HTML entities to their applicable characters
1997
   *
1998
   * INFO: opposite to UTF8::html_encode()
1999
   *
2000
   * @link http://php.net/manual/en/function.html-entity-decode.php
2001
   *
2002
   * @param string $str      <p>
2003
   *                         The input string.
2004
   *                         </p>
2005
   * @param int    $flags    [optional] <p>
2006
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2007
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2008
   *                         <table>
2009
   *                         Available <i>flags</i> constants
2010
   *                         <tr valign="top">
2011
   *                         <td>Constant Name</td>
2012
   *                         <td>Description</td>
2013
   *                         </tr>
2014
   *                         <tr valign="top">
2015
   *                         <td><b>ENT_COMPAT</b></td>
2016
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2017
   *                         </tr>
2018
   *                         <tr valign="top">
2019
   *                         <td><b>ENT_QUOTES</b></td>
2020
   *                         <td>Will convert both double and single quotes.</td>
2021
   *                         </tr>
2022
   *                         <tr valign="top">
2023
   *                         <td><b>ENT_NOQUOTES</b></td>
2024
   *                         <td>Will leave both double and single quotes unconverted.</td>
2025
   *                         </tr>
2026
   *                         <tr valign="top">
2027
   *                         <td><b>ENT_HTML401</b></td>
2028
   *                         <td>
2029
   *                         Handle code as HTML 4.01.
2030
   *                         </td>
2031
   *                         </tr>
2032
   *                         <tr valign="top">
2033
   *                         <td><b>ENT_XML1</b></td>
2034
   *                         <td>
2035
   *                         Handle code as XML 1.
2036
   *                         </td>
2037
   *                         </tr>
2038
   *                         <tr valign="top">
2039
   *                         <td><b>ENT_XHTML</b></td>
2040
   *                         <td>
2041
   *                         Handle code as XHTML.
2042
   *                         </td>
2043
   *                         </tr>
2044
   *                         <tr valign="top">
2045
   *                         <td><b>ENT_HTML5</b></td>
2046
   *                         <td>
2047
   *                         Handle code as HTML 5.
2048
   *                         </td>
2049
   *                         </tr>
2050
   *                         </table>
2051
   *                         </p>
2052
   * @param string $encoding [optional] <p>Encoding to use.</p>
2053
   *
2054
   * @return string <p>The decoded string.</p>
2055
   */
2056
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2057
  {
2058
    $str = (string)$str;
2059
2060
    if (!isset($str[0])) {
2061
      return '';
2062
    }
2063
2064
    if (!isset($str[3])) { // examples: &; || &x;
0 ignored issues
show
Unused Code Comprehensibility introduced by
46% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2065
      return $str;
2066
    }
2067
2068
    if (
2069
        strpos($str, '&') === false
2070
        ||
2071
        (
2072
            strpos($str, '&#') === false
2073
            &&
2074
            strpos($str, ';') === false
2075
        )
2076
    ) {
2077
      return $str;
2078
    }
2079
2080
    if ($encoding !== 'UTF-8') {
2081
      $encoding = self::normalize_encoding($encoding);
2082
    }
2083
2084
    if ($flags === null) {
2085
      if (Bootup::is_php('5.4') === true) {
2086
        $flags = ENT_COMPAT | ENT_HTML5;
2087
      } else {
2088
        $flags = ENT_COMPAT;
2089
      }
2090
    }
2091
2092
    do {
2093
      $str_compare = $str;
2094 2
2095
      $str = preg_replace_callback(
2096 2
          "/&#\d{2,5};/",
2097 1
          function ($matches) use ($encoding) {
2098 1
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2099
2100 2
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2101
              return $returnTmp;
2102 2
            } else {
2103 1
              return $matches[0];
2104
            }
2105
          },
2106 2
          $str
2107 2
      );
2108 2
2109 2
      // decode numeric & UTF16 two byte entities
2110 2
      $str = html_entity_decode(
2111 1
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2112
          $flags,
2113 1
          $encoding
2114 1
      );
2115 1
2116 1
    } while ($str_compare !== $str);
2117 1
2118 2
    return $str;
2119
  }
2120 2
2121
  /**
2122
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2123
   *
2124
   * @link http://php.net/manual/en/function.htmlentities.php
2125
   *
2126
   * @param string $str           <p>
2127
   *                              The input string.
2128
   *                              </p>
2129
   * @param int    $flags         [optional] <p>
2130
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2131
   *                              invalid code unit sequences and the used document type. The default is
2132
   *                              ENT_COMPAT | ENT_HTML401.
2133
   *                              <table>
2134
   *                              Available <i>flags</i> constants
2135
   *                              <tr valign="top">
2136
   *                              <td>Constant Name</td>
2137
   *                              <td>Description</td>
2138
   *                              </tr>
2139
   *                              <tr valign="top">
2140
   *                              <td><b>ENT_COMPAT</b></td>
2141
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2142
   *                              </tr>
2143
   *                              <tr valign="top">
2144
   *                              <td><b>ENT_QUOTES</b></td>
2145
   *                              <td>Will convert both double and single quotes.</td>
2146
   *                              </tr>
2147
   *                              <tr valign="top">
2148
   *                              <td><b>ENT_NOQUOTES</b></td>
2149
   *                              <td>Will leave both double and single quotes unconverted.</td>
2150
   *                              </tr>
2151
   *                              <tr valign="top">
2152
   *                              <td><b>ENT_IGNORE</b></td>
2153
   *                              <td>
2154
   *                              Silently discard invalid code unit sequences instead of returning
2155
   *                              an empty string. Using this flag is discouraged as it
2156
   *                              may have security implications.
2157
   *                              </td>
2158
   *                              </tr>
2159
   *                              <tr valign="top">
2160
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2161
   *                              <td>
2162
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2163
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2164
   *                              </td>
2165
   *                              </tr>
2166
   *                              <tr valign="top">
2167
   *                              <td><b>ENT_DISALLOWED</b></td>
2168
   *                              <td>
2169
   *                              Replace invalid code points for the given document type with a
2170
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2171
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2172
   *                              instance, to ensure the well-formedness of XML documents with
2173
   *                              embedded external content.
2174
   *                              </td>
2175
   *                              </tr>
2176
   *                              <tr valign="top">
2177
   *                              <td><b>ENT_HTML401</b></td>
2178
   *                              <td>
2179
   *                              Handle code as HTML 4.01.
2180
   *                              </td>
2181
   *                              </tr>
2182
   *                              <tr valign="top">
2183
   *                              <td><b>ENT_XML1</b></td>
2184
   *                              <td>
2185
   *                              Handle code as XML 1.
2186
   *                              </td>
2187
   *                              </tr>
2188
   *                              <tr valign="top">
2189
   *                              <td><b>ENT_XHTML</b></td>
2190
   *                              <td>
2191
   *                              Handle code as XHTML.
2192
   *                              </td>
2193
   *                              </tr>
2194
   *                              <tr valign="top">
2195
   *                              <td><b>ENT_HTML5</b></td>
2196
   *                              <td>
2197
   *                              Handle code as HTML 5.
2198
   *                              </td>
2199
   *                              </tr>
2200
   *                              </table>
2201
   *                              </p>
2202
   * @param string $encoding      [optional] <p>
2203
   *                              Like <b>htmlspecialchars</b>,
2204
   *                              <b>htmlentities</b> takes an optional third argument
2205
   *                              <i>encoding</i> which defines encoding used in
2206
   *                              conversion.
2207
   *                              Although this argument is technically optional, you are highly
2208
   *                              encouraged to specify the correct value for your code.
2209
   *                              </p>
2210
   * @param bool   $double_encode [optional] <p>
2211
   *                              When <i>double_encode</i> is turned off PHP will not
2212
   *                              encode existing html entities. The default is to convert everything.
2213
   *                              </p>
2214
   *
2215
   *
2216
   * @return string the encoded string.
2217
   * </p>
2218
   * <p>
2219
   * If the input <i>string</i> contains an invalid code unit
2220
   * sequence within the given <i>encoding</i> an empty string
2221
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2222
   * <b>ENT_SUBSTITUTE</b> flags are set.
2223
   */
2224
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2225
  {
2226
    if ($encoding !== 'UTF-8') {
2227
      $encoding = self::normalize_encoding($encoding);
2228
    }
2229
2230
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2231
2232 1
    if ($encoding !== 'UTF-8') {
2233
      return $str;
2234 1
    }
2235
2236
    $byteLengths = self::chr_size_list($str);
2237
    $search = array();
2238 1
    $replacements = array();
2239
    foreach ($byteLengths as $counter => $byteLength) {
2240
      if ($byteLength >= 3) {
2241
        $char = self::access($str, $counter);
2242
2243
        if (!isset($replacements[$char])) {
2244
          $search[$char] = $char;
2245
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 2241 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2246 1
        }
2247
      }
2248 1
    }
2249
2250
    return str_replace($search, $replacements, $str);
2251
  }
2252
2253
  /**
2254
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2255
   *
2256
   * INFO: Take a look at "UTF8::htmlentities()"
2257
   *
2258
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2259
   *
2260
   * @param string $str           <p>
2261 3
   *                              The string being converted.
2262
   *                              </p>
2263 3
   * @param int    $flags         [optional] <p>
2264 3
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2265
   *                              invalid code unit sequences and the used document type. The default is
2266 3
   *                              ENT_COMPAT | ENT_HTML401.
2267
   *                              <table>
2268 3
   *                              Available <i>flags</i> constants
2269
   *                              <tr valign="top">
2270
   *                              <td>Constant Name</td>
2271
   *                              <td>Description</td>
2272
   *                              </tr>
2273
   *                              <tr valign="top">
2274
   *                              <td><b>ENT_COMPAT</b></td>
2275
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2276
   *                              </tr>
2277
   *                              <tr valign="top">
2278
   *                              <td><b>ENT_QUOTES</b></td>
2279 1
   *                              <td>Will convert both double and single quotes.</td>
2280
   *                              </tr>
2281 1
   *                              <tr valign="top">
2282
   *                              <td><b>ENT_NOQUOTES</b></td>
2283
   *                              <td>Will leave both double and single quotes unconverted.</td>
2284
   *                              </tr>
2285
   *                              <tr valign="top">
2286
   *                              <td><b>ENT_IGNORE</b></td>
2287
   *                              <td>
2288
   *                              Silently discard invalid code unit sequences instead of returning
2289 2
   *                              an empty string. Using this flag is discouraged as it
2290
   *                              may have security implications.
2291 2
   *                              </td>
2292
   *                              </tr>
2293
   *                              <tr valign="top">
2294
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2295
   *                              <td>
2296
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2297
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2298
   *                              </td>
2299
   *                              </tr>
2300
   *                              <tr valign="top">
2301
   *                              <td><b>ENT_DISALLOWED</b></td>
2302
   *                              <td>
2303 2
   *                              Replace invalid code points for the given document type with a
2304
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2305 2
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2306
   *                              instance, to ensure the well-formedness of XML documents with
2307
   *                              embedded external content.
2308
   *                              </td>
2309
   *                              </tr>
2310
   *                              <tr valign="top">
2311
   *                              <td><b>ENT_HTML401</b></td>
2312
   *                              <td>
2313
   *                              Handle code as HTML 4.01.
2314
   *                              </td>
2315
   *                              </tr>
2316
   *                              <tr valign="top">
2317 1
   *                              <td><b>ENT_XML1</b></td>
2318
   *                              <td>
2319 1
   *                              Handle code as XML 1.
2320
   *                              </td>
2321
   *                              </tr>
2322
   *                              <tr valign="top">
2323
   *                              <td><b>ENT_XHTML</b></td>
2324
   *                              <td>
2325
   *                              Handle code as XHTML.
2326
   *                              </td>
2327
   *                              </tr>
2328
   *                              <tr valign="top">
2329
   *                              <td><b>ENT_HTML5</b></td>
2330
   *                              <td>
2331
   *                              Handle code as HTML 5.
2332
   *                              </td>
2333
   *                              </tr>
2334
   *                              </table>
2335
   *                              </p>
2336
   * @param string $encoding      [optional] <p>
2337
   *                              Defines encoding used in conversion.
2338
   *                              </p>
2339
   *                              <p>
2340
   *                              For the purposes of this function, the encodings
2341
   *                              ISO-8859-1, ISO-8859-15,
2342
   *                              UTF-8, cp866,
2343
   *                              cp1251, cp1252, and
2344
   *                              KOI8-R are effectively equivalent, provided the
2345
   *                              <i>string</i> itself is valid for the encoding, as
2346
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2347
   *                              the same positions in all of these encodings.
2348
   *                              </p>
2349
   * @param bool   $double_encode [optional] <p>
2350
   *                              When <i>double_encode</i> is turned off PHP will not
2351
   *                              encode existing html entities, the default is to convert everything.
2352
   *                              </p>
2353
   *
2354
   * @return string The converted string.
2355
   * </p>
2356
   * <p>
2357
   * If the input <i>string</i> contains an invalid code unit
2358
   * sequence within the given <i>encoding</i> an empty string
2359 1
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2360
   * <b>ENT_SUBSTITUTE</b> flags are set.
2361 1
   */
2362
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2363
  {
2364
    if ($encoding !== 'UTF-8') {
2365
      $encoding = self::normalize_encoding($encoding);
2366
    }
2367
2368
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2369
  }
2370
2371
  /**
2372
   * Checks whether iconv is available on the server.
2373
   *
2374
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2375
   */
2376
  public static function iconv_loaded()
2377
  {
2378
    return extension_loaded('iconv') ? true : false;
2379
  }
2380
2381
  /**
2382
   * Converts Integer to hexadecimal U+xxxx code point representation.
2383
   *
2384
   * INFO: opposite to UTF8::hex_to_int()
2385
   *
2386
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2387 1
   * @param string $pfix [optional]
2388
   *
2389 1
   * @return string <p>The code point, or empty string on failure.</p>
2390
   */
2391
  public static function int_to_hex($int, $pfix = 'U+')
2392
  {
2393
    if (ctype_digit((string)$int)) {
2394
      $hex = dechex((int)$int);
2395
2396
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2397
2398
      return $pfix . $hex;
2399
    }
2400
2401 1
    return '';
2402
  }
2403 1
2404
  /**
2405
   * Checks whether intl-char is available on the server.
2406
   *
2407
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2408
   */
2409
  public static function intlChar_loaded()
2410
  {
2411
    return (Bootup::is_php('7.0') === true && class_exists('IntlChar') === true);
2412
  }
2413
2414
  /**
2415
   * Checks whether intl is available on the server.
2416 16
   *
2417
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2418 16
   */
2419
  public static function intl_loaded()
2420
  {
2421
    return extension_loaded('intl') ? true : false;
2422
  }
2423
2424
  /**
2425
   * alias for "UTF8::is_ascii()"
2426
   *
2427
   * @see UTF8::is_ascii()
2428
   *
2429
   * @param string $str
2430
   *
2431 28
   * @return boolean
2432
   */
2433 28
  public static function isAscii($str)
2434
  {
2435 28
    return self::is_ascii($str);
2436 5
  }
2437
2438
  /**
2439 28
   * alias for "UTF8::is_base64()"
2440
   *
2441
   * @see UTF8::is_base64()
2442
   *
2443
   * @param string $str
2444
   *
2445
   * @return bool
2446
   */
2447
  public static function isBase64($str)
2448
  {
2449 1
    return self::is_base64($str);
2450
  }
2451 1
2452
  /**
2453 1
   * alias for "UTF8::is_binary()"
2454 1
   *
2455
   * @see UTF8::is_binary()
2456
   *
2457 1
   * @param string $str
2458 1
   *
2459
   * @return bool
2460 1
   */
2461
  public static function isBinary($str)
2462
  {
2463
    return self::is_binary($str);
2464
  }
2465
2466
  /**
2467
   * alias for "UTF8::is_bom()"
2468
   *
2469
   * @see UTF8::is_bom()
2470
   *
2471 16
   * @param string $utf8_chr
2472
   *
2473
   * @return boolean
2474 16
   */
2475
  public static function isBom($utf8_chr)
2476
  {
2477 16
    return self::is_bom($utf8_chr);
2478
  }
2479 16
2480 16
  /**
2481 15
   * alias for "UTF8::is_html()"
2482 16
   *
2483 6
   * @see UTF8::is_html()
2484
   *
2485 15
   * @param string $str
2486
   *
2487
   * @return boolean
2488
   */
2489
  public static function isHtml($str)
2490
  {
2491
    return self::is_html($str);
2492
  }
2493
2494
  /**
2495
   * alias for "UTF8::is_json()"
2496
   *
2497
   * @see UTF8::is_json()
2498
   *
2499
   * @param string $str
2500
   *
2501
   * @return bool
2502
   */
2503
  public static function isJson($str)
2504
  {
2505
    return self::is_json($str);
2506
  }
2507
2508
  /**
2509
   * alias for "UTF8::is_utf16()"
2510
   *
2511
   * @see UTF8::is_utf16()
2512
   *
2513
   * @param string $str
2514
   *
2515
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2516
   */
2517
  public static function isUtf16($str)
2518
  {
2519
    return self::is_utf16($str);
2520
  }
2521
2522
  /**
2523
   * alias for "UTF8::is_utf32()"
2524
   *
2525
   * @see UTF8::is_utf32()
2526
   *
2527
   * @param string $str
2528
   *
2529
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2530
   */
2531
  public static function isUtf32($str)
2532
  {
2533
    return self::is_utf32($str);
2534
  }
2535
2536 1
  /**
2537
   * alias for "UTF8::is_utf8()"
2538 1
   *
2539
   * @see UTF8::is_utf8()
2540 1
   *
2541
   * @param string $str
2542
   * @param bool   $strict
2543
   *
2544
   * @return bool
2545 1
   */
2546
  public static function isUtf8($str, $strict = false)
2547 1
  {
2548
    return self::is_utf8($str, $strict);
2549 1
  }
2550 1
2551
  /**
2552 1
   * Checks if a string is 7 bit ASCII.
2553
   *
2554
   * @param string $str <p>The string to check.</p>
2555
   *
2556
   * @return bool <p>
2557
   *              <strong>true</strong> if it is ASCII<br />
2558
   *              <strong>false</strong> otherwise
2559
   *              </p>
2560
   */
2561
  public static function is_ascii($str)
2562
  {
2563 1
    $str = (string)$str;
2564
2565 1
    if (!isset($str[0])) {
2566
      return true;
2567 1
    }
2568
2569
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2570
  }
2571
2572 1
  /**
2573 1
   * Returns true if the string is base64 encoded, false otherwise.
2574 1
   *
2575 1
   * @param string $str <p>The input string.</p>
2576 1
   *
2577
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2578 1
   */
2579
  public static function is_base64($str)
2580
  {
2581
    $str = (string)$str;
2582
2583
    if (!isset($str[0])) {
2584
      return false;
2585
    }
2586
2587
    if (base64_encode(base64_decode($str, true)) === $str) {
2588
      return true;
2589
    } else {
2590
      return false;
2591
    }
2592
  }
2593 4
2594
  /**
2595 4
   * Check if the input is binary... (is look like a hack).
2596
   *
2597 4
   * @param mixed $input
2598
   *
2599 4
   * @return bool
2600 4
   */
2601 4
  public static function is_binary($input)
2602 4
  {
2603 4
2604 4
    $testLength = strlen($input);
2605 4
2606 4
    if (
2607 4
        preg_match('~^[01]+$~', $input)
2608 2
        ||
2609 2
        substr_count($input, "\x00") > 0
2610 4
        ||
2611 4
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
2612 4
    ) {
2613
      return true;
2614 4
    } else {
2615 4
      return false;
2616 4
    }
2617 4
  }
2618 4
2619 4
  /**
2620 4
   * Check if the file is binary.
2621 4
   *
2622 4
   * @param string $file
2623 3
   *
2624 3
   * @return boolean
2625 4
   */
2626 4
  public static function is_binary_file($file)
2627 4
  {
2628
    try {
2629 4
      $fp = fopen($file, 'r');
2630 3
      $block = fread($fp, 512);
2631 2
      fclose($fp);
2632
    } catch (\Exception $e) {
2633 3
      $block = '';
2634
    }
2635
2636
    return self::is_binary($block);
2637 3
  }
2638
2639 3
  /**
2640
   * Checks if the given string is equal to any "Byte Order Mark".
2641
   *
2642
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2643
   *
2644
   * @param string $str <p>The input string.</p>
2645
   *
2646
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2647
   */
2648
  public static function is_bom($str)
2649
  {
2650
    foreach (self::$bom as $bomString => $bomByteLength) {
2651
      if ($str === $bomString) {
2652
        return true;
2653 3
      }
2654
    }
2655 3
2656
    return false;
2657 3
  }
2658
2659 3
  /**
2660 3
   * Check if the string contains any html-tags <lall>.
2661 3
   *
2662 3
   * @param string $str <p>The input string.</p>
2663 3
   *
2664 3
   * @return boolean
2665 3
   */
2666 3
  public static function is_html($str)
2667 3
  {
2668 1
    $str = (string)$str;
2669 1
2670 3
    if (!isset($str[0])) {
2671 3
      return false;
2672 3
    }
2673
2674 3
    // init
2675 3
    $matches = array();
2676 3
2677 3
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
2678 3
2679 3
    if (count($matches) == 0) {
2680 3
      return false;
2681 3
    } else {
2682 3
      return true;
2683 1
    }
2684 1
  }
2685 3
2686 3
  /**
2687 3
   * Try to check if "$str" is an json-string.
2688
   *
2689 3
   * @param string $str <p>The input string.</p>
2690 1
   *
2691 1
   * @return bool
2692
   */
2693 1
  public static function is_json($str)
2694
  {
2695
    $str = (string)$str;
2696
2697 3
    if (!isset($str[0])) {
2698
      return false;
2699 3
    }
2700
2701
    if (
2702
        is_object(self::json_decode($str))
2703
        &&
2704
        json_last_error() === JSON_ERROR_NONE
2705
    ) {
2706
      return true;
2707
    } else {
2708
      return false;
2709
    }
2710
  }
2711
2712 43
  /**
2713
   * Check if the string is UTF-16.
2714 43
   *
2715
   * @param string $str <p>The input string.</p>
2716 43
   *
2717 3
   * @return int|false <p>
2718
   *                   <strong>false</strong> if is't not UTF-16,<br />
2719
   *                   <strong>1</strong> for UTF-16LE,<br />
2720 41
   *                   <strong>2</strong> for UTF-16BE.
2721 1
   *                   </p>
2722 1
   */
2723 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2724
  {
2725
    $str = self::remove_bom($str);
2726
2727
    if (self::is_binary($str)) {
2728
2729
      $maybeUTF16LE = 0;
2730 41
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2731
      if ($test) {
2732
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2733
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2734
        if ($test3 === $test) {
2735
          $strChars = self::count_chars($str, true);
2736
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2737
            if (in_array($test3char, $strChars, true) === true) {
2738
              $maybeUTF16LE++;
2739
            }
2740 41
          }
2741
        }
2742 41
      }
2743 41
2744 41
      $maybeUTF16BE = 0;
2745
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2746
      if ($test) {
2747 41
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2748 41
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2749 41
        if ($test3 === $test) {
2750
          $strChars = self::count_chars($str, true);
2751
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2752 41
            if (in_array($test3char, $strChars, true) === true) {
2753
              $maybeUTF16BE++;
2754 36
            }
2755 41
          }
2756
        }
2757 34
      }
2758 34
2759 34
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2760 34
        if ($maybeUTF16LE > $maybeUTF16BE) {
2761 39
          return 1;
2762
        } else {
2763 21
          return 2;
2764 21
        }
2765 21
      }
2766 21
2767 33
    }
2768
2769 9
    return false;
2770 9
  }
2771 9
2772 9
  /**
2773 16
   * Check if the string is UTF-32.
2774
   *
2775
   * @param string $str
2776
   *
2777
   * @return int|false <p>
2778
   *                   <strong>false</strong> if is't not UTF-16,<br />
2779
   *                   <strong>1</strong> for UTF-32LE,<br />
2780
   *                   <strong>2</strong> for UTF-32BE.
2781
   *                   </p>
2782 3
   */
2783 3 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2784 3
  {
2785 3
    $str = self::remove_bom($str);
2786 9
2787
    if (self::is_binary($str)) {
2788 3
2789 3
      $maybeUTF32LE = 0;
2790 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2791 3
      if ($test) {
2792 3
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2793
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2794
        if ($test3 === $test) {
2795
          $strChars = self::count_chars($str, true);
2796 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2797
            if (in_array($test3char, $strChars, true) === true) {
2798 41
              $maybeUTF32LE++;
2799
            }
2800
          }
2801 36
        }
2802
      }
2803 33
2804 33
      $maybeUTF32BE = 0;
2805 33
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2806 33
      if ($test) {
2807
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2808
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2809
        if ($test3 === $test) {
2810
          $strChars = self::count_chars($str, true);
2811 33
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2812
            if (in_array($test3char, $strChars, true) === true) {
2813
              $maybeUTF32BE++;
2814
            }
2815
          }
2816
        }
2817 33
      }
2818 33
2819 33
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2820 33
        if ($maybeUTF32LE > $maybeUTF32BE) {
2821
          return 1;
2822 33
        } else {
2823
          return 2;
2824 33
        }
2825 33
      }
2826 5
2827
    }
2828
2829 33
    return false;
2830 33
  }
2831 33
2832 33
  /**
2833 33
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2834
   *
2835
   * @see    http://hsivonen.iki.fi/php-utf8/
2836
   *
2837
   * @param string $str    <p>The string to be checked.</p>
2838 18
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2839
   *
2840
   * @return bool
2841 41
   */
2842
  public static function is_utf8($str, $strict = false)
2843 20
  {
2844
    $str = (string)$str;
2845
2846
    if (!isset($str[0])) {
2847
      return true;
2848
    }
2849
2850
    if ($strict === true) {
2851
      if (self::is_utf16($str) !== false) {
2852
        return false;
2853
      }
2854
2855
      if (self::is_utf32($str) !== false) {
2856
        return false;
2857
      }
2858
    }
2859
2860
    if (self::pcre_utf8_support() !== true) {
2861
2862
      // If even just the first character can be matched, when the /u
2863
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2864
      // invalid, nothing at all will match, even if the string contains
2865
      // some valid sequences
2866
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
2867
2868
    } else {
2869
2870
      $mState = 0; // cached expected number of octets after the current octet
2871
      // until the beginning of the next UTF8 character sequence
2872
      $mUcs4 = 0; // cached Unicode character
2873
      $mBytes = 1; // cached expected number of octets in the current sequence
2874
      $len = strlen($str);
2875
2876
      /** @noinspection ForeachInvariantsInspection */
2877
      for ($i = 0; $i < $len; $i++) {
2878
        $in = ord($str[$i]);
2879
        if ($mState === 0) {
2880
          // When mState is zero we expect either a US-ASCII character or a
2881
          // multi-octet sequence.
2882
          if (0 === (0x80 & $in)) {
2883 2
            // US-ASCII, pass straight through.
2884
            $mBytes = 1;
2885 2 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2886
            // First octet of 2 octet sequence.
2887 2
            $mUcs4 = $in;
2888 2
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
2889 2
            $mState = 1;
2890
            $mBytes = 2;
2891
          } elseif (0xE0 === (0xF0 & $in)) {
2892
            // First octet of 3 octet sequence.
2893 2
            $mUcs4 = $in;
2894
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
2895
            $mState = 2;
2896
            $mBytes = 3;
2897 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2898
            // First octet of 4 octet sequence.
2899
            $mUcs4 = $in;
2900
            $mUcs4 = ($mUcs4 & 0x07) << 18;
2901
            $mState = 3;
2902
            $mBytes = 4;
2903
          } elseif (0xF8 === (0xFC & $in)) {
2904
            /* First octet of 5 octet sequence.
2905
            *
2906
            * This is illegal because the encoded codepoint must be either
2907
            * (a) not the shortest form or
2908
            * (b) outside the Unicode range of 0-0x10FFFF.
2909
            * Rather than trying to resynchronize, we will carry on until the end
2910
            * of the sequence and let the later error handling code catch it.
2911
            */
2912
            $mUcs4 = $in;
2913
            $mUcs4 = ($mUcs4 & 0x03) << 24;
2914
            $mState = 4;
2915
            $mBytes = 5;
2916 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2917
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
2918
            $mUcs4 = $in;
2919
            $mUcs4 = ($mUcs4 & 1) << 30;
2920
            $mState = 5;
2921
            $mBytes = 6;
2922
          } else {
2923
            /* Current octet is neither in the US-ASCII range nor a legal first
2924
             * octet of a multi-octet sequence.
2925
             */
2926
            return false;
2927
          }
2928
        } else {
2929
          // When mState is non-zero, we expect a continuation of the multi-octet
2930
          // sequence
2931
          if (0x80 === (0xC0 & $in)) {
2932 2
            // Legal continuation.
2933
            $shift = ($mState - 1) * 6;
2934 2
            $tmp = $in;
2935
            $tmp = ($tmp & 0x0000003F) << $shift;
2936 2
            $mUcs4 |= $tmp;
2937
            /**
2938
             * End of the multi-octet sequence. mUcs4 now contains the final
2939 2
             * Unicode code point to be output
2940
             */
2941
            if (0 === --$mState) {
2942 2
              /*
2943
              * Check for illegal sequences and code points.
2944
              */
2945
              // From Unicode 3.1, non-shortest form is illegal
2946
              if (
2947
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
2948
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
2949
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
2950
                  (4 < $mBytes) ||
2951
                  // From Unicode 3.2, surrogate characters are illegal.
2952 6
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
2953
                  // Code points outside the Unicode range are illegal.
2954 6
                  ($mUcs4 > 0x10FFFF)
2955
              ) {
2956
                return false;
2957
              }
2958
              // initialize UTF8 cache
2959
              $mState = 0;
2960
              $mUcs4 = 0;
2961
              $mBytes = 1;
2962
            }
2963
          } else {
2964
            /**
2965 24
             *((0xC0 & (*in) != 0x80) && (mState != 0))
2966
             * Incomplete multi-octet sequence.
2967 24
             */
2968
            return false;
2969 24
          }
2970 2
        }
2971
      }
2972
2973
      return true;
2974 23
    }
2975 2
  }
2976
2977
  /**
2978 23
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2979
   * Decodes a JSON string
2980 23
   *
2981
   * @link http://php.net/manual/en/function.json-decode.php
2982
   *
2983
   * @param string $json    <p>
2984
   *                        The <i>json</i> string being decoded.
2985
   *                        </p>
2986
   *                        <p>
2987
   *                        This function only works with UTF-8 encoded strings.
2988
   *                        </p>
2989
   *                        <p>PHP implements a superset of
2990 1
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2991
   *                        only supports these values when they are nested inside an array or an object.
2992 1
   *                        </p>
2993
   * @param bool   $assoc   [optional] <p>
2994
   *                        When <b>TRUE</b>, returned objects will be converted into
2995
   *                        associative arrays.
2996 1
   *                        </p>
2997
   * @param int    $depth   [optional] <p>
2998
   *                        User specified recursion depth.
2999
   *                        </p>
3000
   * @param int    $options [optional] <p>
3001
   *                        Bitmask of JSON decode options. Currently only
3002
   *                        <b>JSON_BIGINT_AS_STRING</b>
3003
   *                        is supported (default is to cast large integers as floats)
3004
   *                        </p>
3005
   *
3006
   * @return mixed the value encoded in <i>json</i> in appropriate
3007 1
   * PHP type. Values true, false and
3008
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3009 1
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3010 1
   * <i>json</i> cannot be decoded or if the encoded
3011 1
   * data is deeper than the recursion limit.
3012
   */
3013 1
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3014
  {
3015
    $json = self::filter($json);
3016
3017
    if (Bootup::is_php('5.4') === true) {
3018
      $json = json_decode($json, $assoc, $depth, $options);
3019
    } else {
3020
      $json = json_decode($json, $assoc, $depth);
3021
    }
3022 2
3023
    return $json;
3024 2
  }
3025
3026 2
  /**
3027 2
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3028 2
   * Returns the JSON representation of a value.
3029
   *
3030 2
   * @link http://php.net/manual/en/function.json-encode.php
3031
   *
3032
   * @param mixed $value   <p>
3033
   *                       The <i>value</i> being encoded. Can be any type except
3034
   *                       a resource.
3035
   *                       </p>
3036
   *                       <p>
3037
   *                       All string data must be UTF-8 encoded.
3038
   *                       </p>
3039
   *                       <p>PHP implements a superset of
3040 1
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3041
   *                       only supports these values when they are nested inside an array or an object.
3042 1
   *                       </p>
3043
   * @param int   $options [optional] <p>
3044
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3045
   *                       <b>JSON_HEX_TAG</b>,
3046 1
   *                       <b>JSON_HEX_AMP</b>,
3047
   *                       <b>JSON_HEX_APOS</b>,
3048
   *                       <b>JSON_NUMERIC_CHECK</b>,
3049
   *                       <b>JSON_PRETTY_PRINT</b>,
3050
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3051
   *                       <b>JSON_FORCE_OBJECT</b>,
3052
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3053
   *                       constants is described on
3054
   *                       the JSON constants page.
3055
   *                       </p>
3056
   * @param int   $depth   [optional] <p>
3057
   *                       Set the maximum depth. Must be greater than zero.
3058 1
   *                       </p>
3059
   *
3060 1
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3061
   */
3062
  public static function json_encode($value, $options = 0, $depth = 512)
3063
  {
3064
    $value = self::filter($value);
3065
3066
    if (Bootup::is_php('5.5')) {
3067
      $json = json_encode($value, $options, $depth);
3068
    } else {
3069
      $json = json_encode($value, $options);
3070 16
    }
3071
3072 16
    return $json;
3073
  }
3074 16
3075 2
  /**
3076
   * Makes string's first char lowercase.
3077
   *
3078 16
   * @param string $str <p>The input string</p>
3079 1
   *
3080
   * @return string <p>The resulting string</p>
3081
   */
3082 16
  public static function lcfirst($str)
3083 4
  {
3084
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3085
  }
3086 15
3087 14
  /**
3088
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3089
   *
3090 4
   * @param string $str   <p>The string to be trimmed</p>
3091 4
   * @param string $chars <p>Optional characters to be stripped</p>
3092 4
   *
3093
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3094
   */
3095 4 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3096 4
  {
3097 4
    $str = (string)$str;
3098 4
3099 4
    if (!isset($str[0])) {
3100 4
      return '';
3101 4
    }
3102 4
3103 4
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3104 4
    if ($chars === INF || !$chars) {
3105 4
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3106 4
    }
3107 4
3108 4
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3109 4
3110
    return preg_replace("/^{$chars}+/u", '', $str);
3111 4
  }
3112 4
3113 4
  /**
3114
   * Returns the UTF-8 character with the maximum code point in the given data.
3115 4
   *
3116
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3117 4
   *
3118
   * @return string <p>The character with the highest code point than others.</p>
3119
   */
3120 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3121
  {
3122
    if (is_array($arg)) {
3123
      $arg = implode($arg);
3124
    }
3125
3126
    return self::chr(max(self::codepoints($arg)));
3127 13
  }
3128
3129 13
  /**
3130 13
   * Calculates and returns the maximum number of bytes taken by any
3131
   * UTF-8 encoded character in the given string.
3132 13
   *
3133 1
   * @param string $str <p>The original Unicode string.</p>
3134 1
   *
3135 1
   * @return int <p>Max byte lengths of the given chars.</p>
3136
   */
3137 13
  public static function max_chr_width($str)
3138
  {
3139
    $bytes = self::chr_size_list($str);
3140
    if (count($bytes) > 0) {
3141
      return (int)max($bytes);
3142
    } else {
3143
      return 0;
3144
    }
3145
  }
3146
3147
  /**
3148
   * Checks whether mbstring is available on the server.
3149
   *
3150 18
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3151
   */
3152 18
  public static function mbstring_loaded()
3153 18
  {
3154
    $return = extension_loaded('mbstring');
3155 18
3156
    if ($return === true) {
3157 18
      \mb_internal_encoding('UTF-8');
3158
    }
3159 2
3160
    return $return;
3161 2
  }
3162
3163 1
  /**
3164 1
   * Returns the UTF-8 character with the minimum code point in the given data.
3165
   *
3166 2
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3167 2
   *
3168
   * @return string <p>The character with the lowest code point than others.</p>
3169 18
   */
3170 18 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3171 1
  {
3172 1
    if (is_array($arg)) {
3173
      $arg = implode($arg);
3174 18
    }
3175 18
3176
    return self::chr(min(self::codepoints($arg)));
3177 18
  }
3178
3179
  /**
3180
   * alias for "UTF8::normalize_encoding()"
3181
   *
3182
   * @see UTF8::normalize_encoding()
3183
   *
3184
   * @param string $encoding
3185
   *
3186
   * @return string
3187
   */
3188
  public static function normalizeEncoding($encoding)
3189
  {
3190
    return self::normalize_encoding($encoding);
3191
  }
3192
3193
  /**
3194
   * Normalize the encoding-"name" input.
3195
   *
3196
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3197
   *
3198
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3199
   */
3200
  public static function normalize_encoding($encoding)
3201
  {
3202
    static $staticNormalizeEncodingCache = array();
3203
3204
    if (!$encoding) {
3205
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalize_encoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
3206
    }
3207
3208
    if ('UTF-8' === $encoding) {
3209
      return $encoding;
3210
    }
3211
3212
    if (in_array($encoding, self::$iconvEncoding, true)) {
3213
      return $encoding;
3214
    }
3215
3216
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3217
      return $staticNormalizeEncodingCache[$encoding];
3218
    }
3219
3220
    $encodingOrig = $encoding;
3221
    $encoding = strtoupper($encoding);
3222
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3223
3224
    $equivalences = array(
3225
        'ISO88591'    => 'ISO-8859-1',
3226
        'ISO8859'     => 'ISO-8859-1',
3227
        'ISO'         => 'ISO-8859-1',
3228
        'LATIN1'      => 'ISO-8859-1',
3229
        'LATIN'       => 'ISO-8859-1',
3230 17
        'WIN1252'     => 'ISO-8859-1',
3231
        'WINDOWS1252' => 'ISO-8859-1',
3232 17
        'UTF16'       => 'UTF-16',
3233 3
        'UTF32'       => 'UTF-32',
3234
        'UTF8'        => 'UTF-8',
3235
        'UTF'         => 'UTF-8',
3236 16
        'UTF7'        => 'UTF-7',
3237
        '8BIT'        => 'CP850',
3238
        'BINARY'      => 'CP850',
3239
    );
3240 16
3241
    if (!empty($equivalences[$encodingUpperHelper])) {
3242
      $encoding = $equivalences[$encodingUpperHelper];
3243
    }
3244
3245
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3246
3247
    return $encoding;
3248 16
  }
3249 16
3250 15
  /**
3251
   * Normalize some MS Word special characters.
3252
   *
3253 9
   * @param string $str <p>The string to be normalized.</p>
3254 9
   *
3255 9
   * @return string
3256
   */
3257 9 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3258 1
  {
3259
    // init
3260
    $str = (string)$str;
3261 9
3262 4
    if (!isset($str[0])) {
3263
      return '';
3264
    }
3265 9
3266 5
    static $UTF8_MSWORD_KEYS_CACHE = null;
3267
    static $UTF8_MSWORD_VALUES_CACHE = null;
3268
3269 9
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3270
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$utf8MSWord);
3271
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$utf8MSWord);
3272
    }
3273
3274
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3275
  }
3276
3277
  /**
3278
   * Normalize the whitespace.
3279
   *
3280
   * @param string $str                     <p>The string to be normalized.</p>
3281
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3282
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3283
   *                                        bidirectional text chars.</p>
3284
   *
3285 1
   * @return string
3286
   */
3287
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3288 1
  {
3289
    // init
3290 1
    $str = (string)$str;
3291 1
3292 1
    if (!isset($str[0])) {
3293
      return '';
3294
    }
3295 1
3296
    static $WHITESPACE_CACHE = array();
3297
    $cacheKey = (int)$keepNonBreakingSpace;
3298
3299
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3300
3301
      $WHITESPACE_CACHE[$cacheKey] = self::$whitespaceTable;
3302
3303 41
      if ($keepNonBreakingSpace === true) {
3304
        /** @noinspection OffsetOperationsInspection */
3305
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3306 41
      }
3307
3308
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3309
    }
3310
3311
    if ($keepBidiUnicodeControls === false) {
3312
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3313
3314
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3315
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$bidiUniCodeControlsTable);
3316
      }
3317 1
3318
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3319 1
    }
3320 1
3321
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3322
  }
3323 1
3324 1
  /**
3325 1
   * Format a number with grouped thousands.
3326
   *
3327
   * @param float  $number
3328 1
   * @param int    $decimals
3329
   * @param string $dec_point
3330
   * @param string $thousands_sep
3331 1
   *
3332
   * @return string
3333
   *    *
3334
   * @deprecated Because this has nothing to do with UTF8. :/
3335 1
   */
3336 1
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3337 1
  {
3338
    $thousands_sep = (string)$thousands_sep;
3339
    $dec_point = (string)$dec_point;
3340 1
3341
    if (
3342
        isset($thousands_sep[1], $dec_point[1])
3343 1
        &&
3344
        Bootup::is_php('5.4') === true
3345
    ) {
3346
      return str_replace(
3347 1
          array(
3348
              '.',
3349 1
              ',',
3350 1
          ),
3351 1
          array(
3352 1
              $dec_point,
3353 1
              $thousands_sep,
3354
          ),
3355
          number_format($number, $decimals, '.', ',')
3356
      );
3357
    }
3358
3359
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3360
  }
3361
3362
  /**
3363
   * Calculates Unicode code point of the given UTF-8 encoded character.
3364
   *
3365 5
   * INFO: opposite to UTF8::chr()
3366
   *
3367 5
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3368
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3369
   *
3370
   * @return int <p>
3371
   *             Unicode code point of the given character,<br />
3372
   *             0 on invalid UTF-8 byte sequence.
3373
   *             </p>
3374
   */
3375
  public static function ord($chr, $encoding = 'UTF-8')
3376
  {
3377 10
    if (!$chr && $chr !== '0') {
3378
      return 0;
3379 10
    }
3380 10
3381 5
    if ($encoding !== 'UTF-8') {
3382 5
      $encoding = self::normalize_encoding($encoding);
3383 10
      $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3384
    }
3385 10
3386
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3387
      self::checkForSupport();
3388
    }
3389
3390
    if (self::$support['intlChar'] === true) {
3391
      $tmpReturn = \IntlChar::ord($chr);
3392
      if ($tmpReturn) {
3393
        return $tmpReturn;
3394
      }
3395
    }
3396 1
3397
    // use static cache, if there is no support for "IntlChar"
3398 1
    static $cache = array();
3399 1
    if (isset($cache[$chr]) === true) {
3400 1
      return $cache[$chr];
3401
    }
3402 1
3403 1
    $chr_orig = $chr;
3404 1
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3405 1
    $chr = unpack('C*', substr($chr, 0, 4));
3406 1
    $code = $chr ? $chr[1] : 0;
3407
3408 1
    if (0xF0 <= $code && isset($chr[4])) {
3409
      return $cache[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3410
    }
3411
3412
    if (0xE0 <= $code && isset($chr[3])) {
3413
      return $cache[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3414
    }
3415
3416
    if (0xC0 <= $code && isset($chr[2])) {
3417
      return $cache[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3418
    }
3419
3420
    return $cache[$chr_orig] = $code;
3421
  }
3422
3423
  /**
3424 45
   * Parses the string into an array (into the the second parameter).
3425
   *
3426
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3427 45
   *          if the second parameter is not set!
3428
   *
3429
   * @link http://php.net/manual/en/function.parse-str.php
3430
   *
3431 45
   * @param string $str    <p>The input string.</p>
3432 45
   * @param array  $result <p>The result will be returned into this reference parameter.</p>
3433 45
   *
3434 45
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3435
   */
3436 45
  public static function parse_str($str, &$result)
3437
  {
3438
    // clean broken utf8
3439 45
    $str = self::clean($str);
3440 45
3441
    $return = \mb_parse_str($str, $result);
3442 45
    if ($return === false || empty($result)) {
3443
      return false;
3444
    }
3445
3446
    return true;
3447
  }
3448
3449
  /**
3450
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3451
   *
3452
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3453 45
   */
3454
  public static function pcre_utf8_support()
3455 45
  {
3456
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3457 45
    return (bool)@preg_match('//u', '');
3458 45
  }
3459 45
3460
  /**
3461 45
   * Create an array containing a range of UTF-8 characters.
3462 45
   *
3463 45
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3464
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3465 45
   *
3466
   * @return array
3467
   */
3468
  public static function range($var1, $var2)
3469
  {
3470
    if (!$var1 || !$var2) {
3471
      return array();
3472
    }
3473
3474 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3475
      $start = (int)$var1;
3476 23
    } elseif (ctype_xdigit($var1)) {
3477
      $start = (int)self::hex_to_int($var1);
3478 23
    } else {
3479
      $start = self::ord($var1);
3480 23
    }
3481 5
3482
    if (!$start) {
3483
      return array();
3484
    }
3485 19
3486 3 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3487
      $end = (int)$var2;
3488
    } elseif (ctype_xdigit($var2)) {
3489 18
      $end = (int)self::hex_to_int($var2);
3490
    } else {
3491 18
      $end = self::ord($var2);
3492
    }
3493
3494
    if (!$end) {
3495
      return array();
3496
    }
3497
3498
    return array_map(
3499
        array(
3500
            '\\voku\\helper\\UTF8',
3501
            'chr',
3502 52
        ),
3503
        range($start, $end)
3504 52
    );
3505
  }
3506 52
3507
  /**
3508 52
   * alias for "UTF8::remove_bom()"
3509 40
   *
3510
   * @see UTF8::remove_bom()
3511
   *
3512 18
   * @param string $str
3513
   *
3514
   * @return string
3515 18
   */
3516 17
  public static function removeBOM($str)
3517
  {
3518 17
    return self::remove_bom($str);
3519 17
  }
3520 17
3521 2
  /**
3522 2
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3523
   *
3524
   * @param string $str <p>The input string.</p>
3525 18
   *
3526
   * @return string <p>String without UTF-BOM</p>
3527 18
   */
3528 18
  public static function remove_bom($str)
3529 18
  {
3530
    foreach (self::$bom as $bomString => $bomByteLength) {
3531 18
      if (0 === strpos($str, $bomString)) {
3532 18
        $str = substr($str, $bomByteLength);
3533 18
      }
3534
    }
3535
3536
    return $str;
3537 18
  }
3538
3539 18
  /**
3540
   * Removes duplicate occurrences of a string in another string.
3541
   *
3542
   * @param string          $str  <p>The base string.</p>
3543
   * @param string|string[] $what <p>String to search for in the base string.</p>
3544
   *
3545
   * @return string <p>The result string with removed duplicates.</p>
3546
   */
3547
  public static function remove_duplicates($str, $what = ' ')
3548
  {
3549
    if (is_string($what)) {
3550
      $what = array($what);
3551
    }
3552
3553
    if (is_array($what)) {
3554
      /** @noinspection ForeachSourceInspection */
3555
      foreach ($what as $item) {
3556
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3557
      }
3558
    }
3559
3560 1
    return $str;
3561
  }
3562 1
3563 1
  /**
3564
   * Remove invisible characters from a string.
3565
   *
3566
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3567
   *
3568 1
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3569 1
   *
3570 1
   * @param string $str
3571 1
   * @param bool   $url_encoded
3572
   * @param string $replacement
3573
   *
3574 1
   * @return string
3575
   */
3576
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3577
  {
3578
    // init
3579
    $non_displayables = array();
3580
3581
    // every control character except newline (dec 10),
3582
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3583
    if ($url_encoded) {
3584
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3585
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3586 36
    }
3587
3588 36
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3589
3590 36
    do {
3591 2
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3592
    } while ($count !== 0);
3593
3594
    return $str;
3595 36
  }
3596 36
3597
  /**
3598 36
   * Replace the diamond question mark (�) with the replacement.
3599
   *
3600
   * @param string $str
3601
   * @param string $unknown
3602 36
   *
3603
   * @return string
3604 36
   */
3605 6
  public static function replace_diamond_question_mark($str, $unknown = '?')
3606 6
  {
3607
    return str_replace(
3608 36
        array(
3609 36
            "\xEF\xBF\xBD",
3610 36
            '�',
3611 36
        ),
3612 36
        array(
3613
            $unknown,
3614 36
            $unknown,
3615
        ),
3616
        $str
3617
    );
3618
  }
3619
3620
  /**
3621
   * Strip whitespace or other characters from end of a UTF-8 string.
3622
   *
3623
   * @param string $str   <p>The string to be trimmed.</p>
3624
   * @param string $chars <p>Optional characters to be stripped.</p>
3625
   *
3626
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3627
   */
3628 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3629
  {
3630
    $str = (string)$str;
3631
3632
    if (!isset($str[0])) {
3633
      return '';
3634
    }
3635
3636
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3637
    if ($chars === INF || !$chars) {
3638
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3639
    }
3640
3641
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3642
3643
    return preg_replace("/{$chars}+$/u", '', $str);
3644
  }
3645
3646 36
  /**
3647 5
   * rxClass
3648
   *
3649 5
   * @param string $s
3650 5
   * @param string $class
3651
   *
3652
   * @return string
3653 36
   */
3654
  private static function rxClass($s, $class = '')
3655
  {
3656
    static $rxClassCache = array();
3657 36
3658
    $cacheKey = $s . $class;
3659
3660
    if (isset($rxClassCache[$cacheKey])) {
3661
      return $rxClassCache[$cacheKey];
3662
    }
3663
3664
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3665
    $class = array($class);
3666
3667
    /** @noinspection SuspiciousLoopInspection */
3668
    foreach (self::str_split($s) as $s) {
3669
      if ('-' === $s) {
3670 12
        $class[0] = '-' . $class[0];
3671
      } elseif (!isset($s[2])) {
3672
        $class[0] .= preg_quote($s, '/');
3673
      } elseif (1 === self::strlen($s)) {
3674
        $class[0] .= $s;
3675
      } else {
3676 12
        $class[] = $s;
3677 2
      }
3678 1
    }
3679 2
3680 1
    if ($class[0]) {
3681 2
      $class[0] = '[' . $class[0] . ']';
3682
    }
3683 2
3684
    if (1 === count($class)) {
3685
      $return = $class[0];
3686 2
    } else {
3687
      $return = '(?:' . implode('|', $class) . ')';
3688
    }
3689
3690
    $rxClassCache[$cacheKey] = $return;
3691
3692 12
    return $return;
3693 3
  }
3694
3695
  /**
3696
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
3697
   */
3698
  public static function showSupport()
3699
  {
3700 12
    foreach (self::$support as $utf8Support) {
3701 9
      echo $utf8Support . "\n<br>";
3702
    }
3703
  }
3704
3705
  /**
3706
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3707
   *
3708
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3709
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3710 6
   *
3711 6
   * @return string <p>The HTML numbered entity.</p>
3712 6
   */
3713 6
  public static function single_chr_html_encode($char, $keepAsciiChars = false)
3714 6
  {
3715 6
    if (!$char) {
3716 6
      return '';
3717 6
    }
3718 6
3719 6
    if (
3720 6
        $keepAsciiChars === true
3721 6
        &&
3722 6
        self::isAscii($char) === true
3723 6
    ) {
3724 6
      return $char;
3725 6
    }
3726 6
3727 6
    return '&#' . self::ord($char) . ';';
3728 6
  }
3729 6
3730 6
  /**
3731
   * Convert a string to an array of Unicode characters.
3732 6
   *
3733 6
   * @param string  $str       <p>The string to split into array.</p>
3734 6
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
3735
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3736
   *
3737
   * @return string[] <p>An array containing chunks of the string.</p>
3738
   */
3739
  public static function split($str, $length = 1, $cleanUtf8 = false)
3740
  {
3741
    $str = (string)$str;
3742
3743
    if (!isset($str[0])) {
3744
      return array();
3745
    }
3746
3747
    // init
3748
    $str = (string)$str;
3749
    $ret = array();
3750
3751
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3752
      self::checkForSupport();
3753
    }
3754
3755
    if (self::$support['pcre_utf8'] === true) {
3756
3757
      if ($cleanUtf8 === true) {
3758
        $str = self::clean($str);
3759
      }
3760
3761
      preg_match_all('/./us', $str, $retArray);
3762
      if (isset($retArray[0])) {
3763
        $ret = $retArray[0];
3764
      }
3765
      unset($retArray);
3766
3767
    } else {
3768
3769
      // fallback
3770
3771
      $len = strlen($str);
3772
3773
      /** @noinspection ForeachInvariantsInspection */
3774
      for ($i = 0; $i < $len; $i++) {
3775
        if (($str[$i] & "\x80") === "\x00") {
3776
          $ret[] = $str[$i];
3777
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
3778 14
          if (($str[$i + 1] & "\xC0") === "\x80") {
3779
            $ret[] = $str[$i] . $str[$i + 1];
3780 14
3781
            $i++;
3782
          }
3783 14 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3784 14
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
3785 1
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3786 1
3787 13
            $i += 2;
3788
          }
3789 14
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
3790 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3791 14
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3792 14
3793
            $i += 3;
3794 14
          }
3795
        }
3796
      }
3797
    }
3798
3799
    if ($length > 1) {
3800
      $ret = array_chunk($ret, $length);
3801
3802
      $ret = array_map('implode', $ret);
3803
    }
3804
3805
    /** @noinspection OffsetOperationsInspection */
3806 1
    if (isset($ret[0]) && $ret[0] === '') {
3807
      return array();
3808 1
    }
3809
3810 1
    return $ret;
3811
  }
3812
3813
  /**
3814 1
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3815
   *
3816 1
   * @param string $str <p>The input string.</p>
3817
   *
3818
   * @return false|string <p>
3819
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
3820 1
   *                      otherwise it will return false.
3821 1
   *                      </p>
3822
   */
3823
  public static function str_detect_encoding($str)
3824 1
  {
3825 1
    //
3826 1
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
3827 1
    //
3828
3829 1
    if (self::is_binary($str)) {
3830
      if (self::is_utf16($str) === 1) {
3831
        return 'UTF-16LE';
3832 1
      } elseif (self::is_utf16($str) === 2) {
3833
        return 'UTF-16BE';
3834
      } elseif (self::is_utf32($str) === 1) {
3835 1
        return 'UTF-32LE';
3836
      } elseif (self::is_utf32($str) === 2) {
3837
        return 'UTF-32BE';
3838
      }
3839
    }
3840
3841
    //
3842
    // 2.) simple check for ASCII chars
3843
    //
3844
3845
    if (self::is_ascii($str) === true) {
3846
      return 'ASCII';
3847
    }
3848
3849
    //
3850
    // 3.) simple check for UTF-8 chars
3851 2
    //
3852
3853 2
    if (self::is_utf8($str) === true) {
3854
      return 'UTF-8';
3855
    }
3856 2
3857 2
    //
3858
    // 4.) check via "\mb_detect_encoding()"
3859 2
    //
3860
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
3861 2
3862 2
    $detectOrder = array(
3863
        'ISO-8859-1',
3864 2
        'ISO-8859-2',
3865
        'ISO-8859-3',
3866
        'ISO-8859-4',
3867 2
        'ISO-8859-5',
3868 2
        'ISO-8859-6',
3869 2
        'ISO-8859-7',
3870 2
        'ISO-8859-8',
3871 2
        'ISO-8859-9',
3872
        'ISO-8859-10',
3873 2
        'ISO-8859-13',
3874 2
        'ISO-8859-14',
3875 2
        'ISO-8859-15',
3876 2
        'ISO-8859-16',
3877 2
        'WINDOWS-1251',
3878 2
        'WINDOWS-1252',
3879
        'WINDOWS-1254',
3880 2
        'ISO-2022-JP',
3881 2
        'JIS',
3882 2
        'EUC-JP',
3883 2
    );
3884 2
3885 2
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
3886
    if ($encoding) {
3887 2
      return $encoding;
3888
    }
3889
3890 2
    //
3891
    // 5.) check via "iconv()"
3892
    //
3893
3894
    $md5 = md5($str);
3895
    foreach (self::$iconvEncoding as $encodingTmp) {
3896
      # INFO: //IGNORE and //TRANSLIT still throw notice
3897
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3898
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
3899
        return $encodingTmp;
3900
      }
3901
    }
3902
3903
    return false;
3904
  }
3905
3906
  /**
3907
   * Check if the string ends with the given substring.
3908
   *
3909
   * @param string $haystack <p>The string to search in.</p>
3910
   * @param string $needle   <p>The substring to search for.</p>
3911 1
   *
3912
   * @return bool
3913 1
   */
3914 View Code Duplication
  public static function str_ends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3915 1
  {
3916
    $haystack = (string)$haystack;
3917
    $needle = (string)$needle;
3918
3919
    if (!isset($haystack[0], $needle[0])) {
3920
      return false;
3921
    }
3922
3923
    if ($needle === self::substr($haystack, -self::strlen($needle))) {
3924
      return true;
3925
    }
3926
3927
    return false;
3928
  }
3929
3930
  /**
3931
   * Check if the string ends with the given substring, case insensitive.
3932
   *
3933
   * @param string $haystack <p>The string to search in.</p>
3934
   * @param string $needle   <p>The substring to search for.</p>
3935
   *
3936
   * @return bool
3937
   */
3938 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3939
  {
3940
    $haystack = (string)$haystack;
3941
    $needle = (string)$needle;
3942
3943
    if (!isset($haystack[0], $needle[0])) {
3944
      return false;
3945
    }
3946
3947 12
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3948
      return true;
3949 12
    }
3950
3951
    return false;
3952
  }
3953
3954
  /**
3955
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
3956
   *
3957
   * @link  http://php.net/manual/en/function.str-ireplace.php
3958
   *
3959 1
   * @param mixed $search  <p>
3960
   *                       Every replacement with search array is
3961 1
   *                       performed on the result of previous replacement.
3962
   *                       </p>
3963 1
   * @param mixed $replace <p>
3964
   *                       </p>
3965 1
   * @param mixed $subject <p>
3966
   *                       If subject is an array, then the search and
3967
   *                       replace is performed with every entry of
3968
   *                       subject, and the return value is an array as
3969
   *                       well.
3970
   *                       </p>
3971
   * @param int   $count   [optional] <p>
3972
   *                       The number of matched and replaced needles will
3973
   *                       be returned in count which is passed by
3974
   *                       reference.
3975
   *                       </p>
3976
   *
3977 1
   * @return mixed <p>A string or an array of replacements.</p>
3978
   */
3979 1
  public static function str_ireplace($search, $replace, $subject, &$count = null)
3980
  {
3981 1
    $search = (array)$search;
3982 1
3983 1
    /** @noinspection AlterInForeachInspection */
3984
    foreach ($search as &$s) {
3985 1
      if ('' === $s .= '') {
3986 1
        $s = '/^(?<=.)$/';
3987 1
      } else {
3988 1
        $s = '/' . preg_quote($s, '/') . '/ui';
3989
      }
3990
    }
3991 1
3992
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
3993
    $count = $replace; // used as reference parameter
3994
3995
    return $subject;
3996
  }
3997
3998
  /**
3999
   * Check if the string starts with the given substring, case insensitive.
4000
   *
4001
   * @param string $haystack <p>The string to search in.</p>
4002 21
   * @param string $needle   <p>The substring to search for.</p>
4003
   *
4004
   * @return bool
4005 21
   */
4006 21 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4007
  {
4008 21
    $haystack = (string)$haystack;
4009 1
    $needle = (string)$needle;
4010
4011
    if (!isset($haystack[0], $needle[0])) {
4012 20
      return false;
4013
    }
4014
4015
    if (self::stripos($haystack, $needle) === 0) {
4016 20
      return true;
4017 20
    }
4018
4019 20
    return false;
4020 20
  }
4021
4022
  /**
4023 1
   * Limit the number of characters in a string, but also after the next word.
4024 1
   *
4025
   * @param string $str
4026
   * @param int    $length
4027 1
   * @param string $strAddOn
4028 1
   *
4029 1
   * @return string
4030 1
   */
4031 1
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4032
  {
4033 1
    $str = (string)$str;
4034
4035 1
    if (!isset($str[0])) {
4036
      return '';
4037
    }
4038
4039
    $length = (int)$length;
4040
4041
    if (self::strlen($str) <= $length) {
4042
      return $str;
4043
    }
4044
4045 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4046
      return self::substr($str, 0, $length - 1) . $strAddOn;
4047 1
    }
4048
4049 1
    $str = self::substr($str, 0, $length);
4050
    $array = explode(' ', $str);
4051 1
    array_pop($array);
4052
    $new_str = implode(' ', $array);
4053
4054
    if ($new_str === '') {
4055
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4056
    } else {
4057
      $str = $new_str . $strAddOn;
4058
    }
4059
4060
    return $str;
4061
  }
4062
4063
  /**
4064
   * Pad a UTF-8 string to given length with another string.
4065 7
   *
4066
   * @param string $str        <p>The input string.</p>
4067 7
   * @param int    $pad_length <p>The length of return string.</p>
4068
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4069
   * @param int    $pad_type   [optional] <p>
4070
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4071
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4072
   *                           </p>
4073
   *
4074
   * @return string <strong>Returns the padded string</strong>
4075
   */
4076
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4077
  {
4078
    $str_length = self::strlen($str);
4079
4080
    if (
4081
        is_int($pad_length) === true
4082
        &&
4083 1
        $pad_length > 0
4084
        &&
4085 1
        $pad_length >= $str_length
4086 1
    ) {
4087
      $ps_length = self::strlen($pad_string);
4088 1
4089
      $diff = $pad_length - $str_length;
4090 1
4091
      switch ($pad_type) {
4092 1 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4093 1
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4094 1
          $pre = self::substr($pre, 0, $diff);
4095 1
          $post = '';
4096
          break;
4097 1
4098
        case STR_PAD_BOTH:
4099 1
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4100 1
          $pre = self::substr($pre, 0, (int)$diff / 2);
4101 1
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4102 1
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4103 1
          break;
4104 1
4105
        case STR_PAD_RIGHT:
4106 1 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4107
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4108 1
          $post = self::substr($post, 0, $diff);
4109
          $pre = '';
4110
      }
4111
4112 1
      return $pre . $str . $post;
4113
    }
4114
4115
    return $str;
4116
  }
4117
4118
  /**
4119
   * Repeat a string.
4120
   *
4121
   * @param string $str        <p>
4122
   *                           The string to be repeated.
4123
   *                           </p>
4124
   * @param int    $multiplier <p>
4125
   *                           Number of time the input string should be
4126
   *                           repeated.
4127
   *                           </p>
4128
   *                           <p>
4129 9
   *                           multiplier has to be greater than or equal to 0.
4130
   *                           If the multiplier is set to 0, the function
4131 9
   *                           will return an empty string.
4132
   *                           </p>
4133
   *
4134
   * @return string <p>The repeated string.</p>
4135
   */
4136
  public static function str_repeat($str, $multiplier)
4137
  {
4138
    $str = self::filter($str);
4139
4140
    return str_repeat($str, $multiplier);
4141
  }
4142
4143
  /**
4144
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4145
   *
4146
   * Replace all occurrences of the search string with the replacement string
4147 1
   *
4148
   * @link http://php.net/manual/en/function.str-replace.php
4149 1
   *
4150
   * @param mixed $search  <p>
4151
   *                       The value being searched for, otherwise known as the needle.
4152
   *                       An array may be used to designate multiple needles.
4153
   *                       </p>
4154
   * @param mixed $replace <p>
4155
   *                       The replacement value that replaces found search
4156
   *                       values. An array may be used to designate multiple replacements.
4157
   *                       </p>
4158
   * @param mixed $subject <p>
4159
   *                       The string or array being searched and replaced on,
4160
   *                       otherwise known as the haystack.
4161
   *                       </p>
4162
   *                       <p>
4163
   *                       If subject is an array, then the search and
4164 12
   *                       replace is performed with every entry of
4165
   *                       subject, and the return value is an array as
4166 12
   *                       well.
4167 11
   *                       </p>
4168 11
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4169 12
   *
4170
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4171
   */
4172
  public static function str_replace($search, $replace, $subject, &$count = null)
4173
  {
4174
    return str_replace($search, $replace, $subject, $count);
4175
  }
4176
4177
  /**
4178
   * Shuffles all the characters in the string.
4179
   *
4180
   * @param string $str <p>The input string</p>
4181
   *
4182 9
   * @return string <p>The shuffled string.</p>
4183
   */
4184 9
  public static function str_shuffle($str)
4185 1
  {
4186
    $array = self::split($str);
4187
4188 8
    shuffle($array);
4189 2
4190 2
    return implode('', $array);
4191
  }
4192 8
4193 8
  /**
4194 1
   * Sort all characters according to code points.
4195
   *
4196
   * @param string $str    <p>A UTF-8 string.</p>
4197 7
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4198
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4199 7
   *
4200
   * @return string <p>String of sorted characters.</p>
4201
   */
4202 1
  public static function str_sort($str, $unique = false, $desc = false)
4203
  {
4204
    $array = self::codepoints($str);
4205
4206
    if ($unique) {
4207
      $array = array_flip(array_flip($array));
4208
    }
4209
4210
    if ($desc) {
4211
      arsort($array);
4212
    } else {
4213
      asort($array);
4214
    }
4215
4216
    return self::string($array);
4217
  }
4218 1
4219
  /**
4220 1
   * Split a string into an array.
4221
   *
4222
   * @param string $str
4223
   * @param int    $len
4224
   *
4225
   * @return array
4226
   */
4227
  public static function str_split($str, $len = 1)
4228
  {
4229
    // init
4230
    $len = (int)$len;
4231
    $str = (string)$str;
4232 2
4233
    if (!isset($str[0])) {
4234 2
      return array();
4235 2
    }
4236
4237 2
    if ($len < 1) {
4238 2
      return str_split($str, $len);
4239 2
    }
4240
4241 2
    /** @noinspection PhpInternalEntityUsedInspection */
4242 2
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4243
    $a = $a[0];
4244
4245
    if ($len === 1) {
4246
      return $a;
4247
    }
4248
4249
    $arrayOutput = array();
4250
    $p = -1;
4251
4252 3
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4253
    foreach ($a as $l => $a) {
4254 3
      if ($l % $len) {
4255 3
        $arrayOutput[$p] .= $a;
4256 3
      } else {
4257
        $arrayOutput[++$p] = $a;
4258 3
      }
4259
    }
4260 3
4261
    return $arrayOutput;
4262
  }
4263
4264
  /**
4265
   * Check if the string starts with the given substring.
4266
   *
4267
   * @param string $haystack <p>The string to search in.</p>
4268
   * @param string $needle   <p>The substring to search for.</p>
4269
   *
4270
   * @return bool
4271
   */
4272 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4273
  {
4274
    $haystack = (string)$haystack;
4275
    $needle = (string)$needle;
4276
4277
    if (!isset($haystack[0], $needle[0])) {
4278
      return false;
4279
    }
4280
4281
    if (self::strpos($haystack, $needle) === 0) {
4282 2
      return true;
4283
    }
4284
4285 2
    return false;
4286
  }
4287 2
4288
  /**
4289
   * Get a binary representation of a specific string.
4290
   *
4291
   * @param string $str <p>The input string.</p>
4292
   *
4293
   * @return string
4294
   */
4295
  public static function str_to_binary($str)
4296
  {
4297
    $str = (string)$str;
4298
4299
    $value = unpack('H*', $str);
4300
4301
    return base_convert($value[1], 16, 2);
4302
  }
4303
4304
  /**
4305
   * alias for "UTF8::to_ascii()"
4306
   *
4307
   * @see UTF8::to_ascii()
4308
   *
4309
   * @param string $str
4310
   * @param string $unknown
4311
   * @param bool   $strict
4312
   *
4313
   * @return string
4314 8
   */
4315
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4316 8
  {
4317 8
    return self::to_ascii($str, $unknown, $strict);
4318
  }
4319 8
4320 3
  /**
4321
   * Counts number of words in the UTF-8 string.
4322
   *
4323 7
   * @param string $str      <p>The input string.</p>
4324 1
   * @param int    $format   [optional] <p>
4325 1
   *                         <strong>0</strong> => return a number of words (default)<br />
4326 1
   *                         <strong>1</strong> => return an array of words<br />
4327
   *                         <strong>2</strong> => return an array of words with word-offset as key
4328
   *                         </p>
4329
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4330 7
   *
4331 1
   * @return array|int <p>The number of words in the string</p>
4332 7
   */
4333 7
  public static function str_word_count($str, $format = 0, $charlist = '')
4334 7
  {
4335
    $charlist = self::rxClass($charlist, '\pL');
4336
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4337
4338 7
    $len = count($strParts);
4339
4340
    if ($format === 1) {
4341
4342
      $numberOfWords = array();
4343
      for ($i = 1; $i < $len; $i += 2) {
4344
        $numberOfWords[] = $strParts[$i];
4345
      }
4346
4347
    } elseif ($format === 2) {
4348
4349
      $numberOfWords = array();
4350
      $offset = self::strlen($strParts[0]);
4351
      for ($i = 1; $i < $len; $i += 2) {
4352
        $numberOfWords[$offset] = $strParts[$i];
4353
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4354
      }
4355 8
4356
    } else {
4357 8
4358 2
      $numberOfWords = ($len - 1) / 2;
4359
4360
    }
4361 6
4362
    return $numberOfWords;
4363
  }
4364
4365 6
  /**
4366
   * Case-insensitive string comparison.
4367
   *
4368
   * INFO: Case-insensitive version of UTF8::strcmp()
4369
   *
4370
   * @param string $str1
4371
   * @param string $str2
4372 6
   *
4373
   * @return int <p>
4374
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4375
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4376
   *             <strong>0</strong> if they are equal.
4377
   *             </p>
4378
   */
4379
  public static function strcasecmp($str1, $str2)
4380
  {
4381
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4382
  }
4383
4384
  /**
4385
   * alias for "UTF8::strstr()"
4386
   *
4387 62
   * @see UTF8::strstr()
4388
   *
4389 62
   * @param string  $haystack
4390
   * @param string  $needle
4391 62
   * @param bool    $before_needle
4392 4
   * @param string  $encoding
4393
   * @param boolean $cleanUtf8
4394
   *
4395
   * @return string|false
4396
   */
4397 61
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4398 2
  {
4399 61
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4400 60
  }
4401 60
4402 2
  /**
4403
   * Case-sensitive string comparison.
4404
   *
4405
   * @param string $str1
4406 61
   * @param string $str2
4407 61
   *
4408 1
   * @return int  <p>
4409
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4410
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4411 61
   *              <strong>0</strong> if they are equal.
4412 2
   *              </p>
4413 2
   */
4414
  public static function strcmp($str1, $str2)
4415 61
  {
4416
    /** @noinspection PhpUndefinedClassInspection */
4417
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4418
        \Normalizer::normalize($str1, \Normalizer::NFD),
4419
        \Normalizer::normalize($str2, \Normalizer::NFD)
4420
    );
4421
  }
4422
4423
  /**
4424
   * Find length of initial segment not matching mask.
4425
   *
4426
   * @param string $str
4427
   * @param string $charList
4428
   * @param int    $offset
4429
   * @param int    $length
4430 1
   *
4431
   * @return int|null
4432 1
   */
4433
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4434
  {
4435
    if ('' === $charList .= '') {
4436
      return null;
4437
    }
4438
4439
    if ($offset || 2147483647 !== $length) {
4440
      $str = (string)self::substr($str, $offset, $length);
4441
    }
4442
4443
    $str = (string)$str;
4444
    if (!isset($str[0])) {
4445
      return null;
4446
    }
4447
4448
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4449 2
      /** @noinspection OffsetOperationsInspection */
4450
      return self::strlen($length[1]);
4451 2
    }
4452
4453
    return self::strlen($str);
4454
  }
4455
4456
  /**
4457
   * alias for "UTF8::stristr()"
4458
   *
4459
   * @see UTF8::stristr()
4460
   *
4461
   * @param string  $haystack
4462
   * @param string  $needle
4463
   * @param bool    $before_needle
4464
   * @param string  $encoding
4465
   * @param boolean $cleanUtf8
4466
   *
4467 1
   * @return string|false
4468
   */
4469 1
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4470
  {
4471
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4472
  }
4473
4474
  /**
4475
   * Create a UTF-8 string from code points.
4476
   *
4477
   * INFO: opposite to UTF8::codepoints()
4478
   *
4479
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4480
   *
4481
   * @return string <p>UTF-8 encoded string.</p>
4482
   */
4483
  public static function string(array $array)
4484
  {
4485 2
    return implode(
4486
        array_map(
4487 2
            array(
4488 2
                '\\voku\\helper\\UTF8',
4489
                'chr',
4490 2
            ),
4491
            $array
4492
        )
4493
    );
4494
  }
4495
4496
  /**
4497
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4498
   *
4499
   * @param string $str <p>The input string.</p>
4500
   *
4501
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4502
   */
4503 1
  public static function string_has_bom($str)
4504
  {
4505 1
    foreach (self::$bom as $bomString => $bomByteLength) {
4506 1
      if (0 === strpos($str, $bomString)) {
4507
        return true;
4508 1
      }
4509 1
    }
4510
4511
    return false;
4512 1
  }
4513 1
4514
  /**
4515 1
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4516
   *
4517
   * @link http://php.net/manual/en/function.strip-tags.php
4518
   *
4519
   * @param string $str            <p>
4520
   *                               The input string.
4521
   *                               </p>
4522
   * @param string $allowable_tags [optional] <p>
4523
   *                               You can use the optional second parameter to specify tags which should
4524
   *                               not be stripped.
4525
   *                               </p>
4526
   *                               <p>
4527
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
4528
   *                               can not be changed with allowable_tags.
4529
   *                               </p>
4530
   *
4531
   * @return string <p>The stripped string.</p>
4532
   */
4533
  public static function strip_tags($str, $allowable_tags = null)
4534
  {
4535 15
    // clean broken utf8
4536
    $str = self::clean($str);
4537 15
4538 15
    return strip_tags($str, $allowable_tags);
4539
  }
4540 15
4541 2
  /**
4542
   * Finds position of first occurrence of a string within another, case insensitive.
4543
   *
4544
   * @link http://php.net/manual/en/function.mb-stripos.php
4545 14
   *
4546
   * @param string  $haystack  <p>
4547
   *                           The string from which to get the position of the first occurrence
4548
   *                           of needle
4549 14
   *                           </p>
4550
   * @param string  $needle    <p>
4551
   *                           The string to find in haystack
4552
   *                           </p>
4553 14
   * @param int     $offset    [optional] <p>
4554
   *                           The position in haystack
4555
   *                           to start searching
4556 2
   *                           </p>
4557 2
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4558 2
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4559
   *
4560 14
   * @return int|false <p>
4561
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
4562
   *                   or false if needle is not found.
4563
   *                   </p>
4564
   */
4565
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4566 14
  {
4567 2
    $haystack = (string)$haystack;
4568 14
    $needle = (string)$needle;
4569 14
4570 14
    if (!isset($haystack[0], $needle[0])) {
4571 1
      return false;
4572
    }
4573
4574 14
    if ($cleanUtf8 === true) {
4575 14
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4576
      // if invalid characters are found in $haystack before $needle
4577
      $haystack = self::clean($haystack);
4578
      $needle = self::clean($needle);
4579
    }
4580
4581 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4582
        $encoding === 'UTF-8'
4583
        ||
4584
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4585
    ) {
4586
      $encoding = 'UTF-8';
4587
    } else {
4588
      $encoding = self::normalize_encoding($encoding);
4589
    }
4590
4591
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4592
  }
4593
4594
  /**
4595
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4596
   *
4597
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
4598
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
4599
   * @param bool    $before_needle [optional] <p>
4600
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4601
   *                               haystack before the first occurrence of the needle (excluding the needle).
4602
   *                               </p>
4603
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4604
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4605
   *
4606
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
4607
   */
4608
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4609
  {
4610
    if ('' === $needle .= '') {
4611
      return false;
4612
    }
4613
4614
    if ($encoding !== 'UTF-8') {
4615
      $encoding = self::normalize_encoding($encoding);
4616
    }
4617
4618
    if ($cleanUtf8 === true) {
4619
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4620 1
      // if invalid characters are found in $haystack before $needle
4621
      $needle = self::clean($needle);
4622 1
      $haystack = self::clean($haystack);
4623 1
    }
4624 1
4625
    return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4626 1
  }
4627
4628
  /**
4629
   * Get the string length, not the byte-length!
4630
   *
4631
   * @link     http://php.net/manual/en/function.mb-strlen.php
4632
   *
4633 1
   * @param string  $str       <p>The string being checked for length.</p>
4634
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4635
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4636
   *
4637
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4638
   *             character counted as +1)</p>
4639
   */
4640
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4641
  {
4642
    $str = (string)$str;
4643 4
4644
    if (!isset($str[0])) {
4645 4
      return 0;
4646
    }
4647 4
4648 2 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4649
        $encoding === 'UTF-8'
4650
        ||
4651 3
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4652
    ) {
4653
      $encoding = 'UTF-8';
4654
    } else {
4655
      $encoding = self::normalize_encoding($encoding);
4656
    }
4657
4658
    switch ($encoding) {
4659
      case 'ASCII':
4660
      case 'CP850':
4661
        return strlen($str);
4662
    }
4663
4664
    if ($cleanUtf8 === true) {
4665
      $str = self::clean($str);
4666
    }
4667
4668
    return \mb_strlen($str, $encoding);
4669
  }
4670
4671
  /**
4672
   * Case insensitive string comparisons using a "natural order" algorithm.
4673
   *
4674
   * INFO: natural order version of UTF8::strcasecmp()
4675
   *
4676
   * @param string $str1 <p>The first string.</p>
4677 1
   * @param string $str2 <p>The second string.</p>
4678
   *
4679 1
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
4680 1
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
4681 1
   *             <strong>0</strong> if they are equal
4682
   */
4683 1
  public static function strnatcasecmp($str1, $str2)
4684
  {
4685
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4686
  }
4687
4688
  /**
4689
   * String comparisons using a "natural order" algorithm
4690 1
   *
4691
   * INFO: natural order version of UTF8::strcmp()
4692
   *
4693
   * @link  http://php.net/manual/en/function.strnatcmp.php
4694
   *
4695
   * @param string $str1 <p>The first string.</p>
4696
   * @param string $str2 <p>The second string.</p>
4697
   *
4698
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
4699
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
4700
   *             <strong>0</strong> if they are equal
4701
   */
4702
  public static function strnatcmp($str1, $str2)
4703
  {
4704
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4705
  }
4706
4707 1
  /**
4708
   * Case-insensitive string comparison of the first n characters.
4709 1
   *
4710
   * @link  http://php.net/manual/en/function.strncasecmp.php
4711
   *
4712
   * @param string $str1 <p>The first string.</p>
4713
   * @param string $str2 <p>The second string.</p>
4714
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
4715
   *
4716
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
4717
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
4718
   *             <strong>0</strong> if they are equal
4719
   */
4720
  public static function strncasecmp($str1, $str2, $len)
4721
  {
4722
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4723
  }
4724
4725
  /**
4726
   * String comparison of the first n characters.
4727
   *
4728
   * @link  http://php.net/manual/en/function.strncmp.php
4729 11
   *
4730
   * @param string $str1 <p>The first string.</p>
4731 11
   * @param string $str2 <p>The second string.</p>
4732
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
4733 11
   *
4734 2
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
4735 2
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
4736
   *             <strong>0</strong> if they are equal
4737 11
   */
4738
  public static function strncmp($str1, $str2, $len)
4739 11
  {
4740 2
    $str1 = self::substr($str1, 0, $len);
4741
    $str2 = self::substr($str2, 0, $len);
4742
4743
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 4740 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 4741 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
4744 10
  }
4745 10
4746
  /**
4747
   * Search a string for any of a set of characters.
4748
   *
4749 10
   * @link  http://php.net/manual/en/function.strpbrk.php
4750
   *
4751 10
   * @param string $haystack  <p>The string where char_list is looked for.</p>
4752
   * @param string $char_list <p>This parameter is case sensitive.</p>
4753
   *
4754 3
   * @return string String starting from the character found, or false if it is not found.
4755 3
   */
4756 3
  public static function strpbrk($haystack, $char_list)
4757
  {
4758 10
    $haystack = (string)$haystack;
4759
    $char_list = (string)$char_list;
4760
4761
    if (!isset($haystack[0], $char_list[0])) {
4762
      return false;
4763
    }
4764 10
4765 1
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
4766 10
      return substr($haystack, strpos($haystack, $m[0]));
4767 10
    } else {
4768 10
      return false;
4769 1
    }
4770
  }
4771
4772
  /**
4773
   * Find position of first occurrence of string in a string.
4774 10
   *
4775 10
   * @link http://php.net/manual/en/function.mb-strpos.php
4776 10
   *
4777 10
   * @param string  $haystack  <p>The string being checked.</p>
4778
   * @param string  $needle    <p>The position counted from the beginning of haystack.</p>
4779
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
4780
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4781
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4782
   *
4783
   * @return int|false <p>
4784
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
4785
   *                   If needle is not found it returns false.
4786
   *                   </p>
4787
   */
4788
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
4789
  {
4790
    $haystack = (string)$haystack;
4791
    $needle = (string)$needle;
4792
4793
    if (!isset($haystack[0], $needle[0])) {
4794
      return false;
4795
    }
4796
4797
    // init
4798
    $offset = (int)$offset;
4799
4800
    // iconv and mbstring do not support integer $needle
4801
4802
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4803
      $needle = (string)self::chr($needle);
4804
    }
4805
4806
    if ($cleanUtf8 === true) {
4807
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4808
      // if invalid characters are found in $haystack before $needle
4809
      $needle = self::clean($needle);
4810
      $haystack = self::clean($haystack);
4811
    }
4812
4813 10
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4814
      self::checkForSupport();
4815
    }
4816 10
4817 10 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4818
        $encoding === 'UTF-8'
4819 10
        ||
4820 2
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4821 2
    ) {
4822
      $encoding = 'UTF-8';
4823 10
    } else {
4824 10
      $encoding = self::normalize_encoding($encoding);
4825 2
    }
4826
4827
    if (self::$support['mbstring'] === true) {
4828 8
      return \mb_strpos($haystack, $needle, $offset, $encoding);
4829
    }
4830
4831
    if (self::$support['iconv'] === true) {
4832
      // ignore invalid negative offset to keep compatibility
4833
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4834
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4835
    }
4836
4837
    if ($offset > 0) {
4838
      $haystack = self::substr($haystack, $offset);
4839
    }
4840
4841 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4842
      $left = substr($haystack, 0, $pos);
4843
4844
      // negative offset not supported in PHP strpos(), ignoring
4845 2
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
4846
    }
4847 2
4848
    return false;
4849
  }
4850
4851
  /**
4852
   * Finds the last occurrence of a character in a string within another.
4853
   *
4854 2
   * @link http://php.net/manual/en/function.mb-strrchr.php
4855 1
   *
4856 1
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
4857
   * @param string $needle        <p>The string to find in haystack</p>
4858
   * @param bool   $before_needle [optional] <p>
4859
   *                              Determines which portion of haystack
4860 2
   *                              this function returns.
4861 2
   *                              If set to true, it returns all of haystack
4862 2
   *                              from the beginning to the last occurrence of needle.
4863 2
   *                              If set to false, it returns all of haystack
4864
   *                              from the last occurrence of needle to the end,
4865
   *                              </p>
4866
   * @param string $encoding      [optional] <p>
4867
   *                              Character encoding name to use.
4868
   *                              If it is omitted, internal character encoding is used.
4869
   *                              </p>
4870
   * @param bool   $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4871
   *
4872
   * @return string|false The portion of haystack or false if needle is not found.
4873
   */
4874 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4875
  {
4876
    if ($encoding !== 'UTF-8') {
4877
      $encoding = self::normalize_encoding($encoding);
4878
    }
4879
4880
    if ($cleanUtf8 === true) {
4881
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4882 11
      // if invalid characters are found in $haystack before $needle
4883
      $needle = self::clean($needle);
4884 11
      $haystack = self::clean($haystack);
4885 11
    }
4886 11
4887
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
4888 11
  }
4889 1
4890 1
  /**
4891 1
   * Reverses characters order in the string.
4892
   *
4893 11
   * @param string $str The input string
4894
   *
4895 11
   * @return string The string with characters in the reverse sequence
4896
   */
4897 11
  public static function strrev($str)
4898 1
  {
4899 1
    $str = (string)$str;
4900
4901
    if (!isset($str[0])) {
4902 11
      return '';
4903 11
    }
4904
4905 11
    return implode(array_reverse(self::split($str)));
4906
  }
4907 11
4908
  /**
4909
   * Finds the last occurrence of a character in a string within another, case insensitive.
4910
   *
4911
   * @link http://php.net/manual/en/function.mb-strrichr.php
4912
   *
4913
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
4914
   * @param string  $needle        <p>The string to find in haystack.</p>
4915
   * @param bool    $before_needle [optional] <p>
4916
   *                               Determines which portion of haystack
4917
   *                               this function returns.
4918
   *                               If set to true, it returns all of haystack
4919
   *                               from the beginning to the last occurrence of needle.
4920
   *                               If set to false, it returns all of haystack
4921 21
   *                               from the last occurrence of needle to the end,
4922
   *                               </p>
4923
   * @param string  $encoding      [optional] <p>
4924 21
   *                               Character encoding name to use.
4925
   *                               If it is omitted, internal character encoding is used.
4926 21
   *                               </p>
4927 6
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4928
   *
4929
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
4930 19
   */
4931 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4932
  {
4933
    if ($encoding !== 'UTF-8') {
4934
      $encoding = self::normalize_encoding($encoding);
4935
    }
4936 19
4937 2
    if ($cleanUtf8 === true) {
4938 2
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4939
      // if invalid characters are found in $haystack before $needle
4940 19
      $needle = self::clean($needle);
4941
      $haystack = self::clean($haystack);
4942
    }
4943
4944
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
4945
  }
4946
4947
  /**
4948
   * Find position of last occurrence of a case-insensitive string.
4949
   *
4950 3
   * @param string  $haystack  <p>The string to look in.</p>
4951
   * @param string  $needle    <p>The string to look for.</p>
4952 3
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
4953
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4954
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4955
   *
4956
   * @return int|false <p>
4957
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
4958
   *                   not found, it returns false.
4959
   *                   </p>
4960
   */
4961
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
4962
  {
4963
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset, $encoding, $cleanUtf8);
4964
  }
4965
4966 16
  /**
4967
   * Find position of last occurrence of a string in a string.
4968 16
   *
4969
   * @link http://php.net/manual/en/function.mb-strrpos.php
4970 16
   *
4971 2
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
4972
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
4973
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
4974 15
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
4975
   *                              the end of the string.
4976
   *                              </p>
4977
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4978
   * @param boolean    $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4979
   *
4980 15
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
4981 2
   *                   is not found, it returns false.</p>
4982 2
   */
4983
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4984 15
  {
4985
    if (((int)$needle) === $needle && ($needle >= 0)) {
4986
      $needle = (string)self::chr($needle);
4987
    }
4988
4989
    $haystack = (string)$haystack;
4990
    $needle = (string)$needle;
4991
4992
    if (!isset($haystack[0], $needle[0])) {
4993
      return false;
4994
    }
4995
4996
    // init
4997
    $needle = (string)$needle;
4998
    $offset = (int)$offset;
4999
5000
    if (
5001 1
        $cleanUtf8 === true
5002
        ||
5003 1
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5004 1
    ) {
5005 1
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5006 1
5007 1
      $needle = self::clean($needle);
5008
      $haystack = self::clean($haystack);
5009 1
    }
5010 1
5011 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5012 1
      self::checkForSupport();
5013 1
    }
5014
5015 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5016 1
        $encoding === 'UTF-8'
5017
        ||
5018 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5019
    ) {
5020
      $encoding = 'UTF-8';
5021
    } else {
5022
      $encoding = self::normalize_encoding($encoding);
5023
    }
5024
5025
    if (
5026
        $encoding !== 'UTF-8' // INFO: use "mb_"-function (with polyfill) also if we need another encoding
5027
        ||
5028
        self::$support['mbstring'] === true
5029
    ) {
5030 1
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5031
    }
5032 1
5033 1
    if (self::$support['iconv'] === true) {
5034 1
      return \grapheme_strrpos($haystack, $needle, $offset);
5035
    }
5036 1
5037
    // fallback
5038
5039
    if ($offset > 0) {
5040 1
      $haystack = self::substr($haystack, $offset);
5041 1
    } elseif ($offset < 0) {
5042
      $haystack = self::substr($haystack, 0, $offset);
5043 1
    }
5044
5045 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5046
      $left = substr($haystack, 0, $pos);
5047
5048
      // negative offset not supported in PHP strpos(), ignoring
5049
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5050
    }
5051
5052
    return false;
5053
  }
5054
5055
  /**
5056
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5057
   * mask.
5058
   *
5059 47
   * @param string $str    <p>The input string.</p>
5060
   * @param string $mask   <p>The mask of chars</p>
5061
   * @param int    $offset [optional]
5062 47
   * @param int    $length [optional]
5063
   *
5064 47
   * @return int
5065 9
   */
5066
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5067
  {
5068 45
    // init
5069
    $length = (int)$length;
5070
    $offset = (int)$offset;
5071
5072 1
    if ($offset || 2147483647 !== $length) {
5073 1
      $str = self::substr($str, $offset, $length);
5074
    }
5075 45
5076 45
    $str = (string)$str;
5077 37
    if (!isset($str[0], $mask[0])) {
5078 37
      return 0;
5079
    }
5080 45
5081 2
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5082
  }
5083
5084 43
  /**
5085 20
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5086 20
   *
5087 41
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5088
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5089
   * @param bool    $before_needle [optional] <p>
5090 43
   *                               If <b>TRUE</b>, strstr() returns the part of the
5091
   *                               haystack before the first occurrence of the needle (excluding the needle).
5092
   *                               </p>
5093
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5094
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5095
   *
5096 43
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5097 2
   */
5098 43
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5099 43
  {
5100 43
    if ($cleanUtf8 === true) {
5101 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5102
      // if invalid characters are found in $haystack before $needle
5103
      $needle = self::clean($needle);
5104 43
      $haystack = self::clean($haystack);
5105 43
    }
5106
5107
    if ($encoding !== 'UTF-8') {
5108
      $encoding = self::normalize_encoding($encoding);
5109
    }
5110
5111
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5112
      self::checkForSupport();
5113
    }
5114
5115
    if (
5116
        $encoding !== 'UTF-8' // INFO: use "mb_"-function (with polyfill) also if we need another encoding
5117
        ||
5118
        self::$support['mbstring'] === true
5119
    ) {
5120
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5121
    }
5122
5123
    return \grapheme_strstr($haystack, $needle, $before_needle);
5124
  }
5125
5126
  /**
5127
   * Unicode transformation for case-less matching.
5128
   *
5129
   * @link http://unicode.org/reports/tr21/tr21-5.html
5130
   *
5131
   * @param string  $str       <p>The input string.</p>
5132
   * @param bool    $full      [optional] <p>
5133
   *                           <b>true</b>, replace full case folding chars (default)<br />
5134
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5135 1
   *                           </p>
5136
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5137 1
   *
5138 1
   * @return string
5139
   */
5140 1
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5141
  {
5142
    // init
5143
    $str = (string)$str;
5144
5145
    if (!isset($str[0])) {
5146
      return '';
5147
    }
5148
5149
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5150
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5151
5152
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5153
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$commonCaseFold);
5154
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$commonCaseFold);
5155
    }
5156
5157
    $str = str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5158
5159
    if ($full) {
5160
5161 1
      static $fullCaseFold = null;
5162
5163 1
      if ($fullCaseFold === null) {
5164 1
        $fullCaseFold = self::getData('caseFolding_full');
5165
      }
5166 1
5167 1
      /** @noinspection OffsetOperationsInspection */
5168
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5169
    }
5170 1
5171 1
    if ($cleanUtf8 === true) {
5172 1
      $str = self::clean($str);
5173
    }
5174 1
5175 1
    return self::strtolower($str);
5176
  }
5177
5178 1
  /**
5179 1
   * Make a string lowercase.
5180
   *
5181 1
   * @link http://php.net/manual/en/function.mb-strtolower.php
5182 1
   *
5183 1
   * @param string  $str       <p>The string being lowercased.</p>
5184
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5185 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5186
   *
5187
   * @return string str with all alphabetic characters converted to lowercase.
5188
   */
5189 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5190
  {
5191
    // init
5192 1
    $str = (string)$str;
5193
5194
    if (!isset($str[0])) {
5195
      return '';
5196
    }
5197
5198
    if ($cleanUtf8 === true) {
5199
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5200
      // if invalid characters are found in $haystack before $needle
5201
      $str = self::clean($str);
5202
    }
5203
5204
    if ($encoding !== 'UTF-8') {
5205
      $encoding = self::normalize_encoding($encoding);
5206
    }
5207 6
5208
    return \mb_strtolower($str, $encoding);
5209 6
  }
5210 1
5211
  /**
5212
   * Generic case sensitive transformation for collation matching.
5213 1
   *
5214 1
   * @param string $str <p>The input string</p>
5215 1
   *
5216 1
   * @return string
5217
   */
5218
  private static function strtonatfold($str)
5219
  {
5220 1
    /** @noinspection PhpUndefinedClassInspection */
5221 1
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5222 1
  }
5223 1
5224 1
  /**
5225 1
   * Make a string uppercase.
5226 1
   *
5227 1
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5228
   *
5229
   * @param string  $str       <p>The string being uppercased.</p>
5230
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5231 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5232 1
   *
5233 1
   * @return string str with all alphabetic characters converted to uppercase.
5234 1
   */
5235 1 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5236 1
  {
5237 1
    $str = (string)$str;
5238 1
5239
    if (!isset($str[0])) {
5240
      return '';
5241 1
    }
5242 1
5243 1
    if ($cleanUtf8 === true) {
5244 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5245
      // if invalid characters are found in $haystack before $needle
5246
      $str = self::clean($str);
5247
    }
5248 1
5249
    if ($encoding !== 'UTF-8') {
5250 6
      $encoding = self::normalize_encoding($encoding);
5251 1
    }
5252 1
5253 1
    return \mb_strtoupper($str, $encoding);
5254 1
  }
5255
5256 1
  /**
5257
   * Translate characters or replace sub-strings.
5258
   *
5259 6
   * @link  http://php.net/manual/en/function.strtr.php
5260 6
   *
5261
   * @param string          $str  <p>The string being translated.</p>
5262 6
   * @param string|string[] $from <p>The string replacing from.</p>
5263 4
   * @param string|string[] $to   <p>The string being translated to to.</p>
5264 4
   *
5265
   * @return string <p>
5266 6
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5267
   *                corresponding character in to.
5268 6
   *                </p>
5269
   */
5270
  public static function strtr($str, $from, $to = INF)
5271
  {
5272
    if (INF !== $to) {
5273
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5273 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5274
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5274 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5275
      $countFrom = count($from);
5276
      $countTo = count($to);
5277
5278
      if ($countFrom > $countTo) {
5279
        $from = array_slice($from, 0, $countTo);
5280 1
      } elseif ($countFrom < $countTo) {
5281
        $to = array_slice($to, 0, $countFrom);
5282 1
      }
5283
5284 1
      $from = array_combine($from, $to);
5285 1
    }
5286
5287
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5270 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5288 1
  }
5289 1
5290 1
  /**
5291
   * Return the width of a string.
5292 1
   *
5293
   * @param string  $str       <p>The input string.</p>
5294
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5295 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5296 1
   *
5297
   * @return int
5298 1
   */
5299 1
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5300
  {
5301 1
    if ($encoding !== 'UTF-8') {
5302
      $encoding = self::normalize_encoding($encoding);
5303 1
    }
5304 1
5305
    if ($cleanUtf8 === true) {
5306 1
      // iconv and mbstring are not tolerant to invalid encoding
5307
      // further, their behaviour is inconsistent with that of PHP's substr
5308 1
5309
      $str = self::clean($str);
5310 1
    }
5311
5312 1
    return \mb_strwidth($str, $encoding);
5313
  }
5314
5315
  /**
5316
   * Get part of a string.
5317
   *
5318
   * @link http://php.net/manual/en/function.mb-substr.php
5319
   *
5320
   * @param string  $str       <p>The string being checked.</p>
5321
   * @param int     $start     <p>The first position used in str.</p>
5322
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
5323
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5324
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5325
   *
5326 7
   * @return string <p>Returns a sub-string specified by the start and length parameters.</p>
5327
   */
5328 7
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5329
  {
5330
    // init
5331
    $str = (string)$str;
5332
5333
    if (!isset($str[0])) {
5334
      return '';
5335
    }
5336
5337
    if ($cleanUtf8 === true) {
5338
      // iconv and mbstring are not tolerant to invalid encoding
5339
      // further, their behaviour is inconsistent with that of PHP's substr
5340 1
5341
      $str = self::clean($str);
5342 1
    }
5343
5344
    $str_length = 0;
5345
    if ($start || $length === null) {
5346
      $str_length = (int)self::strlen($str);
5347
    }
5348
5349
    if ($start && $start > $str_length) {
5350
      return false;
5351
    }
5352
5353
    if ($length === null) {
5354 1
      $length = $str_length;
5355
    } else {
5356 1
      $length = (int)$length;
5357
    }
5358
5359
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5360
      self::checkForSupport();
5361
    }
5362
5363 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5364
        $encoding === 'UTF-8'
5365
        ||
5366
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5367
    ) {
5368 1
      $encoding = 'UTF-8';
5369
    } else {
5370 1
      $encoding = self::normalize_encoding($encoding);
5371
    }
5372
5373
    if (self::$support['mbstring'] === true) {
5374
      return \mb_substr($str, $start, $length, $encoding);
5375
    }
5376
5377
    if (self::$support['iconv'] === true) {
5378
      return \iconv_substr($str, $start, $length, $encoding);
5379
    }
5380
5381
    // fallback
5382
5383
    // split to array, and remove invalid characters
5384
    $array = self::split($str);
5385 13
5386
    // extract relevant part, and join to make sting again
5387 13
    return implode(array_slice($array, $start, $length));
5388
  }
5389
5390 13
  /**
5391
   * Binary safe comparison of two strings from an offset, up to length characters.
5392 13
   *
5393 3
   * @param string  $main_str           <p>The main string being compared.</p>
5394
   * @param string  $str                <p>The secondary string being compared.</p>
5395
   * @param int     $offset             <p>The start position for the comparison. If negative, it starts counting from
5396 11
   *                                    the end of the string.</p>
5397
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
5398
   *                                    the length of the str compared to the length of main_str less the offset.</p>
5399 11
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5400 7
   *                                    insensitive.</p>
5401
   *
5402
   * @return int
5403 5
   */
5404 1
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5405
  {
5406
    $main_str = self::substr($main_str, $offset, $length);
5407
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5406 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5408 1
5409 1
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5406 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5407 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5406 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5407 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5410
  }
5411
5412 1
  /**
5413 1
   * Count the number of substring occurrences.
5414
   *
5415
   * @link  http://php.net/manual/en/function.substr-count.php
5416 1
   *
5417
   * @param string  $haystack  <p>The string to search in.</p>
5418
   * @param string  $needle    <p>The substring to search for.</p>
5419 1
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
5420
   * @param int     $length    [optional] <p>
5421 5
   *                           The maximum length after the specified offset to search for the
5422 5
   *                           substring. It outputs a warning if the offset plus the length is
5423 5
   *                           greater than the haystack length.
5424
   *                           </p>
5425 5
   * @param string  $encoding  <p>Set the charset for e.g. "\mb_" function.</p>
5426
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5427 5
   *
5428 5
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5429
   */
5430
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5431 5
  {
5432
    $haystack = (string)$haystack;
5433
    $needle = (string)$needle;
5434 5
5435 5
    if (!isset($haystack[0], $needle[0])) {
5436 5
      return false;
5437
    }
5438 5
5439 2
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5440
      $offset = (int)$offset;
5441 2
      $length = (int)$length;
5442 2
5443 2
      if (
5444
          $length + $offset <= 0
5445 2
          &&
5446 1
          Bootup::is_php('7.1') === false
5447
      ) {
5448 1
        return false;
5449 1
      }
5450 1
5451
      $haystack = self::substr($haystack, $offset, $length, $encoding);
5452 1
    }
5453
5454
    if ($encoding !== 'UTF-8') {
5455
      $encoding = self::normalize_encoding($encoding);
5456
    }
5457
5458
    if ($cleanUtf8 === true) {
5459
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5460
      // if invalid characters are found in $haystack before $needle
5461
      $needle = self::clean($needle);
5462
      $haystack = self::clean($haystack);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack can also be of type false; however, voku\helper\UTF8::clean() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
5463
    }
5464
5465
    return \mb_substr_count($haystack, $needle, $encoding);
5466
  }
5467 1
5468 2
  /**
5469
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
5470 5
   *
5471
   * @param string $haystack <p>The string to search in.</p>
5472
   * @param string $needle   <p>The substring to search for.</p>
5473
   *
5474
   * @return string <p>Return the sub-string.</p>
5475 5
   */
5476 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5477
  {
5478
    $haystack = (string)$haystack;
5479
    $needle = (string)$needle;
5480 5
5481 5
    if (!isset($haystack[0])) {
5482 1
      return '';
5483 1
    }
5484
5485 1
    if (!isset($needle[0])) {
5486 1
      return $haystack;
5487 1
    }
5488
5489 1
    if (self::str_istarts_with($haystack, $needle) === true) {
5490
      $haystack = self::substr($haystack, self::strlen($needle));
5491 5
    }
5492 5
5493 5
    return $haystack;
5494 5
  }
5495 1
5496
  /**
5497 5
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
5498
   *
5499 5
   * @param string $haystack <p>The string to search in.</p>
5500
   * @param string $needle   <p>The substring to search for.</p>
5501
   *
5502
   * @return string <p>Return the sub-string.</p>
5503
   */
5504 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5505
  {
5506
    $haystack = (string)$haystack;
5507
    $needle = (string)$needle;
5508
5509 2
    if (!isset($haystack[0])) {
5510
      return '';
5511 2
    }
5512
5513 1
    if (!isset($needle[0])) {
5514
      return $haystack;
5515
    }
5516 1
5517 1
    if (self::str_iends_with($haystack, $needle) === true) {
5518
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5519 1
    }
5520
5521
    return $haystack;
5522 2
  }
5523
5524 2
  /**
5525 1
   * Removes an prefix ($needle) from start of the string ($haystack).
5526
   *
5527
   * @param string $haystack <p>The string to search in.</p>
5528 2
   * @param string $needle   <p>The substring to search for.</p>
5529
   *
5530
   * @return string <p>Return the sub-string.</p>
5531
   */
5532 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5533
  {
5534
    $haystack = (string)$haystack;
5535
    $needle = (string)$needle;
5536
5537
    if (!isset($haystack[0])) {
5538
      return '';
5539
    }
5540 1
5541
    if (!isset($needle[0])) {
5542 1
      return $haystack;
5543
    }
5544
5545
    if (self::str_starts_with($haystack, $needle) === true) {
5546
      $haystack = self::substr($haystack, self::strlen($needle));
5547
    }
5548
5549
    return $haystack;
5550
  }
5551
5552
  /**
5553
   * Replace text within a portion of a string.
5554
   *
5555
   * source: https://gist.github.com/stemar/8287074
5556
   *
5557
   * @param string|string[] $str         <p>The input string or an array of stings.</p>
5558
   * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
5559
   * @param int|int[]       $start
5560
   * @param int|int[]|void  $length      [optional]
5561
   *
5562
   * @return string|string[]
5563
   */
5564
  public static function substr_replace($str, $replacement, $start, $length = null)
5565
  {
5566
    if (is_array($str)) {
5567
      $num = count($str);
5568 20
5569
      // $replacement
5570 20
      if (is_array($replacement)) {
5571 2
        $replacement = array_slice($replacement, 0, $num);
5572
      } else {
5573
        $replacement = array_pad(array($replacement), $num, $replacement);
5574 2
      }
5575 2
5576
      // $start
5577 2 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5578
        $start = array_slice($start, 0, $num);
5579
        foreach ($start as &$valueTmp) {
5580 20
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5581
        }
5582 20
        unset($valueTmp);
5583 4
      } else {
5584
        $start = array_pad(array($start), $num, $start);
5585
      }
5586 19
5587 19
      // $length
5588
      if (!isset($length)) {
5589
        $length = array_fill(0, $num, 0);
5590 19 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5591 19
        $length = array_slice($length, 0, $num);
5592
        foreach ($length as &$valueTmpV2) {
5593 19
          if (isset($valueTmpV2)) {
5594 19
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5595 19
          } else {
5596 19
            $valueTmpV2 = 0;
5597
          }
5598 19
        }
5599
        unset($valueTmpV2);
5600 16
      } else {
5601 16
        $length = array_pad(array($length), $num, $length);
5602 16
      }
5603 16
5604 5
      // Recursive call
5605 5
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
5606 5
    } else {
5607
      if (is_array($replacement)) {
5608
        if (count($replacement) > 0) {
5609 19
          $replacement = $replacement[0];
5610
        } else {
5611 17
          $replacement = '';
5612 13
        }
5613 13
      }
5614 13
    }
5615 8
5616 8
    preg_match_all('/./us', (string)$str, $smatches);
5617 8
    preg_match_all('/./us', (string)$replacement, $rmatches);
5618
5619
    if ($length === null) {
5620 19
      $length = (int)\mb_strlen($str);
5621
    }
5622 9
5623 4
    array_splice($smatches[0], $start, $length, $rmatches[0]);
5624 4
5625 4
    return implode($smatches[0], null);
5626 6
  }
5627 6
5628 6
  /**
5629
   * Removes an suffix ($needle) from end of the string ($haystack).
5630
   *
5631 9
   * @param string $haystack <p>The string to search in.</p>
5632 6
   * @param string $needle   <p>The substring to search for.</p>
5633 6
   *
5634 6
   * @return string <p>Return the sub-string.</p>
5635
   */
5636 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5637 19
  {
5638
    $haystack = (string)$haystack;
5639 4
    $needle = (string)$needle;
5640 4
5641 2
    if (!isset($haystack[0])) {
5642 2
      return '';
5643 3
    }
5644 3
5645 3
    if (!isset($needle[0])) {
5646
      return $haystack;
5647
    }
5648 4
5649 16
    if (self::str_ends_with($haystack, $needle) === true) {
5650
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5651 19
    }
5652
5653
    return $haystack;
5654 19
  }
5655 19
5656
  /**
5657 3
   * Returns a case swapped version of the string.
5658 19
   *
5659
   * @param string  $str       <p>The input string.</p>
5660 19
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5661
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5662
   *
5663 19
   * @return string <p>Each character's case swapped.</p>
5664 19
   */
5665 19
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5666 2
  {
5667 19
    $str = (string)$str;
5668
5669 19
    if (!isset($str[0])) {
5670
      return '';
5671 19
    }
5672
5673
    if ($encoding !== 'UTF-8') {
5674
      $encoding = self::normalize_encoding($encoding);
5675
    }
5676
5677
    if ($cleanUtf8 === true) {
5678
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5679
      // if invalid characters are found in $haystack before $needle
5680
      $str = self::clean($str);
5681
    }
5682
5683
    $strSwappedCase = preg_replace_callback(
5684
        '/[\S]/u',
5685
        function ($match) use ($encoding) {
5686
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
5687 26
5688
          if ($match[0] === $marchToUpper) {
5689 26
            return UTF8::strtolower($match[0], $encoding);
5690
          } else {
5691 26
            return $marchToUpper;
5692 5
          }
5693
        },
5694
        $str
5695
    );
5696 22
5697 6
    return $strSwappedCase;
5698
  }
5699
5700 16
  /**
5701
   * alias for "UTF8::to_ascii()"
5702
   *
5703
   * @see UTF8::to_ascii()
5704
   *
5705
   * @param string $s
5706
   * @param string $subst_chr
5707
   * @param bool   $strict
5708
   *
5709
   * @return string
5710
   */
5711
  public static function toAscii($s, $subst_chr = '?', $strict = false)
5712 14
  {
5713
    return self::to_ascii($s, $subst_chr, $strict);
5714 14
  }
5715
5716
  /**
5717
   * alias for "UTF8::to_iso8859()"
5718
   *
5719
   * @see UTF8::to_iso8859()
5720
   *
5721
   * @param string $str
5722
   *
5723
   * @return string|string[]
5724
   */
5725
  public static function toIso8859($str)
5726
  {
5727
    return self::to_iso8859($str);
5728 1
  }
5729
5730 1
  /**
5731
   * alias for "UTF8::to_latin1()"
5732
   *
5733
   * @see UTF8::to_latin1()
5734
   *
5735
   * @param $str
5736
   *
5737
   * @return string
5738
   */
5739
  public static function toLatin1($str)
5740
  {
5741
    return self::to_latin1($str);
5742
  }
5743
5744 8
  /**
5745
   * alias for "UTF8::to_utf8()"
5746 8
   *
5747 2
   * @see UTF8::to_utf8()
5748
   *
5749
   * @param string $str
5750 7
   *
5751 7
   * @return string
5752 7
   */
5753
  public static function toUTF8($str)
5754 7
  {
5755 1
    return self::to_utf8($str);
5756 1
  }
5757 7
5758
  /**
5759
   * Convert a string into ASCII.
5760 7
   *
5761
   * @param string $str     <p>The input string.</p>
5762 7
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
5763 7
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
5764
   *                        performance</p>
5765
   *
5766
   * @return string
5767 7
   *
5768
   * @throws \Exception
5769
   */
5770
  public static function to_ascii($str, $unknown = '?', $strict = false)
5771 1
  {
5772 1
    static $UTF8_TO_ASCII;
5773 1
5774 7
    // init
5775 7
    $str = (string)$str;
5776 7
5777
    if (!isset($str[0])) {
5778 7
      return '';
5779 7
    }
5780
5781 7
    $str = self::clean($str, false, true, true);
5782
5783
    // check if we only have ASCII
5784
    if (self::is_ascii($str) === true) {
5785
      return $str;
5786
    }
5787
5788
    if ($strict === true) {
5789
      if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5790
        self::checkForSupport();
5791
      }
5792
5793
      if (self::$support['intl'] === true && Bootup::is_php('5.4')) {
5794
        $str = transliterator_transliterate('Any-Latin; Latin-ASCII;', $str);
5795
5796
        // check again, if we only have ASCII, now ...
5797
        if (self::is_ascii($str) === true) {
5798
          return $str;
5799
        }
5800
5801 1
      } else {
5802
        throw new \Exception('Intl is not supported or you use PHP < 5.4!');
5803 1
      }
5804
    }
5805 1
5806 1
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
5807
    $chars = $ar[0];
5808
    foreach ($chars as &$c) {
5809 1
5810
      $ordC0 = ord($c[0]);
5811 1
5812
      if ($ordC0 >= 0 && $ordC0 <= 127) {
5813 1
        continue;
5814 1
      }
5815 1
5816 1
      $ordC1 = ord($c[1]);
5817
5818 1
      // ASCII - next please
5819 1
      if ($ordC0 >= 192 && $ordC0 <= 223) {
5820 1
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
5821
      }
5822 1
5823
      if ($ordC0 >= 224) {
5824
        $ordC2 = ord($c[2]);
5825
5826
        if ($ordC0 <= 239) {
5827
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
5828
        }
5829
5830 1
        if ($ordC0 >= 240) {
5831
          $ordC3 = ord($c[3]);
5832
5833
          if ($ordC0 <= 247) {
5834
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
5835
          }
5836
5837
          if ($ordC0 >= 248) {
5838
            $ordC4 = ord($c[4]);
5839
5840 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5841
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
5842
            }
5843
5844
            if ($ordC0 >= 252) {
5845
              $ordC5 = ord($c[5]);
5846
5847 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5848
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
5849
              }
5850
            }
5851
          }
5852
        }
5853
      }
5854
5855
      if ($ordC0 >= 254 && $ordC0 <= 255) {
5856
        $c = $unknown;
5857
        continue;
5858
      }
5859
5860
      if (!isset($ord)) {
5861
        $c = $unknown;
5862
        continue;
5863
      }
5864
5865
      $bank = $ord >> 8;
5866
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
5867
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
5868
        if (file_exists($bankfile)) {
5869
          /** @noinspection PhpIncludeInspection */
5870
          require $bankfile;
5871
        } else {
5872
          $UTF8_TO_ASCII[$bank] = array();
5873
        }
5874
      }
5875
5876
      $newchar = $ord & 255;
5877
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
5878
        $c = $UTF8_TO_ASCII[$bank][$newchar];
5879
      } else {
5880
        $c = $unknown;
5881
      }
5882
    }
5883
5884
    return implode('', $chars);
5885
  }
5886
5887
  /**
5888
   * Convert a string into "ISO-8859"-encoding (Latin-1).
5889
   *
5890
   * @param string|string[] $str
5891
   *
5892
   * @return string|string[]
5893
   */
5894
  public static function to_iso8859($str)
5895
  {
5896
    if (is_array($str)) {
5897
5898
      /** @noinspection ForeachSourceInspection */
5899
      foreach ($str as $k => $v) {
5900
        /** @noinspection AlterInForeachInspection */
5901
        /** @noinspection OffsetOperationsInspection */
5902
        $str[$k] = self::to_iso8859($v);
5903
      }
5904
5905
      return $str;
5906
    }
5907
5908
    $str = (string)$str;
5909
5910
    if (!isset($str[0])) {
5911
      return '';
5912
    }
5913
5914
    return self::utf8_decode($str);
5915
  }
5916
5917
  /**
5918
   * alias for "UTF8::to_iso8859()"
5919
   *
5920
   * @see UTF8::to_iso8859()
5921
   *
5922
   * @param string|string[] $str
5923
   *
5924
   * @return string|string[]
5925
   */
5926
  public static function to_latin1($str)
5927
  {
5928
    return self::to_iso8859($str);
5929
  }
5930
5931
  /**
5932
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
5933
   *
5934
   * - It decode UTF-8 codepoints and unicode escape sequences.
5935
   *
5936
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
5937
   *
5938
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
5939
   *
5940
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
5941
   *    are followed by any of these:  ("group B")
5942
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
5943
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
5944
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
5945
   * is also a valid unicode character, and will be left unchanged.
5946
   *
5947
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
5948
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
5949
   *
5950
   * @param string|string[] $str <p>Any string or array.</p>
5951
   *
5952
   * @return string|string[] <p>The UTF-8 encoded string.</p>
5953
   */
5954
  public static function to_utf8($str)
5955
  {
5956
    if (is_array($str)) {
5957
      /** @noinspection ForeachSourceInspection */
5958
      foreach ($str as $k => $v) {
5959
        /** @noinspection AlterInForeachInspection */
5960
        /** @noinspection OffsetOperationsInspection */
5961
        $str[$k] = self::to_utf8($v);
5962
      }
5963
5964
      return $str;
5965
    }
5966
5967
    $str = (string)$str;
5968
5969
    if (!isset($str[0])) {
5970
      return $str;
5971
    }
5972
5973
    $max = strlen($str);
5974
    $buf = '';
5975
5976
    /** @noinspection ForeachInvariantsInspection */
5977
    for ($i = 0; $i < $max; $i++) {
5978
      $c1 = $str[$i];
5979
5980
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
5981
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
5982
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
5983
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
5984
5985
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
5986
5987
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
5988
            $buf .= $c1 . $c2;
5989
            $i++;
5990
          } else { // not valid UTF8 - convert it
5991
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5992
            $cc2 = ($c1 & "\x3f") | "\x80";
5993
            $buf .= $cc1 . $cc2;
5994
          }
5995
5996 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5997
5998
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
5999
            $buf .= $c1 . $c2 . $c3;
6000
            $i += 2;
6001
          } else { // not valid UTF8 - convert it
6002
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6003
            $cc2 = ($c1 & "\x3f") | "\x80";
6004
            $buf .= $cc1 . $cc2;
6005
          }
6006
6007
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6008
6009 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6010
            $buf .= $c1 . $c2 . $c3 . $c4;
6011
            $i += 3;
6012
          } else { // not valid UTF8 - convert it
6013
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6014
            $cc2 = ($c1 & "\x3f") | "\x80";
6015
            $buf .= $cc1 . $cc2;
6016
          }
6017
6018
        } else { // doesn't look like UTF8, but should be converted
6019
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6020
          $cc2 = (($c1 & "\x3f") | "\x80");
6021
          $buf .= $cc1 . $cc2;
6022
        }
6023
6024
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6025
6026
        $ordC1 = ord($c1);
6027
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6028
          $buf .= self::$win1252ToUtf8[$ordC1];
6029
        } else {
6030
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6031
          $cc2 = (($c1 & "\x3f") | "\x80");
6032
          $buf .= $cc1 . $cc2;
6033
        }
6034
6035
      } else { // it doesn't need conversion
6036
        $buf .= $c1;
6037
      }
6038
    }
6039
6040
    // decode unicode escape sequences
6041
    $buf = preg_replace_callback(
6042
        '/\\\\u([0-9a-f]{4})/i',
6043
        function ($match) {
6044
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6045
        },
6046
        $buf
6047
    );
6048
6049
    // decode UTF-8 codepoints
6050
    $buf = preg_replace_callback(
6051
        '/&#\d{2,4};/',
6052
        function ($match) {
6053
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6054
        },
6055
        $buf
6056
    );
6057 1
6058
    return $buf;
6059 1
  }
6060
6061
  /**
6062
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6063
   *
6064
   * INFO: This is slower then "trim()"
6065
   *
6066
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6067
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6068
   *
6069 6
   * @param string $str   <p>The string to be trimmed</p>
6070
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
6071 6
   *
6072 6
   * @return string <p>The trimmed string.</p>
6073
   */
6074 6
  public static function trim($str = '', $chars = INF)
6075
  {
6076 6
    $str = (string)$str;
6077 3
6078
    if (!isset($str[0])) {
6079
      return '';
6080
    }
6081 6
6082
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6083 6
    if ($chars === INF || !$chars) {
6084 1
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6085 1
    }
6086 1
6087
    return self::rtrim(self::ltrim($str, $chars), $chars);
6088 6
  }
6089
6090
  /**
6091
   * Makes string's first char uppercase.
6092
   *
6093
   * @param string  $str       <p>The input string.</p>
6094
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6095
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6096
   *
6097
   * @return string <p>The resulting string</p>
6098 6
   */
6099
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6100 6
  {
6101
    return self::strtoupper(self::substr($str, 0, 1, $encoding, $cleanUtf8), $encoding, $cleanUtf8) . self::substr($str, 1, null, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1, $encoding, $cleanUtf8) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6102 6
  }
6103 6
6104
  /**
6105
   * alias for "UTF8::ucfirst()"
6106 5
   *
6107 5
   * @see UTF8::ucfirst()
6108
   *
6109 5
   * @param string  $word
6110 1
   * @param string  $encoding
6111 1
   * @param boolean $cleanUtf8
6112 1
   *
6113
   * @return string
6114 5
   */
6115
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
6116
  {
6117
    return self::ucfirst($word, $encoding, $cleanUtf8);
6118
  }
6119
6120
  /**
6121
   * Uppercase for all words in the string.
6122
   *
6123
   * @param string   $str        <p>The input string.</p>
6124
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6125
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6126
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6127
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
6128
   *
6129
   * @return string
6130
   */
6131
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
6132
  {
6133
    if (!$str) {
6134
      return '';
6135
    }
6136
6137
    $charlist = self::rxClass($charlist, '\pL');
6138
    $words = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
6139
    $newwords = array();
6140
6141
    if (count($exceptions) > 0) {
6142
      $useExceptions = true;
6143
    } else {
6144 1
      $useExceptions = false;
6145
    }
6146 1
6147
    foreach ($words as $word) {
6148
6149
      if (!$word) {
6150
        continue;
6151
      }
6152
6153
      if (
6154
          ($useExceptions === false)
6155
          ||
6156
          (
6157
              $useExceptions === true
6158 1
              &&
6159
              !in_array($word, $exceptions, true)
6160 1
          )
6161
      ) {
6162 1
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
6163 1
      }
6164
6165
      $newwords[] = $word;
6166 1
    }
6167
6168 1
    return implode('', $newwords);
6169 1
  }
6170
6171
  /**
6172 1
   * Multi decode html entity & fix urlencoded-win1252-chars.
6173
   *
6174
   * e.g:
6175 1
   * 'test+test'                     => 'test test'
6176 1
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6177 1
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6178 1
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6179 1
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6180
   * 'Düsseldorf'                   => 'Düsseldorf'
6181
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6182 1
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6183
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6184
   *
6185
   * @param string $str          <p>The input string.</p>
6186
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6187
   *
6188
   * @return string
6189
   */
6190 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6191
  {
6192
    $str = (string)$str;
6193
6194
    if (!isset($str[0])) {
6195
      return '';
6196
    }
6197
6198
    $pattern = '/%u([0-9a-f]{3,4})/i';
6199
    if (preg_match($pattern, $str)) {
6200
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
6201 10
    }
6202
6203 10
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6204 10
6205
    do {
6206 10
      $str_compare = $str;
6207 3
6208
      $str = self::fix_simple_utf8(
6209
          urldecode(
6210 8
              self::html_entity_decode(
6211 8
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6212 8
                  $flags
6213
              )
6214 8
          )
6215
      );
6216 8
6217
    } while ($multi_decode === true && $str_compare !== $str);
6218 8
6219 1
    return (string)$str;
6220 1
  }
6221 1
6222
  /**
6223 8
   * Multi decode html entity & fix urlencoded-win1252-chars.
6224 8
   *
6225
   * e.g:
6226 8
   * 'test+test'                     => 'test+test'
6227 8
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6228 8
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6229 8
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6230 8
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6231
   * 'Düsseldorf'                   => 'Düsseldorf'
6232 8
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6233 8
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6234 8
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6235 8
   *
6236
   * @param string $str          <p>The input string.</p>
6237 8
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6238 6
   *
6239 6
   * @return string
6240 6
   */
6241 6 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6242
  {
6243 6
    $str = (string)$str;
6244 3
6245 3
    if (!isset($str[0])) {
6246
      return '';
6247 6
    }
6248 6
6249
    $pattern = '/%u([0-9a-f]{3,4})/i';
6250 8
    if (preg_match($pattern, $str)) {
6251
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
6252
    }
6253
6254
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6255
6256
    do {
6257
      $str_compare = $str;
6258 1
6259
      $str = self::fix_simple_utf8(
6260 1
          rawurldecode(
6261
              self::html_entity_decode(
6262
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6263
                  $flags
6264
              )
6265
          )
6266
      );
6267
6268
    } while ($multi_decode === true && $str_compare !== $str);
6269
6270
    return (string)$str;
6271
  }
6272
6273
  /**
6274
   * Return a array with "urlencoded"-win1252 -> UTF-8
6275
   *
6276
   * @deprecated use the "UTF8::urldecode()" function to decode a string
6277
   *
6278
   * @return array
6279
   */
6280
  public static function urldecode_fix_win1252_chars()
6281
  {
6282
    static $array = array(
6283
        '%20' => ' ',
6284
        '%21' => '!',
6285
        '%22' => '"',
6286
        '%23' => '#',
6287
        '%24' => '$',
6288
        '%25' => '%',
6289
        '%26' => '&',
6290
        '%27' => "'",
6291
        '%28' => '(',
6292
        '%29' => ')',
6293
        '%2A' => '*',
6294
        '%2B' => '+',
6295
        '%2C' => ',',
6296
        '%2D' => '-',
6297
        '%2E' => '.',
6298
        '%2F' => '/',
6299
        '%30' => '0',
6300
        '%31' => '1',
6301
        '%32' => '2',
6302
        '%33' => '3',
6303
        '%34' => '4',
6304
        '%35' => '5',
6305
        '%36' => '6',
6306
        '%37' => '7',
6307
        '%38' => '8',
6308
        '%39' => '9',
6309
        '%3A' => ':',
6310
        '%3B' => ';',
6311
        '%3C' => '<',
6312
        '%3D' => '=',
6313
        '%3E' => '>',
6314
        '%3F' => '?',
6315
        '%40' => '@',
6316
        '%41' => 'A',
6317
        '%42' => 'B',
6318
        '%43' => 'C',
6319
        '%44' => 'D',
6320
        '%45' => 'E',
6321
        '%46' => 'F',
6322
        '%47' => 'G',
6323
        '%48' => 'H',
6324
        '%49' => 'I',
6325
        '%4A' => 'J',
6326
        '%4B' => 'K',
6327
        '%4C' => 'L',
6328
        '%4D' => 'M',
6329
        '%4E' => 'N',
6330
        '%4F' => 'O',
6331
        '%50' => 'P',
6332
        '%51' => 'Q',
6333
        '%52' => 'R',
6334
        '%53' => 'S',
6335
        '%54' => 'T',
6336
        '%55' => 'U',
6337
        '%56' => 'V',
6338
        '%57' => 'W',
6339
        '%58' => 'X',
6340
        '%59' => 'Y',
6341
        '%5A' => 'Z',
6342
        '%5B' => '[',
6343
        '%5C' => '\\',
6344
        '%5D' => ']',
6345
        '%5E' => '^',
6346
        '%5F' => '_',
6347
        '%60' => '`',
6348
        '%61' => 'a',
6349
        '%62' => 'b',
6350
        '%63' => 'c',
6351
        '%64' => 'd',
6352
        '%65' => 'e',
6353
        '%66' => 'f',
6354
        '%67' => 'g',
6355
        '%68' => 'h',
6356
        '%69' => 'i',
6357
        '%6A' => 'j',
6358
        '%6B' => 'k',
6359
        '%6C' => 'l',
6360
        '%6D' => 'm',
6361
        '%6E' => 'n',
6362
        '%6F' => 'o',
6363
        '%70' => 'p',
6364
        '%71' => 'q',
6365
        '%72' => 'r',
6366
        '%73' => 's',
6367
        '%74' => 't',
6368
        '%75' => 'u',
6369
        '%76' => 'v',
6370
        '%77' => 'w',
6371
        '%78' => 'x',
6372
        '%79' => 'y',
6373
        '%7A' => 'z',
6374
        '%7B' => '{',
6375
        '%7C' => '|',
6376
        '%7D' => '}',
6377
        '%7E' => '~',
6378
        '%7F' => '',
6379
        '%80' => '`',
6380
        '%81' => '',
6381
        '%82' => '‚',
6382
        '%83' => 'ƒ',
6383
        '%84' => '„',
6384
        '%85' => '…',
6385
        '%86' => '†',
6386
        '%87' => '‡',
6387
        '%88' => 'ˆ',
6388
        '%89' => '‰',
6389
        '%8A' => 'Š',
6390
        '%8B' => '‹',
6391
        '%8C' => 'Œ',
6392
        '%8D' => '',
6393
        '%8E' => 'Ž',
6394
        '%8F' => '',
6395
        '%90' => '',
6396
        '%91' => '‘',
6397
        '%92' => '’',
6398
        '%93' => '“',
6399
        '%94' => '”',
6400
        '%95' => '•',
6401
        '%96' => '–',
6402
        '%97' => '—',
6403
        '%98' => '˜',
6404
        '%99' => '™',
6405
        '%9A' => 'š',
6406
        '%9B' => '›',
6407
        '%9C' => 'œ',
6408
        '%9D' => '',
6409
        '%9E' => 'ž',
6410
        '%9F' => 'Ÿ',
6411
        '%A0' => '',
6412
        '%A1' => '¡',
6413
        '%A2' => '¢',
6414
        '%A3' => '£',
6415
        '%A4' => '¤',
6416
        '%A5' => '¥',
6417
        '%A6' => '¦',
6418
        '%A7' => '§',
6419
        '%A8' => '¨',
6420
        '%A9' => '©',
6421
        '%AA' => 'ª',
6422
        '%AB' => '«',
6423
        '%AC' => '¬',
6424
        '%AD' => '',
6425
        '%AE' => '®',
6426
        '%AF' => '¯',
6427
        '%B0' => '°',
6428
        '%B1' => '±',
6429
        '%B2' => '²',
6430
        '%B3' => '³',
6431
        '%B4' => '´',
6432
        '%B5' => 'µ',
6433
        '%B6' => '¶',
6434
        '%B7' => '·',
6435
        '%B8' => '¸',
6436
        '%B9' => '¹',
6437
        '%BA' => 'º',
6438
        '%BB' => '»',
6439
        '%BC' => '¼',
6440
        '%BD' => '½',
6441
        '%BE' => '¾',
6442
        '%BF' => '¿',
6443
        '%C0' => 'À',
6444
        '%C1' => 'Á',
6445
        '%C2' => 'Â',
6446
        '%C3' => 'Ã',
6447
        '%C4' => 'Ä',
6448
        '%C5' => 'Å',
6449
        '%C6' => 'Æ',
6450
        '%C7' => 'Ç',
6451
        '%C8' => 'È',
6452
        '%C9' => 'É',
6453
        '%CA' => 'Ê',
6454
        '%CB' => 'Ë',
6455
        '%CC' => 'Ì',
6456
        '%CD' => 'Í',
6457
        '%CE' => 'Î',
6458
        '%CF' => 'Ï',
6459
        '%D0' => 'Ð',
6460
        '%D1' => 'Ñ',
6461
        '%D2' => 'Ò',
6462
        '%D3' => 'Ó',
6463
        '%D4' => 'Ô',
6464
        '%D5' => 'Õ',
6465
        '%D6' => 'Ö',
6466
        '%D7' => '×',
6467
        '%D8' => 'Ø',
6468
        '%D9' => 'Ù',
6469
        '%DA' => 'Ú',
6470
        '%DB' => 'Û',
6471
        '%DC' => 'Ü',
6472
        '%DD' => 'Ý',
6473
        '%DE' => 'Þ',
6474
        '%DF' => 'ß',
6475
        '%E0' => 'à',
6476
        '%E1' => 'á',
6477
        '%E2' => 'â',
6478
        '%E3' => 'ã',
6479
        '%E4' => 'ä',
6480
        '%E5' => 'å',
6481
        '%E6' => 'æ',
6482
        '%E7' => 'ç',
6483
        '%E8' => 'è',
6484
        '%E9' => 'é',
6485
        '%EA' => 'ê',
6486
        '%EB' => 'ë',
6487
        '%EC' => 'ì',
6488
        '%ED' => 'í',
6489
        '%EE' => 'î',
6490
        '%EF' => 'ï',
6491
        '%F0' => 'ð',
6492
        '%F1' => 'ñ',
6493
        '%F2' => 'ò',
6494
        '%F3' => 'ó',
6495
        '%F4' => 'ô',
6496
        '%F5' => 'õ',
6497
        '%F6' => 'ö',
6498
        '%F7' => '÷',
6499
        '%F8' => 'ø',
6500
        '%F9' => 'ù',
6501
        '%FA' => 'ú',
6502
        '%FB' => 'û',
6503
        '%FC' => 'ü',
6504
        '%FD' => 'ý',
6505
        '%FE' => 'þ',
6506
        '%FF' => 'ÿ',
6507
    );
6508
6509
    return $array;
6510
  }
6511
6512
  /**
6513
   * Decodes an UTF-8 string to ISO-8859-1.
6514
   *
6515
   * @param string $str <p>The input string.</p>
6516
   *
6517
   * @return string
6518
   */
6519
  public static function utf8_decode($str)
6520
  {
6521
    // init
6522
    $str = (string)$str;
6523
6524
    if (!isset($str[0])) {
6525
      return '';
6526
    }
6527
6528
    $str = (string)self::to_utf8($str);
6529
6530
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
6531
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
6532
6533
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
6534
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$utf8ToWin1252);
6535
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$utf8ToWin1252);
6536
    }
6537
6538
    /** @noinspection PhpInternalEntityUsedInspection */
6539
    return Xml::utf8_decode(str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str));
6540
  }
6541
6542
  /**
6543
   * Encodes an ISO-8859-1 string to UTF-8.
6544
   *
6545
   * @param string $str <p>The input string.</p>
6546
   *
6547
   * @return string
6548
   */
6549
  public static function utf8_encode($str)
6550
  {
6551
    // init
6552
    $str = (string)$str;
6553
6554
    if (!isset($str[0])) {
6555
      return '';
6556
    }
6557
6558
    $str = \utf8_encode($str);
6559
6560
    if (false === strpos($str, "\xC2")) {
6561
      return $str;
6562
    } else {
6563
6564
      static $CP1252_TO_UTF8_KEYS_CACHE = null;
6565
      static $CP1252_TO_UTF8_VALUES_CACHE = null;
6566
6567
      if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
6568
        $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$cp1252ToUtf8);
6569
        $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$cp1252ToUtf8);
6570
      }
6571
6572
      return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
6573
    }
6574
  }
6575
6576
  /**
6577
   * fix -> utf8-win1252 chars
6578
   *
6579
   * @param string $str <p>The input string.</p>
6580
   *
6581
   * @return string
6582
   *
6583
   * @deprecated use "UTF8::fix_simple_utf8()"
6584
   */
6585
  public static function utf8_fix_win1252_chars($str)
6586
  {
6587
    return self::fix_simple_utf8($str);
6588
  }
6589
6590
  /**
6591
   * Returns an array with all utf8 whitespace characters.
6592
   *
6593
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6594
   *
6595
   * @author: Derek E. [email protected]
6596
   *
6597
   * @return array <p>
6598
   *               An array with all known whitespace characters as values and the type of whitespace as keys
6599
   *               as defined in above URL.
6600
   *               </p>
6601
   */
6602
  public static function whitespace_table()
6603
  {
6604
    return self::$whitespaceTable;
6605
  }
6606
6607
  /**
6608
   * Limit the number of words in a string.
6609
   *
6610
   * @param string $str      <p>The input string.</p>
6611
   * @param int    $words    <p>The limit of words as integer.</p>
6612
   * @param string $strAddOn <p>Replacement for the striped string.</p>
6613
   *
6614
   * @return string
6615
   */
6616
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6617
  {
6618
    $str = (string)$str;
6619
6620
    if (!isset($str[0])) {
6621
      return '';
6622
    }
6623
6624
    $words = (int)$words;
6625
6626
    if ($words < 1) {
6627
      return '';
6628
    }
6629
6630
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6631
6632
    if (
6633
        !isset($matches[0])
6634
        ||
6635
        self::strlen($str) === self::strlen($matches[0])
6636
    ) {
6637
      return $str;
6638
    }
6639
6640
    return self::rtrim($matches[0]) . $strAddOn;
6641
  }
6642
6643
  /**
6644
   * Wraps a string to a given number of characters
6645
   *
6646
   * @link  http://php.net/manual/en/function.wordwrap.php
6647
   *
6648
   * @param string $str   <p>The input string.</p>
6649
   * @param int    $width [optional] <p>The column width.</p>
6650
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
6651
   * @param bool   $cut   [optional] <p>
6652
   *                      If the cut is set to true, the string is
6653
   *                      always wrapped at or before the specified width. So if you have
6654
   *                      a word that is larger than the given width, it is broken apart.
6655
   *                      </p>
6656
   *
6657
   * @return string <p>The given string wrapped at the specified column.</p>
6658
   */
6659
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6660
  {
6661
    $str = (string)$str;
6662
    $break = (string)$break;
6663
6664
    if (!isset($str[0], $break[0])) {
6665
      return '';
6666
    }
6667
6668
    $w = '';
6669
    $strSplit = explode($break, $str);
6670
    $count = count($strSplit);
6671
6672
    $chars = array();
6673
    /** @noinspection ForeachInvariantsInspection */
6674
    for ($i = 0; $i < $count; ++$i) {
6675
6676
      if ($i) {
6677
        $chars[] = $break;
6678
        $w .= '#';
6679
      }
6680
6681
      $c = $strSplit[$i];
6682
      unset($strSplit[$i]);
6683
6684
      foreach (self::split($c) as $c) {
6685
        $chars[] = $c;
6686
        $w .= ' ' === $c ? ' ' : '?';
6687
      }
6688
    }
6689
6690
    $strReturn = '';
6691
    $j = 0;
6692
    $b = $i = -1;
6693
    $w = wordwrap($w, $width, '#', $cut);
6694
6695
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
6696
      for (++$i; $i < $b; ++$i) {
6697
        $strReturn .= $chars[$j];
6698
        unset($chars[$j++]);
6699
      }
6700
6701
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
6702
        unset($chars[$j++]);
6703
      }
6704
6705
      $strReturn .= $break;
6706
    }
6707
6708
    return $strReturn . implode('', $chars);
6709
  }
6710
6711
  /**
6712
   * Returns an array of Unicode White Space characters.
6713
   *
6714
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
6715
   */
6716
  public static function ws()
6717
  {
6718
    return self::$whitespace;
6719
  }
6720
6721
}
6722