Completed
Push — master ( 2ffeb6...8b3ae6 )
by Lars
03:06
created

UTF8::single_chr_html_encode()   B

Complexity

Conditions 5
Paths 4

Size

Total Lines 23
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 5.583

Importance

Changes 0
Metric Value
dl 0
loc 23
ccs 10
cts 14
cp 0.7143
rs 8.5906
c 0
b 0
f 0
cc 5
eloc 12
nc 4
nop 3
crap 5.583
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
final class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  private static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  private static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  private static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
93
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
94
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
96
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
98
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
99
  );
100
101
  /**
102
   * Numeric code point => UTF-8 Character
103
   *
104
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
105
   *
106
   * @var array
107
   */
108
  private static $whitespace = array(
109
    // NUL Byte
110
    0     => "\x0",
111
    // Tab
112
    9     => "\x9",
113
    // New Line
114
    10    => "\xa",
115
    // Vertical Tab
116
    11    => "\xb",
117
    // Carriage Return
118
    13    => "\xd",
119
    // Ordinary Space
120
    32    => "\x20",
121
    // NO-BREAK SPACE
122
    160   => "\xc2\xa0",
123
    // OGHAM SPACE MARK
124
    5760  => "\xe1\x9a\x80",
125
    // MONGOLIAN VOWEL SEPARATOR
126
    6158  => "\xe1\xa0\x8e",
127
    // EN QUAD
128
    8192  => "\xe2\x80\x80",
129
    // EM QUAD
130
    8193  => "\xe2\x80\x81",
131
    // EN SPACE
132
    8194  => "\xe2\x80\x82",
133
    // EM SPACE
134
    8195  => "\xe2\x80\x83",
135
    // THREE-PER-EM SPACE
136
    8196  => "\xe2\x80\x84",
137
    // FOUR-PER-EM SPACE
138
    8197  => "\xe2\x80\x85",
139
    // SIX-PER-EM SPACE
140
    8198  => "\xe2\x80\x86",
141
    // FIGURE SPACE
142
    8199  => "\xe2\x80\x87",
143
    // PUNCTUATION SPACE
144
    8200  => "\xe2\x80\x88",
145
    // THIN SPACE
146
    8201  => "\xe2\x80\x89",
147
    //HAIR SPACE
148
    8202  => "\xe2\x80\x8a",
149
    // LINE SEPARATOR
150
    8232  => "\xe2\x80\xa8",
151
    // PARAGRAPH SEPARATOR
152
    8233  => "\xe2\x80\xa9",
153
    // NARROW NO-BREAK SPACE
154
    8239  => "\xe2\x80\xaf",
155
    // MEDIUM MATHEMATICAL SPACE
156
    8287  => "\xe2\x81\x9f",
157
    // IDEOGRAPHIC SPACE
158
    12288 => "\xe3\x80\x80",
159
  );
160
161
  /**
162
   * @var array
163
   */
164
  private static $whitespaceTable = array(
165
      'SPACE'                     => "\x20",
166
      'NO-BREAK SPACE'            => "\xc2\xa0",
167
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
168
      'EN QUAD'                   => "\xe2\x80\x80",
169
      'EM QUAD'                   => "\xe2\x80\x81",
170
      'EN SPACE'                  => "\xe2\x80\x82",
171
      'EM SPACE'                  => "\xe2\x80\x83",
172
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
173
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
174
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
175
      'FIGURE SPACE'              => "\xe2\x80\x87",
176
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
177
      'THIN SPACE'                => "\xe2\x80\x89",
178
      'HAIR SPACE'                => "\xe2\x80\x8a",
179
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
180
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
181
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
182
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
183
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
184
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
185
  );
186
187
  /**
188
   * bidirectional text chars
189
   *
190
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
191
   *
192
   * @var array
193
   */
194
  private static $bidiUniCodeControlsTable = array(
195
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
196
    8234 => "\xE2\x80\xAA",
197
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
198
    8235 => "\xE2\x80\xAB",
199
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
200
    8236 => "\xE2\x80\xAC",
201
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
202
    8237 => "\xE2\x80\xAD",
203
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
204
    8238 => "\xE2\x80\xAE",
205
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
206
    8294 => "\xE2\x81\xA6",
207
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
208
    8295 => "\xE2\x81\xA7",
209
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
210
    8296 => "\xE2\x81\xA8",
211
    // POP DIRECTIONAL ISOLATE
212
    8297 => "\xE2\x81\xA9",
213
  );
214
215
  /**
216
   * @var array
217
   */
218
  private static $commonCaseFold = array(
219
      'ſ'            => 's',
220
      "\xCD\x85"     => 'ι',
221
      'ς'            => 'σ',
222
      "\xCF\x90"     => 'β',
223
      "\xCF\x91"     => 'θ',
224
      "\xCF\x95"     => 'φ',
225
      "\xCF\x96"     => 'π',
226
      "\xCF\xB0"     => 'κ',
227
      "\xCF\xB1"     => 'ρ',
228
      "\xCF\xB5"     => 'ε',
229
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
230
      "\xE1\xBE\xBE" => 'ι',
231
  );
232
233
  /**
234
   * @var array
235
   */
236
  private static $brokenUtf8ToUtf8 = array(
237
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
238
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
239
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
240
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
241
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
242
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
243
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
244
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
245
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
246
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
247
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
248
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
249
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
250
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
251
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
252
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
253
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
254
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
255
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
256
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
257
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
258
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
259
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
260
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
261
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
262
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
263
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
264
      'ü'       => 'ü',
265
      'ä'       => 'ä',
266
      'ö'       => 'ö',
267
      'Ö'       => 'Ö',
268
      'ß'       => 'ß',
269
      'Ã '       => 'à',
270
      'á'       => 'á',
271
      'â'       => 'â',
272
      'ã'       => 'ã',
273
      'ù'       => 'ù',
274
      'ú'       => 'ú',
275
      'û'       => 'û',
276
      'Ù'       => 'Ù',
277
      'Ú'       => 'Ú',
278
      'Û'       => 'Û',
279
      'Ü'       => 'Ü',
280
      'ò'       => 'ò',
281
      'ó'       => 'ó',
282
      'ô'       => 'ô',
283
      'è'       => 'è',
284
      'é'       => 'é',
285
      'ê'       => 'ê',
286
      'ë'       => 'ë',
287
      'À'       => 'À',
288
      'Á'       => 'Á',
289
      'Â'       => 'Â',
290
      'Ã'       => 'Ã',
291
      'Ä'       => 'Ä',
292
      'Ã…'       => 'Å',
293
      'Ç'       => 'Ç',
294
      'È'       => 'È',
295
      'É'       => 'É',
296
      'Ê'       => 'Ê',
297
      'Ë'       => 'Ë',
298
      'ÃŒ'       => 'Ì',
299
      'Í'       => 'Í',
300
      'ÃŽ'       => 'Î',
301
      'Ï'       => 'Ï',
302
      'Ñ'       => 'Ñ',
303
      'Ã’'       => 'Ò',
304
      'Ó'       => 'Ó',
305
      'Ô'       => 'Ô',
306
      'Õ'       => 'Õ',
307
      'Ø'       => 'Ø',
308
      'Ã¥'       => 'å',
309
      'æ'       => 'æ',
310
      'ç'       => 'ç',
311
      'ì'       => 'ì',
312
      'í'       => 'í',
313
      'î'       => 'î',
314
      'ï'       => 'ï',
315
      'ð'       => 'ð',
316
      'ñ'       => 'ñ',
317
      'õ'       => 'õ',
318
      'ø'       => 'ø',
319
      'ý'       => 'ý',
320
      'ÿ'       => 'ÿ',
321
      '€'      => '€',
322
  );
323
324
  /**
325
   * @var array
326
   */
327
  private static $utf8ToWin1252 = array(
328
      "\xe2\x82\xac" => "\x80", // EURO SIGN
329
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
330
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
331
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
332
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
333
      "\xe2\x80\xa0" => "\x86", // DAGGER
334
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
335
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
336
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
337
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
338
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
339
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
340
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
341
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
342
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
343
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
344
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
345
      "\xe2\x80\xa2" => "\x95", // BULLET
346
      "\xe2\x80\x93" => "\x96", // EN DASH
347
      "\xe2\x80\x94" => "\x97", // EM DASH
348
      "\xcb\x9c"     => "\x98", // SMALL TILDE
349
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
350
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
351
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
352
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
353
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
354
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
355
  );
356
357
  /**
358
   * @var array
359
   */
360
  private static $utf8MSWord = array(
361
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
362
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
363
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
364
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
365
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
366
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
367
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
368
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
369
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
370
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
371
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
372
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
373
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
374
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
375
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
376
  );
377
378
  /**
379
   * @var array
380
   */
381
  private static $iconvEncoding = array(
382
      'ANSI_X3.4-1968',
383
      'ANSI_X3.4-1986',
384
      'ASCII',
385
      'CP367',
386
      'IBM367',
387
      'ISO-IR-6',
388
      'ISO646-US',
389
      'ISO_646.IRV:1991',
390
      'US',
391
      'US-ASCII',
392
      'CSASCII',
393
      'UTF-8',
394
      'ISO-10646-UCS-2',
395
      'UCS-2',
396
      'CSUNICODE',
397
      'UCS-2BE',
398
      'UNICODE-1-1',
399
      'UNICODEBIG',
400
      'CSUNICODE11',
401
      'UCS-2LE',
402
      'UNICODELITTLE',
403
      'ISO-10646-UCS-4',
404
      'UCS-4',
405
      'CSUCS4',
406
      'UCS-4BE',
407
      'UCS-4LE',
408
      'UTF-16',
409
      'UTF-16BE',
410
      'UTF-16LE',
411
      'UTF-32',
412
      'UTF-32BE',
413
      'UTF-32LE',
414
      'UNICODE-1-1-UTF-7',
415
      'UTF-7',
416
      'CSUNICODE11UTF7',
417
      'UCS-2-INTERNAL',
418
      'UCS-2-SWAPPED',
419
      'UCS-4-INTERNAL',
420
      'UCS-4-SWAPPED',
421
      'C99',
422
      'JAVA',
423
      'CP819',
424
      'IBM819',
425
      'ISO-8859-1',
426
      'ISO-IR-100',
427
      'ISO8859-1',
428
      'ISO_8859-1',
429
      'ISO_8859-1:1987',
430
      'L1',
431
      'LATIN1',
432
      'CSISOLATIN1',
433
      'ISO-8859-2',
434
      'ISO-IR-101',
435
      'ISO8859-2',
436
      'ISO_8859-2',
437
      'ISO_8859-2:1987',
438
      'L2',
439
      'LATIN2',
440
      'CSISOLATIN2',
441
      'ISO-8859-3',
442
      'ISO-IR-109',
443
      'ISO8859-3',
444
      'ISO_8859-3',
445
      'ISO_8859-3:1988',
446
      'L3',
447
      'LATIN3',
448
      'CSISOLATIN3',
449
      'ISO-8859-4',
450
      'ISO-IR-110',
451
      'ISO8859-4',
452
      'ISO_8859-4',
453
      'ISO_8859-4:1988',
454
      'L4',
455
      'LATIN4',
456
      'CSISOLATIN4',
457
      'CYRILLIC',
458
      'ISO-8859-5',
459
      'ISO-IR-144',
460
      'ISO8859-5',
461
      'ISO_8859-5',
462
      'ISO_8859-5:1988',
463
      'CSISOLATINCYRILLIC',
464
      'ARABIC',
465
      'ASMO-708',
466
      'ECMA-114',
467
      'ISO-8859-6',
468
      'ISO-IR-127',
469
      'ISO8859-6',
470
      'ISO_8859-6',
471
      'ISO_8859-6:1987',
472
      'CSISOLATINARABIC',
473
      'ECMA-118',
474
      'ELOT_928',
475
      'GREEK',
476
      'GREEK8',
477
      'ISO-8859-7',
478
      'ISO-IR-126',
479
      'ISO8859-7',
480
      'ISO_8859-7',
481
      'ISO_8859-7:1987',
482
      'ISO_8859-7:2003',
483
      'CSISOLATINGREEK',
484
      'HEBREW',
485
      'ISO-8859-8',
486
      'ISO-IR-138',
487
      'ISO8859-8',
488
      'ISO_8859-8',
489
      'ISO_8859-8:1988',
490
      'CSISOLATINHEBREW',
491
      'ISO-8859-9',
492
      'ISO-IR-148',
493
      'ISO8859-9',
494
      'ISO_8859-9',
495
      'ISO_8859-9:1989',
496
      'L5',
497
      'LATIN5',
498
      'CSISOLATIN5',
499
      'ISO-8859-10',
500
      'ISO-IR-157',
501
      'ISO8859-10',
502
      'ISO_8859-10',
503
      'ISO_8859-10:1992',
504
      'L6',
505
      'LATIN6',
506
      'CSISOLATIN6',
507
      'ISO-8859-11',
508
      'ISO8859-11',
509
      'ISO_8859-11',
510
      'ISO-8859-13',
511
      'ISO-IR-179',
512
      'ISO8859-13',
513
      'ISO_8859-13',
514
      'L7',
515
      'LATIN7',
516
      'ISO-8859-14',
517
      'ISO-CELTIC',
518
      'ISO-IR-199',
519
      'ISO8859-14',
520
      'ISO_8859-14',
521
      'ISO_8859-14:1998',
522
      'L8',
523
      'LATIN8',
524
      'ISO-8859-15',
525
      'ISO-IR-203',
526
      'ISO8859-15',
527
      'ISO_8859-15',
528
      'ISO_8859-15:1998',
529
      'LATIN-9',
530
      'ISO-8859-16',
531
      'ISO-IR-226',
532
      'ISO8859-16',
533
      'ISO_8859-16',
534
      'ISO_8859-16:2001',
535
      'L10',
536
      'LATIN10',
537
      'KOI8-R',
538
      'CSKOI8R',
539
      'KOI8-U',
540
      'KOI8-RU',
541
      'CP1250',
542
      'MS-EE',
543
      'WINDOWS-1250',
544
      'CP1251',
545
      'MS-CYRL',
546
      'WINDOWS-1251',
547
      'CP1252',
548
      'MS-ANSI',
549
      'WINDOWS-1252',
550
      'CP1253',
551
      'MS-GREEK',
552
      'WINDOWS-1253',
553
      'CP1254',
554
      'MS-TURK',
555
      'WINDOWS-1254',
556
      'CP1255',
557
      'MS-HEBR',
558
      'WINDOWS-1255',
559
      'CP1256',
560
      'MS-ARAB',
561
      'WINDOWS-1256',
562
      'CP1257',
563
      'WINBALTRIM',
564
      'WINDOWS-1257',
565
      'CP1258',
566
      'WINDOWS-1258',
567
      '850',
568
      'CP850',
569
      'IBM850',
570
      'CSPC850MULTILINGUAL',
571
      '862',
572
      'CP862',
573
      'IBM862',
574
      'CSPC862LATINHEBREW',
575
      '866',
576
      'CP866',
577
      'IBM866',
578
      'CSIBM866',
579
      'MAC',
580
      'MACINTOSH',
581
      'MACROMAN',
582
      'CSMACINTOSH',
583
      'MACCENTRALEUROPE',
584
      'MACICELAND',
585
      'MACCROATIAN',
586
      'MACROMANIA',
587
      'MACCYRILLIC',
588
      'MACUKRAINE',
589
      'MACGREEK',
590
      'MACTURKISH',
591
      'MACHEBREW',
592
      'MACARABIC',
593
      'MACTHAI',
594
      'HP-ROMAN8',
595
      'R8',
596
      'ROMAN8',
597
      'CSHPROMAN8',
598
      'NEXTSTEP',
599
      'ARMSCII-8',
600
      'GEORGIAN-ACADEMY',
601
      'GEORGIAN-PS',
602
      'KOI8-T',
603
      'CP154',
604
      'CYRILLIC-ASIAN',
605
      'PT154',
606
      'PTCP154',
607
      'CSPTCP154',
608
      'KZ-1048',
609
      'RK1048',
610
      'STRK1048-2002',
611
      'CSKZ1048',
612
      'MULELAO-1',
613
      'CP1133',
614
      'IBM-CP1133',
615
      'ISO-IR-166',
616
      'TIS-620',
617
      'TIS620',
618
      'TIS620-0',
619
      'TIS620.2529-1',
620
      'TIS620.2533-0',
621
      'TIS620.2533-1',
622
      'CP874',
623
      'WINDOWS-874',
624
      'VISCII',
625
      'VISCII1.1-1',
626
      'CSVISCII',
627
      'TCVN',
628
      'TCVN-5712',
629
      'TCVN5712-1',
630
      'TCVN5712-1:1993',
631
      'ISO-IR-14',
632
      'ISO646-JP',
633
      'JIS_C6220-1969-RO',
634
      'JP',
635
      'CSISO14JISC6220RO',
636
      'JISX0201-1976',
637
      'JIS_X0201',
638
      'X0201',
639
      'CSHALFWIDTHKATAKANA',
640
      'ISO-IR-87',
641
      'JIS0208',
642
      'JIS_C6226-1983',
643
      'JIS_X0208',
644
      'JIS_X0208-1983',
645
      'JIS_X0208-1990',
646
      'X0208',
647
      'CSISO87JISX0208',
648
      'ISO-IR-159',
649
      'JIS_X0212',
650
      'JIS_X0212-1990',
651
      'JIS_X0212.1990-0',
652
      'X0212',
653
      'CSISO159JISX02121990',
654
      'CN',
655
      'GB_1988-80',
656
      'ISO-IR-57',
657
      'ISO646-CN',
658
      'CSISO57GB1988',
659
      'CHINESE',
660
      'GB_2312-80',
661
      'ISO-IR-58',
662
      'CSISO58GB231280',
663
      'CN-GB-ISOIR165',
664
      'ISO-IR-165',
665
      'ISO-IR-149',
666
      'KOREAN',
667
      'KSC_5601',
668
      'KS_C_5601-1987',
669
      'KS_C_5601-1989',
670
      'CSKSC56011987',
671
      'EUC-JP',
672
      'EUCJP',
673
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
674
      'CSEUCPKDFMTJAPANESE',
675
      'MS_KANJI',
676
      'SHIFT-JIS',
677
      'SHIFT_JIS',
678
      'SJIS',
679
      'CSSHIFTJIS',
680
      'CP932',
681
      'ISO-2022-JP',
682
      'CSISO2022JP',
683
      'ISO-2022-JP-1',
684
      'ISO-2022-JP-2',
685
      'CSISO2022JP2',
686
      'CN-GB',
687
      'EUC-CN',
688
      'EUCCN',
689
      'GB2312',
690
      'CSGB2312',
691
      'GBK',
692
      'CP936',
693
      'MS936',
694
      'WINDOWS-936',
695
      'GB18030',
696
      'ISO-2022-CN',
697
      'CSISO2022CN',
698
      'ISO-2022-CN-EXT',
699
      'HZ',
700
      'HZ-GB-2312',
701
      'EUC-TW',
702
      'EUCTW',
703
      'CSEUCTW',
704
      'BIG-5',
705
      'BIG-FIVE',
706
      'BIG5',
707
      'BIGFIVE',
708
      'CN-BIG5',
709
      'CSBIG5',
710
      'CP950',
711
      'BIG5-HKSCS:1999',
712
      'BIG5-HKSCS:2001',
713
      'BIG5-HKSCS',
714
      'BIG5-HKSCS:2004',
715
      'BIG5HKSCS',
716
      'EUC-KR',
717
      'EUCKR',
718
      'CSEUCKR',
719
      'CP949',
720
      'UHC',
721
      'CP1361',
722
      'JOHAB',
723
      'ISO-2022-KR',
724
      'CSISO2022KR',
725
      'CP856',
726
      'CP922',
727
      'CP943',
728
      'CP1046',
729
      'CP1124',
730
      'CP1129',
731
      'CP1161',
732
      'IBM-1161',
733
      'IBM1161',
734
      'CSIBM1161',
735
      'CP1162',
736
      'IBM-1162',
737
      'IBM1162',
738
      'CSIBM1162',
739
      'CP1163',
740
      'IBM-1163',
741
      'IBM1163',
742
      'CSIBM1163',
743
      'DEC-KANJI',
744
      'DEC-HANYU',
745
      '437',
746
      'CP437',
747
      'IBM437',
748
      'CSPC8CODEPAGE437',
749
      'CP737',
750
      'CP775',
751
      'IBM775',
752
      'CSPC775BALTIC',
753
      '852',
754
      'CP852',
755
      'IBM852',
756
      'CSPCP852',
757
      'CP853',
758
      '855',
759
      'CP855',
760
      'IBM855',
761
      'CSIBM855',
762
      '857',
763
      'CP857',
764
      'IBM857',
765
      'CSIBM857',
766
      'CP858',
767
      '860',
768
      'CP860',
769
      'IBM860',
770
      'CSIBM860',
771
      '861',
772
      'CP-IS',
773
      'CP861',
774
      'IBM861',
775
      'CSIBM861',
776
      '863',
777
      'CP863',
778
      'IBM863',
779
      'CSIBM863',
780
      'CP864',
781
      'IBM864',
782
      'CSIBM864',
783
      '865',
784
      'CP865',
785
      'IBM865',
786
      'CSIBM865',
787
      '869',
788
      'CP-GR',
789
      'CP869',
790
      'IBM869',
791
      'CSIBM869',
792
      'CP1125',
793
      'EUC-JISX0213',
794
      'SHIFT_JISX0213',
795
      'ISO-2022-JP-3',
796
      'BIG5-2003',
797
      'ISO-IR-230',
798
      'TDS565',
799
      'ATARI',
800
      'ATARIST',
801
      'RISCOS-LATIN1',
802
  );
803
804
  /**
805
   * @var array
806
   */
807 1
  private static $support = array();
808
809 1
  /**
810 1
   * __construct()
811
   */
812
  public function __construct()
813
  {
814
    self::checkForSupport();
815
  }
816
817
  /**
818
   * Return the character at the specified position: $str[1] like functionality.
819
   *
820 2
   * @param string $str <p>A UTF-8 string.</p>
821
   * @param int    $pos <p>The position of character to return.</p>
822 2
   *
823
   * @return string <p>Single Multi-Byte character.</p>
824
   */
825
  public static function access($str, $pos)
826
  {
827
    return self::substr($str, $pos, 1);
828
  }
829
830
  /**
831
   * Prepends UTF-8 BOM character to the string and returns the whole string.
832
   *
833
   * INFO: If BOM already existed there, the Input string is returned.
834 1
   *
835
   * @param string $str <p>The input string.</p>
836 1
   *
837 1
   * @return string <p>The output string that contains BOM.</p>
838 1
   */
839
  public static function add_bom_to_string($str)
840 1
  {
841
    if (self::string_has_bom($str) === false) {
842
      $str = self::bom() . $str;
843
    }
844
845
    return $str;
846
  }
847
848
  /**
849
   * Convert binary into an string.
850 1
   *
851
   * @param mixed $bin 1|0
852 1
   *
853
   * @return string
854
   */
855
  public static function binary_to_str($bin)
856
  {
857
    return pack('H*', base_convert($bin, 2, 16));
858
  }
859
860 2
  /**
861
   * Returns the UTF-8 Byte Order Mark Character.
862 2
   *
863
   * @return string UTF-8 Byte Order Mark
864
   */
865
  public static function bom()
866
  {
867
    return "\xEF\xBB\xBF";
868
  }
869
870
  /**
871
   * @alias of UTF8::chr_map()
872
   * @see   UTF8::chr_map()
873
   *
874 1
   * @param string|array $callback
875
   * @param string       $str
876 1
   *
877
   * @return array
878
   */
879
  public static function callback($callback, $str)
880
  {
881
    return self::chr_map($callback, $str);
882
  }
883
884 2
  /**
885
   * This method will auto-detect your server environment for UTF-8 support.
886 2
   *
887
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
888 1
   */
889
  public static function checkForSupport()
890 1
  {
891 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
892 1
893 1
      self::$support['already_checked_via_portable_utf8'] = true;
894 1
895 1
      self::$support['mbstring'] = self::mbstring_loaded();
896 2
      self::$support['iconv'] = self::iconv_loaded();
897
      self::$support['intl'] = self::intl_loaded();
898
      self::$support['intlChar'] = self::intlChar_loaded();
899
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
900
    }
901
  }
902
903
  /**
904
   * Generates a UTF-8 encoded character from the given code point.
905
   *
906
   * INFO: opposite to UTF8::ord()
907 9
   *
908
   * @param int    $code_point <p>The code point for which to generate a character.</p>
909 9
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
910 9
   *
911 1
   * @return string|null <p>Multi-Byte character, returns null on failure to encode.</p>
912
   */
913
  public static function chr($code_point, $encoding = 'UTF-8')
914 9
  {
915
    $i = (int)$code_point;
916
    if ($i !== $code_point) {
917
      return null;
918 9
    }
919
920
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
921
      self::checkForSupport();
922
    }
923 9
924 9
    if ($encoding !== 'UTF-8') {
925 8
      $encoding = self::normalize_encoding($encoding);
926
    } elseif (self::$support['intlChar'] === true) {
927
      return \IntlChar::chr($code_point);
928
    }
929 8
930 6
    // use static cache, if there is no support for "IntlChar"
931
    static $cache = array();
932
    $cacheKey = $code_point . $encoding;
933 7
    if (isset($cache[$cacheKey]) === true) {
934 6
      return $cache[$cacheKey];
935 6
    }
936
937
    if (0x80 > $code_point %= 0x200000) {
938 7
      $str = chr($code_point);
939 7
    } elseif (0x800 > $code_point) {
940 7
      $str = chr(0xC0 | $code_point >> 6) .
941 7
             chr(0x80 | $code_point & 0x3F);
942
    } elseif (0x10000 > $code_point) {
943
      $str = chr(0xE0 | $code_point >> 12) .
944 1
             chr(0x80 | $code_point >> 6 & 0x3F) .
945 1
             chr(0x80 | $code_point & 0x3F);
946 1
    } else {
947 1
      $str = chr(0xF0 | $code_point >> 18) .
948 1
             chr(0x80 | $code_point >> 12 & 0x3F) .
949
             chr(0x80 | $code_point >> 6 & 0x3F) .
950
             chr(0x80 | $code_point & 0x3F);
951
    }
952
953
    if ($encoding !== 'UTF-8') {
954
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
955
    }
956
957
    // add into static cache
958
    $cache[$cacheKey] = $str;
959
960
    return $str;
961
  }
962
963 1
  /**
964
   * Applies callback to all characters of a string.
965 1
   *
966
   * @param string|array $callback <p>The callback function.</p>
967 1
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
968
   *
969
   * @return array <p>The outcome of callback.</p>
970
   */
971
  public static function chr_map($callback, $str)
972
  {
973
    $chars = self::split($str);
974
975
    return array_map($callback, $chars);
976
  }
977
978
  /**
979
   * Generates an array of byte length of each character of a Unicode string.
980
   *
981
   * 1 byte => U+0000  - U+007F
982 4
   * 2 byte => U+0080  - U+07FF
983
   * 3 byte => U+0800  - U+FFFF
984 4
   * 4 byte => U+10000 - U+10FFFF
985 3
   *
986
   * @param string $str <p>The original Unicode string.</p>
987
   *
988 4
   * @return array <p>An array of byte lengths of each character.</p>
989
   */
990
  public static function chr_size_list($str)
991
  {
992
    if (!$str) {
993
      return array();
994
    }
995
996
    return array_map('strlen', self::split($str));
997
  }
998 2
999
  /**
1000 2
   * Get a decimal code representation of a specific character.
1001 2
   *
1002 2
   * @param string $char <p>The input character.</p>
1003
   *
1004 2
   * @return int
1005
   */
1006 2
  public static function chr_to_decimal($char)
1007
  {
1008
    $char = (string)$char;
1009 2
    $code = self::ord($char[0]);
1010
    $bytes = 1;
1011 2
1012 2
    if (!($code & 0x80)) {
1013 2
      // 0xxxxxxx
1014
      return $code;
1015 1
    }
1016 1
1017 1
    if (($code & 0xe0) === 0xc0) {
1018
      // 110xxxxx
1019
      $bytes = 2;
1020
      $code &= ~0xc0;
1021
    } elseif (($code & 0xf0) === 0xe0) {
1022
      // 1110xxxx
1023 2
      $bytes = 3;
1024
      $code &= ~0xe0;
1025 2
    } elseif (($code & 0xf8) === 0xf0) {
1026 2
      // 11110xxx
1027
      $bytes = 4;
1028 2
      $code &= ~0xf0;
1029
    }
1030
1031
    for ($i = 2; $i <= $bytes; $i++) {
1032
      // 10xxxxxx
1033
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1034
    }
1035
1036
    return $code;
1037
  }
1038
1039 1
  /**
1040
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1041 1
   *
1042
   * @param string $char <p>The input character</p>
1043
   * @param string $pfix [optional]
1044
   *
1045
   * @return string <p>The code point encoded as U+xxxx<p>
1046
   */
1047
  public static function chr_to_hex($char, $pfix = 'U+')
1048
  {
1049
    return self::int_to_hex(self::ord($char), $pfix);
1050
  }
1051
1052
  /**
1053 1
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1054
   *
1055 1
   * @param string $body     <p>The original string to be split.</p>
1056
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1057
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1058
   *
1059
   * @return string <p>The chunked string</p>
1060
   */
1061
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1062
  {
1063
    return implode($end, self::split($body, $chunklen));
1064
  }
1065
1066
  /**
1067
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1068
   *
1069
   * @param string $str                     <p>The string to be sanitized.</p>
1070
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1071 44
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1072
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1073
   *                                        => "..."</p>
1074
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1075
   *                                        $normalize_whitespace</p>
1076
   *
1077
   * @return string <p>Clean UTF-8 encoded string.</p>
1078
   */
1079
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1080
  {
1081
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1082
    // caused connection reset problem on larger strings
1083
1084
    $regx = '/
1085
      (
1086 44
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1087 44
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1088
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1089 44
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1090 44
        ){1,100}                      # ...one or more times
1091
      )
1092 44
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1093 17
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1094 17
    /x';
1095
    $str = preg_replace($regx, '$1', $str);
1096 44
1097 12
    $str = self::replace_diamond_question_mark($str, '');
1098 12
    $str = self::remove_invisible_characters($str);
1099
1100 44
    if ($normalize_whitespace === true) {
1101 5
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1102 5
    }
1103
1104 44
    if ($normalize_msword === true) {
1105
      $str = self::normalize_msword($str);
1106
    }
1107
1108
    if ($remove_bom === true) {
1109
      $str = self::removeBOM($str);
1110
    }
1111
1112
    return $str;
1113
  }
1114 4
1115
  /**
1116 4
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1117
   *
1118 4
   * @param string $str <p>The input string.</p>
1119 1
   *
1120
   * @return string
1121
   */
1122
  public static function cleanup($str)
1123 4
  {
1124
    $str = (string)$str;
1125
1126
    if (!isset($str[0])) {
1127
      return '';
1128
    }
1129
1130 4
    // fixed ISO <-> UTF-8 Errors
1131
    $str = self::fix_simple_utf8($str);
1132 4
1133
    // remove all none UTF-8 symbols
1134
    // && remove diamond question mark (�)
1135
    // && remove remove invisible characters (e.g. "\0")
1136
    // && remove BOM
1137
    // && normalize whitespace chars (but keep non-breaking-spaces)
1138
    $str = self::clean($str, true, true, false, true);
1139
1140
    return (string)$str;
1141
  }
1142
1143
  /**
1144
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1145
   *
1146 5
   * INFO: opposite to UTF8::string()
1147
   *
1148 5
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1149 5
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1150 5
   *                                    default, code points will be returned as integers.</p>
1151
   *
1152 5
   * @return array <p>The array of code points.</p>
1153
   */
1154 5
  public static function codepoints($arg, $u_style = false)
1155 5
  {
1156 5
    if (is_string($arg)) {
1157
      $arg = self::split($arg);
1158 5
    }
1159
1160 5
    $arg = array_map(
1161 1
        array(
1162
            '\\voku\\helper\\UTF8',
1163 1
            'ord',
1164 1
        ),
1165 1
        $arg
1166
    );
1167 1
1168 1
    if ($u_style) {
1169
      $arg = array_map(
1170 5
          array(
1171
              '\\voku\\helper\\UTF8',
1172
              'int_to_hex',
1173
          ),
1174
          $arg
1175
      );
1176
    }
1177
1178
    return $arg;
1179
  }
1180
1181
  /**
1182 6
   * Returns count of characters used in a string.
1183
   *
1184 6
   * @param string $str       <p>The input string.</p>
1185
   * @param bool   $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
1186
   *
1187
   * @return array <p>An associative array of Character as keys and
1188
   *               their count as values.</p>
1189
   */
1190
  public static function count_chars($str, $cleanUtf8 = false)
1191
  {
1192
    return array_count_values(self::split($str, 1, $cleanUtf8));
1193
  }
1194 1
1195
  /**
1196 1
   * Get a UTF-8 character from its decimal code representation.
1197 1
   *
1198 1
   * @param int $code
1199
   *
1200 1
   * @return string
1201
   */
1202
  public static function decimal_to_chr($code)
1203
  {
1204
    return \mb_convert_encoding(
1205
        '&#x' . dechex($code) . ';',
1206
        'UTF-8',
1207
        'HTML-ENTITIES'
1208
    );
1209
  }
1210
1211
  /**
1212
   * Encode a string with a new charset-encoding.
1213
   *
1214
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1215
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1216 11
   *
1217
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1218 11
   * @param string $str      <p>The input string</p>
1219 11
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1220
   *                         /> otherwise we auto-detect the current string-encoding</p>
1221 11
   *
1222 5
   * @return string
1223
   */
1224
  public static function encode($encoding, $str, $force = true)
1225 11
  {
1226 1
    $str = (string)$str;
1227 1
    $encoding = (string)$encoding;
1228
1229 11
    if (!isset($str[0], $encoding[0])) {
1230
      return $str;
1231
    }
1232
1233 11
    if ($encoding !== 'UTF-8') {
1234
      $encoding = self::normalize_encoding($encoding);
1235
    }
1236 11
1237
    $encodingDetected = self::str_detect_encoding($str);
1238 1
1239 11
    if (
1240
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1241
        &&
1242
        (
1243 11
            $force === true
1244
            ||
1245
            $encodingDetected !== $encoding
1246 11
        )
1247 1
    ) {
1248 1
1249 1
      if (
1250 11
          $encoding === 'UTF-8'
1251 11
          &&
1252
          (
1253
              $force === true
1254
              || $encodingDetected === 'UTF-8'
1255
              || $encodingDetected === 'WINDOWS-1252'
1256 2
              || $encodingDetected === 'ISO-8859-1'
1257
          )
1258
      ) {
1259 1
        return self::to_utf8($str);
1260
      }
1261
1262 2
      if (
1263 1
          $encoding === 'ISO-8859-1'
1264
          &&
1265
          (
1266 2
              $force === true
1267 2
              || $encodingDetected === 'ISO-8859-1'
1268 2
              || $encodingDetected === 'UTF-8'
1269
          )
1270 2
      ) {
1271
        return self::to_iso8859($str);
1272 2
      }
1273 2
1274
      $strEncoded = \mb_convert_encoding(
1275
          $str,
1276
          $encoding,
1277 1
          $encodingDetected
1278
      );
1279
1280
      if ($strEncoded) {
1281
        return $strEncoded;
1282
      }
1283
    }
1284
1285
    return $str;
1286
  }
1287
1288
  /**
1289
   * Reads entire file into a string.
1290
   *
1291
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1292
   *
1293
   * @link http://php.net/manual/en/function.file-get-contents.php
1294
   *
1295
   * @param string        $filename      <p>
1296
   *                                     Name of the file to read.
1297
   *                                     </p>
1298
   * @param int|null      $flags         [optional] <p>
1299
   *                                     Prior to PHP 6, this parameter is called
1300
   *                                     use_include_path and is a bool.
1301
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1302
   *                                     to trigger include path
1303
   *                                     search.
1304
   *                                     </p>
1305
   *                                     <p>
1306
   *                                     The value of flags can be any combination of
1307
   *                                     the following flags (with some restrictions), joined with the
1308
   *                                     binary OR (|)
1309
   *                                     operator.
1310
   *                                     </p>
1311
   *                                     <p>
1312
   *                                     <table>
1313
   *                                     Available flags
1314
   *                                     <tr valign="top">
1315
   *                                     <td>Flag</td>
1316
   *                                     <td>Description</td>
1317
   *                                     </tr>
1318
   *                                     <tr valign="top">
1319
   *                                     <td>
1320
   *                                     FILE_USE_INCLUDE_PATH
1321
   *                                     </td>
1322
   *                                     <td>
1323
   *                                     Search for filename in the include directory.
1324
   *                                     See include_path for more
1325
   *                                     information.
1326
   *                                     </td>
1327
   *                                     </tr>
1328
   *                                     <tr valign="top">
1329
   *                                     <td>
1330
   *                                     FILE_TEXT
1331
   *                                     </td>
1332
   *                                     <td>
1333
   *                                     As of PHP 6, the default encoding of the read
1334
   *                                     data is UTF-8. You can specify a different encoding by creating a
1335
   *                                     custom context or by changing the default using
1336
   *                                     stream_default_encoding. This flag cannot be
1337
   *                                     used with FILE_BINARY.
1338
   *                                     </td>
1339
   *                                     </tr>
1340
   *                                     <tr valign="top">
1341
   *                                     <td>
1342
   *                                     FILE_BINARY
1343
   *                                     </td>
1344
   *                                     <td>
1345
   *                                     With this flag, the file is read in binary mode. This is the default
1346
   *                                     setting and cannot be used with FILE_TEXT.
1347
   *                                     </td>
1348
   *                                     </tr>
1349
   *                                     </table>
1350
   *                                     </p>
1351
   * @param resource|null $context       [optional] <p>
1352
   *                                     A valid context resource created with
1353
   *                                     stream_context_create. If you don't need to use a
1354
   *                                     custom context, you can skip this parameter by &null;.
1355
   *                                     </p>
1356
   * @param int|null      $offset        [optional] <p>
1357
   *                                     The offset where the reading starts.
1358
   *                                     </p>
1359
   * @param int|null      $maxlen        [optional] <p>
1360
   *                                     Maximum length of data read. The default is to read until end
1361
   *                                     of file is reached.
1362 2
   *                                     </p>
1363
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1364
   *
1365 2
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1366 2
   *                                     or pdf, because they used non default utf-8 chars</p>
1367
   *
1368 2
   * @return string <p>The function returns the read data or false on failure.</p>
1369 2
   */
1370
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1371
  {
1372
    // init
1373 2
    $timeout = (int)$timeout;
1374 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1375
1376 2
    if ($timeout && $context === null) {
1377 2
      $context = stream_context_create(
1378
          array(
1379 2
              'http' =>
1380 1
                  array(
1381 1
                      'timeout' => $timeout,
1382 2
                  ),
1383
          )
1384
      );
1385
    }
1386 2
1387 1
    if (is_int($maxlen)) {
1388
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1389
    } else {
1390 1
      $data = file_get_contents($filename, $flags, $context, $offset);
1391 1
    }
1392 1
1393 1
    // return false on error
1394
    if ($data === false) {
1395 1
      return false;
1396
    }
1397
1398
    if ($convertToUtf8 === true) {
1399
      $data = self::encode('UTF-8', $data, false);
1400
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1401
    }
1402
1403
    return $data;
1404
  }
1405 1
1406
  /**
1407 1
   * Checks if a file starts with BOM (Byte Order Mark) character.
1408
   *
1409
   * @param string $file_path <p>Path to a valid file.</p>
1410
   *
1411
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1412
   */
1413
  public static function file_has_bom($file_path)
1414
  {
1415
    return self::string_has_bom(file_get_contents($file_path));
1416
  }
1417
1418
  /**
1419 9
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1420
   *
1421 9
   * @param mixed  $var
1422 9
   * @param int    $normalization_form
1423 3
   * @param string $leading_combining
1424
   *
1425 3
   * @return mixed
1426 3
   */
1427 3
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1428 9
  {
1429 2
    switch (gettype($var)) {
1430 2 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1431 2
        foreach ($var as $k => $v) {
1432 2
          /** @noinspection AlterInForeachInspection */
1433 9
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1434
        }
1435 8
        break;
1436 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1437 2
        foreach ($var as $k => $v) {
1438 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1439
        }
1440 8
        break;
1441
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1442 8
1443 6
        if (false !== strpos($var, "\r")) {
1444 6
          // Workaround https://bugs.php.net/65732
1445 6
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1446
        }
1447 6
1448 3
        if (self::is_ascii($var) === false) {
1449 3
1450 5
          /** @noinspection PhpUndefinedClassInspection */
1451
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1452
            $n = '-';
1453
          } else {
1454
            /** @noinspection PhpUndefinedClassInspection */
1455 8
            $n = \Normalizer::normalize($var, $normalization_form);
1456 8
1457 5
            if (isset($n[0])) {
1458 8
              $var = $n;
1459
            } else {
1460
              $var = self::encode('UTF-8', $var);
1461 2
            }
1462 2
          }
1463 8
1464 8
          if (
1465 9
              $var[0] >= "\x80" && isset($n[0], $leading_combining[0])
1466
              &&
1467 9
              preg_match('/^\p{Mn}/u', $var)
1468
          ) {
1469
            // Prevent leading combining chars
1470
            // for NFC-safe concatenations.
1471
            $var = $leading_combining . $var;
1472
          }
1473
        }
1474
        break;
1475
    }
1476
1477
    return $var;
1478
  }
1479
1480
  /**
1481
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1482
   *
1483
   * Gets a specific external variable by name and optionally filters it
1484
   *
1485
   * @link  http://php.net/manual/en/function.filter-input.php
1486
   *
1487
   * @param int    $type          <p>
1488
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1489
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1490
   *                              <b>INPUT_ENV</b>.
1491
   *                              </p>
1492
   * @param string $variable_name <p>
1493
   *                              Name of a variable to get.
1494
   *                              </p>
1495
   * @param int    $filter        [optional] <p>
1496
   *                              The ID of the filter to apply. The
1497
   *                              manual page lists the available filters.
1498
   *                              </p>
1499
   * @param mixed  $options       [optional] <p>
1500
   *                              Associative array of options or bitwise disjunction of flags. If filter
1501
   *                              accepts options, flags can be provided in "flags" field of array.
1502
   *                              </p>
1503
   *
1504
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1505
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1506
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1507
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1508
   * @since 5.2.0
1509
   */
1510 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1511
  {
1512
    if (4 > func_num_args()) {
1513
      $var = filter_input($type, $variable_name, $filter);
1514
    } else {
1515
      $var = filter_input($type, $variable_name, $filter, $options);
1516
    }
1517
1518
    return self::filter($var);
1519
  }
1520 1
1521
  /**
1522 1
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1523 1
   *
1524 1
   * Gets external variables and optionally filters them
1525 1
   *
1526
   * @link  http://php.net/manual/en/function.filter-input-array.php
1527
   *
1528 1
   * @param int   $type       <p>
1529
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1530
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1531
   *                          <b>INPUT_ENV</b>.
1532
   *                          </p>
1533
   * @param mixed $definition [optional] <p>
1534
   *                          An array defining the arguments. A valid key is a string
1535
   *                          containing a variable name and a valid value is either a filter type, or an array
1536
   *                          optionally specifying the filter, flags and options. If the value is an
1537
   *                          array, valid keys are filter which specifies the
1538
   *                          filter type,
1539
   *                          flags which specifies any flags that apply to the
1540 1
   *                          filter, and options which specifies any options that
1541
   *                          apply to the filter. See the example below for a better understanding.
1542 1
   *                          </p>
1543 1
   *                          <p>
1544 1
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1545 1
   *                          input array are filtered by this filter.
1546
   *                          </p>
1547
   * @param bool  $add_empty  [optional] <p>
1548 1
   *                          Add missing keys as <b>NULL</b> to the return value.
1549
   *                          </p>
1550
   *
1551
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1552
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1553
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1554
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1555
   * fails.
1556
   * @since 5.2.0
1557
   */
1558 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1559 1
  {
1560
    if (2 > func_num_args()) {
1561 1
      $a = filter_input_array($type);
1562
    } else {
1563
      $a = filter_input_array($type, $definition, $add_empty);
1564
    }
1565
1566
    return self::filter($a);
1567
  }
1568
1569
  /**
1570
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1571
   *
1572
   * Filters a variable with a specified filter
1573
   *
1574
   * @link  http://php.net/manual/en/function.filter-var.php
1575
   *
1576
   * @param mixed $variable <p>
1577 7
   *                        Value to filter.
1578
   *                        </p>
1579 7
   * @param int   $filter   [optional] <p>
1580 7
   *                        The ID of the filter to apply. The
1581
   *                        manual page lists the available filters.
1582 7
   *                        </p>
1583
   * @param mixed $options  [optional] <p>
1584 7
   *                        Associative array of options or bitwise disjunction of flags. If filter
1585 2
   *                        accepts options, flags can be provided in "flags" field of array. For
1586
   *                        the "callback" filter, callable type should be passed. The
1587
   *                        callback must accept one argument, the value to be filtered, and return
1588 7
   *                        the value after filtering/sanitizing it.
1589 1
   *                        </p>
1590 1
   *                        <p>
1591 1
   *                        <code>
1592
   *                        // for filters that accept options, use this format
1593 7
   *                        $options = array(
1594
   *                        'options' => array(
1595
   *                        'default' => 3, // value to return if the filter fails
1596
   *                        // other options here
1597
   *                        'min_range' => 0
1598
   *                        ),
1599
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1600
   *                        );
1601
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1602
   *                        // for filter that only accept flags, you can pass them directly
1603 1
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1604
   *                        // for filter that only accept flags, you can also pass as an array
1605 1
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1606
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1607 1
   *                        // callback validate filter
1608
   *                        function foo($value)
1609
   *                        {
1610 1
   *                        // Expected format: Surname, GivenNames
1611 1
   *                        if (strpos($value, ", ") === false) return false;
1612
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1613 1
   *                        $empty = (empty($surname) || empty($givennames));
1614
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1615
   *                        if ($empty || $notstrings) {
1616 1
   *                        return false;
1617 1
   *                        } else {
1618 1
   *                        return $value;
1619 1
   *                        }
1620 1
   *                        }
1621
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1622 1
   *                        </code>
1623
   *                        </p>
1624
   *
1625
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1626
   * @since 5.2.0
1627
   */
1628 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1629
  {
1630
    if (3 > func_num_args()) {
1631
      $variable = filter_var($variable, $filter);
1632 1
    } else {
1633
      $variable = filter_var($variable, $filter, $options);
1634 1
    }
1635
1636
    return self::filter($variable);
1637
  }
1638 1
1639
  /**
1640
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1641
   *
1642
   * Gets multiple variables and optionally filters them
1643
   *
1644
   * @link  http://php.net/manual/en/function.filter-var-array.php
1645
   *
1646
   * @param array $data       <p>
1647
   *                          An array with string keys containing the data to filter.
1648
   *                          </p>
1649
   * @param mixed $definition [optional] <p>
1650
   *                          An array defining the arguments. A valid key is a string
1651
   *                          containing a variable name and a valid value is either a
1652
   *                          filter type, or an
1653
   *                          array optionally specifying the filter, flags and options.
1654 1
   *                          If the value is an array, valid keys are filter
1655
   *                          which specifies the filter type,
1656 1
   *                          flags which specifies any flags that apply to the
1657 1
   *                          filter, and options which specifies any options that
1658
   *                          apply to the filter. See the example below for a better understanding.
1659
   *                          </p>
1660 1
   *                          <p>
1661
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1662 1
   *                          input array are filtered by this filter.
1663 1
   *                          </p>
1664 1
   * @param bool  $add_empty  [optional] <p>
1665 1
   *                          Add missing keys as <b>NULL</b> to the return value.
1666 1
   *                          </p>
1667 1
   *
1668 1
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1669 1
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1670 1
   * the variable is not set.
1671 1
   * @since 5.2.0
1672 1
   */
1673 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1674
  {
1675
    if (2 > func_num_args()) {
1676
      $a = filter_var_array($data);
1677
    } else {
1678
      $a = filter_var_array($data, $definition, $add_empty);
1679
    }
1680
1681
    return self::filter($a);
1682
  }
1683
1684
  /**
1685
   * Check if the number of unicode characters are not more than the specified integer.
1686
   *
1687
   * @param string $str      The original string to be checked.
1688
   * @param int    $box_size The size in number of chars to be checked against string.
1689
   *
1690
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1691
   */
1692 1
  public static function fits_inside($str, $box_size)
1693 1
  {
1694
    return (self::strlen($str) <= $box_size);
1695
  }
1696
1697
  /**
1698
   * Try to fix simple broken UTF-8 strings.
1699
   *
1700
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1701
   *
1702
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1703
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1704
   * See: http://en.wikipedia.org/wiki/Windows-1252
1705
   *
1706
   * @param string $str <p>The input string</p>
1707
   *
1708
   * @return string
1709
   */
1710 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1711
  {
1712
    // init
1713
    $str = (string)$str;
1714
1715
    if (!isset($str[0])) {
1716
      return '';
1717
    }
1718
1719
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1720
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1721
1722
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1723
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$brokenUtf8ToUtf8);
1724
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$brokenUtf8ToUtf8);
1725
    }
1726
1727
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1728
  }
1729
1730
  /**
1731
   * Fix a double (or multiple) encoded UTF8 string.
1732
   *
1733
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1734
   *
1735
   * @return mixed
1736
   */
1737
  public static function fix_utf8($str)
1738
  {
1739
    if (is_array($str)) {
1740
1741
      /** @noinspection ForeachSourceInspection */
1742
      foreach ($str as $k => $v) {
1743
        /** @noinspection AlterInForeachInspection */
1744
        /** @noinspection OffsetOperationsInspection */
1745
        $str[$k] = self::fix_utf8($v);
1746
      }
1747
1748
      return $str;
1749
    }
1750
1751
    $last = '';
1752 1
    while ($last !== $str) {
1753
      $last = $str;
1754 1
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1754 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1755 1
    }
1756
1757 1
    return $str;
1758
  }
1759
1760
  /**
1761
   * Get character of a specific character.
1762
   *
1763
   * @param string $char
1764
   *
1765
   * @return string <p>'RTL' or 'LTR'</p>
1766
   */
1767
  public static function getCharDirection($char)
1768
  {
1769
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1770
      self::checkForSupport();
1771
    }
1772 1
1773
    if (self::$support['intlChar'] === true) {
1774 1
      $tmpReturn = \IntlChar::charDirection($char);
1775
1776
      // from "IntlChar"-Class
1777
      $charDirection = array(
1778
          'RTL' => array(1, 13, 14, 15, 21),
1779
          'LTR' => array(0, 11, 12, 20),
1780
      );
1781
1782
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1783
        return 'LTR';
1784
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
1785
        return 'RTL';
1786 1
      }
1787
    }
1788 1
1789 1
    $c = static::chr_to_decimal($char);
1790
1791
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1792 1
      return 'LTR';
1793 1
    }
1794
1795
    if (0x85e >= $c) {
1796 1
1797
      if (0x5be === $c ||
1798
          0x5c0 === $c ||
1799
          0x5c3 === $c ||
1800
          0x5c6 === $c ||
1801
          (0x5d0 <= $c && 0x5ea >= $c) ||
1802
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1803
          0x608 === $c ||
1804
          0x60b === $c ||
1805
          0x60d === $c ||
1806
          0x61b === $c ||
1807
          (0x61e <= $c && 0x64a >= $c) ||
1808
          (0x66d <= $c && 0x66f >= $c) ||
1809
          (0x671 <= $c && 0x6d5 >= $c) ||
1810 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1811
          (0x6ee <= $c && 0x6ef >= $c) ||
1812 1
          (0x6fa <= $c && 0x70d >= $c) ||
1813
          0x710 === $c ||
1814
          (0x712 <= $c && 0x72f >= $c) ||
1815
          (0x74d <= $c && 0x7a5 >= $c) ||
1816
          0x7b1 === $c ||
1817
          (0x7c0 <= $c && 0x7ea >= $c) ||
1818
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1819
          0x7fa === $c ||
1820
          (0x800 <= $c && 0x815 >= $c) ||
1821
          0x81a === $c ||
1822
          0x824 === $c ||
1823
          0x828 === $c ||
1824
          (0x830 <= $c && 0x83e >= $c) ||
1825
          (0x840 <= $c && 0x858 >= $c) ||
1826 2
          0x85e === $c
1827
      ) {
1828
        return 'RTL';
1829 2
      }
1830
1831 2
    } elseif (0x200f === $c) {
1832 2
1833 1
      return 'RTL';
1834 1
1835
    } elseif (0xfb1d <= $c) {
1836 2
1837 1
      if (0xfb1d === $c ||
1838 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1839
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1840 2
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1841 2
          0xfb3e === $c ||
1842 2
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1843
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1844 2
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1845
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1846
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1847
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1848
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1849
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1850
          (0xfe76 <= $c && 0xfefc >= $c) ||
1851
          (0x10800 <= $c && 0x10805 >= $c) ||
1852
          0x10808 === $c ||
1853
          (0x1080a <= $c && 0x10835 >= $c) ||
1854
          (0x10837 <= $c && 0x10838 >= $c) ||
1855
          0x1083c === $c ||
1856
          (0x1083f <= $c && 0x10855 >= $c) ||
1857
          (0x10857 <= $c && 0x1085f >= $c) ||
1858
          (0x10900 <= $c && 0x1091b >= $c) ||
1859
          (0x10920 <= $c && 0x10939 >= $c) ||
1860
          0x1093f === $c ||
1861
          0x10a00 === $c ||
1862
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1863
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1864
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1865
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1866
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1867
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1868
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1869
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1870
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1871
          (0x10b78 <= $c && 0x10b7f >= $c)
1872
      ) {
1873
        return 'RTL';
1874
      }
1875
    }
1876
1877
    return 'LTR';
1878
  }
1879
1880
  /**
1881
   * get data from "/data/*.ser"
1882
   *
1883
   * @param string $file
1884
   *
1885
   * @return bool|string|array|int <p>Will return false on error.</p>
1886
   */
1887
  private static function getData($file)
1888
  {
1889
    $file = __DIR__ . '/data/' . $file . '.php';
1890
    if (file_exists($file)) {
1891
      /** @noinspection PhpIncludeInspection */
1892
      return require $file;
1893
    } else {
1894
      return false;
1895
    }
1896
  }
1897
1898
  /**
1899
   * alias for "UTF8::string_has_bom()"
1900
   *
1901
   * @see UTF8::string_has_bom()
1902
   *
1903
   * @param string $str
1904
   *
1905
   * @return bool
1906
   */
1907
  public static function hasBom($str)
1908
  {
1909
    return self::string_has_bom($str);
1910
  }
1911
1912
  /**
1913
   * Converts hexadecimal U+xxxx code point representation to integer.
1914
   *
1915
   * INFO: opposite to UTF8::int_to_hex()
1916
   *
1917
   * @param string $str <p>The hexadecimal code point representation.</p>
1918
   *
1919
   * @return int|false <p>The code point, or false on failure.</p>
1920
   */
1921
  public static function hex_to_int($str)
1922
  {
1923
    if (!$str) {
1924
      return false;
1925
    }
1926 9
1927
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
1928 9
      return intval($match[1], 16);
1929
    }
1930 9
1931 6
    return false;
1932
  }
1933
1934 9
  /**
1935 7
   * alias for "UTF8::html_entity_decode()"
1936
   *
1937
   * @see UTF8::html_entity_decode()
1938
   *
1939 9
   * @param string $str
1940 9
   * @param int    $flags
1941
   * @param string $encoding
1942 9
   *
1943 9
   * @return string
1944 9
   */
1945 9
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
1946 9
  {
1947 6
    return self::html_entity_decode($str, $flags, $encoding);
1948
  }
1949
1950 9
  /**
1951 2
   * Converts a UTF-8 string to a series of HTML numbered entities.
1952 2
   *
1953
   * INFO: opposite to UTF8::html_decode()
1954 9
   *
1955 4
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1956 4
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1957 4
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
1958
   *
1959
   * @return string <p>HTML numbered entities.</p>
1960 4
   */
1961
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
1962
  {
1963 9
    // init
1964
    $str = (string)$str;
1965 9
1966 9
    if (!isset($str[0])) {
1967
      return '';
1968 7
    }
1969
1970 7
    if ($encoding !== 'UTF-8') {
1971 6
      $encoding = self::normalize_encoding($encoding);
1972
    }
1973 4
1974
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1975 9
    if (function_exists('mb_encode_numericentity')) {
1976
1977 9
      $startCode = 0x00;
1978
      if ($keepAsciiChars === true) {
1979
        $startCode = 0x80;
1980 9
      }
1981 9
1982 9
      return mb_encode_numericentity(
1983
          $str,
1984 9
          array($startCode, 0xfffff, 0, 0xfffff, 0),
1985
          $encoding
1986 9
      );
1987
    }
1988 9
1989
    return implode(
1990
        '',
1991
        array_map(
1992
            function ($data) use ($keepAsciiChars, $encoding) {
1993
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
1994
            },
1995
            self::split($str)
1996
        )
1997
    );
1998
  }
1999
2000
  /**
2001
   * UTF-8 version of html_entity_decode()
2002
   *
2003
   * The reason we are not using html_entity_decode() by itself is because
2004
   * while it is not technically correct to leave out the semicolon
2005
   * at the end of an entity most browsers will still interpret the entity
2006
   * correctly. html_entity_decode() does not convert entities without
2007
   * semicolons, so we are left with our own little solution here. Bummer.
2008
   *
2009
   * Convert all HTML entities to their applicable characters
2010
   *
2011
   * INFO: opposite to UTF8::html_encode()
2012
   *
2013
   * @link http://php.net/manual/en/function.html-entity-decode.php
2014
   *
2015
   * @param string $str      <p>
2016
   *                         The input string.
2017
   *                         </p>
2018
   * @param int    $flags    [optional] <p>
2019
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2020
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2021
   *                         <table>
2022
   *                         Available <i>flags</i> constants
2023
   *                         <tr valign="top">
2024
   *                         <td>Constant Name</td>
2025
   *                         <td>Description</td>
2026
   *                         </tr>
2027
   *                         <tr valign="top">
2028
   *                         <td><b>ENT_COMPAT</b></td>
2029
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2030
   *                         </tr>
2031
   *                         <tr valign="top">
2032
   *                         <td><b>ENT_QUOTES</b></td>
2033
   *                         <td>Will convert both double and single quotes.</td>
2034
   *                         </tr>
2035
   *                         <tr valign="top">
2036
   *                         <td><b>ENT_NOQUOTES</b></td>
2037
   *                         <td>Will leave both double and single quotes unconverted.</td>
2038
   *                         </tr>
2039
   *                         <tr valign="top">
2040
   *                         <td><b>ENT_HTML401</b></td>
2041
   *                         <td>
2042
   *                         Handle code as HTML 4.01.
2043
   *                         </td>
2044
   *                         </tr>
2045
   *                         <tr valign="top">
2046
   *                         <td><b>ENT_XML1</b></td>
2047
   *                         <td>
2048
   *                         Handle code as XML 1.
2049
   *                         </td>
2050
   *                         </tr>
2051
   *                         <tr valign="top">
2052
   *                         <td><b>ENT_XHTML</b></td>
2053
   *                         <td>
2054
   *                         Handle code as XHTML.
2055
   *                         </td>
2056
   *                         </tr>
2057
   *                         <tr valign="top">
2058
   *                         <td><b>ENT_HTML5</b></td>
2059
   *                         <td>
2060
   *                         Handle code as HTML 5.
2061
   *                         </td>
2062
   *                         </tr>
2063
   *                         </table>
2064
   *                         </p>
2065
   * @param string $encoding [optional] <p>Encoding to use.</p>
2066
   *
2067
   * @return string <p>The decoded string.</p>
2068
   */
2069
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2070
  {
2071
    // init
2072
    $str = (string)$str;
2073
2074
    if (!isset($str[0])) {
2075
      return '';
2076
    }
2077
2078
    if (!isset($str[3])) { // examples: &; || &x;
0 ignored issues
show
Unused Code Comprehensibility introduced by
46% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2079
      return $str;
2080
    }
2081
2082
    if (
2083
        strpos($str, '&') === false
2084
        ||
2085
        (
2086
            strpos($str, '&#') === false
2087
            &&
2088
            strpos($str, ';') === false
2089
        )
2090
    ) {
2091
      return $str;
2092
    }
2093
2094 2
    if ($encoding !== 'UTF-8') {
2095
      $encoding = self::normalize_encoding($encoding);
2096 2
    }
2097 1
2098 1
    if ($flags === null) {
2099
      if (Bootup::is_php('5.4') === true) {
2100 2
        $flags = ENT_COMPAT | ENT_HTML5;
2101
      } else {
2102 2
        $flags = ENT_COMPAT;
2103 1
      }
2104
    }
2105
2106 2
    do {
2107 2
      $str_compare = $str;
2108 2
2109 2
      $str = preg_replace_callback(
2110 2
          "/&#\d{2,6};/",
2111 1
          function ($matches) use ($encoding) {
2112
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2113 1
2114 1
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2115 1
              return $returnTmp;
2116 1
            } else {
2117 1
              return $matches[0];
2118 2
            }
2119
          },
2120 2
          $str
2121
      );
2122
2123
      // decode numeric & UTF16 two byte entities
2124
      $str = html_entity_decode(
2125
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2126
          $flags,
2127
          $encoding
2128
      );
2129
2130
    } while ($str_compare !== $str);
2131
2132
    return $str;
2133
  }
2134
2135
  /**
2136
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2137
   *
2138
   * @link http://php.net/manual/en/function.htmlentities.php
2139
   *
2140
   * @param string $str           <p>
2141
   *                              The input string.
2142
   *                              </p>
2143
   * @param int    $flags         [optional] <p>
2144
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2145
   *                              invalid code unit sequences and the used document type. The default is
2146
   *                              ENT_COMPAT | ENT_HTML401.
2147
   *                              <table>
2148
   *                              Available <i>flags</i> constants
2149
   *                              <tr valign="top">
2150
   *                              <td>Constant Name</td>
2151
   *                              <td>Description</td>
2152
   *                              </tr>
2153
   *                              <tr valign="top">
2154
   *                              <td><b>ENT_COMPAT</b></td>
2155
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2156
   *                              </tr>
2157
   *                              <tr valign="top">
2158
   *                              <td><b>ENT_QUOTES</b></td>
2159
   *                              <td>Will convert both double and single quotes.</td>
2160
   *                              </tr>
2161
   *                              <tr valign="top">
2162
   *                              <td><b>ENT_NOQUOTES</b></td>
2163
   *                              <td>Will leave both double and single quotes unconverted.</td>
2164
   *                              </tr>
2165
   *                              <tr valign="top">
2166
   *                              <td><b>ENT_IGNORE</b></td>
2167
   *                              <td>
2168
   *                              Silently discard invalid code unit sequences instead of returning
2169
   *                              an empty string. Using this flag is discouraged as it
2170
   *                              may have security implications.
2171
   *                              </td>
2172
   *                              </tr>
2173
   *                              <tr valign="top">
2174
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2175
   *                              <td>
2176
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2177
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2178
   *                              </td>
2179
   *                              </tr>
2180
   *                              <tr valign="top">
2181
   *                              <td><b>ENT_DISALLOWED</b></td>
2182
   *                              <td>
2183
   *                              Replace invalid code points for the given document type with a
2184
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2185
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2186
   *                              instance, to ensure the well-formedness of XML documents with
2187
   *                              embedded external content.
2188
   *                              </td>
2189
   *                              </tr>
2190
   *                              <tr valign="top">
2191
   *                              <td><b>ENT_HTML401</b></td>
2192
   *                              <td>
2193
   *                              Handle code as HTML 4.01.
2194
   *                              </td>
2195
   *                              </tr>
2196
   *                              <tr valign="top">
2197
   *                              <td><b>ENT_XML1</b></td>
2198
   *                              <td>
2199
   *                              Handle code as XML 1.
2200
   *                              </td>
2201
   *                              </tr>
2202
   *                              <tr valign="top">
2203
   *                              <td><b>ENT_XHTML</b></td>
2204
   *                              <td>
2205
   *                              Handle code as XHTML.
2206
   *                              </td>
2207
   *                              </tr>
2208
   *                              <tr valign="top">
2209
   *                              <td><b>ENT_HTML5</b></td>
2210
   *                              <td>
2211
   *                              Handle code as HTML 5.
2212
   *                              </td>
2213
   *                              </tr>
2214
   *                              </table>
2215
   *                              </p>
2216
   * @param string $encoding      [optional] <p>
2217
   *                              Like <b>htmlspecialchars</b>,
2218
   *                              <b>htmlentities</b> takes an optional third argument
2219
   *                              <i>encoding</i> which defines encoding used in
2220
   *                              conversion.
2221
   *                              Although this argument is technically optional, you are highly
2222
   *                              encouraged to specify the correct value for your code.
2223
   *                              </p>
2224
   * @param bool   $double_encode [optional] <p>
2225
   *                              When <i>double_encode</i> is turned off PHP will not
2226
   *                              encode existing html entities. The default is to convert everything.
2227
   *                              </p>
2228
   *
2229
   *
2230
   * @return string the encoded string.
2231
   * </p>
2232 1
   * <p>
2233
   * If the input <i>string</i> contains an invalid code unit
2234 1
   * sequence within the given <i>encoding</i> an empty string
2235
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2236
   * <b>ENT_SUBSTITUTE</b> flags are set.
2237
   */
2238 1
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2239
  {
2240
    if ($encoding !== 'UTF-8') {
2241
      $encoding = self::normalize_encoding($encoding);
2242
    }
2243
2244
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2245
2246 1
    if ($encoding !== 'UTF-8') {
2247
      return $str;
2248 1
    }
2249
2250
    $byteLengths = self::chr_size_list($str);
2251
    $search = array();
2252
    $replacements = array();
2253
    foreach ($byteLengths as $counter => $byteLength) {
2254
      if ($byteLength >= 3) {
2255
        $char = self::access($str, $counter);
2256
2257
        if (!isset($replacements[$char])) {
2258
          $search[$char] = $char;
2259
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 2255 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2260
        }
2261 3
      }
2262
    }
2263 3
2264 3
    return str_replace($search, $replacements, $str);
2265
  }
2266 3
2267
  /**
2268 3
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2269
   *
2270
   * INFO: Take a look at "UTF8::htmlentities()"
2271
   *
2272
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2273
   *
2274
   * @param string $str           <p>
2275
   *                              The string being converted.
2276
   *                              </p>
2277
   * @param int    $flags         [optional] <p>
2278
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2279 1
   *                              invalid code unit sequences and the used document type. The default is
2280
   *                              ENT_COMPAT | ENT_HTML401.
2281 1
   *                              <table>
2282
   *                              Available <i>flags</i> constants
2283
   *                              <tr valign="top">
2284
   *                              <td>Constant Name</td>
2285
   *                              <td>Description</td>
2286
   *                              </tr>
2287
   *                              <tr valign="top">
2288
   *                              <td><b>ENT_COMPAT</b></td>
2289 2
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2290
   *                              </tr>
2291 2
   *                              <tr valign="top">
2292
   *                              <td><b>ENT_QUOTES</b></td>
2293
   *                              <td>Will convert both double and single quotes.</td>
2294
   *                              </tr>
2295
   *                              <tr valign="top">
2296
   *                              <td><b>ENT_NOQUOTES</b></td>
2297
   *                              <td>Will leave both double and single quotes unconverted.</td>
2298
   *                              </tr>
2299
   *                              <tr valign="top">
2300
   *                              <td><b>ENT_IGNORE</b></td>
2301
   *                              <td>
2302
   *                              Silently discard invalid code unit sequences instead of returning
2303 2
   *                              an empty string. Using this flag is discouraged as it
2304
   *                              may have security implications.
2305 2
   *                              </td>
2306
   *                              </tr>
2307
   *                              <tr valign="top">
2308
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2309
   *                              <td>
2310
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2311
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2312
   *                              </td>
2313
   *                              </tr>
2314
   *                              <tr valign="top">
2315
   *                              <td><b>ENT_DISALLOWED</b></td>
2316
   *                              <td>
2317 1
   *                              Replace invalid code points for the given document type with a
2318
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2319 1
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2320
   *                              instance, to ensure the well-formedness of XML documents with
2321
   *                              embedded external content.
2322
   *                              </td>
2323
   *                              </tr>
2324
   *                              <tr valign="top">
2325
   *                              <td><b>ENT_HTML401</b></td>
2326
   *                              <td>
2327
   *                              Handle code as HTML 4.01.
2328
   *                              </td>
2329
   *                              </tr>
2330
   *                              <tr valign="top">
2331
   *                              <td><b>ENT_XML1</b></td>
2332
   *                              <td>
2333
   *                              Handle code as XML 1.
2334
   *                              </td>
2335
   *                              </tr>
2336
   *                              <tr valign="top">
2337
   *                              <td><b>ENT_XHTML</b></td>
2338
   *                              <td>
2339
   *                              Handle code as XHTML.
2340
   *                              </td>
2341
   *                              </tr>
2342
   *                              <tr valign="top">
2343
   *                              <td><b>ENT_HTML5</b></td>
2344
   *                              <td>
2345
   *                              Handle code as HTML 5.
2346
   *                              </td>
2347
   *                              </tr>
2348
   *                              </table>
2349
   *                              </p>
2350
   * @param string $encoding      [optional] <p>
2351
   *                              Defines encoding used in conversion.
2352
   *                              </p>
2353
   *                              <p>
2354
   *                              For the purposes of this function, the encodings
2355
   *                              ISO-8859-1, ISO-8859-15,
2356
   *                              UTF-8, cp866,
2357
   *                              cp1251, cp1252, and
2358
   *                              KOI8-R are effectively equivalent, provided the
2359 1
   *                              <i>string</i> itself is valid for the encoding, as
2360
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2361 1
   *                              the same positions in all of these encodings.
2362
   *                              </p>
2363
   * @param bool   $double_encode [optional] <p>
2364
   *                              When <i>double_encode</i> is turned off PHP will not
2365
   *                              encode existing html entities, the default is to convert everything.
2366
   *                              </p>
2367
   *
2368
   * @return string The converted string.
2369
   * </p>
2370
   * <p>
2371
   * If the input <i>string</i> contains an invalid code unit
2372
   * sequence within the given <i>encoding</i> an empty string
2373
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2374
   * <b>ENT_SUBSTITUTE</b> flags are set.
2375
   */
2376
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2377
  {
2378
    if ($encoding !== 'UTF-8') {
2379
      $encoding = self::normalize_encoding($encoding);
2380
    }
2381
2382
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2383
  }
2384
2385
  /**
2386
   * Checks whether iconv is available on the server.
2387 1
   *
2388
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2389 1
   */
2390
  public static function iconv_loaded()
2391
  {
2392
    return extension_loaded('iconv') ? true : false;
2393
  }
2394
2395
  /**
2396
   * Converts Integer to hexadecimal U+xxxx code point representation.
2397
   *
2398
   * INFO: opposite to UTF8::hex_to_int()
2399
   *
2400
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2401 1
   * @param string $pfix [optional]
2402
   *
2403 1
   * @return string <p>The code point, or empty string on failure.</p>
2404
   */
2405
  public static function int_to_hex($int, $pfix = 'U+')
2406
  {
2407
    if (ctype_digit((string)$int)) {
2408
      $hex = dechex((int)$int);
2409
2410
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2411
2412
      return $pfix . $hex;
2413
    }
2414
2415
    return '';
2416 16
  }
2417
2418 16
  /**
2419
   * Checks whether intl-char is available on the server.
2420
   *
2421
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2422
   */
2423
  public static function intlChar_loaded()
2424
  {
2425
    return (Bootup::is_php('7.0') === true && class_exists('IntlChar') === true);
2426
  }
2427
2428
  /**
2429
   * Checks whether intl is available on the server.
2430
   *
2431 28
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2432
   */
2433 28
  public static function intl_loaded()
2434
  {
2435 28
    return extension_loaded('intl') ? true : false;
2436 5
  }
2437
2438
  /**
2439 28
   * alias for "UTF8::is_ascii()"
2440
   *
2441
   * @see UTF8::is_ascii()
2442
   *
2443
   * @param string $str
2444
   *
2445
   * @return boolean
2446
   */
2447
  public static function isAscii($str)
2448
  {
2449 1
    return self::is_ascii($str);
2450
  }
2451 1
2452
  /**
2453 1
   * alias for "UTF8::is_base64()"
2454 1
   *
2455
   * @see UTF8::is_base64()
2456
   *
2457 1
   * @param string $str
2458 1
   *
2459
   * @return bool
2460 1
   */
2461
  public static function isBase64($str)
2462
  {
2463
    return self::is_base64($str);
2464
  }
2465
2466
  /**
2467
   * alias for "UTF8::is_binary()"
2468
   *
2469
   * @see UTF8::is_binary()
2470
   *
2471 16
   * @param string $str
2472
   *
2473
   * @return bool
2474 16
   */
2475
  public static function isBinary($str)
2476
  {
2477 16
    return self::is_binary($str);
2478
  }
2479 16
2480 16
  /**
2481 15
   * alias for "UTF8::is_bom()"
2482 16
   *
2483 6
   * @see UTF8::is_bom()
2484
   *
2485 15
   * @param string $utf8_chr
2486
   *
2487
   * @return boolean
2488
   */
2489
  public static function isBom($utf8_chr)
2490
  {
2491
    return self::is_bom($utf8_chr);
2492
  }
2493
2494
  /**
2495
   * alias for "UTF8::is_html()"
2496
   *
2497
   * @see UTF8::is_html()
2498
   *
2499
   * @param string $str
2500
   *
2501
   * @return boolean
2502
   */
2503
  public static function isHtml($str)
2504
  {
2505
    return self::is_html($str);
2506
  }
2507
2508
  /**
2509
   * alias for "UTF8::is_json()"
2510
   *
2511
   * @see UTF8::is_json()
2512
   *
2513
   * @param string $str
2514
   *
2515
   * @return bool
2516
   */
2517
  public static function isJson($str)
2518
  {
2519
    return self::is_json($str);
2520
  }
2521
2522
  /**
2523
   * alias for "UTF8::is_utf16()"
2524
   *
2525
   * @see UTF8::is_utf16()
2526
   *
2527
   * @param string $str
2528
   *
2529
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2530
   */
2531
  public static function isUtf16($str)
2532
  {
2533
    return self::is_utf16($str);
2534
  }
2535
2536 1
  /**
2537
   * alias for "UTF8::is_utf32()"
2538 1
   *
2539
   * @see UTF8::is_utf32()
2540 1
   *
2541
   * @param string $str
2542
   *
2543
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2544
   */
2545 1
  public static function isUtf32($str)
2546
  {
2547 1
    return self::is_utf32($str);
2548
  }
2549 1
2550 1
  /**
2551
   * alias for "UTF8::is_utf8()"
2552 1
   *
2553
   * @see UTF8::is_utf8()
2554
   *
2555
   * @param string $str
2556
   * @param bool   $strict
2557
   *
2558
   * @return bool
2559
   */
2560
  public static function isUtf8($str, $strict = false)
2561
  {
2562
    return self::is_utf8($str, $strict);
2563 1
  }
2564
2565 1
  /**
2566
   * Checks if a string is 7 bit ASCII.
2567 1
   *
2568
   * @param string $str <p>The string to check.</p>
2569
   *
2570
   * @return bool <p>
2571
   *              <strong>true</strong> if it is ASCII<br />
2572 1
   *              <strong>false</strong> otherwise
2573 1
   *              </p>
2574 1
   */
2575 1
  public static function is_ascii($str)
2576 1
  {
2577
    $str = (string)$str;
2578 1
2579
    if (!isset($str[0])) {
2580
      return true;
2581
    }
2582
2583
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2584
  }
2585
2586
  /**
2587
   * Returns true if the string is base64 encoded, false otherwise.
2588
   *
2589
   * @param string $str <p>The input string.</p>
2590
   *
2591
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2592
   */
2593 4
  public static function is_base64($str)
2594
  {
2595 4
    $str = (string)$str;
2596
2597 4
    if (!isset($str[0])) {
2598
      return false;
2599 4
    }
2600 4
2601 4
    if (base64_encode(base64_decode($str, true)) === $str) {
2602 4
      return true;
2603 4
    } else {
2604 4
      return false;
2605 4
    }
2606 4
  }
2607 4
2608 2
  /**
2609 2
   * Check if the input is binary... (is look like a hack).
2610 4
   *
2611 4
   * @param mixed $input
2612 4
   *
2613
   * @return bool
2614 4
   */
2615 4
  public static function is_binary($input)
2616 4
  {
2617 4
2618 4
    $testLength = strlen($input);
2619 4
2620 4
    if (
2621 4
        preg_match('~^[01]+$~', $input)
2622 4
        ||
2623 3
        substr_count($input, "\x00") > 0
2624 3
        ||
2625 4
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
2626 4
    ) {
2627 4
      return true;
2628
    } else {
2629 4
      return false;
2630 3
    }
2631 2
  }
2632
2633 3
  /**
2634
   * Check if the file is binary.
2635
   *
2636
   * @param string $file
2637 3
   *
2638
   * @return boolean
2639 3
   */
2640
  public static function is_binary_file($file)
2641
  {
2642
    try {
2643
      $fp = fopen($file, 'r');
2644
      $block = fread($fp, 512);
2645
      fclose($fp);
2646
    } catch (\Exception $e) {
2647
      $block = '';
2648
    }
2649
2650
    return self::is_binary($block);
2651
  }
2652
2653 3
  /**
2654
   * Checks if the given string is equal to any "Byte Order Mark".
2655 3
   *
2656
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2657 3
   *
2658
   * @param string $str <p>The input string.</p>
2659 3
   *
2660 3
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2661 3
   */
2662 3
  public static function is_bom($str)
2663 3
  {
2664 3
    foreach (self::$bom as $bomString => $bomByteLength) {
2665 3
      if ($str === $bomString) {
2666 3
        return true;
2667 3
      }
2668 1
    }
2669 1
2670 3
    return false;
2671 3
  }
2672 3
2673
  /**
2674 3
   * Check if the string contains any html-tags <lall>.
2675 3
   *
2676 3
   * @param string $str <p>The input string.</p>
2677 3
   *
2678 3
   * @return boolean
2679 3
   */
2680 3
  public static function is_html($str)
2681 3
  {
2682 3
    $str = (string)$str;
2683 1
2684 1
    if (!isset($str[0])) {
2685 3
      return false;
2686 3
    }
2687 3
2688
    // init
2689 3
    $matches = array();
2690 1
2691 1
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
2692
2693 1
    if (count($matches) == 0) {
2694
      return false;
2695
    } else {
2696
      return true;
2697 3
    }
2698
  }
2699 3
2700
  /**
2701
   * Try to check if "$str" is an json-string.
2702
   *
2703
   * @param string $str <p>The input string.</p>
2704
   *
2705
   * @return bool
2706
   */
2707
  public static function is_json($str)
2708
  {
2709
    $str = (string)$str;
2710
2711
    if (!isset($str[0])) {
2712 43
      return false;
2713
    }
2714 43
2715
    if (
2716 43
        is_object(self::json_decode($str))
2717 3
        &&
2718
        json_last_error() === JSON_ERROR_NONE
2719
    ) {
2720 41
      return true;
2721 1
    } else {
2722 1
      return false;
2723
    }
2724
  }
2725
2726
  /**
2727
   * Check if the string is UTF-16.
2728
   *
2729
   * @param string $str <p>The input string.</p>
2730 41
   *
2731
   * @return int|false <p>
2732
   *                   <strong>false</strong> if is't not UTF-16,<br />
2733
   *                   <strong>1</strong> for UTF-16LE,<br />
2734
   *                   <strong>2</strong> for UTF-16BE.
2735
   *                   </p>
2736
   */
2737 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2738
  {
2739
    $str = self::remove_bom($str);
2740 41
2741
    if (self::is_binary($str)) {
2742 41
2743 41
      $maybeUTF16LE = 0;
2744 41
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2745
      if ($test) {
2746
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2747 41
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2748 41
        if ($test3 === $test) {
2749 41
          $strChars = self::count_chars($str, true);
2750
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2751
            if (in_array($test3char, $strChars, true) === true) {
2752 41
              $maybeUTF16LE++;
2753
            }
2754 36
          }
2755 41
        }
2756
      }
2757 34
2758 34
      $maybeUTF16BE = 0;
2759 34
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2760 34
      if ($test) {
2761 39
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2762
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2763 21
        if ($test3 === $test) {
2764 21
          $strChars = self::count_chars($str, true);
2765 21
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2766 21
            if (in_array($test3char, $strChars, true) === true) {
2767 33
              $maybeUTF16BE++;
2768
            }
2769 9
          }
2770 9
        }
2771 9
      }
2772 9
2773 16
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2774
        if ($maybeUTF16LE > $maybeUTF16BE) {
2775
          return 1;
2776
        } else {
2777
          return 2;
2778
        }
2779
      }
2780
2781
    }
2782 3
2783 3
    return false;
2784 3
  }
2785 3
2786 9
  /**
2787
   * Check if the string is UTF-32.
2788 3
   *
2789 3
   * @param string $str
2790 3
   *
2791 3
   * @return int|false <p>
2792 3
   *                   <strong>false</strong> if is't not UTF-16,<br />
2793
   *                   <strong>1</strong> for UTF-32LE,<br />
2794
   *                   <strong>2</strong> for UTF-32BE.
2795
   *                   </p>
2796 5
   */
2797 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2798 41
  {
2799
    $str = self::remove_bom($str);
2800
2801 36
    if (self::is_binary($str)) {
2802
2803 33
      $maybeUTF32LE = 0;
2804 33
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2805 33
      if ($test) {
2806 33
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2807
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2808
        if ($test3 === $test) {
2809
          $strChars = self::count_chars($str, true);
2810
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2811 33
            if (in_array($test3char, $strChars, true) === true) {
2812
              $maybeUTF32LE++;
2813
            }
2814
          }
2815
        }
2816
      }
2817 33
2818 33
      $maybeUTF32BE = 0;
2819 33
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2820 33
      if ($test) {
2821
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2822 33
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2823
        if ($test3 === $test) {
2824 33
          $strChars = self::count_chars($str, true);
2825 33
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2826 5
            if (in_array($test3char, $strChars, true) === true) {
2827
              $maybeUTF32BE++;
2828
            }
2829 33
          }
2830 33
        }
2831 33
      }
2832 33
2833 33
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2834
        if ($maybeUTF32LE > $maybeUTF32BE) {
2835
          return 1;
2836
        } else {
2837
          return 2;
2838 18
        }
2839
      }
2840
2841 41
    }
2842
2843 20
    return false;
2844
  }
2845
2846
  /**
2847
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2848
   *
2849
   * @see    http://hsivonen.iki.fi/php-utf8/
2850
   *
2851
   * @param string $str    <p>The string to be checked.</p>
2852
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2853
   *
2854
   * @return bool
2855
   */
2856
  public static function is_utf8($str, $strict = false)
2857
  {
2858
    $str = (string)$str;
2859
2860
    if (!isset($str[0])) {
2861
      return true;
2862
    }
2863
2864
    if ($strict === true) {
2865
      if (self::is_utf16($str) !== false) {
2866
        return false;
2867
      }
2868
2869
      if (self::is_utf32($str) !== false) {
2870
        return false;
2871
      }
2872
    }
2873
2874
    if (self::pcre_utf8_support() !== true) {
2875
2876
      // If even just the first character can be matched, when the /u
2877
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2878
      // invalid, nothing at all will match, even if the string contains
2879
      // some valid sequences
2880
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
2881
2882
    } else {
2883 2
2884
      $mState = 0; // cached expected number of octets after the current octet
2885 2
      // until the beginning of the next UTF8 character sequence
2886
      $mUcs4 = 0; // cached Unicode character
2887 2
      $mBytes = 1; // cached expected number of octets in the current sequence
2888 2
      $len = strlen($str);
2889 2
2890
      /** @noinspection ForeachInvariantsInspection */
2891
      for ($i = 0; $i < $len; $i++) {
2892
        $in = ord($str[$i]);
2893 2
        if ($mState === 0) {
2894
          // When mState is zero we expect either a US-ASCII character or a
2895
          // multi-octet sequence.
2896
          if (0 === (0x80 & $in)) {
2897
            // US-ASCII, pass straight through.
2898
            $mBytes = 1;
2899 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2900
            // First octet of 2 octet sequence.
2901
            $mUcs4 = $in;
2902
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
2903
            $mState = 1;
2904
            $mBytes = 2;
2905
          } elseif (0xE0 === (0xF0 & $in)) {
2906
            // First octet of 3 octet sequence.
2907
            $mUcs4 = $in;
2908
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
2909
            $mState = 2;
2910
            $mBytes = 3;
2911 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2912
            // First octet of 4 octet sequence.
2913
            $mUcs4 = $in;
2914
            $mUcs4 = ($mUcs4 & 0x07) << 18;
2915
            $mState = 3;
2916
            $mBytes = 4;
2917
          } elseif (0xF8 === (0xFC & $in)) {
2918
            /* First octet of 5 octet sequence.
2919
            *
2920
            * This is illegal because the encoded codepoint must be either
2921
            * (a) not the shortest form or
2922
            * (b) outside the Unicode range of 0-0x10FFFF.
2923
            * Rather than trying to resynchronize, we will carry on until the end
2924
            * of the sequence and let the later error handling code catch it.
2925
            */
2926
            $mUcs4 = $in;
2927
            $mUcs4 = ($mUcs4 & 0x03) << 24;
2928
            $mState = 4;
2929
            $mBytes = 5;
2930 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2931
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
2932 2
            $mUcs4 = $in;
2933
            $mUcs4 = ($mUcs4 & 1) << 30;
2934 2
            $mState = 5;
2935
            $mBytes = 6;
2936 2
          } else {
2937
            /* Current octet is neither in the US-ASCII range nor a legal first
2938
             * octet of a multi-octet sequence.
2939 2
             */
2940
            return false;
2941
          }
2942 2
        } else {
2943
          // When mState is non-zero, we expect a continuation of the multi-octet
2944
          // sequence
2945
          if (0x80 === (0xC0 & $in)) {
2946
            // Legal continuation.
2947
            $shift = ($mState - 1) * 6;
2948
            $tmp = $in;
2949
            $tmp = ($tmp & 0x0000003F) << $shift;
2950
            $mUcs4 |= $tmp;
2951
            /**
2952 6
             * End of the multi-octet sequence. mUcs4 now contains the final
2953
             * Unicode code point to be output
2954 6
             */
2955
            if (0 === --$mState) {
2956
              /*
2957
              * Check for illegal sequences and code points.
2958
              */
2959
              // From Unicode 3.1, non-shortest form is illegal
2960
              if (
2961
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
2962
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
2963
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
2964
                  (4 < $mBytes) ||
2965 24
                  // From Unicode 3.2, surrogate characters are illegal.
2966
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
2967 24
                  // Code points outside the Unicode range are illegal.
2968
                  ($mUcs4 > 0x10FFFF)
2969 24
              ) {
2970 2
                return false;
2971
              }
2972
              // initialize UTF8 cache
2973
              $mState = 0;
2974 23
              $mUcs4 = 0;
2975 2
              $mBytes = 1;
2976
            }
2977
          } else {
2978 23
            /**
2979
             *((0xC0 & (*in) != 0x80) && (mState != 0))
2980 23
             * Incomplete multi-octet sequence.
2981
             */
2982
            return false;
2983
          }
2984
        }
2985
      }
2986
2987
      return true;
2988
    }
2989
  }
2990 1
2991
  /**
2992 1
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2993
   * Decodes a JSON string
2994
   *
2995
   * @link http://php.net/manual/en/function.json-decode.php
2996 1
   *
2997
   * @param string $json    <p>
2998
   *                        The <i>json</i> string being decoded.
2999
   *                        </p>
3000
   *                        <p>
3001
   *                        This function only works with UTF-8 encoded strings.
3002
   *                        </p>
3003
   *                        <p>PHP implements a superset of
3004
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3005
   *                        only supports these values when they are nested inside an array or an object.
3006
   *                        </p>
3007 1
   * @param bool   $assoc   [optional] <p>
3008
   *                        When <b>TRUE</b>, returned objects will be converted into
3009 1
   *                        associative arrays.
3010 1
   *                        </p>
3011 1
   * @param int    $depth   [optional] <p>
3012
   *                        User specified recursion depth.
3013 1
   *                        </p>
3014
   * @param int    $options [optional] <p>
3015
   *                        Bitmask of JSON decode options. Currently only
3016
   *                        <b>JSON_BIGINT_AS_STRING</b>
3017
   *                        is supported (default is to cast large integers as floats)
3018
   *                        </p>
3019
   *
3020
   * @return mixed the value encoded in <i>json</i> in appropriate
3021
   * PHP type. Values true, false and
3022 2
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3023
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3024 2
   * <i>json</i> cannot be decoded or if the encoded
3025
   * data is deeper than the recursion limit.
3026 2
   */
3027 2
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3028 2
  {
3029
    $json = self::filter($json);
3030 2
3031
    if (Bootup::is_php('5.4') === true) {
3032
      $json = json_decode($json, $assoc, $depth, $options);
3033
    } else {
3034
      $json = json_decode($json, $assoc, $depth);
3035
    }
3036
3037
    return $json;
3038
  }
3039
3040 1
  /**
3041
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3042 1
   * Returns the JSON representation of a value.
3043
   *
3044
   * @link http://php.net/manual/en/function.json-encode.php
3045
   *
3046 1
   * @param mixed $value   <p>
3047
   *                       The <i>value</i> being encoded. Can be any type except
3048
   *                       a resource.
3049
   *                       </p>
3050
   *                       <p>
3051
   *                       All string data must be UTF-8 encoded.
3052
   *                       </p>
3053
   *                       <p>PHP implements a superset of
3054
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3055
   *                       only supports these values when they are nested inside an array or an object.
3056
   *                       </p>
3057
   * @param int   $options [optional] <p>
3058 1
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3059
   *                       <b>JSON_HEX_TAG</b>,
3060 1
   *                       <b>JSON_HEX_AMP</b>,
3061
   *                       <b>JSON_HEX_APOS</b>,
3062
   *                       <b>JSON_NUMERIC_CHECK</b>,
3063
   *                       <b>JSON_PRETTY_PRINT</b>,
3064
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3065
   *                       <b>JSON_FORCE_OBJECT</b>,
3066
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3067
   *                       constants is described on
3068
   *                       the JSON constants page.
3069
   *                       </p>
3070 16
   * @param int   $depth   [optional] <p>
3071
   *                       Set the maximum depth. Must be greater than zero.
3072 16
   *                       </p>
3073
   *
3074 16
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3075 2
   */
3076
  public static function json_encode($value, $options = 0, $depth = 512)
3077
  {
3078 16
    $value = self::filter($value);
3079 1
3080
    if (Bootup::is_php('5.5')) {
3081
      $json = json_encode($value, $options, $depth);
3082 16
    } else {
3083 4
      $json = json_encode($value, $options);
3084
    }
3085
3086 15
    return $json;
3087 14
  }
3088
3089
  /**
3090 4
   * Makes string's first char lowercase.
3091 4
   *
3092 4
   * @param string $str <p>The input string</p>
3093
   *
3094
   * @return string <p>The resulting string</p>
3095 4
   */
3096 4
  public static function lcfirst($str)
3097 4
  {
3098 4
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3099 4
  }
3100 4
3101 4
  /**
3102 4
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3103 4
   *
3104 4
   * @param string $str   <p>The string to be trimmed</p>
3105 4
   * @param string $chars <p>Optional characters to be stripped</p>
3106 4
   *
3107 4
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3108 4
   */
3109 4 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3110
  {
3111 4
    $str = (string)$str;
3112 4
3113 4
    if (!isset($str[0])) {
3114
      return '';
3115 4
    }
3116
3117 4
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3118
    if ($chars === INF || !$chars) {
3119
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3120
    }
3121
3122
    return preg_replace("/^" . self::rxClass($chars) . "+/u", '', $str);
3123
  }
3124
3125
  /**
3126
   * Returns the UTF-8 character with the maximum code point in the given data.
3127 13
   *
3128
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3129 13
   *
3130 13
   * @return string <p>The character with the highest code point than others.</p>
3131
   */
3132 13 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3133 1
  {
3134 1
    if (is_array($arg)) {
3135 1
      $arg = implode('', $arg);
3136
    }
3137 13
3138
    return self::chr(max(self::codepoints($arg)));
3139
  }
3140
3141
  /**
3142
   * Calculates and returns the maximum number of bytes taken by any
3143
   * UTF-8 encoded character in the given string.
3144
   *
3145
   * @param string $str <p>The original Unicode string.</p>
3146
   *
3147
   * @return int <p>Max byte lengths of the given chars.</p>
3148
   */
3149
  public static function max_chr_width($str)
3150 18
  {
3151
    $bytes = self::chr_size_list($str);
3152 18
    if (count($bytes) > 0) {
3153 18
      return (int)max($bytes);
3154
    } else {
3155 18
      return 0;
3156
    }
3157 18
  }
3158
3159 2
  /**
3160
   * Checks whether mbstring is available on the server.
3161 2
   *
3162
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3163 1
   */
3164 1
  public static function mbstring_loaded()
3165
  {
3166 2
    $return = extension_loaded('mbstring');
3167 2
3168
    if ($return === true) {
3169 18
      \mb_internal_encoding('UTF-8');
3170 18
    }
3171 1
3172 1
    return $return;
3173
  }
3174 18
3175 18
  /**
3176
   * Returns the UTF-8 character with the minimum code point in the given data.
3177 18
   *
3178
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3179
   *
3180
   * @return string <p>The character with the lowest code point than others.</p>
3181
   */
3182 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3183
  {
3184
    if (is_array($arg)) {
3185
      $arg = implode('', $arg);
3186
    }
3187
3188
    return self::chr(min(self::codepoints($arg)));
3189
  }
3190
3191
  /**
3192
   * alias for "UTF8::normalize_encoding()"
3193
   *
3194
   * @see UTF8::normalize_encoding()
3195
   *
3196
   * @param string $encoding
3197
   *
3198
   * @return string
3199
   */
3200
  public static function normalizeEncoding($encoding)
3201
  {
3202
    return self::normalize_encoding($encoding);
3203
  }
3204
3205
  /**
3206
   * Normalize the encoding-"name" input.
3207
   *
3208
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3209
   *
3210
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3211
   */
3212
  public static function normalize_encoding($encoding)
3213
  {
3214
    static $staticNormalizeEncodingCache = array();
3215
3216
    if (!$encoding) {
3217
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalize_encoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
3218
    }
3219
3220
    if ('UTF-8' === $encoding) {
3221
      return $encoding;
3222
    }
3223
3224
    if (in_array($encoding, self::$iconvEncoding, true)) {
3225
      return $encoding;
3226
    }
3227
3228
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3229
      return $staticNormalizeEncodingCache[$encoding];
3230 17
    }
3231
3232 17
    $encodingOrig = $encoding;
3233 3
    $encoding = strtoupper($encoding);
3234
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3235
3236 16
    $equivalences = array(
3237
        'ISO88591'    => 'ISO-8859-1',
3238
        'ISO8859'     => 'ISO-8859-1',
3239
        'ISO'         => 'ISO-8859-1',
3240 16
        'LATIN1'      => 'ISO-8859-1',
3241
        'LATIN'       => 'ISO-8859-1',
3242
        'WIN1252'     => 'ISO-8859-1',
3243
        'WINDOWS1252' => 'ISO-8859-1',
3244
        'UTF16'       => 'UTF-16',
3245
        'UTF32'       => 'UTF-32',
3246
        'UTF8'        => 'UTF-8',
3247
        'UTF'         => 'UTF-8',
3248 16
        'UTF7'        => 'UTF-7',
3249 16
        '8BIT'        => 'CP850',
3250 15
        'BINARY'      => 'CP850',
3251
    );
3252
3253 9
    if (!empty($equivalences[$encodingUpperHelper])) {
3254 9
      $encoding = $equivalences[$encodingUpperHelper];
3255 9
    }
3256
3257 9
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3258 1
3259
    return $encoding;
3260
  }
3261 9
3262 4
  /**
3263
   * Normalize some MS Word special characters.
3264
   *
3265 9
   * @param string $str <p>The string to be normalized.</p>
3266 5
   *
3267
   * @return string
3268
   */
3269 9 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3270
  {
3271
    // init
3272
    $str = (string)$str;
3273
3274
    if (!isset($str[0])) {
3275
      return '';
3276
    }
3277
3278
    static $UTF8_MSWORD_KEYS_CACHE = null;
3279
    static $UTF8_MSWORD_VALUES_CACHE = null;
3280
3281
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3282
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$utf8MSWord);
3283
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$utf8MSWord);
3284
    }
3285 1
3286
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3287
  }
3288 1
3289
  /**
3290 1
   * Normalize the whitespace.
3291 1
   *
3292 1
   * @param string $str                     <p>The string to be normalized.</p>
3293
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3294
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3295 1
   *                                        bidirectional text chars.</p>
3296
   *
3297
   * @return string
3298
   */
3299
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3300
  {
3301
    // init
3302
    $str = (string)$str;
3303 41
3304
    if (!isset($str[0])) {
3305
      return '';
3306 41
    }
3307
3308
    static $WHITESPACE_CACHE = array();
3309
    $cacheKey = (int)$keepNonBreakingSpace;
3310
3311
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3312
3313
      $WHITESPACE_CACHE[$cacheKey] = self::$whitespaceTable;
3314
3315
      if ($keepNonBreakingSpace === true) {
3316
        /** @noinspection OffsetOperationsInspection */
3317 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3318
      }
3319 1
3320 1
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3321
    }
3322
3323 1
    if ($keepBidiUnicodeControls === false) {
3324 1
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3325 1
3326
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3327
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$bidiUniCodeControlsTable);
3328 1
      }
3329
3330
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3331 1
    }
3332
3333
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3334
  }
3335 1
3336 1
  /**
3337 1
   * Format a number with grouped thousands.
3338
   *
3339
   * @param float  $number
3340 1
   * @param int    $decimals
3341
   * @param string $dec_point
3342
   * @param string $thousands_sep
3343 1
   *
3344
   * @return string
3345
   *    *
3346
   * @deprecated Because this has nothing to do with UTF8. :/
3347 1
   */
3348
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3349 1
  {
3350 1
    $thousands_sep = (string)$thousands_sep;
3351 1
    $dec_point = (string)$dec_point;
3352 1
3353 1
    if (
3354
        isset($thousands_sep[1], $dec_point[1])
3355
        &&
3356
        Bootup::is_php('5.4') === true
3357
    ) {
3358
      return str_replace(
3359
          array(
3360
              '.',
3361
              ',',
3362
          ),
3363
          array(
3364
              $dec_point,
3365 5
              $thousands_sep,
3366
          ),
3367 5
          number_format($number, $decimals, '.', ',')
3368
      );
3369
    }
3370
3371
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3372
  }
3373
3374
  /**
3375
   * Calculates Unicode code point of the given UTF-8 encoded character.
3376
   *
3377 10
   * INFO: opposite to UTF8::chr()
3378
   *
3379 10
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3380 10
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3381 5
   *
3382 5
   * @return int <p>
3383 10
   *             Unicode code point of the given character,<br />
3384
   *             0 on invalid UTF-8 byte sequence.
3385 10
   *             </p>
3386
   */
3387
  public static function ord($chr, $encoding = 'UTF-8')
3388
  {
3389
    if (!$chr && $chr !== '0') {
3390
      return 0;
3391
    }
3392
3393
    if ($encoding !== 'UTF-8') {
3394
      $encoding = self::normalize_encoding($encoding);
3395
      $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3396 1
    }
3397
3398 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3399 1
      self::checkForSupport();
3400 1
    }
3401
3402 1
    if (self::$support['intlChar'] === true) {
3403 1
      $tmpReturn = \IntlChar::ord($chr);
3404 1
      if ($tmpReturn) {
3405 1
        return $tmpReturn;
3406 1
      }
3407
    }
3408 1
3409
    // use static cache, if there is no support for "IntlChar"
3410
    static $cache = array();
3411
    if (isset($cache[$chr]) === true) {
3412
      return $cache[$chr];
3413
    }
3414
3415
    $chr_orig = $chr;
3416
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3417
    $chr = unpack('C*', substr($chr, 0, 4));
3418
    $code = $chr ? $chr[1] : 0;
3419
3420
    if (0xF0 <= $code && isset($chr[4])) {
3421
      return $cache[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3422
    }
3423
3424 45
    if (0xE0 <= $code && isset($chr[3])) {
3425
      return $cache[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3426
    }
3427 45
3428
    if (0xC0 <= $code && isset($chr[2])) {
3429
      return $cache[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3430
    }
3431 45
3432 45
    return $cache[$chr_orig] = $code;
3433 45
  }
3434 45
3435
  /**
3436 45
   * Parses the string into an array (into the the second parameter).
3437
   *
3438
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3439 45
   *          if the second parameter is not set!
3440 45
   *
3441
   * @link http://php.net/manual/en/function.parse-str.php
3442 45
   *
3443
   * @param string $str    <p>The input string.</p>
3444
   * @param array  $result <p>The result will be returned into this reference parameter.</p>
3445
   *
3446
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3447
   */
3448
  public static function parse_str($str, &$result)
3449
  {
3450
    // clean broken utf8
3451
    $str = self::clean($str);
3452
3453 45
    $return = \mb_parse_str($str, $result);
3454
    if ($return === false || empty($result)) {
3455 45
      return false;
3456
    }
3457 45
3458 45
    return true;
3459 45
  }
3460
3461 45
  /**
3462 45
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3463 45
   *
3464
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3465 45
   */
3466
  public static function pcre_utf8_support()
3467
  {
3468
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3469
    return (bool)@preg_match('//u', '');
3470
  }
3471
3472
  /**
3473
   * Create an array containing a range of UTF-8 characters.
3474
   *
3475
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3476 23
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3477
   *
3478 23
   * @return array
3479
   */
3480 23
  public static function range($var1, $var2)
3481 5
  {
3482
    if (!$var1 || !$var2) {
3483
      return array();
3484
    }
3485 19
3486 3 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3487
      $start = (int)$var1;
3488
    } elseif (ctype_xdigit($var1)) {
3489 18
      $start = (int)self::hex_to_int($var1);
3490
    } else {
3491 18
      $start = self::ord($var1);
3492
    }
3493
3494
    if (!$start) {
3495
      return array();
3496
    }
3497
3498 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3499
      $end = (int)$var2;
3500
    } elseif (ctype_xdigit($var2)) {
3501
      $end = (int)self::hex_to_int($var2);
3502 52
    } else {
3503
      $end = self::ord($var2);
3504 52
    }
3505
3506 52
    if (!$end) {
3507
      return array();
3508 52
    }
3509 40
3510
    return array_map(
3511
        array(
3512 18
            '\\voku\\helper\\UTF8',
3513
            'chr',
3514
        ),
3515 18
        range($start, $end)
3516 17
    );
3517
  }
3518 17
3519 17
  /**
3520 17
   * alias for "UTF8::remove_bom()"
3521 2
   *
3522 2
   * @see UTF8::remove_bom()
3523
   *
3524
   * @param string $str
3525 18
   *
3526
   * @return string
3527 18
   */
3528 18
  public static function removeBOM($str)
3529 18
  {
3530
    return self::remove_bom($str);
3531 18
  }
3532 18
3533 18
  /**
3534
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3535
   *
3536
   * @param string $str <p>The input string.</p>
3537 18
   *
3538
   * @return string <p>String without UTF-BOM</p>
3539 18
   */
3540
  public static function remove_bom($str)
3541
  {
3542
    foreach (self::$bom as $bomString => $bomByteLength) {
3543
      if (0 === strpos($str, $bomString)) {
3544
        $str = substr($str, $bomByteLength);
3545
      }
3546
    }
3547
3548
    return $str;
3549
  }
3550
3551
  /**
3552
   * Removes duplicate occurrences of a string in another string.
3553
   *
3554
   * @param string          $str  <p>The base string.</p>
3555
   * @param string|string[] $what <p>String to search for in the base string.</p>
3556
   *
3557
   * @return string <p>The result string with removed duplicates.</p>
3558
   */
3559
  public static function remove_duplicates($str, $what = ' ')
3560 1
  {
3561
    if (is_string($what)) {
3562 1
      $what = array($what);
3563 1
    }
3564
3565
    if (is_array($what)) {
3566
      /** @noinspection ForeachSourceInspection */
3567
      foreach ($what as $item) {
3568 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3569 1
      }
3570 1
    }
3571 1
3572
    return $str;
3573
  }
3574 1
3575
  /**
3576
   * Remove invisible characters from a string.
3577
   *
3578
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3579
   *
3580
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3581
   *
3582
   * @param string $str
3583
   * @param bool   $url_encoded
3584
   * @param string $replacement
3585
   *
3586 36
   * @return string
3587
   */
3588 36
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3589
  {
3590 36
    // init
3591 2
    $non_displayables = array();
3592
3593
    // every control character except newline (dec 10),
3594
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3595 36
    if ($url_encoded) {
3596 36
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3597
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3598 36
    }
3599
3600
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3601
3602 36
    do {
3603
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3604 36
    } while ($count !== 0);
3605 6
3606 6
    return $str;
3607
  }
3608 36
3609 36
  /**
3610 36
   * Replace the diamond question mark (�) with the replacement.
3611 36
   *
3612 36
   * @param string $str
3613
   * @param string $unknown
3614 36
   *
3615
   * @return string
3616
   */
3617
  public static function replace_diamond_question_mark($str, $unknown = '?')
3618
  {
3619
    return str_replace(
3620
        array(
3621
            "\xEF\xBF\xBD",
3622
            '�',
3623
        ),
3624
        array(
3625
            $unknown,
3626
            $unknown,
3627
        ),
3628
        $str
3629
    );
3630
  }
3631
3632
  /**
3633
   * Strip whitespace or other characters from end of a UTF-8 string.
3634
   *
3635
   * @param string $str   <p>The string to be trimmed.</p>
3636
   * @param string $chars <p>Optional characters to be stripped.</p>
3637
   *
3638
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3639
   */
3640 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3641
  {
3642
    $str = (string)$str;
3643
3644
    if (!isset($str[0])) {
3645
      return '';
3646 36
    }
3647 5
3648
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3649 5
    if ($chars === INF || !$chars) {
3650 5
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3651
    }
3652
3653 36
    return preg_replace("/" . self::rxClass($chars) . "+$/u", '', $str);
3654
  }
3655
3656
  /**
3657 36
   * rxClass
3658
   *
3659
   * @param string $s
3660
   * @param string $class
3661
   *
3662
   * @return string
3663
   */
3664
  private static function rxClass($s, $class = '')
3665
  {
3666
    static $rxClassCache = array();
3667
3668
    $cacheKey = $s . $class;
3669
3670 12
    if (isset($rxClassCache[$cacheKey])) {
3671
      return $rxClassCache[$cacheKey];
3672
    }
3673
3674
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3675
    $class = array($class);
3676 12
3677 2
    /** @noinspection SuspiciousLoopInspection */
3678 1
    foreach (self::str_split($s) as $s) {
3679 2
      if ('-' === $s) {
3680 1
        $class[0] = '-' . $class[0];
3681 2
      } elseif (!isset($s[2])) {
3682
        $class[0] .= preg_quote($s, '/');
3683 2
      } elseif (1 === self::strlen($s)) {
3684
        $class[0] .= $s;
3685
      } else {
3686 2
        $class[] = $s;
3687
      }
3688
    }
3689
3690
    if ($class[0]) {
3691
      $class[0] = '[' . $class[0] . ']';
3692 12
    }
3693 3
3694
    if (1 === count($class)) {
3695
      $return = $class[0];
3696
    } else {
3697
      $return = '(?:' . implode('|', $class) . ')';
3698
    }
3699
3700 12
    $rxClassCache[$cacheKey] = $return;
3701 9
3702
    return $return;
3703
  }
3704
3705
  /**
3706
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
3707
   */
3708
  public static function showSupport()
3709
  {
3710 6
    foreach (self::$support as $utf8Support) {
3711 6
      echo $utf8Support . "\n<br>";
3712 6
    }
3713 6
  }
3714 6
3715 6
  /**
3716 6
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3717 6
   *
3718 6
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3719 6
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3720 6
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
3721 6
   *
3722 6
   * @return string <p>The HTML numbered entity.</p>
3723 6
   */
3724 6
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
3725 6
  {
3726 6
    // init
3727 6
    $char = (string)$char;
3728 6
3729 6
    if (!isset($char[0])) {
3730 6
      return '';
3731
    }
3732 6
3733 6
    if (
3734 6
        $keepAsciiChars === true
3735
        &&
3736
        self::isAscii($char) === true
3737
    ) {
3738
      return $char;
3739
    }
3740
3741
    if ($encoding !== 'UTF-8') {
3742
      $encoding = self::normalize_encoding($encoding);
3743
    }
3744
3745
    return '&#' . self::ord($char, $encoding) . ';';
3746
  }
3747
3748
  /**
3749
   * Convert a string to an array of Unicode characters.
3750
   *
3751
   * @param string  $str       <p>The string to split into array.</p>
3752
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
3753
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3754
   *
3755
   * @return string[] <p>An array containing chunks of the string.</p>
3756
   */
3757
  public static function split($str, $length = 1, $cleanUtf8 = false)
3758
  {
3759
    $str = (string)$str;
3760
3761
    if (!isset($str[0])) {
3762
      return array();
3763
    }
3764
3765
    // init
3766
    $str = (string)$str;
3767
    $ret = array();
3768
3769
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3770
      self::checkForSupport();
3771
    }
3772
3773
    if (self::$support['pcre_utf8'] === true) {
3774
3775
      if ($cleanUtf8 === true) {
3776
        $str = self::clean($str);
3777
      }
3778 14
3779
      preg_match_all('/./us', $str, $retArray);
3780 14
      if (isset($retArray[0])) {
3781
        $ret = $retArray[0];
3782
      }
3783 14
      unset($retArray);
3784 14
3785 1
    } else {
3786 1
3787 13
      // fallback
3788
3789 14
      $len = strlen($str);
3790
3791 14
      /** @noinspection ForeachInvariantsInspection */
3792 14
      for ($i = 0; $i < $len; $i++) {
3793
        if (($str[$i] & "\x80") === "\x00") {
3794 14
          $ret[] = $str[$i];
3795
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
3796
          if (($str[$i + 1] & "\xC0") === "\x80") {
3797
            $ret[] = $str[$i] . $str[$i + 1];
3798
3799
            $i++;
3800
          }
3801 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3802
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
3803
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3804
3805
            $i += 2;
3806 1
          }
3807
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
3808 1 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3809
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3810 1
3811
            $i += 3;
3812
          }
3813
        }
3814 1
      }
3815
    }
3816 1
3817
    if ($length > 1) {
3818
      $ret = array_chunk($ret, $length);
3819
3820 1
      return array_map(function($item) {
3821 1
        return implode('', $item);
3822
      }, $ret);
3823
    }
3824 1
3825 1
    /** @noinspection OffsetOperationsInspection */
3826 1
    if (isset($ret[0]) && $ret[0] === '') {
3827 1
      return array();
3828
    }
3829 1
3830
    return $ret;
3831
  }
3832 1
3833
  /**
3834
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3835 1
   *
3836
   * @param string $str <p>The input string.</p>
3837
   *
3838
   * @return false|string <p>
3839
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
3840
   *                      otherwise it will return false.
3841
   *                      </p>
3842
   */
3843
  public static function str_detect_encoding($str)
3844
  {
3845
    //
3846
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
3847
    //
3848
3849
    if (self::is_binary($str)) {
3850
      if (self::is_utf16($str) === 1) {
3851 2
        return 'UTF-16LE';
3852
      } elseif (self::is_utf16($str) === 2) {
3853 2
        return 'UTF-16BE';
3854
      } elseif (self::is_utf32($str) === 1) {
3855
        return 'UTF-32LE';
3856 2
      } elseif (self::is_utf32($str) === 2) {
3857 2
        return 'UTF-32BE';
3858
      }
3859 2
    }
3860
3861 2
    //
3862 2
    // 2.) simple check for ASCII chars
3863
    //
3864 2
3865
    if (self::is_ascii($str) === true) {
3866
      return 'ASCII';
3867 2
    }
3868 2
3869 2
    //
3870 2
    // 3.) simple check for UTF-8 chars
3871 2
    //
3872
3873 2
    if (self::is_utf8($str) === true) {
3874 2
      return 'UTF-8';
3875 2
    }
3876 2
3877 2
    //
3878 2
    // 4.) check via "\mb_detect_encoding()"
3879
    //
3880 2
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
3881 2
3882 2
    $detectOrder = array(
3883 2
        'ISO-8859-1',
3884 2
        'ISO-8859-2',
3885 2
        'ISO-8859-3',
3886
        'ISO-8859-4',
3887 2
        'ISO-8859-5',
3888
        'ISO-8859-6',
3889
        'ISO-8859-7',
3890 2
        'ISO-8859-8',
3891
        'ISO-8859-9',
3892
        'ISO-8859-10',
3893
        'ISO-8859-13',
3894
        'ISO-8859-14',
3895
        'ISO-8859-15',
3896
        'ISO-8859-16',
3897
        'WINDOWS-1251',
3898
        'WINDOWS-1252',
3899
        'WINDOWS-1254',
3900
        'ISO-2022-JP',
3901
        'JIS',
3902
        'EUC-JP',
3903
    );
3904
3905
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
3906
    if ($encoding) {
3907
      return $encoding;
3908
    }
3909
3910
    //
3911 1
    // 5.) check via "iconv()"
3912
    //
3913 1
3914
    $md5 = md5($str);
3915 1
    foreach (self::$iconvEncoding as $encodingTmp) {
3916
      # INFO: //IGNORE and //TRANSLIT still throw notice
3917
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3918
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
3919
        return $encodingTmp;
3920
      }
3921
    }
3922
3923
    return false;
3924
  }
3925
3926
  /**
3927
   * Check if the string ends with the given substring.
3928
   *
3929
   * @param string $haystack <p>The string to search in.</p>
3930
   * @param string $needle   <p>The substring to search for.</p>
3931
   *
3932
   * @return bool
3933
   */
3934 View Code Duplication
  public static function str_ends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3935
  {
3936
    $haystack = (string)$haystack;
3937
    $needle = (string)$needle;
3938
3939
    if (!isset($haystack[0], $needle[0])) {
3940
      return false;
3941
    }
3942
3943
    if ($needle === self::substr($haystack, -self::strlen($needle))) {
3944
      return true;
3945
    }
3946
3947 12
    return false;
3948
  }
3949 12
3950
  /**
3951
   * Check if the string ends with the given substring, case insensitive.
3952
   *
3953
   * @param string $haystack <p>The string to search in.</p>
3954
   * @param string $needle   <p>The substring to search for.</p>
3955
   *
3956
   * @return bool
3957
   */
3958 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3959 1
  {
3960
    $haystack = (string)$haystack;
3961 1
    $needle = (string)$needle;
3962
3963 1
    if (!isset($haystack[0], $needle[0])) {
3964
      return false;
3965 1
    }
3966
3967
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3968
      return true;
3969
    }
3970
3971
    return false;
3972
  }
3973
3974
  /**
3975
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
3976
   *
3977 1
   * @link  http://php.net/manual/en/function.str-ireplace.php
3978
   *
3979 1
   * @param mixed $search  <p>
3980
   *                       Every replacement with search array is
3981 1
   *                       performed on the result of previous replacement.
3982 1
   *                       </p>
3983 1
   * @param mixed $replace <p>
3984
   *                       </p>
3985 1
   * @param mixed $subject <p>
3986 1
   *                       If subject is an array, then the search and
3987 1
   *                       replace is performed with every entry of
3988 1
   *                       subject, and the return value is an array as
3989
   *                       well.
3990
   *                       </p>
3991 1
   * @param int   $count   [optional] <p>
3992
   *                       The number of matched and replaced needles will
3993
   *                       be returned in count which is passed by
3994
   *                       reference.
3995
   *                       </p>
3996
   *
3997
   * @return mixed <p>A string or an array of replacements.</p>
3998
   */
3999
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4000
  {
4001
    $search = (array)$search;
4002 21
4003
    /** @noinspection AlterInForeachInspection */
4004
    foreach ($search as &$s) {
4005 21
      if ('' === $s .= '') {
4006 21
        $s = '/^(?<=.)$/';
4007
      } else {
4008 21
        $s = '/' . preg_quote($s, '/') . '/ui';
4009 1
      }
4010
    }
4011
4012 20
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4013
    $count = $replace; // used as reference parameter
4014
4015
    return $subject;
4016 20
  }
4017 20
4018
  /**
4019 20
   * Check if the string starts with the given substring, case insensitive.
4020 20
   *
4021
   * @param string $haystack <p>The string to search in.</p>
4022
   * @param string $needle   <p>The substring to search for.</p>
4023 1
   *
4024 1
   * @return bool
4025
   */
4026 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4027 1
  {
4028 1
    $haystack = (string)$haystack;
4029 1
    $needle = (string)$needle;
4030 1
4031 1
    if (!isset($haystack[0], $needle[0])) {
4032
      return false;
4033 1
    }
4034
4035 1
    if (self::stripos($haystack, $needle) === 0) {
4036
      return true;
4037
    }
4038
4039
    return false;
4040
  }
4041
4042
  /**
4043
   * Limit the number of characters in a string, but also after the next word.
4044
   *
4045 1
   * @param string $str
4046
   * @param int    $length
4047 1
   * @param string $strAddOn
4048
   *
4049 1
   * @return string
4050
   */
4051 1
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4052
  {
4053
    $str = (string)$str;
4054
4055
    if (!isset($str[0])) {
4056
      return '';
4057
    }
4058
4059
    $length = (int)$length;
4060
4061
    if (self::strlen($str) <= $length) {
4062
      return $str;
4063
    }
4064
4065 7
    if (self::substr($str, $length - 1, 1) === ' ') {
4066
      return self::substr($str, 0, $length - 1) . $strAddOn;
4067 7
    }
4068
4069
    $str = self::substr($str, 0, $length);
4070
    $array = explode(' ', $str);
4071
    array_pop($array);
4072
    $new_str = implode(' ', $array);
4073
4074
    if ($new_str === '') {
4075
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4076
    } else {
4077
      $str = $new_str . $strAddOn;
4078
    }
4079
4080
    return $str;
4081
  }
4082
4083 1
  /**
4084
   * Pad a UTF-8 string to given length with another string.
4085 1
   *
4086 1
   * @param string $str        <p>The input string.</p>
4087
   * @param int    $pad_length <p>The length of return string.</p>
4088 1
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4089
   * @param int    $pad_type   [optional] <p>
4090 1
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4091
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4092 1
   *                           </p>
4093 1
   *
4094 1
   * @return string <strong>Returns the padded string</strong>
4095 1
   */
4096
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4097 1
  {
4098
    $str_length = self::strlen($str);
4099 1
4100 1
    if (
4101 1
        is_int($pad_length) === true
4102 1
        &&
4103 1
        $pad_length > 0
4104 1
        &&
4105
        $pad_length >= $str_length
4106 1
    ) {
4107
      $ps_length = self::strlen($pad_string);
4108 1
4109
      $diff = $pad_length - $str_length;
4110
4111
      switch ($pad_type) {
4112 1 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4113
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4114
          $pre = self::substr($pre, 0, $diff);
4115
          $post = '';
4116
          break;
4117
4118
        case STR_PAD_BOTH:
4119
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4120
          $pre = self::substr($pre, 0, (int)$diff / 2);
4121
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4122
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4123
          break;
4124
4125
        case STR_PAD_RIGHT:
4126 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4127
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4128
          $post = self::substr($post, 0, $diff);
4129 9
          $pre = '';
4130
      }
4131 9
4132
      return $pre . $str . $post;
4133
    }
4134
4135
    return $str;
4136
  }
4137
4138
  /**
4139
   * Repeat a string.
4140
   *
4141
   * @param string $str        <p>
4142
   *                           The string to be repeated.
4143
   *                           </p>
4144
   * @param int    $multiplier <p>
4145
   *                           Number of time the input string should be
4146
   *                           repeated.
4147 1
   *                           </p>
4148
   *                           <p>
4149 1
   *                           multiplier has to be greater than or equal to 0.
4150
   *                           If the multiplier is set to 0, the function
4151
   *                           will return an empty string.
4152
   *                           </p>
4153
   *
4154
   * @return string <p>The repeated string.</p>
4155
   */
4156
  public static function str_repeat($str, $multiplier)
4157
  {
4158
    $str = self::filter($str);
4159
4160
    return str_repeat($str, $multiplier);
4161
  }
4162
4163
  /**
4164 12
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4165
   *
4166 12
   * Replace all occurrences of the search string with the replacement string
4167 11
   *
4168 11
   * @link http://php.net/manual/en/function.str-replace.php
4169 12
   *
4170
   * @param mixed $search  <p>
4171
   *                       The value being searched for, otherwise known as the needle.
4172
   *                       An array may be used to designate multiple needles.
4173
   *                       </p>
4174
   * @param mixed $replace <p>
4175
   *                       The replacement value that replaces found search
4176
   *                       values. An array may be used to designate multiple replacements.
4177
   *                       </p>
4178
   * @param mixed $subject <p>
4179
   *                       The string or array being searched and replaced on,
4180
   *                       otherwise known as the haystack.
4181
   *                       </p>
4182 9
   *                       <p>
4183
   *                       If subject is an array, then the search and
4184 9
   *                       replace is performed with every entry of
4185 1
   *                       subject, and the return value is an array as
4186
   *                       well.
4187
   *                       </p>
4188 8
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4189 2
   *
4190 2
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4191
   */
4192 8
  public static function str_replace($search, $replace, $subject, &$count = null)
4193 8
  {
4194 1
    return str_replace($search, $replace, $subject, $count);
4195
  }
4196
4197 7
  /**
4198
   * Shuffles all the characters in the string.
4199 7
   *
4200
   * @param string $str <p>The input string</p>
4201
   *
4202 1
   * @return string <p>The shuffled string.</p>
4203
   */
4204
  public static function str_shuffle($str)
4205
  {
4206
    $array = self::split($str);
4207
4208
    shuffle($array);
4209
4210
    return implode('', $array);
4211
  }
4212
4213
  /**
4214
   * Sort all characters according to code points.
4215
   *
4216
   * @param string $str    <p>A UTF-8 string.</p>
4217
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4218 1
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4219
   *
4220 1
   * @return string <p>String of sorted characters.</p>
4221
   */
4222
  public static function str_sort($str, $unique = false, $desc = false)
4223
  {
4224
    $array = self::codepoints($str);
4225
4226
    if ($unique) {
4227
      $array = array_flip(array_flip($array));
4228
    }
4229
4230
    if ($desc) {
4231
      arsort($array);
4232 2
    } else {
4233
      asort($array);
4234 2
    }
4235 2
4236
    return self::string($array);
4237 2
  }
4238 2
4239 2
  /**
4240
   * Split a string into an array.
4241 2
   *
4242 2
   * @param string $str
4243
   * @param int    $len
4244
   *
4245
   * @return array
4246
   */
4247
  public static function str_split($str, $len = 1)
4248
  {
4249
    // init
4250
    $len = (int)$len;
4251
    $str = (string)$str;
4252 3
4253
    if (!isset($str[0])) {
4254 3
      return array();
4255 3
    }
4256 3
4257
    if ($len < 1) {
4258 3
      return str_split($str, $len);
4259
    }
4260 3
4261
    /** @noinspection PhpInternalEntityUsedInspection */
4262
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4263
    $a = $a[0];
4264
4265
    if ($len === 1) {
4266
      return $a;
4267
    }
4268
4269
    $arrayOutput = array();
4270
    $p = -1;
4271
4272
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4273
    foreach ($a as $l => $a) {
4274
      if ($l % $len) {
4275
        $arrayOutput[$p] .= $a;
4276
      } else {
4277
        $arrayOutput[++$p] = $a;
4278
      }
4279
    }
4280
4281
    return $arrayOutput;
4282 2
  }
4283
4284
  /**
4285 2
   * Check if the string starts with the given substring.
4286
   *
4287 2
   * @param string $haystack <p>The string to search in.</p>
4288
   * @param string $needle   <p>The substring to search for.</p>
4289
   *
4290
   * @return bool
4291
   */
4292 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4293
  {
4294
    $haystack = (string)$haystack;
4295
    $needle = (string)$needle;
4296
4297
    if (!isset($haystack[0], $needle[0])) {
4298
      return false;
4299
    }
4300
4301
    if (self::strpos($haystack, $needle) === 0) {
4302
      return true;
4303
    }
4304
4305
    return false;
4306
  }
4307
4308
  /**
4309
   * Get a binary representation of a specific string.
4310
   *
4311
   * @param string $str <p>The input string.</p>
4312
   *
4313
   * @return string
4314 8
   */
4315
  public static function str_to_binary($str)
4316 8
  {
4317 8
    $str = (string)$str;
4318
4319 8
    $value = unpack('H*', $str);
4320 3
4321
    return base_convert($value[1], 16, 2);
4322
  }
4323 7
4324 1
  /**
4325 1
   * alias for "UTF8::to_ascii()"
4326 1
   *
4327
   * @see UTF8::to_ascii()
4328
   *
4329
   * @param string $str
4330 7
   * @param string $unknown
4331 1
   * @param bool   $strict
4332 7
   *
4333 7
   * @return string
4334 7
   */
4335
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4336
  {
4337
    return self::to_ascii($str, $unknown, $strict);
4338 7
  }
4339
4340
  /**
4341
   * Counts number of words in the UTF-8 string.
4342
   *
4343
   * @param string $str      <p>The input string.</p>
4344
   * @param int    $format   [optional] <p>
4345
   *                         <strong>0</strong> => return a number of words (default)<br />
4346
   *                         <strong>1</strong> => return an array of words<br />
4347
   *                         <strong>2</strong> => return an array of words with word-offset as key
4348
   *                         </p>
4349
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4350
   *
4351
   * @return array|int <p>The number of words in the string</p>
4352
   */
4353
  public static function str_word_count($str, $format = 0, $charlist = '')
4354
  {
4355 8
    $charlist = self::rxClass($charlist, '\pL');
4356
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4357 8
4358 2
    $len = count($strParts);
4359
4360
    if ($format === 1) {
4361 6
4362
      $numberOfWords = array();
4363
      for ($i = 1; $i < $len; $i += 2) {
4364
        $numberOfWords[] = $strParts[$i];
4365 6
      }
4366
4367
    } elseif ($format === 2) {
4368
4369
      $numberOfWords = array();
4370
      $offset = self::strlen($strParts[0]);
4371
      for ($i = 1; $i < $len; $i += 2) {
4372 6
        $numberOfWords[$offset] = $strParts[$i];
4373
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4374
      }
4375
4376
    } else {
4377
4378
      $numberOfWords = ($len - 1) / 2;
4379
4380
    }
4381
4382
    return $numberOfWords;
4383
  }
4384
4385
  /**
4386
   * Case-insensitive string comparison.
4387 62
   *
4388
   * INFO: Case-insensitive version of UTF8::strcmp()
4389 62
   *
4390
   * @param string $str1
4391 62
   * @param string $str2
4392 4
   *
4393
   * @return int <p>
4394
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4395
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4396
   *             <strong>0</strong> if they are equal.
4397 61
   *             </p>
4398 2
   */
4399 61
  public static function strcasecmp($str1, $str2)
4400 60
  {
4401 60
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4402 2
  }
4403
4404
  /**
4405
   * alias for "UTF8::strstr()"
4406 61
   *
4407 61
   * @see UTF8::strstr()
4408 1
   *
4409
   * @param string  $haystack
4410
   * @param string  $needle
4411 61
   * @param bool    $before_needle
4412 2
   * @param string  $encoding
4413 2
   * @param boolean $cleanUtf8
4414
   *
4415 61
   * @return string|false
4416
   */
4417
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4418
  {
4419
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4420
  }
4421
4422
  /**
4423
   * Case-sensitive string comparison.
4424
   *
4425
   * @param string $str1
4426
   * @param string $str2
4427
   *
4428
   * @return int  <p>
4429
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4430 1
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4431
   *              <strong>0</strong> if they are equal.
4432 1
   *              </p>
4433
   */
4434
  public static function strcmp($str1, $str2)
4435
  {
4436
    /** @noinspection PhpUndefinedClassInspection */
4437
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4438
        \Normalizer::normalize($str1, \Normalizer::NFD),
4439
        \Normalizer::normalize($str2, \Normalizer::NFD)
4440
    );
4441
  }
4442
4443
  /**
4444
   * Find length of initial segment not matching mask.
4445
   *
4446
   * @param string $str
4447
   * @param string $charList
4448
   * @param int    $offset
4449 2
   * @param int    $length
4450
   *
4451 2
   * @return int|null
4452
   */
4453
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4454
  {
4455
    if ('' === $charList .= '') {
4456
      return null;
4457
    }
4458
4459
    if ($offset || 2147483647 !== $length) {
4460
      $str = (string)self::substr($str, $offset, $length);
4461
    }
4462
4463
    $str = (string)$str;
4464
    if (!isset($str[0])) {
4465
      return null;
4466
    }
4467 1
4468
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4469 1
      /** @noinspection OffsetOperationsInspection */
4470
      return self::strlen($length[1]);
4471
    }
4472
4473
    return self::strlen($str);
4474
  }
4475
4476
  /**
4477
   * alias for "UTF8::stristr()"
4478
   *
4479
   * @see UTF8::stristr()
4480
   *
4481
   * @param string  $haystack
4482
   * @param string  $needle
4483
   * @param bool    $before_needle
4484
   * @param string  $encoding
4485 2
   * @param boolean $cleanUtf8
4486
   *
4487 2
   * @return string|false
4488 2
   */
4489
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4490 2
  {
4491
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4492
  }
4493
4494
  /**
4495
   * Create a UTF-8 string from code points.
4496
   *
4497
   * INFO: opposite to UTF8::codepoints()
4498
   *
4499
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4500
   *
4501
   * @return string <p>UTF-8 encoded string.</p>
4502
   */
4503 1
  public static function string(array $array)
4504
  {
4505 1
    return implode(
4506 1
        '',
4507
        array_map(
4508 1
            array(
4509 1
                '\\voku\\helper\\UTF8',
4510
                'chr',
4511
            ),
4512 1
            $array
4513 1
        )
4514
    );
4515 1
  }
4516
4517
  /**
4518
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4519
   *
4520
   * @param string $str <p>The input string.</p>
4521
   *
4522
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4523
   */
4524
  public static function string_has_bom($str)
4525
  {
4526
    foreach (self::$bom as $bomString => $bomByteLength) {
4527
      if (0 === strpos($str, $bomString)) {
4528
        return true;
4529
      }
4530
    }
4531
4532
    return false;
4533
  }
4534
4535 15
  /**
4536
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4537 15
   *
4538 15
   * @link http://php.net/manual/en/function.strip-tags.php
4539
   *
4540 15
   * @param string $str            <p>
4541 2
   *                               The input string.
4542
   *                               </p>
4543
   * @param string $allowable_tags [optional] <p>
4544
   *                               You can use the optional second parameter to specify tags which should
4545 14
   *                               not be stripped.
4546
   *                               </p>
4547
   *                               <p>
4548
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
4549 14
   *                               can not be changed with allowable_tags.
4550
   *                               </p>
4551
   *
4552
   * @return string <p>The stripped string.</p>
4553 14
   */
4554
  public static function strip_tags($str, $allowable_tags = null)
4555
  {
4556 2
    // clean broken utf8
4557 2
    $str = self::clean($str);
4558 2
4559
    return strip_tags($str, $allowable_tags);
4560 14
  }
4561
4562
  /**
4563
   * Finds position of first occurrence of a string within another, case insensitive.
4564
   *
4565
   * @link http://php.net/manual/en/function.mb-stripos.php
4566 14
   *
4567 2
   * @param string  $haystack  <p>
4568 14
   *                           The string from which to get the position of the first occurrence
4569 14
   *                           of needle
4570 14
   *                           </p>
4571 1
   * @param string  $needle    <p>
4572
   *                           The string to find in haystack
4573
   *                           </p>
4574 14
   * @param int     $offset    [optional] <p>
4575 14
   *                           The position in haystack
4576
   *                           to start searching
4577
   *                           </p>
4578
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4579
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4580
   *
4581
   * @return int|false <p>
4582
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
4583
   *                   or false if needle is not found.
4584
   *                   </p>
4585
   */
4586
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4587
  {
4588
    $haystack = (string)$haystack;
4589
    $needle = (string)$needle;
4590
4591
    if (!isset($haystack[0], $needle[0])) {
4592
      return false;
4593
    }
4594
4595
    if ($cleanUtf8 === true) {
4596
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4597
      // if invalid characters are found in $haystack before $needle
4598
      $haystack = self::clean($haystack);
4599
      $needle = self::clean($needle);
4600
    }
4601
4602 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4603
        $encoding === 'UTF-8'
4604
        ||
4605
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4606
    ) {
4607
      $encoding = 'UTF-8';
4608
    } else {
4609
      $encoding = self::normalize_encoding($encoding);
4610
    }
4611
4612
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4613
  }
4614
4615
  /**
4616
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4617
   *
4618
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
4619
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
4620 1
   * @param bool    $before_needle [optional] <p>
4621
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4622 1
   *                               haystack before the first occurrence of the needle (excluding the needle).
4623 1
   *                               </p>
4624 1
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4625
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4626 1
   *
4627
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
4628
   */
4629
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4630
  {
4631
    if ('' === $needle .= '') {
4632
      return false;
4633 1
    }
4634
4635
    if ($encoding !== 'UTF-8') {
4636
      $encoding = self::normalize_encoding($encoding);
4637
    }
4638
4639
    if ($cleanUtf8 === true) {
4640
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4641
      // if invalid characters are found in $haystack before $needle
4642
      $needle = self::clean($needle);
4643 4
      $haystack = self::clean($haystack);
4644
    }
4645 4
4646
    return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4647 4
  }
4648 2
4649
  /**
4650
   * Get the string length, not the byte-length!
4651 3
   *
4652
   * @link     http://php.net/manual/en/function.mb-strlen.php
4653
   *
4654
   * @param string  $str       <p>The string being checked for length.</p>
4655
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4656
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4657
   *
4658
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4659
   *             character counted as +1)</p>
4660
   */
4661
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4662
  {
4663
    $str = (string)$str;
4664
4665
    if (!isset($str[0])) {
4666
      return 0;
4667
    }
4668
4669 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4670
        $encoding === 'UTF-8'
4671
        ||
4672
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4673
    ) {
4674
      $encoding = 'UTF-8';
4675
    } else {
4676
      $encoding = self::normalize_encoding($encoding);
4677 1
    }
4678
4679 1
    switch ($encoding) {
4680 1
      case 'ASCII':
4681 1
      case 'CP850':
4682
        return strlen($str);
4683 1
    }
4684
4685
    if ($cleanUtf8 === true) {
4686
      $str = self::clean($str);
4687
    }
4688
4689
    return \mb_strlen($str, $encoding);
4690 1
  }
4691
4692
  /**
4693
   * Case insensitive string comparisons using a "natural order" algorithm.
4694
   *
4695
   * INFO: natural order version of UTF8::strcasecmp()
4696
   *
4697
   * @param string $str1 <p>The first string.</p>
4698
   * @param string $str2 <p>The second string.</p>
4699
   *
4700
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
4701
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
4702
   *             <strong>0</strong> if they are equal
4703
   */
4704
  public static function strnatcasecmp($str1, $str2)
4705
  {
4706
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4707 1
  }
4708
4709 1
  /**
4710
   * String comparisons using a "natural order" algorithm
4711
   *
4712
   * INFO: natural order version of UTF8::strcmp()
4713
   *
4714
   * @link  http://php.net/manual/en/function.strnatcmp.php
4715
   *
4716
   * @param string $str1 <p>The first string.</p>
4717
   * @param string $str2 <p>The second string.</p>
4718
   *
4719
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
4720
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
4721
   *             <strong>0</strong> if they are equal
4722
   */
4723
  public static function strnatcmp($str1, $str2)
4724
  {
4725
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4726
  }
4727
4728
  /**
4729 11
   * Case-insensitive string comparison of the first n characters.
4730
   *
4731 11
   * @link  http://php.net/manual/en/function.strncasecmp.php
4732
   *
4733 11
   * @param string $str1 <p>The first string.</p>
4734 2
   * @param string $str2 <p>The second string.</p>
4735 2
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
4736
   *
4737 11
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
4738
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
4739 11
   *             <strong>0</strong> if they are equal
4740 2
   */
4741
  public static function strncasecmp($str1, $str2, $len)
4742
  {
4743
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4744 10
  }
4745 10
4746
  /**
4747
   * String comparison of the first n characters.
4748
   *
4749 10
   * @link  http://php.net/manual/en/function.strncmp.php
4750
   *
4751 10
   * @param string $str1 <p>The first string.</p>
4752
   * @param string $str2 <p>The second string.</p>
4753
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
4754 3
   *
4755 3
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
4756 3
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
4757
   *             <strong>0</strong> if they are equal
4758 10
   */
4759
  public static function strncmp($str1, $str2, $len)
4760
  {
4761
    $str1 = self::substr($str1, 0, $len);
4762
    $str2 = self::substr($str2, 0, $len);
4763
4764 10
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 4761 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 4762 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
4765 1
  }
4766 10
4767 10
  /**
4768 10
   * Search a string for any of a set of characters.
4769 1
   *
4770
   * @link  http://php.net/manual/en/function.strpbrk.php
4771
   *
4772
   * @param string $haystack  <p>The string where char_list is looked for.</p>
4773
   * @param string $char_list <p>This parameter is case sensitive.</p>
4774 10
   *
4775 10
   * @return string String starting from the character found, or false if it is not found.
4776 10
   */
4777 10
  public static function strpbrk($haystack, $char_list)
4778
  {
4779
    $haystack = (string)$haystack;
4780
    $char_list = (string)$char_list;
4781
4782
    if (!isset($haystack[0], $char_list[0])) {
4783
      return false;
4784
    }
4785
4786
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
4787
      return substr($haystack, strpos($haystack, $m[0]));
4788
    } else {
4789
      return false;
4790
    }
4791
  }
4792
4793
  /**
4794
   * Find position of first occurrence of string in a string.
4795
   *
4796
   * @link http://php.net/manual/en/function.mb-strpos.php
4797
   *
4798
   * @param string  $haystack  <p>The string being checked.</p>
4799
   * @param string  $needle    <p>The position counted from the beginning of haystack.</p>
4800
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
4801
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4802
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4803
   *
4804
   * @return int|false <p>
4805
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
4806
   *                   If needle is not found it returns false.
4807
   *                   </p>
4808
   */
4809
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
4810
  {
4811
    $haystack = (string)$haystack;
4812
    $needle = (string)$needle;
4813 10
4814
    if (!isset($haystack[0], $needle[0])) {
4815
      return false;
4816 10
    }
4817 10
4818
    // init
4819 10
    $offset = (int)$offset;
4820 2
4821 2
    // iconv and mbstring do not support integer $needle
4822
4823 10
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4824 10
      $needle = (string)self::chr($needle);
4825 2
    }
4826
4827
    if ($cleanUtf8 === true) {
4828 8
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4829
      // if invalid characters are found in $haystack before $needle
4830
      $needle = self::clean($needle);
4831
      $haystack = self::clean($haystack);
4832
    }
4833
4834
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4835
      self::checkForSupport();
4836
    }
4837
4838 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4839
        $encoding === 'UTF-8'
4840
        ||
4841
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4842
    ) {
4843
      $encoding = 'UTF-8';
4844
    } else {
4845 2
      $encoding = self::normalize_encoding($encoding);
4846
    }
4847 2
4848
    if (self::$support['mbstring'] === true) {
4849
      return \mb_strpos($haystack, $needle, $offset, $encoding);
4850
    }
4851
4852
    if (self::$support['iconv'] === true) {
4853
      // ignore invalid negative offset to keep compatibility
4854 2
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4855 1
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4856 1
    }
4857
4858
    if ($offset > 0) {
4859
      $haystack = self::substr($haystack, $offset);
4860 2
    }
4861 2
4862 2 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4863 2
      $left = substr($haystack, 0, $pos);
4864
4865
      // negative offset not supported in PHP strpos(), ignoring
4866
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
4867
    }
4868
4869
    return false;
4870
  }
4871
4872
  /**
4873
   * Finds the last occurrence of a character in a string within another.
4874
   *
4875
   * @link http://php.net/manual/en/function.mb-strrchr.php
4876
   *
4877
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
4878
   * @param string $needle        <p>The string to find in haystack</p>
4879
   * @param bool   $before_needle [optional] <p>
4880
   *                              Determines which portion of haystack
4881
   *                              this function returns.
4882 11
   *                              If set to true, it returns all of haystack
4883
   *                              from the beginning to the last occurrence of needle.
4884 11
   *                              If set to false, it returns all of haystack
4885 11
   *                              from the last occurrence of needle to the end,
4886 11
   *                              </p>
4887
   * @param string $encoding      [optional] <p>
4888 11
   *                              Character encoding name to use.
4889 1
   *                              If it is omitted, internal character encoding is used.
4890 1
   *                              </p>
4891 1
   * @param bool   $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4892
   *
4893 11
   * @return string|false The portion of haystack or false if needle is not found.
4894
   */
4895 11 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4896
  {
4897 11
    if ($encoding !== 'UTF-8') {
4898 1
      $encoding = self::normalize_encoding($encoding);
4899 1
    }
4900
4901
    if ($cleanUtf8 === true) {
4902 11
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4903 11
      // if invalid characters are found in $haystack before $needle
4904
      $needle = self::clean($needle);
4905 11
      $haystack = self::clean($haystack);
4906
    }
4907 11
4908
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
4909
  }
4910
4911
  /**
4912
   * Reverses characters order in the string.
4913
   *
4914
   * @param string $str The input string
4915
   *
4916
   * @return string The string with characters in the reverse sequence
4917
   */
4918
  public static function strrev($str)
4919
  {
4920
    $str = (string)$str;
4921 21
4922
    if (!isset($str[0])) {
4923
      return '';
4924 21
    }
4925
4926 21
    return implode('', array_reverse(self::split($str)));
4927 6
  }
4928
4929
  /**
4930 19
   * Finds the last occurrence of a character in a string within another, case insensitive.
4931
   *
4932
   * @link http://php.net/manual/en/function.mb-strrichr.php
4933
   *
4934
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
4935
   * @param string  $needle        <p>The string to find in haystack.</p>
4936 19
   * @param bool    $before_needle [optional] <p>
4937 2
   *                               Determines which portion of haystack
4938 2
   *                               this function returns.
4939
   *                               If set to true, it returns all of haystack
4940 19
   *                               from the beginning to the last occurrence of needle.
4941
   *                               If set to false, it returns all of haystack
4942
   *                               from the last occurrence of needle to the end,
4943
   *                               </p>
4944
   * @param string  $encoding      [optional] <p>
4945
   *                               Character encoding name to use.
4946
   *                               If it is omitted, internal character encoding is used.
4947
   *                               </p>
4948
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4949
   *
4950 3
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
4951
   */
4952 3 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4953
  {
4954
    if ($encoding !== 'UTF-8') {
4955
      $encoding = self::normalize_encoding($encoding);
4956
    }
4957
4958
    if ($cleanUtf8 === true) {
4959
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4960
      // if invalid characters are found in $haystack before $needle
4961
      $needle = self::clean($needle);
4962
      $haystack = self::clean($haystack);
4963
    }
4964
4965
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
4966 16
  }
4967
4968 16
  /**
4969
   * Find position of last occurrence of a case-insensitive string.
4970 16
   *
4971 2
   * @param string  $haystack  <p>The string to look in.</p>
4972
   * @param string  $needle    <p>The string to look for.</p>
4973
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
4974 15
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4975
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4976
   *
4977
   * @return int|false <p>
4978
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
4979
   *                   not found, it returns false.
4980 15
   *                   </p>
4981 2
   */
4982 2
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
4983
  {
4984 15
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset, $encoding, $cleanUtf8);
4985
  }
4986
4987
  /**
4988
   * Find position of last occurrence of a string in a string.
4989
   *
4990
   * @link http://php.net/manual/en/function.mb-strrpos.php
4991
   *
4992
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
4993
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
4994
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
4995
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
4996
   *                              the end of the string.
4997
   *                              </p>
4998
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4999
   * @param boolean    $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5000
   *
5001 1
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5002
   *                   is not found, it returns false.</p>
5003 1
   */
5004 1
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5005 1
  {
5006 1
    if (((int)$needle) === $needle && ($needle >= 0)) {
5007 1
      $needle = (string)self::chr($needle);
5008
    }
5009 1
5010 1
    $haystack = (string)$haystack;
5011 1
    $needle = (string)$needle;
5012 1
5013 1
    if (!isset($haystack[0], $needle[0])) {
5014
      return false;
5015 1
    }
5016 1
5017
    // init
5018 1
    $needle = (string)$needle;
5019
    $offset = (int)$offset;
5020
5021
    if (
5022
        $cleanUtf8 === true
5023
        ||
5024
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5025
    ) {
5026
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5027
5028
      $needle = self::clean($needle);
5029
      $haystack = self::clean($haystack);
5030 1
    }
5031
5032 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5033 1
      self::checkForSupport();
5034 1
    }
5035
5036 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5037
        $encoding === 'UTF-8'
5038
        ||
5039
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5040 1
    ) {
5041 1
      $encoding = 'UTF-8';
5042
    } else {
5043 1
      $encoding = self::normalize_encoding($encoding);
5044
    }
5045
5046 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5047
        $encoding !== 'UTF-8' // INFO: use "mb_"-function (with polyfill) also if we need another encoding
5048
        ||
5049
        self::$support['mbstring'] === true
5050
    ) {
5051
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5052
    }
5053
5054
    if (self::$support['iconv'] === true) {
5055
      return \grapheme_strrpos($haystack, $needle, $offset);
5056
    }
5057
5058
    // fallback
5059 47
5060
    if ($offset > 0) {
5061
      $haystack = self::substr($haystack, $offset);
5062 47
    } elseif ($offset < 0) {
5063
      $haystack = self::substr($haystack, 0, $offset);
5064 47
    }
5065 9
5066 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5067
      $left = substr($haystack, 0, $pos);
5068 45
5069
      // negative offset not supported in PHP strpos(), ignoring
5070
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5071
    }
5072 1
5073 1
    return false;
5074
  }
5075 45
5076 45
  /**
5077 37
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5078 37
   * mask.
5079
   *
5080 45
   * @param string $str    <p>The input string.</p>
5081 2
   * @param string $mask   <p>The mask of chars</p>
5082
   * @param int    $offset [optional]
5083
   * @param int    $length [optional]
5084 43
   *
5085 20
   * @return int
5086 20
   */
5087 41
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5088
  {
5089
    // init
5090 43
    $length = (int)$length;
5091
    $offset = (int)$offset;
5092
5093
    if ($offset || 2147483647 !== $length) {
5094
      $str = self::substr($str, $offset, $length);
5095
    }
5096 43
5097 2
    $str = (string)$str;
5098 43
    if (!isset($str[0], $mask[0])) {
5099 43
      return 0;
5100 43
    }
5101 1
5102
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5103
  }
5104 43
5105 43
  /**
5106
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5107
   *
5108
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5109
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5110
   * @param bool    $before_needle [optional] <p>
5111
   *                               If <b>TRUE</b>, strstr() returns the part of the
5112
   *                               haystack before the first occurrence of the needle (excluding the needle).
5113
   *                               </p>
5114
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5115
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5116
   *
5117
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5118
   */
5119
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5120
  {
5121
    $haystack = (string)$haystack;
5122
    $needle = (string)$needle;
5123
5124
    if (!isset($haystack[0], $needle[0])) {
5125
      return false;
5126
    }
5127
5128
    if ($cleanUtf8 === true) {
5129
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5130
      // if invalid characters are found in $haystack before $needle
5131
      $needle = self::clean($needle);
5132
      $haystack = self::clean($haystack);
5133
    }
5134
5135 1
    if ($encoding !== 'UTF-8') {
5136
      $encoding = self::normalize_encoding($encoding);
5137 1
    }
5138 1
5139
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5140 1
      self::checkForSupport();
5141
    }
5142
5143 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5144
        $encoding !== 'UTF-8' // INFO: use "mb_"-function (with polyfill) also if we need another encoding
5145
        ||
5146
        self::$support['mbstring'] === true
5147
    ) {
5148
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5149
    }
5150
5151
    return \grapheme_strstr($haystack, $needle, $before_needle);
5152
  }
5153
5154
  /**
5155
   * Unicode transformation for case-less matching.
5156
   *
5157
   * @link http://unicode.org/reports/tr21/tr21-5.html
5158
   *
5159
   * @param string  $str       <p>The input string.</p>
5160
   * @param bool    $full      [optional] <p>
5161 1
   *                           <b>true</b>, replace full case folding chars (default)<br />
5162
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5163 1
   *                           </p>
5164 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5165
   *
5166 1
   * @return string
5167 1
   */
5168
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5169
  {
5170 1
    // init
5171 1
    $str = (string)$str;
5172 1
5173
    if (!isset($str[0])) {
5174 1
      return '';
5175 1
    }
5176
5177
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5178 1
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5179 1
5180
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5181 1
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$commonCaseFold);
5182 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$commonCaseFold);
5183 1
    }
5184
5185 1
    $str = str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5186
5187
    if ($full) {
5188
5189
      static $fullCaseFold = null;
5190
5191
      if ($fullCaseFold === null) {
5192 1
        $fullCaseFold = self::getData('caseFolding_full');
5193
      }
5194
5195
      /** @noinspection OffsetOperationsInspection */
5196
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5197
    }
5198
5199
    if ($cleanUtf8 === true) {
5200
      $str = self::clean($str);
5201
    }
5202
5203
    return self::strtolower($str);
5204
  }
5205
5206
  /**
5207 6
   * Make a string lowercase.
5208
   *
5209 6
   * @link http://php.net/manual/en/function.mb-strtolower.php
5210 1
   *
5211
   * @param string  $str       <p>The string being lowercased.</p>
5212
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5213 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5214 1
   *
5215 1
   * @return string str with all alphabetic characters converted to lowercase.
5216 1
   */
5217 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5218
  {
5219
    // init
5220 1
    $str = (string)$str;
5221 1
5222 1
    if (!isset($str[0])) {
5223 1
      return '';
5224 1
    }
5225 1
5226 1
    if ($cleanUtf8 === true) {
5227 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5228
      // if invalid characters are found in $haystack before $needle
5229
      $str = self::clean($str);
5230
    }
5231 1
5232 1
    if ($encoding !== 'UTF-8') {
5233 1
      $encoding = self::normalize_encoding($encoding);
5234 1
    }
5235 1
5236 1
    return \mb_strtolower($str, $encoding);
5237 1
  }
5238 1
5239
  /**
5240
   * Generic case sensitive transformation for collation matching.
5241 1
   *
5242 1
   * @param string $str <p>The input string</p>
5243 1
   *
5244 1
   * @return string
5245
   */
5246
  private static function strtonatfold($str)
5247
  {
5248 1
    /** @noinspection PhpUndefinedClassInspection */
5249
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5250 6
  }
5251 1
5252 1
  /**
5253 1
   * Make a string uppercase.
5254 1
   *
5255
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5256 1
   *
5257
   * @param string  $str       <p>The string being uppercased.</p>
5258
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5259 6
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5260 6
   *
5261
   * @return string str with all alphabetic characters converted to uppercase.
5262 6
   */
5263 4 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5264 4
  {
5265
    $str = (string)$str;
5266 6
5267
    if (!isset($str[0])) {
5268 6
      return '';
5269
    }
5270
5271
    if ($cleanUtf8 === true) {
5272
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5273
      // if invalid characters are found in $haystack before $needle
5274
      $str = self::clean($str);
5275
    }
5276
5277
    if ($encoding !== 'UTF-8') {
5278
      $encoding = self::normalize_encoding($encoding);
5279
    }
5280 1
5281
    return \mb_strtoupper($str, $encoding);
5282 1
  }
5283
5284 1
  /**
5285 1
   * Translate characters or replace sub-strings.
5286
   *
5287
   * @link  http://php.net/manual/en/function.strtr.php
5288 1
   *
5289 1
   * @param string          $str  <p>The string being translated.</p>
5290 1
   * @param string|string[] $from <p>The string replacing from.</p>
5291
   * @param string|string[] $to   <p>The string being translated to to.</p>
5292 1
   *
5293
   * @return string <p>
5294
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5295 1
   *                corresponding character in to.
5296 1
   *                </p>
5297
   */
5298 1
  public static function strtr($str, $from, $to = INF)
5299 1
  {
5300
    if (INF !== $to) {
5301 1
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5301 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5302
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5302 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5303 1
      $countFrom = count($from);
5304 1
      $countTo = count($to);
5305
5306 1
      if ($countFrom > $countTo) {
5307
        $from = array_slice($from, 0, $countTo);
5308 1
      } elseif ($countFrom < $countTo) {
5309
        $to = array_slice($to, 0, $countFrom);
5310 1
      }
5311
5312 1
      $from = array_combine($from, $to);
5313
    }
5314
5315
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5298 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5316
  }
5317
5318
  /**
5319
   * Return the width of a string.
5320
   *
5321
   * @param string  $str       <p>The input string.</p>
5322
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5323
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5324
   *
5325
   * @return int
5326 7
   */
5327
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5328 7
  {
5329
    if ($encoding !== 'UTF-8') {
5330
      $encoding = self::normalize_encoding($encoding);
5331
    }
5332
5333
    if ($cleanUtf8 === true) {
5334
      // iconv and mbstring are not tolerant to invalid encoding
5335
      // further, their behaviour is inconsistent with that of PHP's substr
5336
5337
      $str = self::clean($str);
5338
    }
5339
5340 1
    return \mb_strwidth($str, $encoding);
5341
  }
5342 1
5343
  /**
5344
   * Get part of a string.
5345
   *
5346
   * @link http://php.net/manual/en/function.mb-substr.php
5347
   *
5348
   * @param string  $str       <p>The string being checked.</p>
5349
   * @param int     $start     <p>The first position used in str.</p>
5350
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
5351
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5352
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5353
   *
5354 1
   * @return string <p>Returns a sub-string specified by the start and length parameters.</p>
5355
   */
5356 1
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5357
  {
5358
    // init
5359
    $str = (string)$str;
5360
5361
    if (!isset($str[0])) {
5362
      return '';
5363
    }
5364
5365
    if ($cleanUtf8 === true) {
5366
      // iconv and mbstring are not tolerant to invalid encoding
5367
      // further, their behaviour is inconsistent with that of PHP's substr
5368 1
5369
      $str = self::clean($str);
5370 1
    }
5371
5372
    $str_length = 0;
5373
    if ($start || $length === null) {
5374
      $str_length = (int)self::strlen($str);
5375
    }
5376
5377
    if ($start && $start > $str_length) {
5378
      return false;
5379
    }
5380
5381
    if ($length === null) {
5382
      $length = $str_length;
5383
    } else {
5384
      $length = (int)$length;
5385 13
    }
5386
5387 13
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5388
      self::checkForSupport();
5389
    }
5390 13
5391 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5392 13
        $encoding === 'UTF-8'
5393 3
        ||
5394
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5395
    ) {
5396 11
      $encoding = 'UTF-8';
5397
    } else {
5398
      $encoding = self::normalize_encoding($encoding);
5399 11
    }
5400 7
5401
    if (self::$support['mbstring'] === true) {
5402
      return \mb_substr($str, $start, $length, $encoding);
5403 5
    }
5404 1
5405
    if (self::$support['iconv'] === true) {
5406
      return \iconv_substr($str, $start, $length, $encoding);
5407
    }
5408 1
5409 1
    // fallback
5410
5411
    // split to array, and remove invalid characters
5412 1
    $array = self::split($str);
5413 1
5414
    // extract relevant part, and join to make sting again
5415
    return implode('', array_slice($array, $start, $length));
5416 1
  }
5417
5418
  /**
5419 1
   * Binary safe comparison of two strings from an offset, up to length characters.
5420
   *
5421 5
   * @param string  $main_str           <p>The main string being compared.</p>
5422 5
   * @param string  $str                <p>The secondary string being compared.</p>
5423 5
   * @param int     $offset             <p>The start position for the comparison. If negative, it starts counting from
5424
   *                                    the end of the string.</p>
5425 5
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
5426
   *                                    the length of the str compared to the length of main_str less the offset.</p>
5427 5
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5428 5
   *                                    insensitive.</p>
5429
   *
5430
   * @return int
5431 5
   */
5432
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5433
  {
5434 5
    $main_str = self::substr($main_str, $offset, $length);
5435 5
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5434 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5436 5
5437
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5434 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5435 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5434 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5435 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5438 5
  }
5439 2
5440
  /**
5441 2
   * Count the number of substring occurrences.
5442 2
   *
5443 2
   * @link  http://php.net/manual/en/function.substr-count.php
5444
   *
5445 2
   * @param string  $haystack  <p>The string to search in.</p>
5446 1
   * @param string  $needle    <p>The substring to search for.</p>
5447
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
5448 1
   * @param int     $length    [optional] <p>
5449 1
   *                           The maximum length after the specified offset to search for the
5450 1
   *                           substring. It outputs a warning if the offset plus the length is
5451
   *                           greater than the haystack length.
5452 1
   *                           </p>
5453
   * @param string  $encoding  <p>Set the charset for e.g. "\mb_" function.</p>
5454
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5455
   *
5456
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5457
   */
5458
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5459
  {
5460
    // init
5461
    $haystack = (string)$haystack;
5462
    $needle = (string)$needle;
5463
5464
    if (!isset($haystack[0], $needle[0])) {
5465
      return false;
5466
    }
5467 1
5468 2
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5469
      $offset = (int)$offset;
5470 5
      $length = (int)$length;
5471
5472
      if (
5473
          $length + $offset <= 0
5474
          &&
5475 5
          Bootup::is_php('7.1') === false
5476
      ) {
5477
        return false;
5478
      }
5479
5480 5
      $haystack = self::substr($haystack, $offset, $length, $encoding);
5481 5
    }
5482 1
5483 1
    if ($encoding !== 'UTF-8') {
5484
      $encoding = self::normalize_encoding($encoding);
5485 1
    }
5486 1
5487 1
    if ($cleanUtf8 === true) {
5488
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5489 1
      // if invalid characters are found in $haystack before $needle
5490
      $needle = self::clean($needle);
5491 5
      $haystack = self::clean($haystack);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack can also be of type false; however, voku\helper\UTF8::clean() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
5492 5
    }
5493 5
5494 5 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5495 1
        $encoding !== 'UTF-8' // INFO: use "mb_"-function (with polyfill) also if we need another encoding
5496
        ||
5497 5
        self::$support['mbstring'] === true
5498
    ) {
5499 5
      return \mb_substr_count($haystack, $needle, $encoding);
5500
    }
5501
5502
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
5503
    return count($matches);
5504
  }
5505
5506
  /**
5507
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
5508
   *
5509 2
   * @param string $haystack <p>The string to search in.</p>
5510
   * @param string $needle   <p>The substring to search for.</p>
5511 2
   *
5512
   * @return string <p>Return the sub-string.</p>
5513 1
   */
5514 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5515
  {
5516 1
    $haystack = (string)$haystack;
5517 1
    $needle = (string)$needle;
5518
5519 1
    if (!isset($haystack[0])) {
5520
      return '';
5521
    }
5522 2
5523
    if (!isset($needle[0])) {
5524 2
      return $haystack;
5525 1
    }
5526
5527
    if (self::str_istarts_with($haystack, $needle) === true) {
5528 2
      $haystack = self::substr($haystack, self::strlen($needle));
5529
    }
5530
5531
    return $haystack;
5532
  }
5533
5534
  /**
5535
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
5536
   *
5537
   * @param string $haystack <p>The string to search in.</p>
5538
   * @param string $needle   <p>The substring to search for.</p>
5539
   *
5540 1
   * @return string <p>Return the sub-string.</p>
5541
   */
5542 1 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5543
  {
5544
    $haystack = (string)$haystack;
5545
    $needle = (string)$needle;
5546
5547
    if (!isset($haystack[0])) {
5548
      return '';
5549
    }
5550
5551
    if (!isset($needle[0])) {
5552
      return $haystack;
5553
    }
5554
5555
    if (self::str_iends_with($haystack, $needle) === true) {
5556
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5557
    }
5558
5559
    return $haystack;
5560
  }
5561
5562
  /**
5563
   * Removes an prefix ($needle) from start of the string ($haystack).
5564
   *
5565
   * @param string $haystack <p>The string to search in.</p>
5566
   * @param string $needle   <p>The substring to search for.</p>
5567
   *
5568 20
   * @return string <p>Return the sub-string.</p>
5569
   */
5570 20 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5571 2
  {
5572
    $haystack = (string)$haystack;
5573
    $needle = (string)$needle;
5574 2
5575 2
    if (!isset($haystack[0])) {
5576
      return '';
5577 2
    }
5578
5579
    if (!isset($needle[0])) {
5580 20
      return $haystack;
5581
    }
5582 20
5583 4
    if (self::str_starts_with($haystack, $needle) === true) {
5584
      $haystack = self::substr($haystack, self::strlen($needle));
5585
    }
5586 19
5587 19
    return $haystack;
5588
  }
5589
5590 19
  /**
5591 19
   * Replace text within a portion of a string.
5592
   *
5593 19
   * source: https://gist.github.com/stemar/8287074
5594 19
   *
5595 19
   * @param string|string[] $str         <p>The input string or an array of stings.</p>
5596 19
   * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
5597
   * @param int|int[]       $start
5598 19
   * @param int|int[]|void  $length      [optional]
5599
   *
5600 16
   * @return string|string[]
5601 16
   */
5602 16
  public static function substr_replace($str, $replacement, $start, $length = null)
5603 16
  {
5604 5
    if (is_array($str)) {
5605 5
      $num = count($str);
5606 5
5607
      // $replacement
5608
      if (is_array($replacement)) {
5609 19
        $replacement = array_slice($replacement, 0, $num);
5610
      } else {
5611 17
        $replacement = array_pad(array($replacement), $num, $replacement);
5612 13
      }
5613 13
5614 13
      // $start
5615 8 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5616 8
        $start = array_slice($start, 0, $num);
5617 8
        foreach ($start as &$valueTmp) {
5618
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5619
        }
5620 19
        unset($valueTmp);
5621
      } else {
5622 9
        $start = array_pad(array($start), $num, $start);
5623 4
      }
5624 4
5625 4
      // $length
5626 6
      if (!isset($length)) {
5627 6
        $length = array_fill(0, $num, 0);
5628 6 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5629
        $length = array_slice($length, 0, $num);
5630
        foreach ($length as &$valueTmpV2) {
5631 9
          if (isset($valueTmpV2)) {
5632 6
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5633 6
          } else {
5634 6
            $valueTmpV2 = 0;
5635
          }
5636
        }
5637 19
        unset($valueTmpV2);
5638
      } else {
5639 4
        $length = array_pad(array($length), $num, $length);
5640 4
      }
5641 2
5642 2
      // Recursive call
5643 3
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
5644 3
    } else {
5645 3
      if (is_array($replacement)) {
5646
        if (count($replacement) > 0) {
5647
          $replacement = $replacement[0];
5648 4
        } else {
5649 16
          $replacement = '';
5650
        }
5651 19
      }
5652
    }
5653
5654 19
    preg_match_all('/./us', (string)$str, $smatches);
5655 19
    preg_match_all('/./us', (string)$replacement, $rmatches);
5656
5657 3
    if ($length === null) {
5658 19
      $length = (int)\mb_strlen($str);
5659
    }
5660 19
5661
    array_splice($smatches[0], $start, $length, $rmatches[0]);
5662
5663 19
    return implode('', $smatches[0]);
5664 19
  }
5665 19
5666 2
  /**
5667 19
   * Removes an suffix ($needle) from end of the string ($haystack).
5668
   *
5669 19
   * @param string $haystack <p>The string to search in.</p>
5670
   * @param string $needle   <p>The substring to search for.</p>
5671 19
   *
5672
   * @return string <p>Return the sub-string.</p>
5673
   */
5674 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5675
  {
5676
    $haystack = (string)$haystack;
5677
    $needle = (string)$needle;
5678
5679
    if (!isset($haystack[0])) {
5680
      return '';
5681
    }
5682
5683
    if (!isset($needle[0])) {
5684
      return $haystack;
5685
    }
5686
5687 26
    if (self::str_ends_with($haystack, $needle) === true) {
5688
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5689 26
    }
5690
5691 26
    return $haystack;
5692 5
  }
5693
5694
  /**
5695
   * Returns a case swapped version of the string.
5696 22
   *
5697 6
   * @param string  $str       <p>The input string.</p>
5698
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5699
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5700 16
   *
5701
   * @return string <p>Each character's case swapped.</p>
5702
   */
5703
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5704
  {
5705
    $str = (string)$str;
5706
5707
    if (!isset($str[0])) {
5708
      return '';
5709
    }
5710
5711
    if ($encoding !== 'UTF-8') {
5712 14
      $encoding = self::normalize_encoding($encoding);
5713
    }
5714 14
5715
    if ($cleanUtf8 === true) {
5716
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5717
      // if invalid characters are found in $haystack before $needle
5718
      $str = self::clean($str);
5719
    }
5720
5721
    $strSwappedCase = preg_replace_callback(
5722
        '/[\S]/u',
5723
        function ($match) use ($encoding) {
5724
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
5725
5726
          if ($match[0] === $marchToUpper) {
5727
            return UTF8::strtolower($match[0], $encoding);
5728 1
          } else {
5729
            return $marchToUpper;
5730 1
          }
5731
        },
5732
        $str
5733
    );
5734
5735
    return $strSwappedCase;
5736
  }
5737
5738
  /**
5739
   * alias for "UTF8::to_ascii()"
5740
   *
5741
   * @see UTF8::to_ascii()
5742
   *
5743
   * @param string $s
5744 8
   * @param string $subst_chr
5745
   * @param bool   $strict
5746 8
   *
5747 2
   * @return string
5748
   */
5749
  public static function toAscii($s, $subst_chr = '?', $strict = false)
5750 7
  {
5751 7
    return self::to_ascii($s, $subst_chr, $strict);
5752 7
  }
5753
5754 7
  /**
5755 1
   * alias for "UTF8::to_iso8859()"
5756 1
   *
5757 7
   * @see UTF8::to_iso8859()
5758
   *
5759
   * @param string $str
5760 7
   *
5761
   * @return string|string[]
5762 7
   */
5763 7
  public static function toIso8859($str)
5764
  {
5765
    return self::to_iso8859($str);
5766
  }
5767 7
5768
  /**
5769
   * alias for "UTF8::to_latin1()"
5770
   *
5771 1
   * @see UTF8::to_latin1()
5772 1
   *
5773 1
   * @param $str
5774 7
   *
5775 7
   * @return string
5776 7
   */
5777
  public static function toLatin1($str)
5778 7
  {
5779 7
    return self::to_latin1($str);
5780
  }
5781 7
5782
  /**
5783
   * alias for "UTF8::to_utf8()"
5784
   *
5785
   * @see UTF8::to_utf8()
5786
   *
5787
   * @param string $str
5788
   *
5789
   * @return string
5790
   */
5791
  public static function toUTF8($str)
5792
  {
5793
    return self::to_utf8($str);
5794
  }
5795
5796
  /**
5797
   * Convert a string into ASCII.
5798
   *
5799
   * @param string $str     <p>The input string.</p>
5800
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
5801 1
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
5802
   *                        performance</p>
5803 1
   *
5804
   * @return string
5805 1
   *
5806 1
   * @throws \Exception
5807
   */
5808
  public static function to_ascii($str, $unknown = '?', $strict = false)
5809 1
  {
5810
    static $UTF8_TO_ASCII;
5811 1
5812
    // init
5813 1
    $str = (string)$str;
5814 1
5815 1
    if (!isset($str[0])) {
5816 1
      return '';
5817
    }
5818 1
5819 1
    $str = self::clean($str, false, true, true);
5820 1
5821
    // check if we only have ASCII
5822 1
    if (self::is_ascii($str) === true) {
5823
      return $str;
5824
    }
5825
5826
    if ($strict === true) {
5827
      if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5828
        self::checkForSupport();
5829
      }
5830 1
5831
      if (self::$support['intl'] === true && Bootup::is_php('5.4')) {
5832
        $str = transliterator_transliterate('Any-Latin; Latin-ASCII;', $str);
5833
5834
        // check again, if we only have ASCII, now ...
5835
        if (self::is_ascii($str) === true) {
5836
          return $str;
5837
        }
5838
5839
      } else {
5840
        throw new \Exception('Intl is not supported or you use PHP < 5.4!');
5841
      }
5842
    }
5843
5844
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
5845
    $chars = $ar[0];
5846
    foreach ($chars as &$c) {
5847
5848
      $ordC0 = ord($c[0]);
5849
5850
      if ($ordC0 >= 0 && $ordC0 <= 127) {
5851
        continue;
5852
      }
5853
5854
      $ordC1 = ord($c[1]);
5855
5856
      // ASCII - next please
5857
      if ($ordC0 >= 192 && $ordC0 <= 223) {
5858
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
5859
      }
5860
5861
      if ($ordC0 >= 224) {
5862
        $ordC2 = ord($c[2]);
5863
5864
        if ($ordC0 <= 239) {
5865
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
5866
        }
5867
5868
        if ($ordC0 >= 240) {
5869
          $ordC3 = ord($c[3]);
5870
5871
          if ($ordC0 <= 247) {
5872
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
5873
          }
5874
5875
          if ($ordC0 >= 248) {
5876
            $ordC4 = ord($c[4]);
5877
5878 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5879
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
5880
            }
5881
5882
            if ($ordC0 >= 252) {
5883
              $ordC5 = ord($c[5]);
5884
5885 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5886
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
5887
              }
5888
            }
5889
          }
5890
        }
5891
      }
5892
5893
      if ($ordC0 >= 254 && $ordC0 <= 255) {
5894
        $c = $unknown;
5895
        continue;
5896
      }
5897
5898
      if (!isset($ord)) {
5899
        $c = $unknown;
5900
        continue;
5901
      }
5902
5903
      $bank = $ord >> 8;
5904
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
5905
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
5906
        if (file_exists($bankfile)) {
5907
          /** @noinspection PhpIncludeInspection */
5908
          require $bankfile;
5909
        } else {
5910
          $UTF8_TO_ASCII[$bank] = array();
5911
        }
5912
      }
5913
5914
      $newchar = $ord & 255;
5915
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
5916
        $c = $UTF8_TO_ASCII[$bank][$newchar];
5917
      } else {
5918
        $c = $unknown;
5919
      }
5920
    }
5921
5922
    return implode('', $chars);
5923
  }
5924
5925
  /**
5926
   * Convert a string into "ISO-8859"-encoding (Latin-1).
5927
   *
5928
   * @param string|string[] $str
5929
   *
5930
   * @return string|string[]
5931
   */
5932
  public static function to_iso8859($str)
5933
  {
5934
    if (is_array($str)) {
5935
5936
      /** @noinspection ForeachSourceInspection */
5937
      foreach ($str as $k => $v) {
5938
        /** @noinspection AlterInForeachInspection */
5939
        /** @noinspection OffsetOperationsInspection */
5940
        $str[$k] = self::to_iso8859($v);
5941
      }
5942
5943
      return $str;
5944
    }
5945
5946
    $str = (string)$str;
5947
5948
    if (!isset($str[0])) {
5949
      return '';
5950
    }
5951
5952
    return self::utf8_decode($str);
5953
  }
5954
5955
  /**
5956
   * alias for "UTF8::to_iso8859()"
5957
   *
5958
   * @see UTF8::to_iso8859()
5959
   *
5960
   * @param string|string[] $str
5961
   *
5962
   * @return string|string[]
5963
   */
5964
  public static function to_latin1($str)
5965
  {
5966
    return self::to_iso8859($str);
5967
  }
5968
5969
  /**
5970
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
5971
   *
5972
   * - It decode UTF-8 codepoints and unicode escape sequences.
5973
   *
5974
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
5975
   *
5976
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
5977
   *
5978
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
5979
   *    are followed by any of these:  ("group B")
5980
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
5981
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
5982
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
5983
   * is also a valid unicode character, and will be left unchanged.
5984
   *
5985
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
5986
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
5987
   *
5988
   * @param string|string[] $str <p>Any string or array.</p>
5989
   *
5990
   * @return string|string[] <p>The UTF-8 encoded string.</p>
5991
   */
5992
  public static function to_utf8($str)
5993
  {
5994
    if (is_array($str)) {
5995
      /** @noinspection ForeachSourceInspection */
5996
      foreach ($str as $k => $v) {
5997
        /** @noinspection AlterInForeachInspection */
5998
        /** @noinspection OffsetOperationsInspection */
5999
        $str[$k] = self::to_utf8($v);
6000
      }
6001
6002
      return $str;
6003
    }
6004
6005
    $str = (string)$str;
6006
6007
    if (!isset($str[0])) {
6008
      return $str;
6009
    }
6010
6011
    $max = strlen($str);
6012
    $buf = '';
6013
6014
    /** @noinspection ForeachInvariantsInspection */
6015
    for ($i = 0; $i < $max; $i++) {
6016
      $c1 = $str[$i];
6017
6018
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6019
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6020
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6021
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6022
6023
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6024
6025
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6026
            $buf .= $c1 . $c2;
6027
            $i++;
6028
          } else { // not valid UTF8 - convert it
6029
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6030
            $cc2 = ($c1 & "\x3f") | "\x80";
6031
            $buf .= $cc1 . $cc2;
6032
          }
6033
6034 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6035
6036
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6037
            $buf .= $c1 . $c2 . $c3;
6038
            $i += 2;
6039
          } else { // not valid UTF8 - convert it
6040
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6041
            $cc2 = ($c1 & "\x3f") | "\x80";
6042
            $buf .= $cc1 . $cc2;
6043
          }
6044
6045
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6046
6047 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6048
            $buf .= $c1 . $c2 . $c3 . $c4;
6049
            $i += 3;
6050
          } else { // not valid UTF8 - convert it
6051
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6052
            $cc2 = ($c1 & "\x3f") | "\x80";
6053
            $buf .= $cc1 . $cc2;
6054
          }
6055
6056
        } else { // doesn't look like UTF8, but should be converted
6057 1
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6058
          $cc2 = (($c1 & "\x3f") | "\x80");
6059 1
          $buf .= $cc1 . $cc2;
6060
        }
6061
6062
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6063
6064
        $ordC1 = ord($c1);
6065
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6066
          $buf .= self::$win1252ToUtf8[$ordC1];
6067
        } else {
6068
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6069 6
          $cc2 = (($c1 & "\x3f") | "\x80");
6070
          $buf .= $cc1 . $cc2;
6071 6
        }
6072 6
6073
      } else { // it doesn't need conversion
6074 6
        $buf .= $c1;
6075
      }
6076 6
    }
6077 3
6078
    // decode unicode escape sequences
6079
    $buf = preg_replace_callback(
6080
        '/\\\\u([0-9a-f]{4})/i',
6081 6
        function ($match) {
6082
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6083 6
        },
6084 1
        $buf
6085 1
    );
6086 1
6087
    // decode UTF-8 codepoints
6088 6
    $buf = preg_replace_callback(
6089
        '/&#\d{2,6};/',
6090
        function ($match) {
6091
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6092
        },
6093
        $buf
6094
    );
6095
6096
    return $buf;
6097
  }
6098 6
6099
  /**
6100 6
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6101
   *
6102 6
   * INFO: This is slower then "trim()"
6103 6
   *
6104
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6105
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6106 5
   *
6107 5
   * @param string $str   <p>The string to be trimmed</p>
6108
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
6109 5
   *
6110 1
   * @return string <p>The trimmed string.</p>
6111 1
   */
6112 1
  public static function trim($str = '', $chars = INF)
6113
  {
6114 5
    $str = (string)$str;
6115
6116
    if (!isset($str[0])) {
6117
      return '';
6118
    }
6119
6120
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6121
    if ($chars === INF || !$chars) {
6122
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6123
    }
6124
6125
    return self::rtrim(self::ltrim($str, $chars), $chars);
6126
  }
6127
6128
  /**
6129
   * Makes string's first char uppercase.
6130
   *
6131
   * @param string  $str       <p>The input string.</p>
6132
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6133
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6134
   *
6135
   * @return string <p>The resulting string</p>
6136
   */
6137
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6138
  {
6139
    return self::strtoupper(self::substr($str, 0, 1, $encoding, $cleanUtf8), $encoding, $cleanUtf8) . self::substr($str, 1, null, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1, $encoding, $cleanUtf8) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6140
  }
6141
6142
  /**
6143
   * alias for "UTF8::ucfirst()"
6144 1
   *
6145
   * @see UTF8::ucfirst()
6146 1
   *
6147
   * @param string  $word
6148
   * @param string  $encoding
6149
   * @param boolean $cleanUtf8
6150
   *
6151
   * @return string
6152
   */
6153
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
6154
  {
6155
    return self::ucfirst($word, $encoding, $cleanUtf8);
6156
  }
6157
6158 1
  /**
6159
   * Uppercase for all words in the string.
6160 1
   *
6161
   * @param string   $str        <p>The input string.</p>
6162 1
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6163 1
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6164
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6165
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
6166 1
   *
6167
   * @return string
6168 1
   */
6169 1
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
6170
  {
6171
    if (!$str) {
6172 1
      return '';
6173
    }
6174
6175 1
    $charlist = self::rxClass($charlist, '\pL');
6176 1
    $words = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
6177 1
    $newwords = array();
6178 1
6179 1
    if (count($exceptions) > 0) {
6180
      $useExceptions = true;
6181
    } else {
6182 1
      $useExceptions = false;
6183
    }
6184
6185
    foreach ($words as $word) {
6186
6187
      if (!$word) {
6188
        continue;
6189
      }
6190
6191
      if (
6192
          ($useExceptions === false)
6193
          ||
6194
          (
6195
              $useExceptions === true
6196
              &&
6197
              !in_array($word, $exceptions, true)
6198
          )
6199
      ) {
6200
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
6201 10
      }
6202
6203 10
      $newwords[] = $word;
6204 10
    }
6205
6206 10
    return implode('', $newwords);
6207 3
  }
6208
6209
  /**
6210 8
   * Multi decode html entity & fix urlencoded-win1252-chars.
6211 8
   *
6212 8
   * e.g:
6213
   * 'test+test'                     => 'test test'
6214 8
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6215
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6216 8
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6217
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6218 8
   * 'Düsseldorf'                   => 'Düsseldorf'
6219 1
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6220 1
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6221 1
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6222
   *
6223 8
   * @param string $str          <p>The input string.</p>
6224 8
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6225
   *
6226 8
   * @return string
6227 8
   */
6228 8 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6229 8
  {
6230 8
    $str = (string)$str;
6231
6232 8
    if (!isset($str[0])) {
6233 8
      return '';
6234 8
    }
6235 8
6236
    $pattern = '/%u([0-9a-f]{3,4})/i';
6237 8
    if (preg_match($pattern, $str)) {
6238 6
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
6239 6
    }
6240 6
6241 6
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6242
6243 6
    do {
6244 3
      $str_compare = $str;
6245 3
6246
      $str = self::fix_simple_utf8(
6247 6
          urldecode(
6248 6
              self::html_entity_decode(
6249
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6250 8
                  $flags
6251
              )
6252
          )
6253
      );
6254
6255
    } while ($multi_decode === true && $str_compare !== $str);
6256
6257
    return (string)$str;
6258 1
  }
6259
6260 1
  /**
6261
   * Multi decode html entity & fix urlencoded-win1252-chars.
6262
   *
6263
   * e.g:
6264
   * 'test+test'                     => 'test+test'
6265
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6266
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6267
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6268
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6269
   * 'Düsseldorf'                   => 'Düsseldorf'
6270
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6271
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6272
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6273
   *
6274
   * @param string $str          <p>The input string.</p>
6275
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6276
   *
6277
   * @return string
6278
   */
6279 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6280
  {
6281
    $str = (string)$str;
6282
6283
    if (!isset($str[0])) {
6284
      return '';
6285
    }
6286
6287
    $pattern = '/%u([0-9a-f]{3,4})/i';
6288
    if (preg_match($pattern, $str)) {
6289
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
6290
    }
6291
6292
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6293
6294
    do {
6295
      $str_compare = $str;
6296
6297
      $str = self::fix_simple_utf8(
6298
          rawurldecode(
6299
              self::html_entity_decode(
6300
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6301
                  $flags
6302
              )
6303
          )
6304
      );
6305
6306
    } while ($multi_decode === true && $str_compare !== $str);
6307
6308
    return (string)$str;
6309
  }
6310
6311
  /**
6312
   * Return a array with "urlencoded"-win1252 -> UTF-8
6313
   *
6314
   * @deprecated use the "UTF8::urldecode()" function to decode a string
6315
   *
6316
   * @return array
6317
   */
6318
  public static function urldecode_fix_win1252_chars()
6319
  {
6320
    static $array = array(
6321
        '%20' => ' ',
6322
        '%21' => '!',
6323
        '%22' => '"',
6324
        '%23' => '#',
6325
        '%24' => '$',
6326
        '%25' => '%',
6327
        '%26' => '&',
6328
        '%27' => "'",
6329
        '%28' => '(',
6330
        '%29' => ')',
6331
        '%2A' => '*',
6332
        '%2B' => '+',
6333
        '%2C' => ',',
6334
        '%2D' => '-',
6335
        '%2E' => '.',
6336
        '%2F' => '/',
6337
        '%30' => '0',
6338
        '%31' => '1',
6339
        '%32' => '2',
6340
        '%33' => '3',
6341
        '%34' => '4',
6342
        '%35' => '5',
6343
        '%36' => '6',
6344
        '%37' => '7',
6345
        '%38' => '8',
6346
        '%39' => '9',
6347
        '%3A' => ':',
6348
        '%3B' => ';',
6349
        '%3C' => '<',
6350
        '%3D' => '=',
6351
        '%3E' => '>',
6352
        '%3F' => '?',
6353
        '%40' => '@',
6354
        '%41' => 'A',
6355
        '%42' => 'B',
6356
        '%43' => 'C',
6357
        '%44' => 'D',
6358
        '%45' => 'E',
6359
        '%46' => 'F',
6360
        '%47' => 'G',
6361
        '%48' => 'H',
6362
        '%49' => 'I',
6363
        '%4A' => 'J',
6364
        '%4B' => 'K',
6365
        '%4C' => 'L',
6366
        '%4D' => 'M',
6367
        '%4E' => 'N',
6368
        '%4F' => 'O',
6369
        '%50' => 'P',
6370
        '%51' => 'Q',
6371
        '%52' => 'R',
6372
        '%53' => 'S',
6373
        '%54' => 'T',
6374
        '%55' => 'U',
6375
        '%56' => 'V',
6376
        '%57' => 'W',
6377
        '%58' => 'X',
6378
        '%59' => 'Y',
6379
        '%5A' => 'Z',
6380
        '%5B' => '[',
6381
        '%5C' => '\\',
6382
        '%5D' => ']',
6383
        '%5E' => '^',
6384
        '%5F' => '_',
6385
        '%60' => '`',
6386
        '%61' => 'a',
6387
        '%62' => 'b',
6388
        '%63' => 'c',
6389
        '%64' => 'd',
6390
        '%65' => 'e',
6391
        '%66' => 'f',
6392
        '%67' => 'g',
6393
        '%68' => 'h',
6394
        '%69' => 'i',
6395
        '%6A' => 'j',
6396
        '%6B' => 'k',
6397
        '%6C' => 'l',
6398
        '%6D' => 'm',
6399
        '%6E' => 'n',
6400
        '%6F' => 'o',
6401
        '%70' => 'p',
6402
        '%71' => 'q',
6403
        '%72' => 'r',
6404
        '%73' => 's',
6405
        '%74' => 't',
6406
        '%75' => 'u',
6407
        '%76' => 'v',
6408
        '%77' => 'w',
6409
        '%78' => 'x',
6410
        '%79' => 'y',
6411
        '%7A' => 'z',
6412
        '%7B' => '{',
6413
        '%7C' => '|',
6414
        '%7D' => '}',
6415
        '%7E' => '~',
6416
        '%7F' => '',
6417
        '%80' => '`',
6418
        '%81' => '',
6419
        '%82' => '‚',
6420
        '%83' => 'ƒ',
6421
        '%84' => '„',
6422
        '%85' => '…',
6423
        '%86' => '†',
6424
        '%87' => '‡',
6425
        '%88' => 'ˆ',
6426
        '%89' => '‰',
6427
        '%8A' => 'Š',
6428
        '%8B' => '‹',
6429
        '%8C' => 'Œ',
6430
        '%8D' => '',
6431
        '%8E' => 'Ž',
6432
        '%8F' => '',
6433
        '%90' => '',
6434
        '%91' => '‘',
6435
        '%92' => '’',
6436
        '%93' => '“',
6437
        '%94' => '”',
6438
        '%95' => '•',
6439
        '%96' => '–',
6440
        '%97' => '—',
6441
        '%98' => '˜',
6442
        '%99' => '™',
6443
        '%9A' => 'š',
6444
        '%9B' => '›',
6445
        '%9C' => 'œ',
6446
        '%9D' => '',
6447
        '%9E' => 'ž',
6448
        '%9F' => 'Ÿ',
6449
        '%A0' => '',
6450
        '%A1' => '¡',
6451
        '%A2' => '¢',
6452
        '%A3' => '£',
6453
        '%A4' => '¤',
6454
        '%A5' => '¥',
6455
        '%A6' => '¦',
6456
        '%A7' => '§',
6457
        '%A8' => '¨',
6458
        '%A9' => '©',
6459
        '%AA' => 'ª',
6460
        '%AB' => '«',
6461
        '%AC' => '¬',
6462
        '%AD' => '',
6463
        '%AE' => '®',
6464
        '%AF' => '¯',
6465
        '%B0' => '°',
6466
        '%B1' => '±',
6467
        '%B2' => '²',
6468
        '%B3' => '³',
6469
        '%B4' => '´',
6470
        '%B5' => 'µ',
6471
        '%B6' => '¶',
6472
        '%B7' => '·',
6473
        '%B8' => '¸',
6474
        '%B9' => '¹',
6475
        '%BA' => 'º',
6476
        '%BB' => '»',
6477
        '%BC' => '¼',
6478
        '%BD' => '½',
6479
        '%BE' => '¾',
6480
        '%BF' => '¿',
6481
        '%C0' => 'À',
6482
        '%C1' => 'Á',
6483
        '%C2' => 'Â',
6484
        '%C3' => 'Ã',
6485
        '%C4' => 'Ä',
6486
        '%C5' => 'Å',
6487
        '%C6' => 'Æ',
6488
        '%C7' => 'Ç',
6489
        '%C8' => 'È',
6490
        '%C9' => 'É',
6491
        '%CA' => 'Ê',
6492
        '%CB' => 'Ë',
6493
        '%CC' => 'Ì',
6494
        '%CD' => 'Í',
6495
        '%CE' => 'Î',
6496
        '%CF' => 'Ï',
6497
        '%D0' => 'Ð',
6498
        '%D1' => 'Ñ',
6499
        '%D2' => 'Ò',
6500
        '%D3' => 'Ó',
6501
        '%D4' => 'Ô',
6502
        '%D5' => 'Õ',
6503
        '%D6' => 'Ö',
6504
        '%D7' => '×',
6505
        '%D8' => 'Ø',
6506
        '%D9' => 'Ù',
6507
        '%DA' => 'Ú',
6508
        '%DB' => 'Û',
6509
        '%DC' => 'Ü',
6510
        '%DD' => 'Ý',
6511
        '%DE' => 'Þ',
6512
        '%DF' => 'ß',
6513
        '%E0' => 'à',
6514
        '%E1' => 'á',
6515
        '%E2' => 'â',
6516
        '%E3' => 'ã',
6517
        '%E4' => 'ä',
6518
        '%E5' => 'å',
6519
        '%E6' => 'æ',
6520
        '%E7' => 'ç',
6521
        '%E8' => 'è',
6522
        '%E9' => 'é',
6523
        '%EA' => 'ê',
6524
        '%EB' => 'ë',
6525
        '%EC' => 'ì',
6526
        '%ED' => 'í',
6527
        '%EE' => 'î',
6528
        '%EF' => 'ï',
6529
        '%F0' => 'ð',
6530
        '%F1' => 'ñ',
6531
        '%F2' => 'ò',
6532
        '%F3' => 'ó',
6533
        '%F4' => 'ô',
6534
        '%F5' => 'õ',
6535
        '%F6' => 'ö',
6536
        '%F7' => '÷',
6537
        '%F8' => 'ø',
6538
        '%F9' => 'ù',
6539
        '%FA' => 'ú',
6540
        '%FB' => 'û',
6541
        '%FC' => 'ü',
6542
        '%FD' => 'ý',
6543
        '%FE' => 'þ',
6544
        '%FF' => 'ÿ',
6545
    );
6546
6547
    return $array;
6548
  }
6549
6550
  /**
6551
   * Decodes an UTF-8 string to ISO-8859-1.
6552
   *
6553
   * @param string $str <p>The input string.</p>
6554
   *
6555
   * @return string
6556
   */
6557
  public static function utf8_decode($str)
6558
  {
6559
    // init
6560
    $str = (string)$str;
6561
6562
    if (!isset($str[0])) {
6563
      return '';
6564
    }
6565
6566
    $str = (string)self::to_utf8($str);
6567
6568
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
6569
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
6570
6571
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
6572
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$utf8ToWin1252);
6573
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$utf8ToWin1252);
6574
    }
6575
6576
    /** @noinspection PhpInternalEntityUsedInspection */
6577
    return Xml::utf8_decode(str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str));
6578
  }
6579
6580
  /**
6581
   * Encodes an ISO-8859-1 string to UTF-8.
6582
   *
6583
   * @param string $str <p>The input string.</p>
6584
   *
6585
   * @return string
6586
   */
6587
  public static function utf8_encode($str)
6588
  {
6589
    // init
6590
    $str = (string)$str;
6591
6592
    if (!isset($str[0])) {
6593
      return '';
6594
    }
6595
6596
    $str = \utf8_encode($str);
6597
6598
    if (false === strpos($str, "\xC2")) {
6599
      return $str;
6600
    } else {
6601
6602
      static $CP1252_TO_UTF8_KEYS_CACHE = null;
6603
      static $CP1252_TO_UTF8_VALUES_CACHE = null;
6604
6605
      if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
6606
        $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$cp1252ToUtf8);
6607
        $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$cp1252ToUtf8);
6608
      }
6609
6610
      return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
6611
    }
6612
  }
6613
6614
  /**
6615
   * fix -> utf8-win1252 chars
6616
   *
6617
   * @param string $str <p>The input string.</p>
6618
   *
6619
   * @return string
6620
   *
6621
   * @deprecated use "UTF8::fix_simple_utf8()"
6622
   */
6623
  public static function utf8_fix_win1252_chars($str)
6624
  {
6625
    return self::fix_simple_utf8($str);
6626
  }
6627
6628
  /**
6629
   * Returns an array with all utf8 whitespace characters.
6630
   *
6631
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6632
   *
6633
   * @author: Derek E. [email protected]
6634
   *
6635
   * @return array <p>
6636
   *               An array with all known whitespace characters as values and the type of whitespace as keys
6637
   *               as defined in above URL.
6638
   *               </p>
6639
   */
6640
  public static function whitespace_table()
6641
  {
6642
    return self::$whitespaceTable;
6643
  }
6644
6645
  /**
6646
   * Limit the number of words in a string.
6647
   *
6648
   * @param string $str      <p>The input string.</p>
6649
   * @param int    $words    <p>The limit of words as integer.</p>
6650
   * @param string $strAddOn <p>Replacement for the striped string.</p>
6651
   *
6652
   * @return string
6653
   */
6654
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6655
  {
6656
    $str = (string)$str;
6657
6658
    if (!isset($str[0])) {
6659
      return '';
6660
    }
6661
6662
    $words = (int)$words;
6663
6664
    if ($words < 1) {
6665
      return '';
6666
    }
6667
6668
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6669
6670
    if (
6671
        !isset($matches[0])
6672
        ||
6673
        self::strlen($str) === self::strlen($matches[0])
6674
    ) {
6675
      return $str;
6676
    }
6677
6678
    return self::rtrim($matches[0]) . $strAddOn;
6679
  }
6680
6681
  /**
6682
   * Wraps a string to a given number of characters
6683
   *
6684
   * @link  http://php.net/manual/en/function.wordwrap.php
6685
   *
6686
   * @param string $str   <p>The input string.</p>
6687
   * @param int    $width [optional] <p>The column width.</p>
6688
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
6689
   * @param bool   $cut   [optional] <p>
6690
   *                      If the cut is set to true, the string is
6691
   *                      always wrapped at or before the specified width. So if you have
6692
   *                      a word that is larger than the given width, it is broken apart.
6693
   *                      </p>
6694
   *
6695
   * @return string <p>The given string wrapped at the specified column.</p>
6696
   */
6697
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6698
  {
6699
    $str = (string)$str;
6700
    $break = (string)$break;
6701
6702
    if (!isset($str[0], $break[0])) {
6703
      return '';
6704
    }
6705
6706
    $w = '';
6707
    $strSplit = explode($break, $str);
6708
    $count = count($strSplit);
6709
6710
    $chars = array();
6711
    /** @noinspection ForeachInvariantsInspection */
6712
    for ($i = 0; $i < $count; ++$i) {
6713
6714
      if ($i) {
6715
        $chars[] = $break;
6716
        $w .= '#';
6717
      }
6718
6719
      $c = $strSplit[$i];
6720
      unset($strSplit[$i]);
6721
6722
      foreach (self::split($c) as $c) {
6723
        $chars[] = $c;
6724
        $w .= ' ' === $c ? ' ' : '?';
6725
      }
6726
    }
6727
6728
    $strReturn = '';
6729
    $j = 0;
6730
    $b = $i = -1;
6731
    $w = wordwrap($w, $width, '#', $cut);
6732
6733
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
6734
      for (++$i; $i < $b; ++$i) {
6735
        $strReturn .= $chars[$j];
6736
        unset($chars[$j++]);
6737
      }
6738
6739
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
6740
        unset($chars[$j++]);
6741
      }
6742
6743
      $strReturn .= $break;
6744
    }
6745
6746
    return $strReturn . implode('', $chars);
6747
  }
6748
6749
  /**
6750
   * Returns an array of Unicode White Space characters.
6751
   *
6752
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
6753
   */
6754
  public static function ws()
6755
  {
6756
    return self::$whitespace;
6757
  }
6758
6759
}
6760