Completed
Push — master ( 316bdb...f74be9 )
by Lars
03:45
created

UTF8::strtoupper()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 14
Code Lines 7

Duplication

Lines 14
Ratio 100 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 6
Bugs 1 Features 1
Metric Value
c 6
b 1
f 1
dl 14
loc 14
ccs 0
cts 0
cp 0
rs 9.4285
cc 3
eloc 7
nc 3
nop 2
crap 12
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
final class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  private static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  private static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  private static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
93
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
94
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
96
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
  );
98
99
  /**
100
   * Numeric code point => UTF-8 Character
101
   *
102
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
103
   *
104
   * @var array
105
   */
106
  private static $whitespace = array(
107
    // NUL Byte
108
    0     => "\x0",
109
    // Tab
110
    9     => "\x9",
111
    // New Line
112
    10    => "\xa",
113
    // Vertical Tab
114
    11    => "\xb",
115
    // Carriage Return
116
    13    => "\xd",
117
    // Ordinary Space
118
    32    => "\x20",
119
    // NO-BREAK SPACE
120
    160   => "\xc2\xa0",
121
    // OGHAM SPACE MARK
122
    5760  => "\xe1\x9a\x80",
123
    // MONGOLIAN VOWEL SEPARATOR
124
    6158  => "\xe1\xa0\x8e",
125
    // EN QUAD
126
    8192  => "\xe2\x80\x80",
127
    // EM QUAD
128
    8193  => "\xe2\x80\x81",
129
    // EN SPACE
130
    8194  => "\xe2\x80\x82",
131
    // EM SPACE
132
    8195  => "\xe2\x80\x83",
133
    // THREE-PER-EM SPACE
134
    8196  => "\xe2\x80\x84",
135
    // FOUR-PER-EM SPACE
136
    8197  => "\xe2\x80\x85",
137
    // SIX-PER-EM SPACE
138
    8198  => "\xe2\x80\x86",
139
    // FIGURE SPACE
140
    8199  => "\xe2\x80\x87",
141
    // PUNCTUATION SPACE
142
    8200  => "\xe2\x80\x88",
143
    // THIN SPACE
144
    8201  => "\xe2\x80\x89",
145
    //HAIR SPACE
146
    8202  => "\xe2\x80\x8a",
147
    // LINE SEPARATOR
148
    8232  => "\xe2\x80\xa8",
149
    // PARAGRAPH SEPARATOR
150
    8233  => "\xe2\x80\xa9",
151
    // NARROW NO-BREAK SPACE
152
    8239  => "\xe2\x80\xaf",
153
    // MEDIUM MATHEMATICAL SPACE
154
    8287  => "\xe2\x81\x9f",
155
    // IDEOGRAPHIC SPACE
156
    12288 => "\xe3\x80\x80",
157
  );
158
159
  /**
160
   * @var array
161
   */
162
  private static $whitespaceTable = array(
163
      'SPACE'                     => "\x20",
164
      'NO-BREAK SPACE'            => "\xc2\xa0",
165
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
166
      'EN QUAD'                   => "\xe2\x80\x80",
167
      'EM QUAD'                   => "\xe2\x80\x81",
168
      'EN SPACE'                  => "\xe2\x80\x82",
169
      'EM SPACE'                  => "\xe2\x80\x83",
170
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
171
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
172
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
173
      'FIGURE SPACE'              => "\xe2\x80\x87",
174
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
175
      'THIN SPACE'                => "\xe2\x80\x89",
176
      'HAIR SPACE'                => "\xe2\x80\x8a",
177
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
178
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
179
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
180
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
181
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
182
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
183
  );
184
185
  /**
186
   * bidirectional text chars
187
   *
188
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
189
   *
190
   * @var array
191
   */
192
  private static $bidiUniCodeControlsTable = array(
193
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
194
    8234 => "\xE2\x80\xAA",
195
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
196
    8235 => "\xE2\x80\xAB",
197
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
198
    8236 => "\xE2\x80\xAC",
199
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
200
    8237 => "\xE2\x80\xAD",
201
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
202
    8238 => "\xE2\x80\xAE",
203
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
204
    8294 => "\xE2\x81\xA6",
205
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
206
    8295 => "\xE2\x81\xA7",
207
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
208
    8296 => "\xE2\x81\xA8",
209
    // POP DIRECTIONAL ISOLATE
210
    8297 => "\xE2\x81\xA9",
211
  );
212
213
  /**
214
   * @var array
215
   */
216
  private static $commonCaseFold = array(
217
      'ſ'            => 's',
218
      "\xCD\x85"     => 'ι',
219
      'ς'            => 'σ',
220
      "\xCF\x90"     => 'β',
221
      "\xCF\x91"     => 'θ',
222
      "\xCF\x95"     => 'φ',
223
      "\xCF\x96"     => 'π',
224
      "\xCF\xB0"     => 'κ',
225
      "\xCF\xB1"     => 'ρ',
226
      "\xCF\xB5"     => 'ε',
227
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
228
      "\xE1\xBE\xBE" => 'ι',
229
  );
230
231
  /**
232
   * @var array
233
   */
234
  private static $brokenUtf8ToUtf8 = array(
235
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
236
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
237
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
238
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
239
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
240
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
241
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
242
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
243
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
244
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
245
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
246
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
247
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
248
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
249
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
250
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
251
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
252
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
253
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
254
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
255
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
256
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
257
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
258
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
259
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
260
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
261
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
262
      'ü'       => 'ü',
263
      'ä'       => 'ä',
264
      'ö'       => 'ö',
265
      'Ö'       => 'Ö',
266
      'ß'       => 'ß',
267
      'Ã '       => 'à',
268
      'á'       => 'á',
269
      'â'       => 'â',
270
      'ã'       => 'ã',
271
      'ù'       => 'ù',
272
      'ú'       => 'ú',
273
      'û'       => 'û',
274
      'Ù'       => 'Ù',
275
      'Ú'       => 'Ú',
276
      'Û'       => 'Û',
277
      'Ü'       => 'Ü',
278
      'ò'       => 'ò',
279
      'ó'       => 'ó',
280
      'ô'       => 'ô',
281
      'è'       => 'è',
282
      'é'       => 'é',
283
      'ê'       => 'ê',
284
      'ë'       => 'ë',
285
      'À'       => 'À',
286
      'Á'       => 'Á',
287
      'Â'       => 'Â',
288
      'Ã'       => 'Ã',
289
      'Ä'       => 'Ä',
290
      'Ã…'       => 'Å',
291
      'Ç'       => 'Ç',
292
      'È'       => 'È',
293
      'É'       => 'É',
294
      'Ê'       => 'Ê',
295
      'Ë'       => 'Ë',
296
      'ÃŒ'       => 'Ì',
297
      'Í'       => 'Í',
298
      'ÃŽ'       => 'Î',
299
      'Ï'       => 'Ï',
300
      'Ñ'       => 'Ñ',
301
      'Ã’'       => 'Ò',
302
      'Ó'       => 'Ó',
303
      'Ô'       => 'Ô',
304
      'Õ'       => 'Õ',
305
      'Ø'       => 'Ø',
306
      'Ã¥'       => 'å',
307
      'æ'       => 'æ',
308
      'ç'       => 'ç',
309
      'ì'       => 'ì',
310
      'í'       => 'í',
311
      'î'       => 'î',
312
      'ï'       => 'ï',
313
      'ð'       => 'ð',
314
      'ñ'       => 'ñ',
315
      'õ'       => 'õ',
316
      'ø'       => 'ø',
317
      'ý'       => 'ý',
318
      'ÿ'       => 'ÿ',
319
      '€'      => '€',
320
  );
321
322
  /**
323
   * @var array
324
   */
325
  private static $utf8ToWin1252 = array(
326
      "\xe2\x82\xac" => "\x80", // EURO SIGN
327
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
328
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
329
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
330
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
331
      "\xe2\x80\xa0" => "\x86", // DAGGER
332
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
333
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
334
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
335
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
336
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
337
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
338
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
339
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
340
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
341
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
342
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
343
      "\xe2\x80\xa2" => "\x95", // BULLET
344
      "\xe2\x80\x93" => "\x96", // EN DASH
345
      "\xe2\x80\x94" => "\x97", // EM DASH
346
      "\xcb\x9c"     => "\x98", // SMALL TILDE
347
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
348
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
349
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
350
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
351
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
352
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
353
  );
354
355
  /**
356
   * @var array
357
   */
358
  private static $utf8MSWord = array(
359
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
360
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
361
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
362
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
363
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
364
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
365
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
366
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
367
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
368
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
369
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
370
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
371
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
372
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
373
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
374
  );
375
376
  private static $iconvEncoding = array(
377
      'ANSI_X3.4-1968',
378
      'ANSI_X3.4-1986',
379
      'ASCII',
380
      'CP367',
381
      'IBM367',
382
      'ISO-IR-6',
383
      'ISO646-US',
384
      'ISO_646.IRV:1991',
385
      'US',
386
      'US-ASCII',
387
      'CSASCII',
388
      'UTF-8',
389
      'ISO-10646-UCS-2',
390
      'UCS-2',
391
      'CSUNICODE',
392
      'UCS-2BE',
393
      'UNICODE-1-1',
394
      'UNICODEBIG',
395
      'CSUNICODE11',
396
      'UCS-2LE',
397
      'UNICODELITTLE',
398
      'ISO-10646-UCS-4',
399
      'UCS-4',
400
      'CSUCS4',
401
      'UCS-4BE',
402
      'UCS-4LE',
403
      'UTF-16',
404
      'UTF-16BE',
405
      'UTF-16LE',
406
      'UTF-32',
407
      'UTF-32BE',
408
      'UTF-32LE',
409
      'UNICODE-1-1-UTF-7',
410
      'UTF-7',
411
      'CSUNICODE11UTF7',
412
      'UCS-2-INTERNAL',
413
      'UCS-2-SWAPPED',
414
      'UCS-4-INTERNAL',
415
      'UCS-4-SWAPPED',
416
      'C99',
417
      'JAVA',
418
      'CP819',
419
      'IBM819',
420
      'ISO-8859-1',
421
      'ISO-IR-100',
422
      'ISO8859-1',
423
      'ISO_8859-1',
424
      'ISO_8859-1:1987',
425
      'L1',
426
      'LATIN1',
427
      'CSISOLATIN1',
428
      'ISO-8859-2',
429
      'ISO-IR-101',
430
      'ISO8859-2',
431
      'ISO_8859-2',
432
      'ISO_8859-2:1987',
433
      'L2',
434
      'LATIN2',
435
      'CSISOLATIN2',
436
      'ISO-8859-3',
437
      'ISO-IR-109',
438
      'ISO8859-3',
439
      'ISO_8859-3',
440
      'ISO_8859-3:1988',
441
      'L3',
442
      'LATIN3',
443
      'CSISOLATIN3',
444
      'ISO-8859-4',
445
      'ISO-IR-110',
446
      'ISO8859-4',
447
      'ISO_8859-4',
448
      'ISO_8859-4:1988',
449
      'L4',
450
      'LATIN4',
451
      'CSISOLATIN4',
452
      'CYRILLIC',
453
      'ISO-8859-5',
454
      'ISO-IR-144',
455
      'ISO8859-5',
456
      'ISO_8859-5',
457
      'ISO_8859-5:1988',
458
      'CSISOLATINCYRILLIC',
459
      'ARABIC',
460
      'ASMO-708',
461
      'ECMA-114',
462
      'ISO-8859-6',
463
      'ISO-IR-127',
464
      'ISO8859-6',
465
      'ISO_8859-6',
466
      'ISO_8859-6:1987',
467
      'CSISOLATINARABIC',
468
      'ECMA-118',
469
      'ELOT_928',
470
      'GREEK',
471
      'GREEK8',
472
      'ISO-8859-7',
473
      'ISO-IR-126',
474
      'ISO8859-7',
475
      'ISO_8859-7',
476
      'ISO_8859-7:1987',
477
      'ISO_8859-7:2003',
478
      'CSISOLATINGREEK',
479
      'HEBREW',
480
      'ISO-8859-8',
481
      'ISO-IR-138',
482
      'ISO8859-8',
483
      'ISO_8859-8',
484
      'ISO_8859-8:1988',
485
      'CSISOLATINHEBREW',
486
      'ISO-8859-9',
487
      'ISO-IR-148',
488
      'ISO8859-9',
489
      'ISO_8859-9',
490
      'ISO_8859-9:1989',
491
      'L5',
492
      'LATIN5',
493
      'CSISOLATIN5',
494
      'ISO-8859-10',
495
      'ISO-IR-157',
496
      'ISO8859-10',
497
      'ISO_8859-10',
498
      'ISO_8859-10:1992',
499
      'L6',
500
      'LATIN6',
501
      'CSISOLATIN6',
502
      'ISO-8859-11',
503
      'ISO8859-11',
504
      'ISO_8859-11',
505
      'ISO-8859-13',
506
      'ISO-IR-179',
507
      'ISO8859-13',
508
      'ISO_8859-13',
509
      'L7',
510
      'LATIN7',
511
      'ISO-8859-14',
512
      'ISO-CELTIC',
513
      'ISO-IR-199',
514
      'ISO8859-14',
515
      'ISO_8859-14',
516
      'ISO_8859-14:1998',
517
      'L8',
518
      'LATIN8',
519
      'ISO-8859-15',
520
      'ISO-IR-203',
521
      'ISO8859-15',
522
      'ISO_8859-15',
523
      'ISO_8859-15:1998',
524
      'LATIN-9',
525
      'ISO-8859-16',
526
      'ISO-IR-226',
527
      'ISO8859-16',
528
      'ISO_8859-16',
529
      'ISO_8859-16:2001',
530
      'L10',
531
      'LATIN10',
532
      'KOI8-R',
533
      'CSKOI8R',
534
      'KOI8-U',
535
      'KOI8-RU',
536
      'CP1250',
537
      'MS-EE',
538
      'WINDOWS-1250',
539
      'CP1251',
540
      'MS-CYRL',
541
      'WINDOWS-1251',
542
      'CP1252',
543
      'MS-ANSI',
544
      'WINDOWS-1252',
545
      'CP1253',
546
      'MS-GREEK',
547
      'WINDOWS-1253',
548
      'CP1254',
549
      'MS-TURK',
550
      'WINDOWS-1254',
551
      'CP1255',
552
      'MS-HEBR',
553
      'WINDOWS-1255',
554
      'CP1256',
555
      'MS-ARAB',
556
      'WINDOWS-1256',
557
      'CP1257',
558
      'WINBALTRIM',
559
      'WINDOWS-1257',
560
      'CP1258',
561
      'WINDOWS-1258',
562
      '850',
563
      'CP850',
564
      'IBM850',
565
      'CSPC850MULTILINGUAL',
566
      '862',
567
      'CP862',
568
      'IBM862',
569
      'CSPC862LATINHEBREW',
570
      '866',
571
      'CP866',
572
      'IBM866',
573
      'CSIBM866',
574
      'MAC',
575
      'MACINTOSH',
576
      'MACROMAN',
577
      'CSMACINTOSH',
578
      'MACCENTRALEUROPE',
579
      'MACICELAND',
580
      'MACCROATIAN',
581
      'MACROMANIA',
582
      'MACCYRILLIC',
583
      'MACUKRAINE',
584
      'MACGREEK',
585
      'MACTURKISH',
586
      'MACHEBREW',
587
      'MACARABIC',
588
      'MACTHAI',
589
      'HP-ROMAN8',
590
      'R8',
591
      'ROMAN8',
592
      'CSHPROMAN8',
593
      'NEXTSTEP',
594
      'ARMSCII-8',
595
      'GEORGIAN-ACADEMY',
596
      'GEORGIAN-PS',
597
      'KOI8-T',
598
      'CP154',
599
      'CYRILLIC-ASIAN',
600
      'PT154',
601
      'PTCP154',
602
      'CSPTCP154',
603
      'KZ-1048',
604
      'RK1048',
605
      'STRK1048-2002',
606
      'CSKZ1048',
607
      'MULELAO-1',
608
      'CP1133',
609
      'IBM-CP1133',
610
      'ISO-IR-166',
611
      'TIS-620',
612
      'TIS620',
613
      'TIS620-0',
614
      'TIS620.2529-1',
615
      'TIS620.2533-0',
616
      'TIS620.2533-1',
617
      'CP874',
618
      'WINDOWS-874',
619
      'VISCII',
620
      'VISCII1.1-1',
621
      'CSVISCII',
622
      'TCVN',
623
      'TCVN-5712',
624
      'TCVN5712-1',
625
      'TCVN5712-1:1993',
626
      'ISO-IR-14',
627
      'ISO646-JP',
628
      'JIS_C6220-1969-RO',
629
      'JP',
630
      'CSISO14JISC6220RO',
631
      'JISX0201-1976',
632
      'JIS_X0201',
633
      'X0201',
634
      'CSHALFWIDTHKATAKANA',
635
      'ISO-IR-87',
636
      'JIS0208',
637
      'JIS_C6226-1983',
638
      'JIS_X0208',
639
      'JIS_X0208-1983',
640
      'JIS_X0208-1990',
641
      'X0208',
642
      'CSISO87JISX0208',
643
      'ISO-IR-159',
644
      'JIS_X0212',
645
      'JIS_X0212-1990',
646
      'JIS_X0212.1990-0',
647
      'X0212',
648
      'CSISO159JISX02121990',
649
      'CN',
650
      'GB_1988-80',
651
      'ISO-IR-57',
652
      'ISO646-CN',
653
      'CSISO57GB1988',
654
      'CHINESE',
655
      'GB_2312-80',
656
      'ISO-IR-58',
657
      'CSISO58GB231280',
658
      'CN-GB-ISOIR165',
659
      'ISO-IR-165',
660
      'ISO-IR-149',
661
      'KOREAN',
662
      'KSC_5601',
663
      'KS_C_5601-1987',
664
      'KS_C_5601-1989',
665
      'CSKSC56011987',
666
      'EUC-JP',
667
      'EUCJP',
668
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
669
      'CSEUCPKDFMTJAPANESE',
670
      'MS_KANJI',
671
      'SHIFT-JIS',
672
      'SHIFT_JIS',
673
      'SJIS',
674
      'CSSHIFTJIS',
675
      'CP932',
676
      'ISO-2022-JP',
677
      'CSISO2022JP',
678
      'ISO-2022-JP-1',
679
      'ISO-2022-JP-2',
680
      'CSISO2022JP2',
681
      'CN-GB',
682
      'EUC-CN',
683
      'EUCCN',
684
      'GB2312',
685
      'CSGB2312',
686
      'GBK',
687
      'CP936',
688
      'MS936',
689
      'WINDOWS-936',
690
      'GB18030',
691
      'ISO-2022-CN',
692
      'CSISO2022CN',
693
      'ISO-2022-CN-EXT',
694
      'HZ',
695
      'HZ-GB-2312',
696
      'EUC-TW',
697
      'EUCTW',
698
      'CSEUCTW',
699
      'BIG-5',
700
      'BIG-FIVE',
701
      'BIG5',
702
      'BIGFIVE',
703
      'CN-BIG5',
704
      'CSBIG5',
705
      'CP950',
706
      'BIG5-HKSCS:1999',
707
      'BIG5-HKSCS:2001',
708
      'BIG5-HKSCS',
709
      'BIG5-HKSCS:2004',
710
      'BIG5HKSCS',
711
      'EUC-KR',
712
      'EUCKR',
713
      'CSEUCKR',
714
      'CP949',
715
      'UHC',
716
      'CP1361',
717
      'JOHAB',
718
      'ISO-2022-KR',
719
      'CSISO2022KR',
720
      'CP856',
721
      'CP922',
722
      'CP943',
723
      'CP1046',
724
      'CP1124',
725
      'CP1129',
726
      'CP1161',
727
      'IBM-1161',
728
      'IBM1161',
729
      'CSIBM1161',
730
      'CP1162',
731
      'IBM-1162',
732
      'IBM1162',
733
      'CSIBM1162',
734
      'CP1163',
735
      'IBM-1163',
736
      'IBM1163',
737
      'CSIBM1163',
738
      'DEC-KANJI',
739
      'DEC-HANYU',
740
      '437',
741
      'CP437',
742
      'IBM437',
743
      'CSPC8CODEPAGE437',
744
      'CP737',
745
      'CP775',
746
      'IBM775',
747
      'CSPC775BALTIC',
748
      '852',
749
      'CP852',
750
      'IBM852',
751
      'CSPCP852',
752
      'CP853',
753
      '855',
754
      'CP855',
755
      'IBM855',
756
      'CSIBM855',
757
      '857',
758
      'CP857',
759
      'IBM857',
760
      'CSIBM857',
761
      'CP858',
762
      '860',
763
      'CP860',
764
      'IBM860',
765
      'CSIBM860',
766
      '861',
767
      'CP-IS',
768
      'CP861',
769
      'IBM861',
770
      'CSIBM861',
771
      '863',
772
      'CP863',
773
      'IBM863',
774
      'CSIBM863',
775
      'CP864',
776
      'IBM864',
777
      'CSIBM864',
778
      '865',
779
      'CP865',
780
      'IBM865',
781
      'CSIBM865',
782
      '869',
783
      'CP-GR',
784
      'CP869',
785
      'IBM869',
786
      'CSIBM869',
787
      'CP1125',
788
      'EUC-JISX0213',
789
      'SHIFT_JISX0213',
790
      'ISO-2022-JP-3',
791
      'BIG5-2003',
792
      'ISO-IR-230',
793
      'TDS565',
794
      'ATARI',
795
      'ATARIST',
796
      'RISCOS-LATIN1',
797
  );
798
799
  /**
800
   * @var array
801
   */
802
  private static $support = array();
803
804
  /**
805
   * __construct()
806
   */
807 1
  public function __construct()
808
  {
809 1
    self::checkForSupport();
810 1
  }
811
812
  /**
813
   * Return the character at the specified position: $str[1] like functionality.
814
   *
815
   * @param    string $str A UTF-8 string.
816
   * @param    int    $pos The position of character to return.
817
   *
818
   * @return   string Single Multi-Byte character.
819
   */
820 2
  public static function access($str, $pos)
821
  {
822 2
    return self::substr($str, $pos, 1);
823
  }
824
825
  /**
826
   * Prepends UTF-8 BOM character to the string and returns the whole string.
827
   *
828
   * INFO: If BOM already existed there, the Input string is returned.
829
   *
830
   * @param    string $str The input string
831
   *
832
   * @return   string The output string that contains BOM
833
   */
834
  public static function add_bom_to_string($str)
835
  {
836
    if (self::string_has_bom($str) === false) {
837
      $str = self::bom() . $str;
838
    }
839
840
    return $str;
841
  }
842
843
  /**
844
   * Convert binary into an string.
845
   *
846
   * @param mixed $bin 1|0
847
   *
848
   * @return string
849
   */
850 1
  public static function binary_to_str($bin)
851
  {
852 1
    return pack('H*', base_convert($bin, 2, 16));
853
  }
854
855
  /**
856
   * Returns the UTF-8 Byte Order Mark Character.
857
   *
858
   * @return string UTF-8 Byte Order Mark
859
   */
860 1
  public static function bom()
861
  {
862 1
    return "\xEF\xBB\xBF";
863
  }
864
865
  /**
866
   * @alias of UTF8::chr_map()
867
   * @see   UTF8::chr_map()
868
   *
869
   * @param string|array $callback
870
   * @param string       $str
871
   *
872
   * @return array
873
   */
874 1
  public static function callback($callback, $str)
875
  {
876 1
    return self::chr_map($callback, $str);
877
  }
878
879
  /**
880
   * This method will auto-detect your server environment for UTF-8 support.
881
   *
882
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
883
   */
884
  public static function checkForSupport()
885
  {
886
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
887
888
      self::$support['already_checked_via_portable_utf8'] = true;
889
890
      self::$support['mbstring'] = self::mbstring_loaded();
891
      self::$support['iconv'] = self::iconv_loaded();
892
      self::$support['intl'] = self::intl_loaded();
893
      self::$support['intlChar'] = self::intlChar_loaded();
894
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
895
    }
896
  }
897
898
  /**
899
   * Generates a UTF-8 encoded character from the given code point.
900
   *
901
   * INFO: opposite to UTF8::ord()
902
   *
903
   * @param    int $code_point The code point for which to generate a character.
904
   *
905
   * @return   string|null Multi-Byte character, returns null on failure to encode.
906
   */
907
  public static function chr($code_point)
908
  {
909
    // init
910
    $i = (int)$code_point;
911
912
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
913
      self::checkForSupport();
914
    }
915
916
    if (self::$support['intlChar'] === true) {
917
      return \IntlChar::chr($code_point);
918
    }
919
920
    if ($i !== $code_point) {
921
      $i = self::hex_to_int($code_point);
922
    }
923
924
    if (!$i) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $i of type integer|false is loosely compared to false; this is ambiguous if the integer can be zero. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
925
      return null;
926
    }
927
928
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
929
  }
930
931
  /**
932
   * Applies callback to all characters of a string.
933
   *
934
   * @param  string|array $callback The callback function.
935
   * @param  string       $str      UTF-8 string to run callback on.
936
   *
937
   * @return array The outcome of callback.
938
   */
939
  public static function chr_map($callback, $str)
940
  {
941
    $chars = self::split($str);
942
943
    return array_map($callback, $chars);
944
  }
945
946
  /**
947
   * Generates an array of byte length of each character of a Unicode string.
948
   *
949
   * 1 byte => U+0000  - U+007F
950
   * 2 byte => U+0080  - U+07FF
951
   * 3 byte => U+0800  - U+FFFF
952
   * 4 byte => U+10000 - U+10FFFF
953
   *
954
   * @param    string $str The original Unicode string.
955
   *
956
   * @return   array An array of byte lengths of each character.
957
   */
958
  public static function chr_size_list($str)
959
  {
960
    if (!$str) {
961
      return array();
962
    }
963
964
    return array_map('strlen', self::split($str));
965
  }
966
967
  /**
968
   * Get a decimal code representation of a specific character.
969
   *
970
   * @param   string $char The input character
971
   *
972
   * @return  int
973
   */
974
  public static function chr_to_decimal($char)
975
  {
976
    $char = (string)$char;
977
    $code = self::ord($char[0]);
978
    $bytes = 1;
979
980
    if (!($code & 0x80)) {
981
      // 0xxxxxxx
982
      return $code;
983
    }
984
985
    if (($code & 0xe0) === 0xc0) {
986
      // 110xxxxx
987
      $bytes = 2;
988
      $code &= ~0xc0;
989
    } elseif (($code & 0xf0) === 0xe0) {
990
      // 1110xxxx
991
      $bytes = 3;
992
      $code &= ~0xe0;
993
    } elseif (($code & 0xf8) === 0xf0) {
994
      // 11110xxx
995
      $bytes = 4;
996
      $code &= ~0xf0;
997
    }
998
999
    for ($i = 2; $i <= $bytes; $i++) {
1000
      // 10xxxxxx
1001
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1002
    }
1003
1004
    return $code;
1005
  }
1006
1007
  /**
1008
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1009
   *
1010
   * @param    string $char The input character
1011
   * @param    string $pfix
1012
   *
1013
   * @return   string The code point encoded as U+xxxx
1014
   */
1015
  public static function chr_to_hex($char, $pfix = 'U+')
1016
  {
1017
    return self::int_to_hex(self::ord($char), $pfix);
1018
  }
1019
1020
  /**
1021
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1022
   *
1023
   * @param    string $body     The original string to be split.
1024
   * @param    int    $chunklen The maximum character length of a chunk.
1025
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
1026
   *
1027
   * @return   string The chunked string
1028
   */
1029
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1030
  {
1031
    return implode($end, self::split($body, $chunklen));
1032
  }
1033
1034
  /**
1035
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1036
   *
1037
   * @param string $str                     The string to be sanitized.
1038
   * @param bool   $remove_bom
1039
   * @param bool   $normalize_whitespace
1040
   * @param bool   $normalize_msword        e.g.: "…" => "..."
1041
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
1042
   *
1043
   * @return string Clean UTF-8 encoded string
1044
   */
1045
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1046
  {
1047
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1048
    // caused connection reset problem on larger strings
1049
1050
    $regx = '/
1051
      (
1052
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1053
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1054
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1055
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1056
        ){1,100}                      # ...one or more times
1057
      )
1058
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1059
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1060
    /x';
1061
    $str = preg_replace($regx, '$1', $str);
1062
1063
    $str = self::replace_diamond_question_mark($str, '');
1064
    $str = self::remove_invisible_characters($str);
1065
1066
    if ($normalize_whitespace === true) {
1067
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1068
    }
1069
1070
    if ($normalize_msword === true) {
1071
      $str = self::normalize_msword($str);
1072
    }
1073
1074
    if ($remove_bom === true) {
1075
      $str = self::removeBOM($str);
1076
    }
1077
1078
    return $str;
1079
  }
1080
1081
  /**
1082
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1083
   *
1084
   * @param string $str
1085
   *
1086
   * @return string
1087
   */
1088
  public static function cleanup($str)
1089
  {
1090
    $str = (string)$str;
1091
1092
    if (!isset($str[0])) {
1093
      return '';
1094
    }
1095
1096
    // fixed ISO <-> UTF-8 Errors
1097
    $str = self::fix_simple_utf8($str);
1098
1099
    // remove all none UTF-8 symbols
1100
    // && remove diamond question mark (�)
1101
    // && remove remove invisible characters (e.g. "\0")
1102
    // && remove BOM
1103
    // && normalize whitespace chars (but keep non-breaking-spaces)
1104
    $str = self::clean($str, true, true, false, true);
1105
1106
    return (string)$str;
1107
  }
1108
1109
  /**
1110
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1111
   *
1112
   * INFO: opposite to UTF8::string()
1113
   *
1114
   * @param    string|string[] $arg     A UTF-8 encoded string or an array of such strings.
1115
   * @param    bool            $u_style If True, will return code points in U+xxxx format,
1116
   *                                    default, code points will be returned as integers.
1117
   *
1118
   * @return   array The array of code points
1119
   */
1120
  public static function codepoints($arg, $u_style = false)
1121
  {
1122
    if (is_string($arg)) {
1123
      $arg = self::split($arg);
1124
    }
1125
1126
    $arg = array_map(
1127
        array(
1128
            '\\voku\\helper\\UTF8',
1129
            'ord',
1130
        ),
1131
        $arg
1132
    );
1133
1134
    if ($u_style) {
1135
      $arg = array_map(
1136
          array(
1137
              '\\voku\\helper\\UTF8',
1138
              'int_to_hex',
1139
          ),
1140
          $arg
1141
      );
1142
    }
1143
1144
    return $arg;
1145
  }
1146
1147
  /**
1148
   * Returns count of characters used in a string.
1149
   *
1150
   * @param    string $str       The input string.
1151
   * @param    bool   $cleanUtf8 Clean non UTF-8 chars from the string.
1152
   *
1153
   * @return   array An associative array of Character as keys and
1154
   *           their count as values.
1155
   */
1156
  public static function count_chars($str, $cleanUtf8 = false)
1157
  {
1158
    return array_count_values(self::split($str, 1, $cleanUtf8));
1159
  }
1160
1161
  /**
1162
   * Get a UTF-8 character from its decimal code representation.
1163
   *
1164
   * @param   int $code Code.
1165
   *
1166
   * @return  string
1167
   */
1168
  public static function decimal_to_chr($code)
1169
  {
1170
    return \mb_convert_encoding(
1171
        '&#x' . dechex($code) . ';',
1172
        'UTF-8',
1173
        'HTML-ENTITIES'
1174
    );
1175
  }
1176
1177
  /**
1178
   * Encode a string with a new charset-encoding.
1179
   *
1180
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1181
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1182
   *
1183
   * @param string $encoding e.g. 'UTF-8', 'ISO-8859-1', etc.
1184
   * @param string $str      the string
1185
   * @param bool   $force    force the new encoding (we try to fix broken / double encoding for UTF-8)<br />
1186
   *                         otherwise we auto-detect the current string-encoding
1187
   *
1188
   * @return string
1189
   */
1190
  public static function encode($encoding, $str, $force = true)
1191
  {
1192
    $str = (string)$str;
1193
    $encoding = (string)$encoding;
1194
1195
    if (!isset($str[0], $encoding[0])) {
1196
      return $str;
1197
    }
1198
1199
    if ($encoding !== 'UTF-8') {
1200
      $encoding = self::normalize_encoding($encoding);
1201
    }
1202
1203
    $encodingDetected = self::str_detect_encoding($str);
1204
1205
    if (
1206
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1207
        &&
1208
        (
1209
            $force === true
1210
            ||
1211
            $encodingDetected !== $encoding
1212
        )
1213
    ) {
1214
1215 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1216
          $encoding === 'UTF-8'
1217
          &&
1218
          (
1219
              $force === true
1220
              || $encodingDetected === 'UTF-8'
1221
              || $encodingDetected === 'WINDOWS-1252'
1222
              || $encodingDetected === 'ISO-8859-1'
1223
          )
1224
      ) {
1225
        return self::to_utf8($str);
1226
      }
1227
1228 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1229
          $encoding === 'ISO-8859-1'
1230
          &&
1231
          (
1232
              $force === true
1233
              || $encodingDetected === 'ISO-8859-1'
1234
              || $encodingDetected === 'UTF-8'
1235
          )
1236
      ) {
1237
        return self::to_win1252($str);
1238
      }
1239
1240
      $strEncoded = \mb_convert_encoding(
1241
          $str,
1242
          $encoding,
1243
          $encodingDetected
1244
      );
1245
1246
      if ($strEncoded) {
1247
        return $strEncoded;
1248
      }
1249
    }
1250
1251
    return $str;
1252
  }
1253
1254
  /**
1255
   * Reads entire file into a string.
1256
   *
1257
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1258
   *
1259
   * @link http://php.net/manual/en/function.file-get-contents.php
1260
   *
1261
   * @param string        $filename      <p>
1262
   *                                     Name of the file to read.
1263
   *                                     </p>
1264
   * @param int|null      $flags         [optional] <p>
1265
   *                                     Prior to PHP 6, this parameter is called
1266
   *                                     use_include_path and is a bool.
1267
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1268
   *                                     to trigger include path
1269
   *                                     search.
1270
   *                                     </p>
1271
   *                                     <p>
1272
   *                                     The value of flags can be any combination of
1273
   *                                     the following flags (with some restrictions), joined with the
1274
   *                                     binary OR (|)
1275
   *                                     operator.
1276
   *                                     </p>
1277
   *                                     <p>
1278
   *                                     <table>
1279
   *                                     Available flags
1280
   *                                     <tr valign="top">
1281
   *                                     <td>Flag</td>
1282
   *                                     <td>Description</td>
1283
   *                                     </tr>
1284
   *                                     <tr valign="top">
1285
   *                                     <td>
1286
   *                                     FILE_USE_INCLUDE_PATH
1287
   *                                     </td>
1288
   *                                     <td>
1289
   *                                     Search for filename in the include directory.
1290
   *                                     See include_path for more
1291
   *                                     information.
1292
   *                                     </td>
1293
   *                                     </tr>
1294
   *                                     <tr valign="top">
1295
   *                                     <td>
1296
   *                                     FILE_TEXT
1297
   *                                     </td>
1298
   *                                     <td>
1299
   *                                     As of PHP 6, the default encoding of the read
1300
   *                                     data is UTF-8. You can specify a different encoding by creating a
1301
   *                                     custom context or by changing the default using
1302
   *                                     stream_default_encoding. This flag cannot be
1303
   *                                     used with FILE_BINARY.
1304
   *                                     </td>
1305
   *                                     </tr>
1306
   *                                     <tr valign="top">
1307
   *                                     <td>
1308
   *                                     FILE_BINARY
1309
   *                                     </td>
1310
   *                                     <td>
1311
   *                                     With this flag, the file is read in binary mode. This is the default
1312
   *                                     setting and cannot be used with FILE_TEXT.
1313
   *                                     </td>
1314
   *                                     </tr>
1315
   *                                     </table>
1316
   *                                     </p>
1317
   * @param resource|null $context       [optional] <p>
1318
   *                                     A valid context resource created with
1319
   *                                     stream_context_create. If you don't need to use a
1320
   *                                     custom context, you can skip this parameter by &null;.
1321
   *                                     </p>
1322
   * @param int|null      $offset        [optional] <p>
1323
   *                                     The offset where the reading starts.
1324
   *                                     </p>
1325
   * @param int|null      $maxlen        [optional] <p>
1326
   *                                     Maximum length of data read. The default is to read until end
1327
   *                                     of file is reached.
1328
   *                                     </p>
1329
   * @param int           $timeout
1330
   *
1331
   * @param boolean       $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used
1332
   *                                     non default utf-8 chars
1333
   *
1334
   * @return string The function returns the read data or false on failure.
1335
   */
1336
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1337
  {
1338
    // init
1339
    $timeout = (int)$timeout;
1340
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1341
1342
    if ($timeout && $context === null) {
1343
      $context = stream_context_create(
1344
          array(
1345
              'http' =>
1346
                  array(
1347
                      'timeout' => $timeout,
1348
                  ),
1349
          )
1350
      );
1351
    }
1352
1353
    if (is_int($maxlen)) {
1354
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1355
    } else {
1356
      $data = file_get_contents($filename, $flags, $context, $offset);
1357
    }
1358
1359
    // return false on error
1360
    if ($data === false) {
1361
      return false;
1362
    }
1363
1364
    if ($convertToUtf8 === true) {
1365
      $data = self::encode('UTF-8', $data, false);
1366
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1367
    }
1368
1369
    // clean utf-8 string
1370
    return $data;
1371
  }
1372
1373
  /**
1374
   * Checks if a file starts with BOM (Byte Order Mark) character.
1375
   *
1376
   * @param    string $file_path Path to a valid file.
1377
   *
1378
   * @return   bool True if the file has BOM at the start, False otherwise.
1379
   */
1380
  public static function file_has_bom($file_path)
1381
  {
1382
    return self::string_has_bom(file_get_contents($file_path));
1383
  }
1384
1385
  /**
1386
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1387
   *
1388
   * @param mixed  $var
1389
   * @param int    $normalization_form
1390
   * @param string $leading_combining
1391
   *
1392
   * @return mixed
1393
   */
1394
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1395
  {
1396
    switch (gettype($var)) {
1397 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1398
        foreach ($var as $k => $v) {
1399
          /** @noinspection AlterInForeachInspection */
1400
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1401
        }
1402
        break;
1403 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1404
        foreach ($var as $k => $v) {
1405
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1406
        }
1407
        break;
1408
      case 'string':
1409
        if (false !== strpos($var, "\r")) {
1410
          // Workaround https://bugs.php.net/65732
1411
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1412
        }
1413
        if (preg_match('/[\x80-\xFF]/', $var)) {
1414
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1415
            $n = '-';
1416
          } else {
1417
            $n = \Normalizer::normalize($var, $normalization_form);
1418
1419
            if (isset($n[0])) {
1420
              $var = $n;
1421
            } else {
1422
              $var = self::encode('UTF-8', $var);
1423
            }
1424
1425
          }
1426
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
1427
            // Prevent leading combining chars
1428
            // for NFC-safe concatenations.
1429
            $var = $leading_combining . $var;
1430
          }
1431
        }
1432
        break;
1433
    }
1434
1435
    return $var;
1436
  }
1437
1438
  /**
1439
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1440
   *
1441
   * @param int    $type
1442
   * @param string $var
1443
   * @param int    $filter
1444
   * @param mixed  $option
1445
   *
1446
   * @return mixed
1447
   */
1448 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1449
  {
1450
    if (4 > func_num_args()) {
1451
      $var = filter_input($type, $var, $filter);
1452
    } else {
1453
      $var = filter_input($type, $var, $filter, $option);
1454
    }
1455
1456
    return self::filter($var);
1457
  }
1458
1459
  /**
1460
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1461
   *
1462
   * @param int   $type
1463
   * @param mixed $definition
1464
   * @param bool  $add_empty
1465
   *
1466
   * @return mixed
1467
   */
1468 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1469
  {
1470
    if (2 > func_num_args()) {
1471
      $a = filter_input_array($type);
1472
    } else {
1473
      $a = filter_input_array($type, $definition, $add_empty);
1474
    }
1475
1476
    return self::filter($a);
1477
  }
1478
1479
  /**
1480
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1481
   *
1482
   * @param mixed $var
1483
   * @param int   $filter
1484
   * @param mixed $option
1485
   *
1486
   * @return mixed
1487
   */
1488 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1489
  {
1490
    if (3 > func_num_args()) {
1491
      $var = filter_var($var, $filter);
1492
    } else {
1493
      $var = filter_var($var, $filter, $option);
1494
    }
1495
1496
    return self::filter($var);
1497
  }
1498
1499
  /**
1500
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1501
   *
1502
   * @param array $data
1503
   * @param mixed $definition
1504
   * @param bool  $add_empty
1505
   *
1506
   * @return mixed
1507
   */
1508 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1509
  {
1510
    if (2 > func_num_args()) {
1511
      $a = filter_var_array($data);
1512
    } else {
1513
      $a = filter_var_array($data, $definition, $add_empty);
1514
    }
1515
1516
    return self::filter($a);
1517
  }
1518
1519
  /**
1520
   * Check if the number of unicode characters are not more than the specified integer.
1521
   *
1522
   * @param    string $str      The original string to be checked.
1523
   * @param    int    $box_size The size in number of chars to be checked against string.
1524
   *
1525
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
1526
   */
1527
  public static function fits_inside($str, $box_size)
1528
  {
1529
    return (self::strlen($str) <= $box_size);
1530
  }
1531
1532
  /**
1533
   * Try to fix simple broken UTF-8 strings.
1534
   *
1535
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1536
   *
1537
   * @param string $str
1538
   *
1539
   * @return string
1540
   */
1541
  public static function fix_simple_utf8($str)
1542
  {
1543
    static $brokenUtf8ToUtf8Keys = null;
1544
    static $brokenUtf8ToUtf8Values = null;
1545
1546
    $str = (string)$str;
1547
1548
    if (!isset($str[0])) {
1549
      return '';
1550
    }
1551
1552
    if ($brokenUtf8ToUtf8Keys === null) {
1553
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
1554
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
1555
    }
1556
1557
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
1558
  }
1559
1560
  /**
1561
   * Fix a double (or multiple) encoded UTF8 string.
1562
   *
1563
   * @param string|string[] $str You can use a string or an array of strings.
1564
   *
1565
   * @return mixed
1566
   */
1567
  public static function fix_utf8($str)
1568
  {
1569
    if (is_array($str)) {
1570
1571
      foreach ($str as $k => $v) {
1572
        /** @noinspection AlterInForeachInspection */
1573
        /** @noinspection OffsetOperationsInspection */
1574
        $str[$k] = self::fix_utf8($v);
1575
      }
1576
1577
      return $str;
1578
    }
1579
1580
    $last = '';
1581
    while ($last !== $str) {
1582
      $last = $str;
1583
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1583 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1584
    }
1585
1586
    return $str;
1587
  }
1588
1589
  /**
1590
   * Get character of a specific character.
1591
   *
1592
   * @param   string $char Character.
1593
   *
1594
   * @return  string 'RTL' or 'LTR'
1595
   */
1596
  public static function getCharDirection($char)
1597
  {
1598
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1599
      self::checkForSupport();
1600
    }
1601
1602
    if (self::$support['intlChar'] === true) {
1603
      $tmpReturn = \IntlChar::charDirection($char);
1604
1605
      // from "IntlChar"-Class
1606
      $charDirection = array(
1607
          'RTL' => array(1, 13, 14, 15, 21),
1608
          'LTR' => array(0, 11, 12, 20),
1609
      );
1610
1611
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1612
        return 'LTR';
1613
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
1614
        return 'RTL';
1615
      }
1616
    }
1617
1618
    $c = static::chr_to_decimal($char);
1619
1620
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1621
      return 'LTR';
1622
    }
1623
1624
    if (0x85e >= $c) {
1625
1626
      if (0x5be === $c ||
1627
          0x5c0 === $c ||
1628
          0x5c3 === $c ||
1629
          0x5c6 === $c ||
1630
          (0x5d0 <= $c && 0x5ea >= $c) ||
1631
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1632
          0x608 === $c ||
1633
          0x60b === $c ||
1634
          0x60d === $c ||
1635
          0x61b === $c ||
1636
          (0x61e <= $c && 0x64a >= $c) ||
1637
          (0x66d <= $c && 0x66f >= $c) ||
1638
          (0x671 <= $c && 0x6d5 >= $c) ||
1639
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1640
          (0x6ee <= $c && 0x6ef >= $c) ||
1641
          (0x6fa <= $c && 0x70d >= $c) ||
1642
          0x710 === $c ||
1643
          (0x712 <= $c && 0x72f >= $c) ||
1644
          (0x74d <= $c && 0x7a5 >= $c) ||
1645
          0x7b1 === $c ||
1646
          (0x7c0 <= $c && 0x7ea >= $c) ||
1647
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1648
          0x7fa === $c ||
1649
          (0x800 <= $c && 0x815 >= $c) ||
1650
          0x81a === $c ||
1651
          0x824 === $c ||
1652
          0x828 === $c ||
1653
          (0x830 <= $c && 0x83e >= $c) ||
1654
          (0x840 <= $c && 0x858 >= $c) ||
1655
          0x85e === $c
1656
      ) {
1657
        return 'RTL';
1658
      }
1659
1660
    } elseif (0x200f === $c) {
1661
1662
      return 'RTL';
1663
1664
    } elseif (0xfb1d <= $c) {
1665
1666
      if (0xfb1d === $c ||
1667
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1668
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1669
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1670
          0xfb3e === $c ||
1671
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1672
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1673
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1674
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1675
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1676
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1677
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1678
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1679
          (0xfe76 <= $c && 0xfefc >= $c) ||
1680
          (0x10800 <= $c && 0x10805 >= $c) ||
1681
          0x10808 === $c ||
1682
          (0x1080a <= $c && 0x10835 >= $c) ||
1683
          (0x10837 <= $c && 0x10838 >= $c) ||
1684
          0x1083c === $c ||
1685
          (0x1083f <= $c && 0x10855 >= $c) ||
1686
          (0x10857 <= $c && 0x1085f >= $c) ||
1687
          (0x10900 <= $c && 0x1091b >= $c) ||
1688
          (0x10920 <= $c && 0x10939 >= $c) ||
1689
          0x1093f === $c ||
1690
          0x10a00 === $c ||
1691
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1692
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1693
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1694
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1695
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1696
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1697
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1698
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1699
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1700
          (0x10b78 <= $c && 0x10b7f >= $c)
1701
      ) {
1702
        return 'RTL';
1703
      }
1704
    }
1705
1706
    return 'LTR';
1707
  }
1708
1709
  /**
1710
   * get data from "/data/*.ser"
1711
   *
1712
   * @param string $file
1713
   *
1714
   * @return bool|string|array|int false on error
1715
   */
1716
  private static function getData($file)
1717
  {
1718
    $file = __DIR__ . '/data/' . $file . '.php';
1719
    if (file_exists($file)) {
1720
      /** @noinspection PhpIncludeInspection */
1721
      return require $file;
1722
    } else {
1723
      return false;
1724
    }
1725
  }
1726
1727
  /**
1728
   * Converts hexadecimal U+xxxx code point representation to integer.
1729
   *
1730
   * INFO: opposite to UTF8::int_to_hex()
1731
   *
1732
   * @param    string $str The hexadecimal code point representation.
1733
   *
1734
   * @return   int|false The code point, or false on failure.
1735
   */
1736
  public static function hex_to_int($str)
1737
  {
1738
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
1739
      return intval($match[1], 16);
1740
    }
1741
1742
    return false;
1743
  }
1744
1745
  /**
1746
   * alias for "UTF8::html_entity_decode()"
1747
   *
1748
   * @see UTF8::html_entity_decode()
1749
   *
1750
   * @param string $str
1751
   * @param int    $flags
1752
   * @param string $encoding
1753
   *
1754
   * @return string
1755
   */
1756
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
1757
  {
1758
    return self::html_entity_decode($str, $flags, $encoding);
1759
  }
1760
1761
  /**
1762
   * Converts a UTF-8 string to a series of HTML numbered entities.
1763
   *
1764
   * INFO: opposite to UTF8::html_decode()
1765
   *
1766
   * @param  string $str            The Unicode string to be encoded as numbered entities.
1767
   * @param  bool   $keepAsciiChars Keep ASCII chars.
1768
   * @param  string $encoding
1769
   *
1770
   * @return string HTML numbered entities.
1771
   */
1772
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
1773
  {
1774
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1775
    if (function_exists('mb_encode_numericentity')) {
1776
1777
      $startCode = 0x00;
1778
      if ($keepAsciiChars === true) {
1779
        $startCode = 0x80;
1780
      }
1781
1782
      if ($encoding !== 'UTF-8') {
1783
        $encoding = self::normalize_encoding($encoding);
1784
      }
1785
1786
      return mb_encode_numericentity(
1787
          $str,
1788
          array($startCode, 0xffff, 0, 0xffff,),
1789
          $encoding
1790
      );
1791
    }
1792
1793
    return implode(
1794
        array_map(
1795
            function ($data) use ($keepAsciiChars) {
1796
              return UTF8::single_chr_html_encode($data, $keepAsciiChars);
1797
            },
1798
            self::split($str)
1799
        )
1800
    );
1801
  }
1802
1803
  /**
1804
   * UTF-8 version of html_entity_decode()
1805
   *
1806
   * The reason we are not using html_entity_decode() by itself is because
1807
   * while it is not technically correct to leave out the semicolon
1808
   * at the end of an entity most browsers will still interpret the entity
1809
   * correctly. html_entity_decode() does not convert entities without
1810
   * semicolons, so we are left with our own little solution here. Bummer.
1811
   *
1812
   * Convert all HTML entities to their applicable characters
1813
   *
1814
   * INFO: opposite to UTF8::html_encode()
1815
   *
1816
   * @link http://php.net/manual/en/function.html-entity-decode.php
1817
   *
1818
   * @param string $str      <p>
1819
   *                         The input string.
1820
   *                         </p>
1821
   * @param int    $flags    [optional] <p>
1822
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1823
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1824
   *                         <table>
1825
   *                         Available <i>flags</i> constants
1826
   *                         <tr valign="top">
1827
   *                         <td>Constant Name</td>
1828
   *                         <td>Description</td>
1829
   *                         </tr>
1830
   *                         <tr valign="top">
1831
   *                         <td><b>ENT_COMPAT</b></td>
1832
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1833
   *                         </tr>
1834
   *                         <tr valign="top">
1835
   *                         <td><b>ENT_QUOTES</b></td>
1836
   *                         <td>Will convert both double and single quotes.</td>
1837
   *                         </tr>
1838
   *                         <tr valign="top">
1839
   *                         <td><b>ENT_NOQUOTES</b></td>
1840
   *                         <td>Will leave both double and single quotes unconverted.</td>
1841
   *                         </tr>
1842
   *                         <tr valign="top">
1843
   *                         <td><b>ENT_HTML401</b></td>
1844
   *                         <td>
1845
   *                         Handle code as HTML 4.01.
1846
   *                         </td>
1847
   *                         </tr>
1848
   *                         <tr valign="top">
1849
   *                         <td><b>ENT_XML1</b></td>
1850
   *                         <td>
1851
   *                         Handle code as XML 1.
1852
   *                         </td>
1853
   *                         </tr>
1854
   *                         <tr valign="top">
1855
   *                         <td><b>ENT_XHTML</b></td>
1856
   *                         <td>
1857
   *                         Handle code as XHTML.
1858
   *                         </td>
1859
   *                         </tr>
1860
   *                         <tr valign="top">
1861
   *                         <td><b>ENT_HTML5</b></td>
1862
   *                         <td>
1863
   *                         Handle code as HTML 5.
1864
   *                         </td>
1865
   *                         </tr>
1866
   *                         </table>
1867
   *                         </p>
1868
   * @param string $encoding [optional] <p>
1869
   *                         Encoding to use.
1870
   *                         </p>
1871
   *
1872
   * @return string the decoded string.
1873
   */
1874
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
1875
  {
1876
    $str = (string)$str;
1877
1878
    if (!isset($str[0])) {
1879
      return '';
1880
    }
1881
1882
    if (strpos($str, '&') === false) {
1883
      return $str;
1884
    }
1885
1886
    if ($encoding !== 'UTF-8') {
1887
      $encoding = self::normalize_encoding($encoding);
1888
    }
1889
1890
    if ($flags === null) {
1891 194
      if (Bootup::is_php('5.4') === true) {
1892
        $flags = ENT_COMPAT | ENT_HTML5;
1893 194
      } else {
1894
        $flags = ENT_COMPAT;
1895 1
      }
1896 1
    }
1897 1
1898 1
    do {
1899 1
      $str_compare = $str;
1900 1
1901 194
      $str = preg_replace_callback(
1902
          "/&#\d{2,5};/",
1903
          function ($matches) {
1904
            $returnTmp = \mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
1905
1906
            if ($returnTmp !== '"' && $returnTmp !== "'") {
1907
              return $returnTmp;
1908
            } else {
1909
              return $matches[0];
1910
            }
1911
          },
1912 9
          $str
1913
      );
1914 9
1915
      // decode numeric & UTF16 two byte entities
1916 9
      $str = html_entity_decode(
1917
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
1918 9
          $flags,
1919
          $encoding
1920
      );
1921
1922 9
    } while ($str_compare !== $str);
1923 1
1924 1
    return $str;
1925
  }
1926 9
1927 2
  /**
1928
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
1929
   *
1930 9
   * @link http://php.net/manual/en/function.htmlentities.php
1931
   *
1932
   * @param string $str           <p>
1933
   *                              The input string.
1934
   *                              </p>
1935
   * @param int    $flags         [optional] <p>
1936
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
1937
   *                              invalid code unit sequences and the used document type. The default is
1938
   *                              ENT_COMPAT | ENT_HTML401.
1939
   *                              <table>
1940
   *                              Available <i>flags</i> constants
1941 1
   *                              <tr valign="top">
1942
   *                              <td>Constant Name</td>
1943 1
   *                              <td>Description</td>
1944
   *                              </tr>
1945 1
   *                              <tr valign="top">
1946
   *                              <td><b>ENT_COMPAT</b></td>
1947
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
1948
   *                              </tr>
1949
   *                              <tr valign="top">
1950
   *                              <td><b>ENT_QUOTES</b></td>
1951
   *                              <td>Will convert both double and single quotes.</td>
1952
   *                              </tr>
1953
   *                              <tr valign="top">
1954
   *                              <td><b>ENT_NOQUOTES</b></td>
1955
   *                              <td>Will leave both double and single quotes unconverted.</td>
1956
   *                              </tr>
1957
   *                              <tr valign="top">
1958
   *                              <td><b>ENT_IGNORE</b></td>
1959
   *                              <td>
1960 4
   *                              Silently discard invalid code unit sequences instead of returning
1961
   *                              an empty string. Using this flag is discouraged as it
1962 4
   *                              may have security implications.
1963 3
   *                              </td>
1964
   *                              </tr>
1965
   *                              <tr valign="top">
1966 4
   *                              <td><b>ENT_SUBSTITUTE</b></td>
1967
   *                              <td>
1968
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1969
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1970
   *                              </td>
1971
   *                              </tr>
1972
   *                              <tr valign="top">
1973
   *                              <td><b>ENT_DISALLOWED</b></td>
1974
   *                              <td>
1975
   *                              Replace invalid code points for the given document type with a
1976 2
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1977
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1978 2
   *                              instance, to ensure the well-formedness of XML documents with
1979 2
   *                              embedded external content.
1980 2
   *                              </td>
1981
   *                              </tr>
1982 2
   *                              <tr valign="top">
1983
   *                              <td><b>ENT_HTML401</b></td>
1984 2
   *                              <td>
1985
   *                              Handle code as HTML 4.01.
1986
   *                              </td>
1987 2
   *                              </tr>
1988
   *                              <tr valign="top">
1989 2
   *                              <td><b>ENT_XML1</b></td>
1990 2
   *                              <td>
1991 2
   *                              Handle code as XML 1.
1992
   *                              </td>
1993 1
   *                              </tr>
1994 1
   *                              <tr valign="top">
1995 1
   *                              <td><b>ENT_XHTML</b></td>
1996
   *                              <td>
1997
   *                              Handle code as XHTML.
1998
   *                              </td>
1999
   *                              </tr>
2000
   *                              <tr valign="top">
2001 2
   *                              <td><b>ENT_HTML5</b></td>
2002
   *                              <td>
2003 2
   *                              Handle code as HTML 5.
2004 2
   *                              </td>
2005
   *                              </tr>
2006 2
   *                              </table>
2007
   *                              </p>
2008
   * @param string $encoding      [optional] <p>
2009
   *                              Like <b>htmlspecialchars</b>,
2010
   *                              <b>htmlentities</b> takes an optional third argument
2011
   *                              <i>encoding</i> which defines encoding used in
2012
   *                              conversion.
2013
   *                              Although this argument is technically optional, you are highly
2014
   *                              encouraged to specify the correct value for your code.
2015
   *                              </p>
2016
   * @param bool   $double_encode [optional] <p>
2017
   *                              When <i>double_encode</i> is turned off PHP will not
2018
   *                              encode existing html entities. The default is to convert everything.
2019
   *                              </p>
2020
   *
2021
   *
2022
   * @return string the encoded string.
2023
   * </p>
2024
   * <p>
2025
   * If the input <i>string</i> contains an invalid code unit
2026
   * sequence within the given <i>encoding</i> an empty string
2027
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2028
   * <b>ENT_SUBSTITUTE</b> flags are set.
2029
   */
2030
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2031 1
  {
2032
    if ($encoding !== 'UTF-8') {
2033 1
      $encoding = self::normalize_encoding($encoding);
2034
    }
2035
2036
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2037
2038
    if ($encoding !== 'UTF-8') {
2039
      return $str;
2040
    }
2041
2042
    $byteLengths = self::chr_size_list($str);
2043
    $search = array();
2044
    $replacements = array();
2045
    foreach ($byteLengths as $counter => $byteLength) {
2046
      if ($byteLength >= 3) {
2047 41
        $char = self::access($str, $counter);
2048
2049
        if (!isset($replacements[$char])) {
2050
          $search[$char] = $char;
2051
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 2047 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2052
        }
2053
      }
2054
    }
2055
2056
    return str_replace($search, $replacements, $str);
2057
  }
2058
2059
  /**
2060
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2061
   *
2062 41
   * INFO: Take a look at "UTF8::htmlentities()"
2063 41
   *
2064
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2065 41
   *
2066 41
   * @param string $str           <p>
2067
   *                              The string being converted.
2068 41
   *                              </p>
2069 6
   * @param int    $flags         [optional] <p>
2070 6
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2071
   *                              invalid code unit sequences and the used document type. The default is
2072 41
   *                              ENT_COMPAT | ENT_HTML401.
2073 1
   *                              <table>
2074 1
   *                              Available <i>flags</i> constants
2075
   *                              <tr valign="top">
2076 41
   *                              <td>Constant Name</td>
2077 5
   *                              <td>Description</td>
2078 5
   *                              </tr>
2079
   *                              <tr valign="top">
2080 41
   *                              <td><b>ENT_COMPAT</b></td>
2081
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2082
   *                              </tr>
2083
   *                              <tr valign="top">
2084
   *                              <td><b>ENT_QUOTES</b></td>
2085
   *                              <td>Will convert both double and single quotes.</td>
2086
   *                              </tr>
2087
   *                              <tr valign="top">
2088
   *                              <td><b>ENT_NOQUOTES</b></td>
2089
   *                              <td>Will leave both double and single quotes unconverted.</td>
2090 4
   *                              </tr>
2091
   *                              <tr valign="top">
2092 4
   *                              <td><b>ENT_IGNORE</b></td>
2093
   *                              <td>
2094 4
   *                              Silently discard invalid code unit sequences instead of returning
2095 1
   *                              an empty string. Using this flag is discouraged as it
2096
   *                              may have security implications.
2097
   *                              </td>
2098
   *                              </tr>
2099 4
   *                              <tr valign="top">
2100
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2101
   *                              <td>
2102
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2103
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2104
   *                              </td>
2105
   *                              </tr>
2106 4
   *                              <tr valign="top">
2107
   *                              <td><b>ENT_DISALLOWED</b></td>
2108 4
   *                              <td>
2109
   *                              Replace invalid code points for the given document type with a
2110
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2111
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2112
   *                              instance, to ensure the well-formedness of XML documents with
2113
   *                              embedded external content.
2114
   *                              </td>
2115
   *                              </tr>
2116
   *                              <tr valign="top">
2117
   *                              <td><b>ENT_HTML401</b></td>
2118
   *                              <td>
2119
   *                              Handle code as HTML 4.01.
2120
   *                              </td>
2121
   *                              </tr>
2122 5
   *                              <tr valign="top">
2123
   *                              <td><b>ENT_XML1</b></td>
2124 5
   *                              <td>
2125 5
   *                              Handle code as XML 1.
2126 5
   *                              </td>
2127
   *                              </tr>
2128 5
   *                              <tr valign="top">
2129
   *                              <td><b>ENT_XHTML</b></td>
2130 5
   *                              <td>
2131 5
   *                              Handle code as XHTML.
2132 5
   *                              </td>
2133
   *                              </tr>
2134 5
   *                              <tr valign="top">
2135
   *                              <td><b>ENT_HTML5</b></td>
2136 5
   *                              <td>
2137 1
   *                              Handle code as HTML 5.
2138
   *                              </td>
2139 1
   *                              </tr>
2140 1
   *                              </table>
2141 1
   *                              </p>
2142
   * @param string $encoding      [optional] <p>
2143 1
   *                              Defines encoding used in conversion.
2144 1
   *                              </p>
2145
   *                              <p>
2146 5
   *                              For the purposes of this function, the encodings
2147
   *                              ISO-8859-1, ISO-8859-15,
2148
   *                              UTF-8, cp866,
2149
   *                              cp1251, cp1252, and
2150
   *                              KOI8-R are effectively equivalent, provided the
2151
   *                              <i>string</i> itself is valid for the encoding, as
2152
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2153
   *                              the same positions in all of these encodings.
2154
   *                              </p>
2155
   * @param bool   $double_encode [optional] <p>
2156
   *                              When <i>double_encode</i> is turned off PHP will not
2157
   *                              encode existing html entities, the default is to convert everything.
2158 6
   *                              </p>
2159
   *
2160 6
   * @return string The converted string.
2161
   * </p>
2162
   * <p>
2163
   * If the input <i>string</i> contains an invalid code unit
2164
   * sequence within the given <i>encoding</i> an empty string
2165
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2166
   * <b>ENT_SUBSTITUTE</b> flags are set.
2167
   */
2168
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2169
  {
2170 1
    if ($encoding !== 'UTF-8') {
2171
      $encoding = self::normalize_encoding($encoding);
2172 1
    }
2173
2174 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2175 1
  }
2176 1
2177
  /**
2178 1
   * checks whether iconv is available on the server
2179
   *
2180
   * @return   bool True if available, False otherwise
2181
   */
2182
  public static function iconv_loaded()
2183
  {
2184
    return extension_loaded('iconv') ? true : false;
2185
  }
2186
2187
  /**
2188
   * Converts Integer to hexadecimal U+xxxx code point representation.
2189
   *
2190
   * INFO: opposite to UTF8::hex_to_int()
2191
   *
2192
   * @param    int    $int The integer to be converted to hexadecimal code point.
2193
   * @param    string $pfix
2194 11
   *
2195
   * @return   string The code point, or empty string on failure.
2196 11
   */
2197 11
  public static function int_to_hex($int, $pfix = 'U+')
2198
  {
2199 11
    if (ctype_digit((string)$int)) {
2200 5
      $hex = dechex((int)$int);
2201
2202
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2203 11
2204 11
      return $pfix . $hex;
2205
    }
2206
2207
    return '';
2208 11
  }
2209
2210
  /**
2211 11
   * checks whether intl-char is available on the server
2212
   *
2213 1
   * @return   bool True if available, False otherwise
2214 11
   */
2215 11
  public static function intlChar_loaded()
2216
  {
2217
    return Bootup::is_php('7.0') === true and class_exists('IntlChar');
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
2218
  }
2219 11
2220
  /**
2221
   * checks whether intl is available on the server
2222 11
   *
2223 1
   * @return   bool True if available, False otherwise
2224 1
   */
2225 1
  public static function intl_loaded()
2226 11
  {
2227 11
    return extension_loaded('intl') ? true : false;
2228
  }
2229
2230
  /**
2231
   * alias for "UTF8::is_ascii()"
2232 2
   *
2233
   * @see UTF8::is_ascii()
2234
   *
2235 1
   * @param string $str
2236
   *
2237
   * @return boolean
2238 2
   */
2239 1
  public static function isAscii($str)
2240
  {
2241
    return self::is_ascii($str);
2242 2
  }
2243 2
2244 2
  /**
2245
   * alias for "UTF8::is_base64()"
2246 2
   *
2247
   * @see UTF8::is_base64()
2248 2
   *
2249 2
   * @param string $str
2250
   *
2251
   * @return bool
2252
   */
2253 1
  public static function isBase64($str)
2254
  {
2255
    return self::is_base64($str);
2256
  }
2257
2258
  /**
2259
   * alias for "UTF8::is_binary()"
2260
   *
2261
   * @see UTF8::is_binary()
2262
   *
2263
   * @param string $str
2264
   *
2265
   * @return bool
2266
   */
2267
  public static function isBinary($str)
2268
  {
2269
    return self::is_binary($str);
2270
  }
2271
2272
  /**
2273
   * alias for "UTF8::is_bom()"
2274
   *
2275
   * @see UTF8::is_bom()
2276
   *
2277
   * @param string $utf8_chr
2278
   *
2279
   * @return boolean
2280
   */
2281
  public static function isBom($utf8_chr)
2282
  {
2283
    return self::is_bom($utf8_chr);
2284
  }
2285
2286
  /**
2287
   * alias for "UTF8::is_html()"
2288
   *
2289
   * @see UTF8::is_html()
2290
   *
2291
   * @param string $str
2292
   *
2293
   * @return boolean
2294
   */
2295
  public static function isHtml($str)
2296
  {
2297
    return self::is_html($str);
2298
  }
2299
2300
  /**
2301
   * alias for "UTF8::is_json()"
2302
   *
2303
   * @see UTF8::is_json()
2304
   *
2305
   * @param string $str
2306
   *
2307
   * @return bool
2308
   */
2309
  public static function isJson($str)
2310
  {
2311
    return self::is_json($str);
2312
  }
2313
2314
  /**
2315
   * alias for "UTF8::is_utf16()"
2316
   *
2317
   * @see UTF8::is_utf16()
2318
   *
2319
   * @param string $str
2320
   *
2321
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2322
   */
2323
  public static function isUtf16($str)
2324
  {
2325
    return self::is_utf16($str);
2326
  }
2327
2328
  /**
2329
   * alias for "UTF8::is_utf32()"
2330
   *
2331
   * @see UTF8::is_utf32()
2332
   *
2333
   * @param string $str
2334
   *
2335
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2336
   */
2337
  public static function isUtf32($str)
2338 2
  {
2339
    return self::is_utf32($str);
2340
  }
2341 2
2342 2
  /**
2343
   * alias for "UTF8::is_utf8()"
2344 2
   *
2345 2
   * @see UTF8::is_utf8()
2346
   *
2347
   * @param string $str
2348
   * @param  bool  $strict
2349 2
   *
2350 2
   * @return bool
2351
   */
2352 2
  public static function isUtf8($str, $strict = false)
2353 2
  {
2354
    return self::is_utf8($str, $strict);
2355 2
  }
2356 1
2357 1
  /**
2358 2
   * Checks if a string is 7 bit ASCII.
2359
   *
2360
   * @param    string $str The string to check.
2361
   *
2362 2
   * @return   bool <strong>true</strong> if it is ASCII<br />
2363 1
   *                <strong>false</strong> otherwise
2364
   */
2365
  public static function is_ascii($str)
2366 1
  {
2367 1
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2368
  }
2369 1
2370 1
  /**
2371 1
   * Returns true if the string is base64 encoded, false otherwise.
2372
   *
2373
   * @param string $str
2374 1
   *
2375
   * @return bool Whether or not $str is base64 encoded
2376
   */
2377
  public static function is_base64($str)
2378
  {
2379
    $str = (string)$str;
2380
2381
    if (!isset($str[0])) {
2382
      return false;
2383
    }
2384 1
2385
    if (base64_encode(base64_decode($str, true)) === $str) {
2386 1
      return true;
2387
    } else {
2388
      return false;
2389
    }
2390
  }
2391
2392
  /**
2393
   * Check if the input is binary... (is look like a hack).
2394
   *
2395
   * @param mixed $input
2396
   *
2397
   * @return bool
2398 9
   */
2399
  public static function is_binary($input)
2400 9
  {
2401 9
2402 3
    $testLength = strlen($input);
2403
2404 3
    if (
2405 3
        preg_match('~^[01]+$~', $input)
2406 3
        ||
2407 9
        substr_count($input, "\x00") > 0
2408 2
        ||
2409 2
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
2410 2
    ) {
2411 2
      return true;
2412 9
    } else {
2413 8
      return false;
2414
    }
2415 2
  }
2416 2
2417 8
  /**
2418 8
   * Check if the file is binary.
2419 6
   *
2420 6
   * @param string $file
2421 6
   *
2422
   * @return boolean
2423 6
   */
2424 3
  public static function is_binary_file($file)
2425 3
  {
2426 5
    try {
2427
      $fp = fopen($file, 'r');
2428
      $block = fread($fp, 512);
2429
      fclose($fp);
2430 8
    } catch (\Exception $e) {
2431
      $block = '';
2432
    }
2433 2
2434 2
    return self::is_binary($block);
2435 8
  }
2436 8
2437 9
  /**
2438
   * Checks if the given string is equal to any "Byte Order Mark".
2439 9
   *
2440
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2441
   *
2442
   * @param    string $str The input string.
2443
   *
2444
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
2445
   */
2446
  public static function is_bom($str)
2447
  {
2448
    foreach (self::$bom as $bomString => $bomByteLength) {
2449
      if ($str === $bomString) {
2450
        return true;
2451
      }
2452
    }
2453
2454
    return false;
2455
  }
2456
2457
  /**
2458
   * Check if the string contains any html-tags <lall>.
2459
   *
2460
   * @param string $str
2461
   *
2462
   * @return boolean
2463
   */
2464
  public static function is_html($str)
2465
  {
2466
    $str = (string)$str;
2467
2468
    if (!isset($str[0])) {
2469
      return false;
2470
    }
2471
2472
    // init
2473
    $matches = array();
2474
2475
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
2476
2477
    if (count($matches) == 0) {
2478
      return false;
2479
    } else {
2480
      return true;
2481
    }
2482
  }
2483
2484
  /**
2485
   * Try to check if "$str" is an json-string.
2486
   *
2487
   * @param string $str
2488
   *
2489
   * @return bool
2490
   */
2491
  public static function is_json($str)
2492 1
  {
2493
    $str = (string)$str;
2494 1
2495 1
    if (!isset($str[0])) {
2496 1
      return false;
2497 1
    }
2498
2499
    if (
2500 1
        is_object(self::json_decode($str))
2501
        &&
2502
        json_last_error() === JSON_ERROR_NONE
2503
    ) {
2504
      return true;
2505
    } else {
2506
      return false;
2507
    }
2508
  }
2509
2510
  /**
2511
   * Check if the string is UTF-16.
2512 1
   *
2513
   * @param string $str
2514 1
   *
2515 1
   * @return int|false false if is't not UTF-16, 1 for UTF-16LE, 2 for UTF-16BE.
2516 1
   */
2517 1 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2518
  {
2519
    $str = self::remove_bom($str);
2520 1
2521
    if (self::is_binary($str)) {
2522
2523
      $maybeUTF16LE = 0;
2524
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2525
      if ($test) {
2526
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2527
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2528
        if ($test3 === $test) {
2529
          $strChars = self::count_chars($str, true);
2530
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2531 1
            if (in_array($test3char, $strChars, true) === true) {
2532
              $maybeUTF16LE++;
2533 1
            }
2534
          }
2535
        }
2536
      }
2537
2538
      $maybeUTF16BE = 0;
2539
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2540
      if ($test) {
2541
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2542
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2543
        if ($test3 === $test) {
2544
          $strChars = self::count_chars($str, true);
2545 7
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2546
            if (in_array($test3char, $strChars, true) === true) {
2547 7
              $maybeUTF16BE++;
2548 7
            }
2549
          }
2550 7
        }
2551
      }
2552 7
2553 2
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2554
        if ($maybeUTF16LE > $maybeUTF16BE) {
2555
          return 1;
2556 7
        } else {
2557 1
          return 2;
2558 1
        }
2559 1
      }
2560
2561 7
    }
2562
2563
    return false;
2564
  }
2565
2566
  /**
2567
   * Check if the string is UTF-32.
2568
   *
2569
   * @param string $str
2570
   *
2571 1
   * @return int|false false if is't not UTF-16, 1 for UTF-32LE, 2 for UTF-32BE.
2572
   */
2573 1 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2574
  {
2575 1
    $str = self::remove_bom($str);
2576
2577
    if (self::is_binary($str)) {
2578 1
2579 1
      $maybeUTF32LE = 0;
2580
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2581 1
      if ($test) {
2582
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2583
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2584 1
        if ($test3 === $test) {
2585 1
          $strChars = self::count_chars($str, true);
2586 1
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2587 1
            if (in_array($test3char, $strChars, true) === true) {
2588 1
              $maybeUTF32LE++;
2589
            }
2590 1
          }
2591
        }
2592
      }
2593
2594
      $maybeUTF32BE = 0;
2595
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2596
      if ($test) {
2597
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2598
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2599
        if ($test3 === $test) {
2600 1
          $strChars = self::count_chars($str, true);
2601
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2602
            if (in_array($test3char, $strChars, true) === true) {
2603 1
              $maybeUTF32BE++;
2604
            }
2605 1
          }
2606
        }
2607
      }
2608
2609
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2610
        if ($maybeUTF32LE > $maybeUTF32BE) {
2611
          return 1;
2612
        } else {
2613
          return 2;
2614
        }
2615
      }
2616
2617
    }
2618
2619
    return false;
2620
  }
2621 1
2622
  /**
2623 1
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2624 1
   *
2625
   * @see    http://hsivonen.iki.fi/php-utf8/
2626
   *
2627 1
   * @param  string $str    The string to be checked.
2628
   * @param  bool   $strict Check also if the string is not UTF-16 or UTF-32.
2629 1
   *
2630 1
   * @return bool
2631 1
   */
2632 1
  public static function is_utf8($str, $strict = false)
2633 1
  {
2634 1
    $str = (string)$str;
2635 1
2636 1
    if (!isset($str[0])) {
2637 1
      return true;
2638 1
    }
2639 1
2640
    if ($strict === true) {
2641
      if (self::is_utf16($str) !== false) {
2642
        return false;
2643
      }
2644
2645
      if (self::is_utf32($str) !== false) {
2646
        return false;
2647
      }
2648
    }
2649
2650
    if (self::pcre_utf8_support() !== true) {
2651
2652
      // If even just the first character can be matched, when the /u
2653
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2654
      // invalid, nothing at all will match, even if the string contains
2655
      // some valid sequences
2656
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
2657
2658
    } else {
2659 1
2660 1
      $mState = 0; // cached expected number of octets after the current octet
2661
      // until the beginning of the next UTF8 character sequence
2662
      $mUcs4 = 0; // cached Unicode character
2663
      $mBytes = 1; // cached expected number of octets in the current sequence
2664
      $len = strlen($str);
2665
2666
      /** @noinspection ForeachInvariantsInspection */
2667
      for ($i = 0; $i < $len; $i++) {
2668
        $in = ord($str[$i]);
2669
        if ($mState === 0) {
2670
          // When mState is zero we expect either a US-ASCII character or a
2671
          // multi-octet sequence.
2672
          if (0 === (0x80 & $in)) {
2673
            // US-ASCII, pass straight through.
2674
            $mBytes = 1;
2675 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2676
            // First octet of 2 octet sequence.
2677
            $mUcs4 = $in;
2678
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
2679
            $mState = 1;
2680
            $mBytes = 2;
2681
          } elseif (0xE0 === (0xF0 & $in)) {
2682
            // First octet of 3 octet sequence.
2683
            $mUcs4 = $in;
2684
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
2685
            $mState = 2;
2686
            $mBytes = 3;
2687 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2688
            // First octet of 4 octet sequence.
2689
            $mUcs4 = $in;
2690
            $mUcs4 = ($mUcs4 & 0x07) << 18;
2691
            $mState = 3;
2692
            $mBytes = 4;
2693
          } elseif (0xF8 === (0xFC & $in)) {
2694
            /* First octet of 5 octet sequence.
2695
            *
2696
            * This is illegal because the encoded codepoint must be either
2697
            * (a) not the shortest form or
2698
            * (b) outside the Unicode range of 0-0x10FFFF.
2699
            * Rather than trying to resynchronize, we will carry on until the end
2700
            * of the sequence and let the later error handling code catch it.
2701
            */
2702
            $mUcs4 = $in;
2703
            $mUcs4 = ($mUcs4 & 0x03) << 24;
2704
            $mState = 4;
2705
            $mBytes = 5;
2706 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2707
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
2708
            $mUcs4 = $in;
2709
            $mUcs4 = ($mUcs4 & 1) << 30;
2710
            $mState = 5;
2711
            $mBytes = 6;
2712
          } else {
2713
            /* Current octet is neither in the US-ASCII range nor a legal first
2714
             * octet of a multi-octet sequence.
2715
             */
2716
            return false;
2717
          }
2718
        } else {
2719 1
          // When mState is non-zero, we expect a continuation of the multi-octet
2720
          // sequence
2721 1
          if (0x80 === (0xC0 & $in)) {
2722 1
            // Legal continuation.
2723
            $shift = ($mState - 1) * 6;
2724 1
            $tmp = $in;
2725
            $tmp = ($tmp & 0x0000003F) << $shift;
2726
            $mUcs4 |= $tmp;
2727
            /**
2728
             * End of the multi-octet sequence. mUcs4 now contains the final
2729
             * Unicode code point to be output
2730
             */
2731
            if (0 === --$mState) {
2732
              /*
2733
              * Check for illegal sequences and code points.
2734
              */
2735
              // From Unicode 3.1, non-shortest form is illegal
2736
              if (
2737
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
2738
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
2739 2
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
2740
                  (4 < $mBytes) ||
2741 2
                  // From Unicode 3.2, surrogate characters are illegal.
2742 1
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
2743
                  // Code points outside the Unicode range are illegal.
2744
                  ($mUcs4 > 0x10FFFF)
2745 1
              ) {
2746
                return false;
2747
              }
2748
              // initialize UTF8 cache
2749
              $mState = 0;
2750
              $mUcs4 = 0;
2751
              $mBytes = 1;
2752
            }
2753
          } else {
2754
            /**
2755
             *((0xC0 & (*in) != 0x80) && (mState != 0))
2756
             * Incomplete multi-octet sequence.
2757
             */
2758
            return false;
2759 1
          }
2760
        }
2761 1
      }
2762
2763
      return true;
2764
    }
2765
  }
2766
2767
  /**
2768
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2769
   * Decodes a JSON string
2770
   *
2771
   * @link http://php.net/manual/en/function.json-decode.php
2772
   *
2773
   * @param string $json    <p>
2774
   *                        The <i>json</i> string being decoded.
2775 2
   *                        </p>
2776
   *                        <p>
2777
   *                        This function only works with UTF-8 encoded strings.
2778 2
   *                        </p>
2779
   *                        <p>PHP implements a superset of
2780 2
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2781 2
   *                        only supports these values when they are nested inside an array or an object.
2782 1
   *                        </p>
2783 1
   * @param bool   $assoc   [optional] <p>
2784
   *                        When <b>TRUE</b>, returned objects will be converted into
2785 2
   *                        associative arrays.
2786
   *                        </p>
2787 2
   * @param int    $depth   [optional] <p>
2788 2
   *                        User specified recursion depth.
2789 2
   *                        </p>
2790
   * @param int    $options [optional] <p>
2791 2
   *                        Bitmask of JSON decode options. Currently only
2792
   *                        <b>JSON_BIGINT_AS_STRING</b>
2793
   *                        is supported (default is to cast large integers as floats)
2794
   *                        </p>
2795
   *
2796
   * @return mixed the value encoded in <i>json</i> in appropriate
2797
   * PHP type. Values true, false and
2798
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
2799
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
2800
   * <i>json</i> cannot be decoded or if the encoded
2801
   * data is deeper than the recursion limit.
2802
   */
2803
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
2804
  {
2805
    $json = self::filter($json);
2806
2807
    if (Bootup::is_php('5.4') === true) {
2808
      $json = json_decode($json, $assoc, $depth, $options);
2809
    } else {
2810
      $json = json_decode($json, $assoc, $depth);
2811
    }
2812
2813
    return $json;
2814
  }
2815
2816
  /**
2817
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2818
   * Returns the JSON representation of a value.
2819
   *
2820
   * @link http://php.net/manual/en/function.json-encode.php
2821
   *
2822
   * @param mixed $value   <p>
2823
   *                       The <i>value</i> being encoded. Can be any type except
2824
   *                       a resource.
2825
   *                       </p>
2826
   *                       <p>
2827
   *                       All string data must be UTF-8 encoded.
2828
   *                       </p>
2829
   *                       <p>PHP implements a superset of
2830
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2831
   *                       only supports these values when they are nested inside an array or an object.
2832
   *                       </p>
2833
   * @param int   $options [optional] <p>
2834
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
2835
   *                       <b>JSON_HEX_TAG</b>,
2836
   *                       <b>JSON_HEX_AMP</b>,
2837
   *                       <b>JSON_HEX_APOS</b>,
2838
   *                       <b>JSON_NUMERIC_CHECK</b>,
2839
   *                       <b>JSON_PRETTY_PRINT</b>,
2840
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
2841
   *                       <b>JSON_FORCE_OBJECT</b>,
2842
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
2843
   *                       constants is described on
2844
   *                       the JSON constants page.
2845
   *                       </p>
2846
   * @param int   $depth   [optional] <p>
2847
   *                       Set the maximum depth. Must be greater than zero.
2848
   *                       </p>
2849
   *
2850
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
2851
   */
2852
  public static function json_encode($value, $options = 0, $depth = 512)
2853
  {
2854
    $value = self::filter($value);
2855
2856
    if (Bootup::is_php('5.5')) {
2857
      $json = json_encode($value, $options, $depth);
2858
    } else {
2859
      $json = json_encode($value, $options);
2860
    }
2861
2862
    return $json;
2863
  }
2864
2865
  /**
2866
   * Makes string's first char lowercase.
2867
   *
2868
   * @param    string $str The input string
2869
   *
2870
   * @return   string The resulting string
2871
   */
2872
  public static function lcfirst($str)
2873
  {
2874
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
2875 17
  }
2876
2877 17
  /**
2878
   * Strip whitespace or other characters from beginning of a UTF-8 string.
2879 17
   *
2880 4
   * @param  string $str   The string to be trimmed
2881
   * @param  string $chars Optional characters to be stripped
2882
   *
2883 17
   * @return string The string with unwanted characters stripped from the left
2884 5
   */
2885 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2886
  {
2887 17
    $str = (string)$str;
2888
2889 17
    if (!isset($str[0])) {
2890
      return '';
2891 17
    }
2892 4
2893
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
2894
    if ($chars === INF || !$chars) {
2895 4
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
2896
    }
2897 4
2898
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
2899
2900 17
    return preg_replace("/^{$chars}+/u", '', $str);
2901
  }
2902
2903 14
  /**
2904
   * Returns the UTF-8 character with the maximum code point in the given data.
2905 14
   *
2906 14
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
2907
   *
2908 6
   * @return   string The character with the highest code point than others.
2909
   */
2910 17 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2911
  {
2912
    if (is_array($arg)) {
2913 17
      $arg = implode($arg);
2914 17
    }
2915 17
2916
    return self::chr(max(self::codepoints($arg)));
2917 17
  }
2918
2919 17
  /**
2920
   * Calculates and returns the maximum number of bytes taken by any
2921 17
   * UTF-8 encoded character in the given string.
2922
   *
2923
   * @param  string $str The original Unicode string.
2924
   *
2925
   * @return int Max byte lengths of the given chars.
2926
   */
2927
  public static function max_chr_width($str)
2928
  {
2929
    $bytes = self::chr_size_list($str);
2930
    if (count($bytes) > 0) {
2931
      return (int)max($bytes);
2932
    } else {
2933
      return 0;
2934
    }
2935
  }
2936
2937
  /**
2938
   * checks whether mbstring is available on the server
2939
   *
2940
   * @return   bool True if available, False otherwise
2941
   */
2942
  public static function mbstring_loaded()
2943
  {
2944
    $return = extension_loaded('mbstring');
2945
2946
    if ($return === true) {
2947
      \mb_internal_encoding('UTF-8');
2948
    }
2949
2950
    return $return;
2951
  }
2952
2953
  /**
2954
   * Returns the UTF-8 character with the minimum code point in the given data.
2955
   *
2956
   * @param  mixed $arg A UTF-8 encoded string or an array of such strings.
2957
   *
2958
   * @return string The character with the lowest code point than others.
2959
   */
2960 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2961
  {
2962
    if (is_array($arg)) {
2963
      $arg = implode($arg);
2964
    }
2965
2966
    return self::chr(min(self::codepoints($arg)));
2967
  }
2968
2969
  /**
2970
   * alias for "UTF8::normalize_encoding()"
2971
   *
2972
   * @see UTF8::normalize_encoding()
2973
   *
2974
   * @param string $encoding
2975
   *
2976
   * @return string
2977
   */
2978
  public static function normalizeEncoding($encoding)
2979
  {
2980
    return self::normalize_encoding($encoding);
2981
  }
2982
2983
  /**
2984
   * Normalize the encoding-"name" input.
2985
   *
2986
   * @param  string $encoding e.g.: ISO, UTF8, WINDOWS-1251 etc.
2987
   *
2988
   * @return string e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.
2989
   */
2990
  public static function normalize_encoding($encoding)
2991
  {
2992
    static $staticNormalizeEncodingCache = array();
2993
2994
    if (!$encoding) {
2995
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalize_encoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
2996
    }
2997
2998
    if ('UTF-8' === $encoding) {
2999
      return $encoding;
3000
    }
3001
3002
    if (in_array($encoding, self::$iconvEncoding, true)) {
3003
      return $encoding;
3004
    }
3005
3006
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3007
      return $staticNormalizeEncodingCache[$encoding];
3008
    }
3009
3010
    $encodingOrig = $encoding;
3011
    $encoding = strtoupper($encoding);
3012
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3013
3014
    $equivalences = array(
3015
        'ISO88591'    => 'ISO-8859-1',
3016
        'ISO8859'     => 'ISO-8859-1',
3017
        'ISO'         => 'ISO-8859-1',
3018
        'LATIN1'      => 'ISO-8859-1',
3019
        'LATIN'       => 'ISO-8859-1',
3020
        'WIN1252'     => 'ISO-8859-1',
3021
        'WINDOWS1252' => 'ISO-8859-1',
3022
        'UTF16'       => 'UTF-16',
3023
        'UTF32'       => 'UTF-32',
3024
        'UTF8'        => 'UTF-8',
3025
        'UTF'         => 'UTF-8',
3026
        'UTF7'        => 'UTF-7',
3027 2
        '8BIT'        => 'CP850',
3028
        'BINARY'      => 'CP850',
3029 2
    );
3030
3031 2
    if (!empty($equivalences[$encodingUpperHelper])) {
3032
      $encoding = $equivalences[$encodingUpperHelper];
3033 2
    }
3034
3035
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3036
3037 2
    return $encoding;
3038 2
  }
3039 2
3040 2
  /**
3041 2
   * Normalize some MS Word special characters.
3042 1
   *
3043
   * @param string $str The string to be normalized.
3044 1
   *
3045 1
   * @return string
3046 1
   */
3047 1
  public static function normalize_msword($str)
3048 1
  {
3049 2
    static $utf8MSWordKeys = null;
3050
    static $utf8MSWordValues = null;
3051 2
3052
    if ($utf8MSWordKeys === null) {
3053
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
3054
      $utf8MSWordValues = array_values(self::$utf8MSWord);
3055
    }
3056
3057
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
3058
  }
3059
3060
  /**
3061
   * Normalize the whitespace.
3062
   *
3063
   * @param string $str                     The string to be normalized.
3064
   * @param bool   $keepNonBreakingSpace    Set to true, to keep non-breaking-spaces.
3065
   * @param bool   $keepBidiUnicodeControls Set to true, to keep non-printable (for the web) bidirectional text chars.
3066
   *
3067
   * @return string
3068
   */
3069
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3070
  {
3071
    static $whitespaces = array();
3072
    static $bidiUniCodeControls = null;
3073
3074
    $cacheKey = (int)$keepNonBreakingSpace;
3075
3076
    if (!isset($whitespaces[$cacheKey])) {
3077
3078
      $whitespaces[$cacheKey] = self::$whitespaceTable;
3079
3080
      if ($keepNonBreakingSpace === true) {
3081
        /** @noinspection OffsetOperationsInspection */
3082
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
3083
      }
3084
3085
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
3086
    }
3087
3088
    if ($keepBidiUnicodeControls === false) {
3089
      if ($bidiUniCodeControls === null) {
3090
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
3091
      }
3092
3093
      $str = str_replace($bidiUniCodeControls, '', $str);
3094
    }
3095
3096
    return str_replace($whitespaces[$cacheKey], ' ', $str);
3097
  }
3098
3099
  /**
3100
   * Format a number with grouped thousands.
3101
   *
3102
   * @param float  $number
3103
   * @param int    $decimals
3104
   * @param string $dec_point
3105
   * @param string $thousands_sep
3106
   *
3107
   * @deprecated
3108
   *
3109
   * @return string
3110
   */
3111
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3112
  {
3113
    $thousands_sep = (string)$thousands_sep;
3114
    $dec_point = (string)$dec_point;
3115
3116
    if (
3117
        isset($thousands_sep[1], $dec_point[1])
3118
        &&
3119
        Bootup::is_php('5.4') === true
3120
    ) {
3121
      return str_replace(
3122
          array(
3123
              '.',
3124
              ',',
3125
          ),
3126
          array(
3127
              $dec_point,
3128
              $thousands_sep,
3129
          ),
3130
          number_format($number, $decimals, '.', ',')
3131
      );
3132
    }
3133
3134
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3135
  }
3136
3137
  /**
3138
   * Calculates Unicode code point of the given UTF-8 encoded character.
3139
   *
3140
   * INFO: opposite to UTF8::chr()
3141
   *
3142
   * @param  string $chr The character of which to calculate code point.
3143
   *
3144
   * @return int Unicode code point of the given character,<br />
3145
   *         0 on invalid UTF-8 byte sequence.
3146
   */
3147
  public static function ord($chr)
3148
  {
3149
    if (!$chr && $chr !== '0') {
3150
      return 0;
3151
    }
3152
3153
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3154
      self::checkForSupport();
3155
    }
3156
3157
    if (self::$support['intlChar'] === true) {
3158
      $tmpReturn = \IntlChar::ord($chr);
3159
      if ($tmpReturn) {
3160
        return $tmpReturn;
3161
      }
3162
    }
3163 1
3164
    $chr = unpack('C*', substr($chr, 0, 4));
3165 1
    $a = $chr ? $chr[1] : 0;
3166
3167 1
    if (0xF0 <= $a && isset($chr[4])) {
3168
      return (($a - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3169
    }
3170
3171
    if (0xE0 <= $a && isset($chr[3])) {
3172
      return (($a - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3173
    }
3174
3175 1
    if (0xC0 <= $a && isset($chr[2])) {
3176
      return (($a - 0xC0) << 6) + $chr[2] - 0x80;
3177 1
    }
3178
3179
    return $a;
3180
  }
3181
3182
  /**
3183
   * Parses the string into an array (into the the second parameter).
3184
   *
3185
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3186
   *          if the second parameter is not set!
3187
   *
3188
   * @link http://php.net/manual/en/function.parse-str.php
3189
   *
3190 2
   * @param string $str     <p>
3191
   *                        The input string.
3192 2
   *                        </p>
3193 2
   * @param array  $result  <p>
3194
   *                        The result will be returned into this reference parameter.
3195 2
   *                        </p>
3196
   *
3197 2
   * @return bool will return false if php can't parse the string and we haven't any $result
3198
   */
3199
  public static function parse_str($str, &$result)
3200
  {
3201
    // init
3202
    $str = self::clean($str);
3203
3204
    $return = \mb_parse_str($str, $result);
3205
    if ($return === false || empty($result)) {
3206
      return false;
3207
    }
3208 1
3209
    return true;
3210 1
  }
3211
3212
  /**
3213
   * checks if \u modifier is available that enables Unicode support in PCRE.
3214
   *
3215
   * @return   bool True if support is available, false otherwise
3216
   */
3217
  public static function pcre_utf8_support()
3218 3
  {
3219
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3220 3
    return (bool)@preg_match('//u', '');
3221
  }
3222
3223
  /**
3224
   * Create an array containing a range of UTF-8 characters.
3225
   *
3226
   * @param  mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
3227
   * @param  mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
3228
   *
3229
   * @return array
3230
   */
3231
  public static function range($var1, $var2)
3232 2
  {
3233
    if (!$var1 || !$var2) {
3234 2
      return array();
3235
    }
3236
3237 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3238
      $start = (int)$var1;
3239
    } elseif (ctype_xdigit($var1)) {
3240
      $start = (int)self::hex_to_int($var1);
3241
    } else {
3242
      $start = self::ord($var1);
3243
    }
3244
3245
    if (!$start) {
3246 1
      return array();
3247
    }
3248 1
3249 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3250
      $end = (int)$var2;
3251
    } elseif (ctype_xdigit($var2)) {
3252
      $end = (int)self::hex_to_int($var2);
3253
    } else {
3254
      $end = self::ord($var2);
3255
    }
3256
3257
    if (!$end) {
3258
      return array();
3259
    }
3260
3261
    return array_map(
3262
        array(
3263
            '\\voku\\helper\\UTF8',
3264
            'chr',
3265
        ),
3266
        range($start, $end)
3267
    );
3268
  }
3269
3270
  /**
3271
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3272
   *
3273
   * @param string $str
3274
   *
3275
   * @return string
3276
   */
3277
  public static function remove_bom($str)
3278
  {
3279
    foreach (self::$bom as $bomString => $bomByteLength) {
3280
      if (0 === strpos($str, $bomString)) {
3281
        $str = substr($str, $bomByteLength);
3282
      }
3283
    }
3284
3285
    return $str;
3286
  }
3287
3288 1
  /**
3289
   * alias for "UTF8::remove_bom()"
3290 1
   *
3291
   * @see UTF8::remove_bom()
3292
   *
3293
   * @param string $str
3294
   *
3295
   * @return string
3296
   */
3297
  public static function removeBOM($str)
3298
  {
3299
    return self::remove_bom($str);
3300
  }
3301
3302
  /**
3303
   * Removes duplicate occurrences of a string in another string.
3304
   *
3305
   * @param    string       $str  The base string
3306
   * @param    string|array $what String to search for in the base string
3307
   *
3308
   * @return   string The result string with removed duplicates
3309
   */
3310
  public static function remove_duplicates($str, $what = ' ')
3311
  {
3312
    if (is_string($what)) {
3313
      $what = array($what);
3314
    }
3315
3316 1
    if (is_array($what)) {
3317
      foreach ($what as $item) {
3318 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3319
      }
3320
    }
3321
3322
    return $str;
3323
  }
3324
3325
  /**
3326
   * Remove invisible characters from a string.
3327
   *
3328
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3329
   *
3330 1
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3331
   *
3332 1
   * @param  string $str
3333
   * @param  bool   $url_encoded
3334
   * @param  string $replacement
3335
   *
3336
   * @return  string
3337
   */
3338
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3339
  {
3340
    // init
3341
    $non_displayables = array();
3342
3343
    // every control character except newline (dec 10),
3344
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3345 16
    if ($url_encoded) {
3346
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3347 16
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3348
    }
3349
3350
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3351
3352
    do {
3353
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3354
    } while ($count !== 0);
3355
3356
    return $str;
3357
  }
3358 14
3359
  /**
3360 14
   * Replace the diamond question mark (�) with the replacement.
3361
   *
3362
   * @param string $str
3363
   * @param string $unknown
3364
   *
3365
   * @return string
3366
   */
3367
  public static function replace_diamond_question_mark($str, $unknown = '?')
3368
  {
3369
    return str_replace(
3370 1
        array(
3371
            "\xEF\xBF\xBD",
3372 1
            '�',
3373
        ),
3374 1
        array(
3375 1
            $unknown,
3376
            $unknown,
3377
        ),
3378 1
        $str
3379 1
    );
3380
  }
3381 1
3382
  /**
3383
   * Strip whitespace or other characters from end of a UTF-8 string.
3384
   *
3385
   * @param    string $str   The string to be trimmed
3386
   * @param    string $chars Optional characters to be stripped
3387
   *
3388
   * @return   string The string with unwanted characters stripped from the right
3389
   */
3390 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3391
  {
3392 16
    $str = (string)$str;
3393
3394
    if (!isset($str[0])) {
3395 16
      return '';
3396
    }
3397
3398 16
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3399
    if ($chars === INF || !$chars) {
3400 16
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3401 16
    }
3402 15
3403 16
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3404 6
3405
    return preg_replace("/{$chars}+$/u", '', $str);
3406 15
  }
3407
3408
  /**
3409
   * rxClass
3410
   *
3411
   * @param string $s
3412
   * @param string $class
3413
   *
3414
   * @return string
3415
   */
3416
  private static function rxClass($s, $class = '')
3417
  {
3418
    static $rxClassCache = array();
3419
3420
    $cacheKey = $s . $class;
3421
3422
    if (isset($rxClassCache[$cacheKey])) {
3423
      return $rxClassCache[$cacheKey];
3424
    }
3425
3426
    $class = array($class);
3427
3428
    /** @noinspection SuspiciousLoopInspection */
3429
    foreach (self::str_split($s) as $s) {
3430
      if ('-' === $s) {
3431
        $class[0] = '-' . $class[0];
3432
      } elseif (!isset($s[2])) {
3433
        $class[0] .= preg_quote($s, '/');
3434
      } elseif (1 === self::strlen($s)) {
3435
        $class[0] .= $s;
3436
      } else {
3437
        $class[] = $s;
3438
      }
3439
    }
3440
3441
    if ($class[0]) {
3442
      $class[0] = '[' . $class[0] . ']';
3443
    }
3444
3445
    if (1 === count($class)) {
3446
      $return = $class[0];
3447
    } else {
3448
      $return = '(?:' . implode('|', $class) . ')';
3449
    }
3450
3451
    $rxClassCache[$cacheKey] = $return;
3452
3453
    return $return;
3454
  }
3455
3456
  /**
3457 1
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
3458
   */
3459 1
  public static function showSupport()
3460
  {
3461 1
    foreach (self::$support as $utf8Support) {
3462
      echo $utf8Support . "\n<br>";
3463
    }
3464
  }
3465
3466 1
  /**
3467
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3468 1
   *
3469
   * @param    string $char           The Unicode character to be encoded as numbered entity.
3470 1
   * @param    bool   $keepAsciiChars Keep ASCII chars.
3471 1
   *
3472
   * @return   string The HTML numbered entity.
3473 1
   */
3474
  public static function single_chr_html_encode($char, $keepAsciiChars = false)
3475
  {
3476
    if (!$char) {
3477
      return '';
3478
    }
3479
3480
    if (
3481
        $keepAsciiChars === true
3482
        &&
3483
        self::isAscii($char) === true
3484 1
    ) {
3485
      return $char;
3486 1
    }
3487
3488 1
    return '&#' . self::ord($char) . ';';
3489
  }
3490
3491
  /**
3492
   * Convert a string to an array of Unicode characters.
3493 1
   *
3494 1
   * @param    string  $str       The string to split into array.
3495 1
   * @param    int     $length    Max character length of each array element.
3496 1
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
3497 1
   *
3498
   * @return   array An array containing chunks of the string.
3499 1
   */
3500
  public static function split($str, $length = 1, $cleanUtf8 = false)
3501
  {
3502
    $str = (string)$str;
3503
3504
    if (!isset($str[0])) {
3505
      return array();
3506
    }
3507
3508
    // init
3509
    $str = (string)$str;
3510 4
    $ret = array();
3511
3512 4
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3513
      self::checkForSupport();
3514 4
    }
3515 4
3516
    if (self::$support['pcre_utf8'] === true) {
3517 4
3518 4
      if ($cleanUtf8 === true) {
3519 4
        $str = self::clean($str);
3520 4
      }
3521 4
3522 4
      preg_match_all('/./us', $str, $retArray);
3523 4
      if (isset($retArray[0])) {
3524 4
        $ret = $retArray[0];
3525 4
      }
3526 2
      unset($retArray);
3527 2
3528 4
    } else {
3529 4
3530 4
      // fallback
3531
3532 4
      $len = strlen($str);
3533 4
3534 4
      /** @noinspection ForeachInvariantsInspection */
3535 4
      for ($i = 0; $i < $len; $i++) {
3536 4
        if (($str[$i] & "\x80") === "\x00") {
3537 4
          $ret[] = $str[$i];
3538 4
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
3539 4
          if (($str[$i + 1] & "\xC0") === "\x80") {
3540 4
            $ret[] = $str[$i] . $str[$i + 1];
3541 3
3542 3
            $i++;
3543 4
          }
3544 4 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3545 4
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
3546
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3547 4
3548 3
            $i += 2;
3549 2
          }
3550
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
3551 3 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3552
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3553
3554
            $i += 3;
3555 3
          }
3556
        }
3557 3
      }
3558
    }
3559
3560
    if ($length > 1) {
3561
      $ret = array_chunk($ret, $length);
3562
3563
      $ret = array_map('implode', $ret);
3564
    }
3565
3566
    /** @noinspection OffsetOperationsInspection */
3567 3
    if (isset($ret[0]) && $ret[0] === '') {
3568
      return array();
3569 3
    }
3570
3571 3
    return $ret;
3572 3
  }
3573
3574 3
  /**
3575 3
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3576 3
   *
3577 2
   * @param string $str
3578 2
   *
3579 2
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
3580 2
   *                      otherwise it will return false.
3581 2
   */
3582 2
  public static function str_detect_encoding($str)
3583 1
  {
3584 1
3585 2
    //
3586 2
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
3587 2
    //
3588
3589 3
    if (self::is_binary($str)) {
3590 3
      if (self::is_utf16($str) === 1) {
3591 3
        return 'UTF-16LE';
3592 2
      } elseif (self::is_utf16($str) === 2) {
3593 2
        return 'UTF-16BE';
3594 2
      } elseif (self::is_utf32($str) === 1) {
3595 2
        return 'UTF-32LE';
3596 2
      } elseif (self::is_utf32($str) === 2) {
3597 2
        return 'UTF-32BE';
3598 1
      }
3599 1
    }
3600 2
3601 2
    //
3602 2
    // 2.) simple check for ASCII chars
3603
    //
3604 3
3605 1
    if (self::is_ascii($str) === true) {
3606 1
      return 'ASCII';
3607
    }
3608 1
3609
    //
3610
    // 3.) simple check for UTF-8 chars
3611
    //
3612 3
3613
    if (self::is_utf8($str) === true) {
3614 3
      return 'UTF-8';
3615
    }
3616
3617
    //
3618
    // 4.) check via "\mb_detect_encoding()"
3619
    //
3620
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
3621
3622
    $detectOrder = array(
3623
        'ISO-8859-1',
3624
        'ISO-8859-2',
3625
        'ISO-8859-3',
3626
        'ISO-8859-4',
3627 43
        'ISO-8859-5',
3628
        'ISO-8859-6',
3629 43
        'ISO-8859-7',
3630
        'ISO-8859-8',
3631 43
        'ISO-8859-9',
3632 3
        'ISO-8859-10',
3633
        'ISO-8859-13',
3634
        'ISO-8859-14',
3635 41
        'ISO-8859-15',
3636 1
        'ISO-8859-16',
3637 1
        'WINDOWS-1251',
3638
        'WINDOWS-1252',
3639
        'WINDOWS-1254',
3640
        'ISO-2022-JP',
3641
        'JIS',
3642
        'EUC-JP',
3643
    );
3644
3645 41
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
3646
    if ($encoding) {
3647
      return $encoding;
3648
    }
3649
3650
    //
3651
    // 5.) check via "iconv()"
3652
    //
3653
3654
    $md5 = md5($str);
3655 41
    foreach (self::$iconvEncoding as $encodingTmp) {
3656
      # INFO: //IGNORE and //TRANSLIT still throw notice
3657 41
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3658 41
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
3659 41
        return $encodingTmp;
3660
      }
3661
    }
3662 41
3663 41
    return false;
3664 41
  }
3665
3666
  /**
3667 41
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
3668
   *
3669 36
   * @link  http://php.net/manual/en/function.str-ireplace.php
3670 41
   *
3671
   * @param mixed $search  <p>
3672 34
   *                       Every replacement with search array is
3673 34
   *                       performed on the result of previous replacement.
3674 34
   *                       </p>
3675 34
   * @param mixed $replace <p>
3676 39
   *                       </p>
3677
   * @param mixed $subject <p>
3678 21
   *                       If subject is an array, then the search and
3679 21
   *                       replace is performed with every entry of
3680 21
   *                       subject, and the return value is an array as
3681 21
   *                       well.
3682 33
   *                       </p>
3683
   * @param int   $count   [optional] <p>
3684 9
   *                       The number of matched and replaced needles will
3685 9
   *                       be returned in count which is passed by
3686 9
   *                       reference.
3687 9
   *                       </p>
3688 16
   *
3689
   * @return mixed A string or an array of replacements.
3690
   */
3691
  public static function str_ireplace($search, $replace, $subject, &$count = null)
3692
  {
3693
    $search = (array)$search;
3694
3695
    /** @noinspection AlterInForeachInspection */
3696
    foreach ($search as &$s) {
3697 3
      if ('' === $s .= '') {
3698 3
        $s = '/^(?<=.)$/';
3699 3
      } else {
3700 3
        $s = '/' . preg_quote($s, '/') . '/ui';
3701 9
      }
3702
    }
3703 3
3704 3
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
3705 3
    $count = $replace; // used as reference parameter
3706 3
3707 3
    return $subject;
3708
  }
3709
3710
  /**
3711 5
   * Limit the number of characters in a string, but also after the next word.
3712
   *
3713 41
   * @param  string $str
3714
   * @param  int    $length
3715
   * @param  string $strAddOn
3716 36
   *
3717
   * @return string
3718 33
   */
3719 33
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
3720 33
  {
3721 33
    $str = (string)$str;
3722
3723
    if (!isset($str[0])) {
3724
      return '';
3725
    }
3726 33
3727
    $length = (int)$length;
3728
3729
    if (self::strlen($str) <= $length) {
3730
      return $str;
3731
    }
3732 33
3733 33
    if (self::substr($str, $length - 1, 1) === ' ') {
3734 33
      return self::substr($str, 0, $length - 1) . $strAddOn;
3735 33
    }
3736
3737 33
    $str = self::substr($str, 0, $length);
3738
    $array = explode(' ', $str);
3739 33
    array_pop($array);
3740 33
    $new_str = implode(' ', $array);
3741 5
3742
    if ($new_str === '') {
3743
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3744 33
    } else {
3745 33
      $str = $new_str . $strAddOn;
3746 33
    }
3747 33
3748 33
    return $str;
3749
  }
3750
3751
  /**
3752
   * Pad a UTF-8 string to given length with another string.
3753 18
   *
3754
   * @param    string $str        The input string
3755
   * @param    int    $pad_length The length of return string
3756 41
   * @param    string $pad_string String to use for padding the input string
3757
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
3758 20
   *
3759
   * @return   string Returns the padded string
3760
   */
3761
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
3762
  {
3763
    $str_length = self::strlen($str);
3764
3765
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $str_length)) {
3766
      $ps_length = self::strlen($pad_string);
3767
3768
      $diff = $pad_length - $str_length;
3769
3770
      switch ($pad_type) {
3771 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3772
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
3773
          $pre = self::substr($pre, 0, $diff);
3774
          $post = '';
3775
          break;
3776
3777
        case STR_PAD_BOTH:
3778
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
3779
          $pre = self::substr($pre, 0, (int)$diff / 2);
3780
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
3781
          $post = self::substr($post, 0, (int)ceil($diff / 2));
3782
          break;
3783
3784
        case STR_PAD_RIGHT:
3785 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3786
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
3787
          $post = self::substr($post, 0, $diff);
3788
          $pre = '';
3789
      }
3790
3791
      return $pre . $str . $post;
3792
    }
3793
3794
    return $str;
3795
  }
3796
3797
  /**
3798 2
   * Repeat a string.
3799
   *
3800 2
   * @param string $str        <p>
3801
   *                           The string to be repeated.
3802 2
   *                           </p>
3803
   * @param int    $multiplier <p>
3804
   *                           Number of time the input string should be
3805 2
   *                           repeated.
3806
   *                           </p>
3807
   *                           <p>
3808 2
   *                           multiplier has to be greater than or equal to 0.
3809
   *                           If the multiplier is set to 0, the function
3810
   *                           will return an empty string.
3811
   *                           </p>
3812
   *
3813
   * @return string the repeated string.
3814
   */
3815
  public static function str_repeat($str, $multiplier)
3816
  {
3817
    $str = self::filter($str);
3818
3819
    return str_repeat($str, $multiplier);
3820
  }
3821
3822
  /**
3823
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
3824
   *
3825
   * Replace all occurrences of the search string with the replacement string
3826
   *
3827
   * @link http://php.net/manual/en/function.str-replace.php
3828
   *
3829
   * @param mixed $search  <p>
3830
   *                       The value being searched for, otherwise known as the needle.
3831
   *                       An array may be used to designate multiple needles.
3832
   *                       </p>
3833
   * @param mixed $replace <p>
3834
   *                       The replacement value that replaces found search
3835
   *                       values. An array may be used to designate multiple replacements.
3836
   *                       </p>
3837
   * @param mixed $subject <p>
3838
   *                       The string or array being searched and replaced on,
3839
   *                       otherwise known as the haystack.
3840
   *                       </p>
3841
   *                       <p>
3842
   *                       If subject is an array, then the search and
3843
   *                       replace is performed with every entry of
3844
   *                       subject, and the return value is an array as
3845
   *                       well.
3846
   *                       </p>
3847 2
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
3848
   *
3849 2
   * @return mixed This function returns a string or an array with the replaced values.
3850
   */
3851 2
  public static function str_replace($search, $replace, $subject, &$count = null)
3852
  {
3853
    return str_replace($search, $replace, $subject, $count);
3854 2
  }
3855
3856
  /**
3857 2
   * Shuffles all the characters in the string.
3858
   *
3859
   * @param    string $str The input string
3860
   *
3861
   * @return   string The shuffled string.
3862
   */
3863
  public static function str_shuffle($str)
3864
  {
3865
    $array = self::split($str);
3866
3867 6
    shuffle($array);
3868
3869 6
    return implode('', $array);
3870
  }
3871
3872
  /**
3873
   * Sort all characters according to code points.
3874
   *
3875
   * @param    string $str    A UTF-8 string.
3876
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
3877
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
3878
   *
3879
   * @return   string String of sorted characters
3880 24
   */
3881
  public static function str_sort($str, $unique = false, $desc = false)
3882 24
  {
3883
    $array = self::codepoints($str);
3884 24
3885 2
    if ($unique) {
3886
      $array = array_flip(array_flip($array));
3887
    }
3888
3889 23
    if ($desc) {
3890 2
      arsort($array);
3891
    } else {
3892
      asort($array);
3893 23
    }
3894
3895 23
    return self::string($array);
3896
  }
3897
3898
  /**
3899
   * Split a string into an array.
3900
   *
3901
   * @param string $str
3902
   * @param int    $len
3903
   *
3904
   * @return array
3905 1
   */
3906
  public static function str_split($str, $len = 1)
3907 1
  {
3908
    // init
3909
    $len = (int)$len;
3910
3911 1
    if ($len < 1) {
3912
      return str_split($str, $len);
3913
    }
3914
3915
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
3916
    $a = $a[0];
3917
3918
    if ($len === 1) {
3919
      return $a;
3920
    }
3921
3922 1
    $arrayOutput = array();
3923
    $p = -1;
3924 1
3925 1
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
3926 1
    foreach ($a as $l => $a) {
3927
      if ($l % $len) {
3928 1
        $arrayOutput[$p] .= $a;
3929
      } else {
3930
        $arrayOutput[++$p] = $a;
3931
      }
3932
    }
3933
3934
    return $arrayOutput;
3935
  }
3936
3937 2
  /**
3938
   * Get a binary representation of a specific string.
3939 2
   *
3940
   * @param  string $str The input string.
3941 2
   *
3942 2
   * @return string
3943 2
   */
3944
  public static function str_to_binary($str)
3945 2
  {
3946
    $str = (string)$str;
3947
3948
    $value = unpack('H*', $str);
3949
3950
    return base_convert($value[1], 16, 2);
3951
  }
3952
3953
  /**
3954
   * alias for "UTF8::to_ascii()"
3955 1
   *
3956
   * @see UTF8::to_ascii()
3957 1
   *
3958
   * @param string $str
3959
   * @param string $unknown
3960
   *
3961 1
   * @return string
3962
   */
3963
  public static function str_transliterate($str, $unknown = '?')
3964
  {
3965
    return self::to_ascii($str, $unknown);
3966
  }
3967
3968
  /**
3969
   * Counts number of words in the UTF-8 string.
3970
   *
3971
   * @param string $str      The input string.
3972
   * @param int    $format   <strong>0</strong> => return a number of words<br />
3973 125
   *                         <strong>1</strong> => return an array of words<br />
3974
   *                         <strong>2</strong> => return an array of words with word-offset as key
3975 125
   * @param string $charlist Additional chars that contains to words and do not start a new word (default: "'", "’")
3976
   *
3977
   * @return array|int The number of words in the string
3978
   */
3979
  public static function str_word_count($str, $format = 0, $charlist = '')
3980
  {
3981
    $charlist = self::rxClass($charlist, '\pL');
3982
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
3983
3984
    $len = count($strParts);
3985 125
3986
    if ($format === 1) {
3987 125
3988
      $numberOfWords = array();
3989 125
      for ($i = 1; $i < $len; $i += 2) {
3990 1
        $numberOfWords[] = $strParts[$i];
3991
      }
3992
3993 125
    } elseif ($format === 2) {
3994 125
3995
      $numberOfWords = array();
3996
      $offset = self::strlen($strParts[0]);
3997 2
      for ($i = 1; $i < $len; $i += 2) {
3998 2
        $numberOfWords[$offset] = $strParts[$i];
3999
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4000
      }
4001 2
4002 2
    } else {
4003
4004
      $numberOfWords = ($len - 1) / 2;
4005 2
4006 2
    }
4007 2
4008
    return $numberOfWords;
4009
  }
4010 2
4011 2
  /**
4012 2
   * Case-insensitive string comparison.
4013 2
   *
4014 2
   * INFO: Case-insensitive version of UTF8::strcmp()
4015 2
   *
4016 2
   * @param string $str1
4017 2
   * @param string $str2
4018 2
   *
4019 2
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
4020 2
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4021 2
   *             <strong>0</strong> if they are equal.
4022 2
   */
4023 2
  public static function strcasecmp($str1, $str2)
4024 2
  {
4025
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4026 2
  }
4027 2
4028 2
  /**
4029
   * Case-sensitive string comparison.
4030 2
   *
4031
   * @param string $str1
4032 2
   * @param string $str2
4033
   *
4034
   * @return int  <strong>&lt; 0</strong> if str1 is less than str2<br />
4035
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4036
   *              <strong>0</strong> if they are equal.
4037
   */
4038
  public static function strcmp($str1, $str2)
4039
  {
4040
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4041
        \Normalizer::normalize($str1, \Normalizer::NFD),
4042 2
        \Normalizer::normalize($str2, \Normalizer::NFD)
4043
    );
4044 2
  }
4045 2
4046
  /**
4047 2
   * Find length of initial segment not matching mask.
4048 1
   *
4049 1
   * @param string $str
4050 1
   * @param string $charList
4051
   * @param int    $offset
4052 2
   * @param int    $length
4053
   *
4054
   * @return int|null
4055
   */
4056
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4057
  {
4058
    if ('' === $charList .= '') {
4059
      return null;
4060
    }
4061
4062
    if ($offset || 2147483647 !== $length) {
4063
      $str = (string)self::substr($str, $offset, $length);
4064 7
    } else {
4065
      $str = (string)$str;
4066 7
    }
4067 7
4068
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4069 7
      /** @noinspection OffsetOperationsInspection */
4070
      return self::strlen($length[1]);
4071 7
    } else {
4072
      return self::strlen($str);
4073 2
    }
4074
  }
4075 2
4076
  /**
4077 1
   * Create a UTF-8 string from code points.
4078 1
   *
4079
   * INFO: opposite to UTF8::codepoints()
4080 2
   *
4081 2
   * @param  array $array Integer or Hexadecimal codepoints
4082
   *
4083 7
   * @return string UTF-8 encoded string
4084 7
   */
4085 1
  public static function string(array $array)
4086 1
  {
4087
    return implode(
4088 7
        array_map(
4089 7
            array(
4090
                '\\voku\\helper\\UTF8',
4091 7
                'chr',
4092
            ),
4093
            $array
4094
        )
4095
    );
4096
  }
4097
4098
  /**
4099
   * alias for "UTF8::string_has_bom()"
4100
   *
4101
   * @see UTF8::string_has_bom()
4102
   *
4103
   * @param string $str
4104
   *
4105
   * @return bool
4106
   */
4107
  public static function hasBom($str)
4108
  {
4109
    return self::string_has_bom($str);
4110
  }
4111
4112
  /**
4113
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4114
   *
4115
   * @param    string $str The input string.
4116
   *
4117
   * @return   bool True if the string has BOM at the start, False otherwise.
4118
   */
4119
  public static function string_has_bom($str)
4120
  {
4121
    foreach (self::$bom as $bomString => $bomByteLength) {
4122
      if (0 === strpos($str, $bomString)) {
4123
        return true;
4124
      }
4125
    }
4126
4127
    return false;
4128
  }
4129
4130
  /**
4131
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4132
   *
4133
   * @link http://php.net/manual/en/function.strip-tags.php
4134
   *
4135
   * @param string $str            <p>
4136
   *                               The input string.
4137
   *                               </p>
4138
   * @param string $allowable_tags [optional] <p>
4139
   *                               You can use the optional second parameter to specify tags which should
4140
   *                               not be stripped.
4141
   *                               </p>
4142 16
   *                               <p>
4143
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
4144 16
   *                               can not be changed with allowable_tags.
4145 2
   *                               </p>
4146
   *
4147
   * @return string the stripped string.
4148
   */
4149 15
  public static function strip_tags($str, $allowable_tags = null)
4150
  {
4151 15
    // clean broken utf8
4152
    $str = self::clean($str);
4153
4154
    return strip_tags($str, $allowable_tags);
4155
  }
4156
4157
  /**
4158 15
   * Finds position of first occurrence of a string within another, case insensitive.
4159 15
   *
4160
   * @link http://php.net/manual/en/function.mb-stripos.php
4161 15
   *
4162 3
   * @param string  $haystack  <p>
4163
   *                           The string from which to get the position of the first occurrence
4164
   *                           of needle
4165 14
   *                           </p>
4166 9
   * @param string  $needle    <p>
4167
   *                           The string to find in haystack
4168
   *                           </p>
4169 12
   * @param int     $offset    [optional] <p>
4170 9
   *                           The position in haystack
4171
   *                           to start searching
4172
   *                           </p>
4173 11
   * @param string  $encoding
4174
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string.
4175
   *
4176
   * @return int|false Return the numeric position of the first occurrence of needle in the haystack string,<br />
4177
   *                   or false if needle is not found.
4178
   */
4179
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4180
  {
4181
    $haystack = (string)$haystack;
4182
    $needle = (string)$needle;
4183
4184
    if (!isset($haystack[0], $needle[0])) {
4185
      return false;
4186
    }
4187
4188
    if ($cleanUtf8 === true) {
4189
      $haystack = self::clean($haystack);
4190
      $needle = self::clean($needle);
4191
    }
4192
4193 1
    // INFO: this is only a fallback for old versions
4194
    if ($encoding === 'UTF-8' || $encoding === true || $encoding === false) {
4195
      $encoding = 'UTF-8';
4196 1
    } else {
4197
      $encoding = self::normalize_encoding($encoding);
4198 1
    }
4199
4200 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4201 1
  }
4202 1
4203
  /**
4204
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4205 1
   *
4206
   * @param string $str
4207
   * @param string $needle
4208
   * @param bool   $before_needle
4209
   *
4210
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
4211
   */
4212
  public static function stristr($str, $needle, $before_needle = false)
4213 41
  {
4214
    if ('' === $needle .= '') {
4215
      return false;
4216 41
    }
4217
4218
    return \mb_stristr($str, $needle, $before_needle, 'UTF-8');
4219
  }
4220
4221
  /**
4222
   * Get the string length, not the byte-length!
4223
   *
4224
   * @link     http://php.net/manual/en/function.mb-strlen.php
4225
   *
4226
   * @param string  $str       The string being checked for length.
4227 1
   * @param string  $encoding  Set the charset for e.g. "\mb_" function
4228
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4229 1
   *
4230 1
   * @return int the number of characters in the string $str having character encoding $encoding. (One multi-byte
4231
   *             character counted as +1)
4232
   */
4233 1
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4234 1
  {
4235 1
    $str = (string)$str;
4236
4237
    if (!isset($str[0])) {
4238 1
      return 0;
4239
    }
4240
4241 1
    // INFO: this is only a fallback for old versions
4242
    if ($encoding === 'UTF-8' || $encoding === true || $encoding === false) {
4243
      $encoding = 'UTF-8';
4244
    } else {
4245 1
      $encoding = self::normalize_encoding($encoding);
4246 1
    }
4247 1
4248
    switch ($encoding) {
4249
      case 'ASCII':
4250 1
      case 'CP850':
4251
        return strlen($str);
4252
    }
4253 1
4254
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
4255
      $str = self::clean($str);
4256
    }
4257 1
4258
    return \mb_strlen($str, $encoding);
4259 1
  }
4260 1
4261 1
  /**
4262 1
   * Case insensitive string comparisons using a "natural order" algorithm.
4263 1
   *
4264
   * INFO: natural order version of UTF8::strcasecmp()
4265
   *
4266
   * @param string $str1
4267
   * @param string $str2
4268
   *
4269
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
4270
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
4271
   *             <strong>0</strong> if they are equal
4272
   */
4273 10
  public static function strnatcasecmp($str1, $str2)
4274
  {
4275 10
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4276 10
  }
4277 5
4278 5
  /**
4279 10
   * String comparisons using a "natural order" algorithm
4280
   *
4281 10
   * INFO: natural order version of UTF8::strcmp()
4282
   *
4283
   * @link  http://php.net/manual/en/function.strnatcmp.php
4284
   *
4285
   * @param string $str1 <p>
4286
   *                     The first string.
4287
   *                     </p>
4288
   * @param string $str2 <p>
4289
   *                     The second string.
4290
   *                     </p>
4291
   *
4292
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
4293 5
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
4294
   *             <strong>0</strong> if they are equal
4295 5
   */
4296
  public static function strnatcmp($str1, $str2)
4297
  {
4298
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4299
  }
4300
4301
  /**
4302
   * Case-insensitive string comparison of the first n characters.
4303
   *
4304
   * @link  http://php.net/manual/en/function.strncasecmp.php
4305
   *
4306 1
   * @param string $str1 <p>
4307
   *                     The first string.
4308 1
   *                     </p>
4309 1
   * @param string $str2 <p>
4310 1
   *                     The second string.
4311
   *                     </p>
4312 1
   * @param int    $len  <p>
4313 1
   *                     The length of strings to be used in the comparison.
4314 1
   *                     </p>
4315 1
   *
4316 1
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
4317
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
4318 1
   *             <strong>0</strong> if they are equal
4319
   */
4320
  public static function strncasecmp($str1, $str2, $len)
4321
  {
4322
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4323
  }
4324
4325
  /**
4326
   * String comparison of the first n characters.
4327
   *
4328
   * @link  http://php.net/manual/en/function.strncmp.php
4329
   *
4330
   * @param string $str1 <p>
4331
   *                     The first string.
4332
   *                     </p>
4333
   * @param string $str2 <p>
4334 42
   *                     The second string.
4335
   *                     </p>
4336
   * @param int    $len  <p>
4337 42
   *                     Number of characters to use in the comparison.
4338
   *                     </p>
4339
   *
4340
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
4341 42
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
4342 42
   *             <strong>0</strong> if they are equal
4343 42
   */
4344 42
  public static function strncmp($str1, $str2, $len)
4345
  {
4346 42
    $str1 = self::substr($str1, 0, $len);
4347
    $str2 = self::substr($str2, 0, $len);
4348
4349 42
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 4346 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 4347 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
4350 42
  }
4351
4352 42
  /**
4353
   * Search a string for any of a set of characters.
4354
   *
4355
   * @link  http://php.net/manual/en/function.strpbrk.php
4356
   *
4357
   * @param string $haystack  <p>
4358
   *                          The string where char_list is looked for.
4359
   *                          </p>
4360
   * @param string $char_list <p>
4361
   *                          This parameter is case sensitive.
4362
   *                          </p>
4363 42
   *
4364
   * @return string String starting from the character found, or false if it is not found.
4365 42
   */
4366
  public static function strpbrk($haystack, $char_list)
4367 42
  {
4368 42
    $haystack = (string)$haystack;
4369 42
    $char_list = (string)$char_list;
4370
4371 42
    if (!isset($haystack[0], $char_list[0])) {
4372 42
      return false;
4373 42
    }
4374
4375 42
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
4376
      return substr($haystack, strpos($haystack, $m[0]));
4377
    } else {
4378
      return false;
4379
    }
4380
  }
4381
4382
  /**
4383
   * Find position of first occurrence of string in a string.
4384
   *
4385
   * @link http://php.net/manual/en/function.mb-strpos.php
4386 23
   *
4387
   * @param string  $haystack     <p>
4388 23
   *                              The string being checked.
4389
   *                              </p>
4390 23
   * @param string  $needle       <p>
4391 5
   *                              The position counted from the beginning of haystack.
4392
   *                              </p>
4393
   * @param int     $offset       [optional] <p>
4394
   *                              The search offset. If it is not specified, 0 is used.
4395 19
   *                              </p>
4396 3
   * @param string  $encoding
4397
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
4398
   *
4399 18
   * @return int|false The numeric position of the first occurrence of needle in the haystack string.<br />
4400
   *                   If needle is not found it returns false.
4401 18
   */
4402
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
4403
  {
4404
    $haystack = (string)$haystack;
4405
    $needle = (string)$needle;
4406
4407
    if (!isset($haystack[0], $needle[0])) {
4408
      return false;
4409
    }
4410
4411
    // init
4412 45
    $offset = (int)$offset;
4413
4414 45
    // iconv and mbstring do not support integer $needle
4415
4416 45
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4417
      $needle = (string)self::chr($needle);
4418 45
    }
4419 34
4420
    if ($cleanUtf8 === true) {
4421
      // \mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
4422 17
      // iconv_strpos is not tolerant to invalid characters
4423
4424
      $needle = self::clean((string)$needle);
4425 17
      $haystack = self::clean($haystack);
4426 17
    }
4427
4428 17
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4429 17
      self::checkForSupport();
4430 17
    }
4431 2
4432 2 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4433
4434
      // INFO: this is only a fallback for old versions
4435 17
      if ($encoding === 'UTF-8' || $encoding === true || $encoding === false) {
4436
        $encoding = 'UTF-8';
4437 17
      } else {
4438 17
        $encoding = self::normalize_encoding($encoding);
4439 17
      }
4440
4441 17
      return \mb_strpos($haystack, $needle, $offset, $encoding);
4442 17
    }
4443 17
4444
    if (self::$support['iconv'] === true) {
4445
      // ignore invalid negative offset to keep compatility
4446
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4447 17
      return \grapheme_strpos($haystack, $needle, $offset > 0 ? $offset : 0);
4448
    }
4449 17
4450
    if ($offset > 0) {
4451
      $haystack = self::substr($haystack, $offset);
4452
    }
4453
4454 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4455
      $left = substr($haystack, 0, $pos);
4456
4457
      // negative offset not supported in PHP strpos(), ignoring
4458
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
4459
    }
4460
4461
    return false;
4462
  }
4463
4464
  /**
4465
   * Finds the last occurrence of a character in a string within another.
4466
   *
4467
   * @link http://php.net/manual/en/function.mb-strrchr.php
4468
   *
4469
   * @param string $haystack <p>
4470 1
   *                         The string from which to get the last occurrence
4471
   *                         of needle
4472 1
   *                         </p>
4473 1
   * @param string $needle   <p>
4474
   *                         The string to find in haystack
4475
   *                         </p>
4476
   * @param bool   $part     [optional] <p>
4477
   *                         Determines which portion of haystack
4478 1
   *                         this function returns.
4479 1
   *                         If set to true, it returns all of haystack
4480 1
   *                         from the beginning to the last occurrence of needle.
4481 1
   *                         If set to false, it returns all of haystack
4482
   *                         from the last occurrence of needle to the end,
4483
   *                         </p>
4484 1
   * @param string $encoding [optional] <p>
4485
   *                         Character encoding name to use.
4486
   *                         If it is omitted, internal character encoding is used.
4487
   *                         </p>
4488
   *
4489
   * @return string|false The portion of haystack or false if needle is not found.
4490
   */
4491 View Code Duplication
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4492
  {
4493
    if ($encoding !== 'UTF-8') {
4494
      $encoding = self::normalize_encoding($encoding);
4495
    }
4496 36
4497
    return \mb_strrchr($haystack, $needle, $part, $encoding);
4498 36
  }
4499
4500 36
  /**
4501 4
   * alias for "UTF8::strstr()"
4502
   *
4503
   * @see UTF8::strstr()
4504
   *
4505 35
   * @param string $haystack
4506 35
   * @param string $needle
4507 35
   * @param bool   $before_needle
4508
   *
4509 35
   * @return string|false
4510
   */
4511 35
  public static function strchr($haystack, $needle, $before_needle = false)
4512 6
  {
4513 6
    return self::strstr($haystack, $needle, $before_needle);
4514
  }
4515 35
4516 35
  /**
4517 35
   * alias for "UTF8::stristr()"
4518 35
   *
4519 35
   * @see UTF8::stristr()
4520
   *
4521 35
   * @param string $haystack
4522
   * @param string $needle
4523
   * @param bool   $before_needle
4524
   *
4525
   * @return string|false
4526
   */
4527
  public static function strichr($haystack, $needle, $before_needle = false)
4528
  {
4529
    return self::stristr($haystack, $needle, $before_needle);
4530
  }
4531
4532
  /**
4533
   * Reverses characters order in the string.
4534
   *
4535
   * @param  string $str The input string
4536
   *
4537
   * @return string The string with characters in the reverse sequence
4538
   */
4539
  public static function strrev($str)
4540
  {
4541
    $str = (string)$str;
4542
4543
    if (!isset($str[0])) {
4544
      return '';
4545
    }
4546
4547
    return implode(array_reverse(self::split($str)));
4548
  }
4549
4550
  /**
4551
   * Finds the last occurrence of a character in a string within another, case insensitive.
4552
   *
4553 35
   * @link http://php.net/manual/en/function.mb-strrichr.php
4554 5
   *
4555
   * @param string $haystack <p>
4556 5
   *                         The string from which to get the last occurrence
4557 5
   *                         of needle
4558
   *                         </p>
4559
   * @param string $needle   <p>
4560 35
   *                         The string to find in haystack
4561
   *                         </p>
4562
   * @param bool   $part     [optional] <p>
4563
   *                         Determines which portion of haystack
4564 35
   *                         this function returns.
4565
   *                         If set to true, it returns all of haystack
4566
   *                         from the beginning to the last occurrence of needle.
4567
   *                         If set to false, it returns all of haystack
4568
   *                         from the last occurrence of needle to the end,
4569
   *                         </p>
4570
   * @param string $encoding [optional] <p>
4571
   *                         Character encoding name to use.
4572
   *                         If it is omitted, internal character encoding is used.
4573
   *                         </p>
4574
   *
4575 12
   * @return string|false The portion of haystack or false if needle is not found.
4576
   */
4577 View Code Duplication
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4578
  {
4579
    if ($encoding !== 'UTF-8') {
4580
      $encoding = self::normalize_encoding($encoding);
4581
    }
4582 12
4583 2
    return \mb_strrichr($haystack, $needle, $part, $encoding);
4584 1
  }
4585 2
4586 1
  /**
4587 2
   * Find position of last occurrence of a case-insensitive string.
4588
   *
4589 2
   * @param string  $haystack  The string to look in
4590
   * @param string  $needle    The string to look for
4591
   * @param int     $offset    (Optional) Number of characters to ignore in the beginning or end
4592 2
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4593
   *
4594
   * @return int|false The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
4595
   *                   not found, it returns false.
4596
   */
4597
  public static function strripos($haystack, $needle, $offset = 0, $cleanUtf8 = false)
4598 12
  {
4599 3
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset, $cleanUtf8);
4600
  }
4601
4602
  /**
4603
   * Find position of last occurrence of a string in a string.
4604
   *
4605
   * @link http://php.net/manual/en/function.mb-strrpos.php
4606 12
   *
4607 9
   * @param string     $haystack  <p>
4608
   *                              The string being checked, for the last occurrence
4609
   *                              of needle
4610
   *                              </p>
4611
   * @param string|int $needle    <p>
4612
   *                              The string to find in haystack.
4613
   *                              Or a code point as int.
4614
   *                              </p>
4615
   * @param int        $offset    [optional] May be specified to begin searching an arbitrary number of characters into
4616 6
   *                              the string. Negative values will stop searching at an arbitrary point
4617 6
   *                              prior to the end of the string.
4618 6
   * @param boolean    $cleanUtf8 Clean non UTF-8 chars from the string
4619 6
   *
4620 6
   * @return int|false The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
4621 6
   *                   not found, it returns false.
4622
   */
4623 6
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
4624 6
  {
4625 6
    $haystack = (string)$haystack;
4626 6
4627
    if (((int)$needle) === $needle && ($needle >= 0)) {
4628
      $needle = self::chr($needle);
4629
    }
4630
4631
    $needle = (string)$needle;
4632
4633
    if (!isset($haystack[0], $needle[0])) {
4634
      return false;
4635
    }
4636
4637
    // init
4638
    $needle = (string)$needle;
4639
    $offset = (int)$offset;
4640
4641
    if ($cleanUtf8 === true) {
4642
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
4643
4644
      $needle = self::clean($needle);
4645
      $haystack = self::clean($haystack);
4646
    }
4647
4648
4649
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4650
      self::checkForSupport();
4651
    }
4652
4653
    if (self::$support['mbstring'] === true) {
4654
      return \mb_strrpos($haystack, $needle, $offset, 'UTF-8');
4655
    }
4656
4657
    if (self::$support['iconv'] === true) {
4658
      return \grapheme_strrpos($haystack, $needle, $offset);
4659
    }
4660
4661
    // fallback
4662
4663
    if ($offset > 0) {
4664
      $haystack = self::substr($haystack, $offset);
4665
    } elseif ($offset < 0) {
4666
      $haystack = self::substr($haystack, 0, $offset);
4667
    }
4668
4669 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4670 13
      $left = substr($haystack, 0, $pos);
4671
4672 13
      // negative offset not supported in PHP strpos(), ignoring
4673
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
4674
    }
4675 13
4676 13
    return false;
4677 1
  }
4678 1
4679 12
  /**
4680
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
4681 13
   * mask.
4682
   *
4683 13
   * @param string $str
4684 13
   * @param string $mask
4685
   * @param int    $offset
4686 13
   * @param int    $length
4687
   *
4688
   * @return int
4689
   */
4690
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
4691
  {
4692
    if ($offset || 2147483647 !== $length) {
4693
      $str = self::substr($str, $offset, $length);
4694
    }
4695
4696
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
4697
  }
4698 1
4699
  /**
4700 1
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
4701
   *
4702 1
   * @link http://php.net/manual/en/function.grapheme-strstr.php
4703
   *
4704
   * @param string $haystack      <p>
4705
   *                              The input string. Must be valid UTF-8.
4706 1
   *                              </p>
4707
   * @param string $needle        <p>
4708 1
   *                              The string to look for. Must be valid UTF-8.
4709
   *                              </p>
4710
   * @param bool   $before_needle [optional] <p>
4711
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
4712 1
   *                              haystack before the first occurrence of the needle (excluding the needle).
4713 1
   *                              </p>
4714
   *
4715
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
4716 1
   */
4717 1
  public static function strstr($haystack, $needle, $before_needle = false)
4718 1
  {
4719 1
    return \grapheme_strstr($haystack, $needle, $before_needle);
4720
  }
4721 1
4722
  /**
4723
   * Unicode transformation for case-less matching.
4724 1
   *
4725
   * @link http://unicode.org/reports/tr21/tr21-5.html
4726
   *
4727 1
   * @param string $str
4728
   * @param bool   $full <b>true</b> === replace full case folding chars + strtolower,<br />
4729
   *                     <b>false</b> use only $commonCaseFold +  strtolower
4730
   *
4731
   * @return string
4732
   */
4733
  public static function strtocasefold($str, $full = true)
4734
  {
4735
    static $fullCaseFold = null;
4736
    static $commonCaseFoldKeys = null;
4737
    static $commonCaseFoldValues = null;
4738
4739
    if ($commonCaseFoldKeys === null) {
4740 2
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
4741
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
4742 2
    }
4743
4744 2
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
4745 2
4746
    if ($full) {
4747 2
4748
      if ($fullCaseFold === null) {
4749
        $fullCaseFold = self::getData('caseFolding_full');
4750 2
      }
4751 2
4752 2
      /** @noinspection OffsetOperationsInspection */
4753 2
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
4754 2
    }
4755
4756 2
    $str = self::clean($str);
4757 2
4758 2
    return self::strtolower($str);
4759 2
  }
4760 2
4761 2
  /**
4762
   * Make a string lowercase.
4763 2
   *
4764 2
   * @link http://php.net/manual/en/function.mb-strtolower.php
4765 2
   *
4766 2
   * @param string $str      <p>The string being lowercased.</p>
4767 2
   * @param string $encoding
4768 2
   *
4769
   * @return string str with all alphabetic characters converted to lowercase.
4770 2
   */
4771 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4772
  {
4773 2
    // init
4774
    $str = (string)$str;
4775
4776
    if (!isset($str[0])) {
4777
      return '';
4778
    }
4779
4780
    if ($encoding !== 'UTF-8') {
4781
      $encoding = self::normalize_encoding($encoding);
4782
    }
4783
4784
    return \mb_strtolower($str, $encoding);
4785
  }
4786
4787
  /**
4788
   * Generic case sensitive transformation for collation matching.
4789
   *
4790
   * @param string $s
4791
   *
4792
   * @return string
4793
   */
4794 1
  private static function strtonatfold($s)
4795
  {
4796 1
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($s, \Normalizer::NFD));
4797
  }
4798 1
4799
  /**
4800
   * Make a string uppercase.
4801
   *
4802
   * @link http://php.net/manual/en/function.mb-strtoupper.php
4803
   *
4804
   * @param string $str      <p>The string being uppercased.</p>
4805
   * @param string $encoding
4806
   *
4807
   * @return string str with all alphabetic characters converted to uppercase.
4808
   */
4809 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4810
  {
4811
    $str = (string)$str;
4812
4813
    if (!isset($str[0])) {
4814
      return '';
4815
    }
4816
4817
    if ($encoding !== 'UTF-8') {
4818
      $encoding = self::normalize_encoding($encoding);
4819
    }
4820
4821
    return \mb_strtoupper($str, $encoding);
4822
  }
4823
4824
  /**
4825
   * Translate characters or replace sub-strings.
4826
   *
4827
   * @link  http://php.net/manual/en/function.strtr.php
4828
   *
4829
   * @param string       $str  <p>
4830
   *                           The string being translated.
4831 12
   *                           </p>
4832
   * @param string|array $from <p>
4833 12
   *                           The string replacing from.
4834
   *                           </p>
4835
   * @param string|array $to   <p>
4836
   *                           The string being translated to to.
4837
   *                           </p>
4838
   *
4839
   * @return string This function returns a copy of str,
4840
   * translating all occurrences of each character in
4841
   * from to the corresponding character in
4842
   * to.
4843 1
   */
4844
  public static function strtr($str, $from, $to = INF)
4845 1
  {
4846
    if (INF !== $to) {
4847 1
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 4847 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
4848
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 4848 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
4849 1
      $countFrom = count($from);
4850
      $countTo = count($to);
4851
4852
      if ($countFrom > $countTo) {
4853
        $from = array_slice($from, 0, $countTo);
4854
      } elseif ($countFrom < $countTo) {
4855
        $to = array_slice($to, 0, $countFrom);
4856
      }
4857
4858
      $from = array_combine($from, $to);
4859
    }
4860
4861 1
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 4844 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
4862
  }
4863 1
4864
  /**
4865 1
   * Return the width of a string.
4866 1
   *
4867 1
   * @param string $s
4868
   *
4869 1
   * @return int
4870 1
   */
4871 1
  public static function strwidth($s)
4872 1
  {
4873
    return \mb_strwidth($s, 'UTF-8');
4874
  }
4875 1
4876
  /**
4877
   * Get part of a string.
4878
   *
4879
   * @link http://php.net/manual/en/function.mb-substr.php
4880
   *
4881
   * @param string  $str       <p>
4882
   *                           The string being checked.
4883
   *                           </p>
4884
   * @param int     $start     <p>
4885
   *                           The first position used in str.
4886 20
   *                           </p>
4887
   * @param int     $length    [optional] <p>
4888
   *                           The maximum length of the returned string.
4889 20
   *                           </p>
4890 20
   * @param string  $encoding
4891
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4892 20
   *
4893
   * @return string Returns a sub-string specified by the start and length parameters.
4894
   */
4895
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4896 20
  {
4897 20
    // init
4898
    $str = (string)$str;
4899 20
4900 20
    if (!isset($str[0])) {
4901
      return '';
4902
    }
4903 1
4904 1
    if ($cleanUtf8 === true) {
4905
      // iconv and mbstring are not tolerant to invalid encoding
4906
      // further, their behaviour is inconsistent with that of PHP's substr
4907 1
4908 1
      $str = self::clean($str);
4909 1
    }
4910 1
4911 1
    $str_length = 0;
4912
    if ($start || $length === null) {
4913 1
      $str_length = (int)self::strlen($str);
4914
    }
4915 1
4916
    if ($start && $start > $str_length) {
4917
      return false;
4918
    }
4919
4920
    if ($length === null) {
4921
      $length = $str_length;
4922
    } else {
4923
      $length = (int)$length;
4924
    }
4925 1
4926
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4927 1
      self::checkForSupport();
4928
    }
4929 1
4930 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4931 1
4932
      // INFO: this is only a fallback for old versions
4933
      if ($encoding === 'UTF-8' || $encoding === true || $encoding === false) {
4934
        $encoding = 'UTF-8';
4935
      } else {
4936
        $encoding = self::normalize_encoding($encoding);
4937
      }
4938
4939
      return \mb_substr($str, $start, $length, $encoding);
4940
    }
4941
4942
    if (self::$support['iconv'] === true) {
4943
      return (string)\grapheme_substr($str, $start, $length);
4944 7
    }
4945
4946 7
    // fallback
4947
4948
    // split to array, and remove invalid characters
4949
    $array = self::split($str);
4950
4951
    // extract relevant part, and join to make sting again
4952
    return implode(array_slice($array, $start, $length));
4953
  }
4954
4955
  /**
4956
   * Binary safe comparison of two strings from an offset, up to length characters.
4957
   *
4958
   * @param string  $main_str           The main string being compared.
4959
   * @param string  $str                The secondary string being compared.
4960 1
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
4961
   *                                    end of the string.
4962 1
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
4963 1
   *                                    the str compared to the length of main_str less the offset.
4964
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
4965 1
   *
4966
   * @return int
4967 1
   */
4968
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
4969 1
  {
4970 1
    $main_str = self::substr($main_str, $offset, $length);
4971 1
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 4970 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
4972 1
4973
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 4970 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 4971 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 4970 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 4971 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
4974 1
  }
4975
4976 1
  /**
4977
   * Count the number of substring occurrences
4978 1
   *
4979 1
   * @link  http://php.net/manual/en/function.substr-count.php
4980 1
   *
4981 1
   * @param string $haystack <p>
4982 1
   *                         The string to search in
4983 1
   *                         </p>
4984
   * @param string $needle   <p>
4985 1
   *                         The substring to search for
4986
   *                         </p>
4987 1
   * @param int    $offset   [optional] <p>
4988
   *                         The offset where to start counting
4989
   *                         </p>
4990
   * @param int    $length   [optional] <p>
4991 1
   *                         The maximum length after the specified offset to search for the
4992
   *                         substring. It outputs a warning if the offset plus the length is
4993
   *                         greater than the haystack length.
4994
   *                         </p>
4995
   *
4996
   * @return int This functions returns an integer.
4997
   */
4998
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
4999
  {
5000
    $haystack = (string)$haystack;
5001
    $needle = (string)$needle;
5002
5003
    if (!isset($haystack[0], $needle[0])) {
5004
      return false;
5005
    }
5006 9
5007
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5008 9
      $offset = (int)$offset;
5009
      $length = (int)$length;
5010
5011
      if ($length + $offset <= 0) {
5012
        return false;
5013
      }
5014
5015
      $haystack = self::substr($haystack, $offset, $length);
5016
    }
5017
5018
    return \mb_substr_count($haystack, $needle);
5019
  }
5020
5021 12
  /**
5022
   * Replace text within a portion of a string.
5023 12
   *
5024 11
   * source: https://gist.github.com/stemar/8287074
5025 11
   *
5026 12
   * @param string|array   $str
5027
   * @param string|array   $replacement
5028
   * @param int|array      $start
5029
   * @param null|int|array $length
5030
   *
5031
   * @return array|string
5032
   */
5033
  public static function substr_replace($str, $replacement, $start, $length = null)
5034
  {
5035
    if (is_array($str)) {
5036
      $num = count($str);
5037
5038
      // $replacement
5039 8
      if (is_array($replacement)) {
5040
        $replacement = array_slice($replacement, 0, $num);
5041 8
      } else {
5042 1
        $replacement = array_pad(array($replacement), $num, $replacement);
5043
      }
5044
5045 7
      // $start
5046 2 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5047 2
        $start = array_slice($start, 0, $num);
5048 5
        foreach ($start as &$valueTmp) {
5049
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5050
        }
5051 7
        unset($valueTmp);
5052
      } else {
5053 7
        $start = array_pad(array($start), $num, $start);
5054
      }
5055 1
5056
      // $length
5057
      if (!isset($length)) {
5058
        $length = array_fill(0, $num, 0);
5059 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5060
        $length = array_slice($length, 0, $num);
5061
        foreach ($length as &$valueTmpV2) {
5062
          if (isset($valueTmpV2)) {
5063
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5064
          } else {
5065
            $valueTmpV2 = 0;
5066
          }
5067
        }
5068 2
        unset($valueTmpV2);
5069
      } else {
5070 2
        $length = array_pad(array($length), $num, $length);
5071 2
      }
5072
5073 2
      // Recursive call
5074 2
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
5075 2
    } else {
5076
      if (is_array($replacement)) {
5077 2
        if (count($replacement) > 0) {
5078 2
          $replacement = $replacement[0];
5079
        } else {
5080
          $replacement = '';
5081
        }
5082
      }
5083
    }
5084
5085
    preg_match_all('/./us', (string)$str, $smatches);
5086
    preg_match_all('/./us', (string)$replacement, $rmatches);
5087
5088
    if ($length === null) {
5089
      $length = \mb_strlen($str);
5090
    }
5091
5092
    array_splice($smatches[0], $start, $length, $rmatches[0]);
5093
5094
    return implode($smatches[0], null);
5095
  }
5096
5097
  /**
5098
   * Returns a case swapped version of the string.
5099
   *
5100
   * @param string $str
5101
   * @param string $encoding
5102 2
   *
5103
   * @return string each character's case swapped
5104 2
   */
5105 2
  public static function swapCase($str, $encoding = 'UTF-8')
5106 2
  {
5107
    $str = (string)$str;
5108 2
5109
    if (!isset($str[0])) {
5110 2
      return '';
5111
    }
5112
5113
    if ($encoding !== 'UTF-8') {
5114
      $encoding = self::normalize_encoding($encoding);
5115
    }
5116
5117
    $str = self::clean($str);
5118
5119
    $strSwappedCase = preg_replace_callback(
5120
        '/[\S]/u',
5121
        function ($match) use ($encoding) {
5122
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
5123
5124
          if ($match[0] === $marchToUpper) {
5125
            return UTF8::strtolower($match[0], $encoding);
5126
          } else {
5127
            return $marchToUpper;
5128
          }
5129
        },
5130
        $str
5131
    );
5132 2
5133
    return $strSwappedCase;
5134
  }
5135 2
5136
  /**
5137 2
   * alias for "UTF8::to_ascii()"
5138
   *
5139
   * @see UTF8::to_ascii()
5140
   *
5141
   * @param string $s The input string e.g. a UTF-8 String
5142
   * @param string $subst_chr
5143
   *
5144
   * @return string
5145
   */
5146
  public static function toAscii($s, $subst_chr = '?')
5147
  {
5148
    return self::to_ascii($s, $subst_chr);
5149
  }
5150
5151
  /**
5152
   * alias for "UTF8::to_latin1()"
5153
   *
5154
   * @see UTF8::to_latin1()
5155
   *
5156
   * @param $str
5157
   *
5158
   * @return string
5159
   */
5160
  public static function toLatin1($str)
5161
  {
5162 8
    return self::to_latin1($str);
5163
  }
5164 8
5165 8
  /**
5166
   * alias for "UTF8::to_utf8()"
5167 8
   *
5168 3
   * @see UTF8::to_utf8()
5169
   *
5170
   * @param string $str
5171
   *
5172 7
   * @return string
5173
   */
5174 7
  public static function toUTF8($str)
5175 1
  {
5176 1
    return self::to_utf8($str);
5177 1
  }
5178
5179
  /**
5180 7
   * convert to ASCII
5181 1
   *
5182 1
   * @param string $str     The input string.
5183 7
   * @param string $unknown Character use if character unknown. (default is ?)
5184
   *
5185
   * @return string
5186 7
   */
5187
  public static function to_ascii($str, $unknown = '?')
5188
  {
5189
    static $UTF8_TO_ASCII;
5190
5191
    // init
5192
    $str = (string)$str;
5193
5194
    if (!isset($str[0])) {
5195
      return '';
5196
    }
5197
5198 7
    $str = self::clean($str);
5199
5200 7
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5201 2
      self::checkForSupport();
5202
    }
5203
5204
    if (self::$support['intl'] === true && Bootup::is_php('5.4')) {
5205 5
      $str = transliterator_transliterate('Any-Latin; Latin-ASCII;', $str);
5206
5207 5
      // check again, if we only have ASCII, now ...
5208
      if (!preg_match("/[\x80-\xFF]/", $str)) {
5209
        return $str;
5210
      }
5211
    }
5212
5213
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
5214
    $chars = $ar[0];
5215
    foreach ($chars as &$c) {
5216
5217
      $ordC0 = ord($c[0]);
5218
5219
      if ($ordC0 >= 0 && $ordC0 <= 127) {
5220
        continue;
5221 61
      }
5222
5223 61
      $ordC1 = ord($c[1]);
5224
5225 61
      // ASCII - next please
5226 4
      if ($ordC0 >= 192 && $ordC0 <= 223) {
5227
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
5228
      }
5229
5230 60
      if ($ordC0 >= 224) {
5231
        $ordC2 = ord($c[2]);
5232
5233 60
        if ($ordC0 <= 239) {
5234
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
5235
        }
5236
5237 60
        if ($ordC0 >= 240) {
5238 60
          $ordC3 = ord($c[3]);
5239
5240
          if ($ordC0 <= 247) {
5241
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
5242 60
          }
5243
5244 60
          if ($ordC0 >= 248) {
5245 1
            $ordC4 = ord($c[4]);
5246 1
5247 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5248 60
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
5249
            }
5250
5251
            if ($ordC0 >= 252) {
5252
              $ordC5 = ord($c[5]);
5253
5254 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5255
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
5256
              }
5257
            }
5258
          }
5259
        }
5260
      }
5261
5262
      if ($ordC0 >= 254 && $ordC0 <= 255) {
5263 1
        $c = $unknown;
5264
        continue;
5265 1
      }
5266
5267
      if (!isset($ord)) {
5268
        $c = $unknown;
5269
        continue;
5270
      }
5271
5272
      $bank = $ord >> 8;
5273
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
5274
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
5275
        if (file_exists($bankfile)) {
5276
          /** @noinspection PhpIncludeInspection */
5277
          require $bankfile;
5278
        } else {
5279
          $UTF8_TO_ASCII[$bank] = array();
5280
        }
5281
      }
5282
5283
      $newchar = $ord & 255;
5284
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
5285
        $c = $UTF8_TO_ASCII[$bank][$newchar];
5286
      } else {
5287 2
        $c = $unknown;
5288
      }
5289 2
    }
5290
5291
    return implode('', $chars);
5292
  }
5293
5294
  /**
5295
   * alias for "UTF8::to_win1252()"
5296
   *
5297
   * @see UTF8::to_win1252()
5298
   *
5299
   * @param   string $str
5300
   *
5301
   * @return  array|string
5302
   */
5303
  public static function to_iso8859($str)
5304
  {
5305
    return self::to_win1252($str);
5306
  }
5307
5308
  /**
5309
   * alias for "UTF8::to_win1252()"
5310
   *
5311 1
   * @see UTF8::to_win1252()
5312
   *
5313 1
   * @param string|array $str
5314
   *
5315
   * @return string|array
5316
   */
5317
  public static function to_latin1($str)
5318
  {
5319
    return self::to_win1252($str);
5320
  }
5321
5322
  /**
5323
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
5324
   *
5325
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
5326
   *
5327
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
5328
   *
5329
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
5330
   *    are followed by any of these:  ("group B")
5331
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
5332
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
5333
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
5334
   * is also a valid unicode character, and will be left unchanged.
5335
   *
5336 2
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
5337
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
5338 2
   *
5339 2
   * @param string|array $str Any string or array.
5340
   *
5341 2
   * @return string The same string, but UTF8 encoded.
5342
   */
5343
  public static function to_utf8($str)
5344
  {
5345
    if (is_array($str)) {
5346
      foreach ($str as $k => $v) {
5347
        /** @noinspection AlterInForeachInspection */
5348
        $str[$k] = self::to_utf8($v);
5349
      }
5350
5351
      return $str;
5352
    }
5353
5354
    $str = (string)$str;
5355
5356
    if (!isset($str[0])) {
5357
      return $str;
5358 1
    }
5359
5360 1
    $max = strlen($str);
5361 1
    $buf = '';
5362
5363 1
    /** @noinspection ForeachInvariantsInspection */
5364 1
    for ($i = 0; $i < $max; $i++) {
5365
      $c1 = $str[$i];
5366
5367 1
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
5368 1
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
5369
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
5370
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
5371
5372
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
5373
5374
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
5375
            $buf .= $c1 . $c2;
5376
            $i++;
5377
          } else { // not valid UTF8 - convert it
5378
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5379
            $cc2 = ($c1 & "\x3f") | "\x80";
5380
            $buf .= $cc1 . $cc2;
5381
          }
5382
5383 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5384
5385
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
5386
            $buf .= $c1 . $c2 . $c3;
5387
            $i += 2;
5388
          } else { // not valid UTF8 - convert it
5389
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5390
            $cc2 = ($c1 & "\x3f") | "\x80";
5391
            $buf .= $cc1 . $cc2;
5392
          }
5393
5394 15
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
5395
5396 15 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5397 15
            $buf .= $c1 . $c2 . $c3 . $c4;
5398
            $i += 3;
5399 15
          } else { // not valid UTF8 - convert it
5400 2
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5401
            $cc2 = ($c1 & "\x3f") | "\x80";
5402
            $buf .= $cc1 . $cc2;
5403
          }
5404 14
5405 14
        } else { // doesn't look like UTF8, but should be converted
5406
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
5407
          $cc2 = (($c1 & "\x3f") | "\x80");
5408
          $buf .= $cc1 . $cc2;
5409 14
        }
5410
5411
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
5412
5413 14
        $ordC1 = ord($c1);
5414
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
5415
          $buf .= self::$win1252ToUtf8[$ordC1];
5416
        } else {
5417 1
          $cc1 = (chr($ordC1 / 64) | "\xc0");
5418 1
          $cc2 = (($c1 & "\x3f") | "\x80");
5419 1
          $buf .= $cc1 . $cc2;
5420
        }
5421 14
5422
      } else { // it doesn't need conversion
5423
        $buf .= $c1;
5424 14
      }
5425 1
    }
5426 1
5427 14
    // decode unicode escape sequences
5428
    $buf = preg_replace_callback(
5429
        '/\\\\u([0-9a-f]{4})/i',
5430 14
        function ($match) {
5431
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
5432
        },
5433
        $buf
5434
    );
5435
5436
    // decode UTF-8 codepoints
5437
    $buf = preg_replace_callback(
5438
        '/&#\d{2,4};/',
5439
        function ($match) {
5440
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
5441
        },
5442
        $buf
5443
    );
5444
5445
    return $buf;
5446
  }
5447
5448
  /**
5449
   * Convert a string into "win1252"-encoding.
5450
   *
5451
   * @param  string|array $str
5452
   *
5453
   * @return string|array
5454
   */
5455
  private static function to_win1252($str)
5456
  {
5457
    if (is_array($str)) {
5458
5459
      foreach ($str as $k => $v) {
5460
        /** @noinspection AlterInForeachInspection */
5461
        $str[$k] = self::to_win1252($v);
5462
      }
5463
5464
      return $str;
5465
    }
5466
5467
    $str = (string)$str;
5468
5469
    if (!isset($str[0])) {
5470
      return '';
5471
    }
5472
5473
    return self::utf8_decode($str);
5474
  }
5475
5476
  /**
5477
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
5478
   *
5479
   * INFO: This is slower then "trim()"
5480
   *
5481 1
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
5482
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
5483 1
   *
5484 1
   * @param    string $str   The string to be trimmed
5485
   * @param    string $chars Optional characters to be stripped
5486 1
   *
5487
   * @return   string The trimmed string
5488
   */
5489
  public static function trim($str = '', $chars = INF)
5490
  {
5491
    $str = (string)$str;
5492
5493
    if (!isset($str[0])) {
5494
      return '';
5495
    }
5496 4
5497
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
5498 4
    if ($chars === INF || !$chars) {
5499
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
5500
    }
5501
5502
    return self::rtrim(self::ltrim($str, $chars), $chars);
5503
  }
5504
5505
  /**
5506
   * Makes string's first char uppercase.
5507
   *
5508
   * @param    string $str The input string
5509
   *
5510
   * @return   string The resulting string
5511
   */
5512
  public static function ucfirst($str)
5513
  {
5514
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
5515
  }
5516
5517
  /**
5518
   * alias for "UTF8::ucfirst()"
5519
   *
5520
   * @see UTF8::ucfirst()
5521
   *
5522
   * @param string $word
5523
   *
5524
   * @return string
5525
   */
5526
  public static function ucword($word)
5527
  {
5528
    return self::ucfirst($word);
5529 1
  }
5530
5531 1
  /**
5532 1
   * Uppercase for all words in the string.
5533
   *
5534 1
   * @param  string $str
5535
   * @param array   $exceptions
5536
   *
5537
   * @return string
5538
   */
5539
  public static function ucwords($str, $exceptions = array())
5540
  {
5541
    if (!$str) {
5542
      return '';
5543
    }
5544
5545
    // init
5546 1
    $words = explode(' ', $str);
5547
    $newwords = array();
5548 1
5549
    if (count($exceptions) > 0) {
5550
      $useExceptions = true;
5551
    } else {
5552
      $useExceptions = false;
5553
    }
5554
5555
    foreach ($words as $word) {
5556
      if (
5557
          ($useExceptions === false)
5558
          ||
5559
          (
5560
              $useExceptions === true
5561
              &&
5562
              !in_array($word, $exceptions, true)
5563
          )
5564
      ) {
5565
        $word = self::ucfirst($word);
5566
      }
5567
      $newwords[] = $word;
5568
    }
5569
5570
    return self::ucfirst(implode(' ', $newwords));
5571
  }
5572
5573
  /**
5574 11
   * Multi decode html entity & fix urlencoded-win1252-chars.
5575
   *
5576 11
   * e.g:
5577
   * 'D&#252;sseldorf'               => 'Düsseldorf'
5578 11
   * 'D%FCsseldorf'                  => 'Düsseldorf'
5579 2
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
5580 2
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
5581
   * 'Düsseldorf'                   => 'Düsseldorf'
5582 11
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
5583
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
5584 11
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
5585 2
   *
5586
   * @param string $str
5587
   *
5588
   * @return string
5589 10
   */
5590
  public static function urldecode($str)
5591 10
  {
5592 10
    $str = (string)$str;
5593
5594 10
    if (!isset($str[0])) {
5595
      return '';
5596
    }
5597 2
5598 2
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
5599 2
5600
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
5601 10
5602 10
    $str = self::fix_simple_utf8(
5603
        rawurldecode(
5604
            self::html_entity_decode(
5605
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5606
                $flags
5607
            )
5608
        )
5609
    );
5610
5611
    return (string)$str;
5612
  }
5613
5614
  /**
5615
   * Return a array with "urlencoded"-win1252 -> UTF-8
5616
   *
5617
   * @return mixed
5618
   */
5619
  public static function urldecode_fix_win1252_chars()
5620
  {
5621
    static $array = array(
5622
        '%20' => ' ',
5623
        '%21' => '!',
5624
        '%22' => '"',
5625
        '%23' => '#',
5626
        '%24' => '$',
5627
        '%25' => '%',
5628
        '%26' => '&',
5629
        '%27' => "'",
5630
        '%28' => '(',
5631
        '%29' => ')',
5632
        '%2A' => '*',
5633
        '%2B' => '+',
5634
        '%2C' => ',',
5635
        '%2D' => '-',
5636
        '%2E' => '.',
5637
        '%2F' => '/',
5638 8
        '%30' => '0',
5639
        '%31' => '1',
5640 8
        '%32' => '2',
5641 2
        '%33' => '3',
5642 2
        '%34' => '4',
5643
        '%35' => '5',
5644 8
        '%36' => '6',
5645
        '%37' => '7',
5646
        '%38' => '8',
5647
        '%39' => '9',
5648
        '%3A' => ':',
5649
        '%3B' => ';',
5650
        '%3C' => '<',
5651
        '%3D' => '=',
5652
        '%3E' => '>',
5653
        '%3F' => '?',
5654
        '%40' => '@',
5655
        '%41' => 'A',
5656
        '%42' => 'B',
5657
        '%43' => 'C',
5658
        '%44' => 'D',
5659
        '%45' => 'E',
5660
        '%46' => 'F',
5661
        '%47' => 'G',
5662
        '%48' => 'H',
5663
        '%49' => 'I',
5664
        '%4A' => 'J',
5665 1
        '%4B' => 'K',
5666
        '%4C' => 'L',
5667 1
        '%4D' => 'M',
5668
        '%4E' => 'N',
5669 1
        '%4F' => 'O',
5670
        '%50' => 'P',
5671
        '%51' => 'Q',
5672
        '%52' => 'R',
5673
        '%53' => 'S',
5674
        '%54' => 'T',
5675
        '%55' => 'U',
5676
        '%56' => 'V',
5677
        '%57' => 'W',
5678
        '%58' => 'X',
5679
        '%59' => 'Y',
5680
        '%5A' => 'Z',
5681
        '%5B' => '[',
5682 11
        '%5C' => '\\',
5683
        '%5D' => ']',
5684 11
        '%5E' => '^',
5685 11
        '%5F' => '_',
5686 11
        '%60' => '`',
5687
        '%61' => 'a',
5688 11
        '%62' => 'b',
5689 1
        '%63' => 'c',
5690 1
        '%64' => 'd',
5691 1
        '%65' => 'e',
5692
        '%66' => 'f',
5693 11
        '%67' => 'g',
5694
        '%68' => 'h',
5695 11
        '%69' => 'i',
5696
        '%6A' => 'j',
5697 11
        '%6B' => 'k',
5698 1
        '%6C' => 'l',
5699 1
        '%6D' => 'm',
5700
        '%6E' => 'n',
5701
        '%6F' => 'o',
5702 11
        '%70' => 'p',
5703 11
        '%71' => 'q',
5704
        '%72' => 'r',
5705 11
        '%73' => 's',
5706
        '%74' => 't',
5707 11
        '%75' => 'u',
5708
        '%76' => 'v',
5709
        '%77' => 'w',
5710
        '%78' => 'x',
5711
        '%79' => 'y',
5712
        '%7A' => 'z',
5713
        '%7B' => '{',
5714
        '%7C' => '|',
5715
        '%7D' => '}',
5716
        '%7E' => '~',
5717
        '%7F' => '',
5718
        '%80' => '`',
5719
        '%81' => '',
5720
        '%82' => '‚',
5721
        '%83' => 'ƒ',
5722
        '%84' => '„',
5723 21
        '%85' => '…',
5724
        '%86' => '†',
5725 21
        '%87' => '‡',
5726
        '%88' => 'ˆ',
5727 21
        '%89' => '‰',
5728 6
        '%8A' => 'Š',
5729
        '%8B' => '‹',
5730
        '%8C' => 'Œ',
5731
        '%8D' => '',
5732 19
        '%8E' => 'Ž',
5733 19
        '%8F' => '',
5734
        '%90' => '',
5735 19
        '%91' => '‘',
5736
        '%92' => '’',
5737
        '%93' => '“',
5738
        '%94' => '”',
5739
        '%95' => '•',
5740
        '%96' => '–',
5741
        '%97' => '—',
5742
        '%98' => '˜',
5743
        '%99' => '™',
5744
        '%9A' => 'š',
5745 3
        '%9B' => '›',
5746
        '%9C' => 'œ',
5747 3
        '%9D' => '',
5748
        '%9E' => 'ž',
5749
        '%9F' => 'Ÿ',
5750
        '%A0' => '',
5751
        '%A1' => '¡',
5752
        '%A2' => '¢',
5753
        '%A3' => '£',
5754
        '%A4' => '¤',
5755
        '%A5' => '¥',
5756
        '%A6' => '¦',
5757
        '%A7' => '§',
5758
        '%A8' => '¨',
5759
        '%A9' => '©',
5760
        '%AA' => 'ª',
5761
        '%AB' => '«',
5762 16
        '%AC' => '¬',
5763
        '%AD' => '',
5764 16
        '%AE' => '®',
5765
        '%AF' => '¯',
5766 16
        '%B0' => '°',
5767 4
        '%B1' => '±',
5768
        '%B2' => '²',
5769
        '%B3' => '³',
5770
        '%B4' => '´',
5771 15
        '%B5' => 'µ',
5772
        '%B6' => '¶',
5773 15
        '%B7' => '·',
5774 15
        '%B8' => '¸',
5775
        '%B9' => '¹',
5776 15
        '%BA' => 'º',
5777
        '%BB' => '»',
5778
        '%BC' => '¼',
5779
        '%BD' => '½',
5780
        '%BE' => '¾',
5781
        '%BF' => '¿',
5782
        '%C0' => 'À',
5783
        '%C1' => 'Á',
5784
        '%C2' => 'Â',
5785
        '%C3' => 'Ã',
5786
        '%C4' => 'Ä',
5787
        '%C5' => 'Å',
5788
        '%C6' => 'Æ',
5789
        '%C7' => 'Ç',
5790
        '%C8' => 'È',
5791
        '%C9' => 'É',
5792
        '%CA' => 'Ê',
5793
        '%CB' => 'Ë',
5794
        '%CC' => 'Ì',
5795
        '%CD' => 'Í',
5796
        '%CE' => 'Î',
5797
        '%CF' => 'Ï',
5798
        '%D0' => 'Ð',
5799
        '%D1' => 'Ñ',
5800
        '%D2' => 'Ò',
5801
        '%D3' => 'Ó',
5802
        '%D4' => 'Ô',
5803
        '%D5' => 'Õ',
5804
        '%D6' => 'Ö',
5805
        '%D7' => '×',
5806
        '%D8' => 'Ø',
5807
        '%D9' => 'Ù',
5808
        '%DA' => 'Ú',
5809
        '%DB' => 'Û',
5810
        '%DC' => 'Ü',
5811
        '%DD' => 'Ý',
5812
        '%DE' => 'Þ',
5813
        '%DF' => 'ß',
5814
        '%E0' => 'à',
5815
        '%E1' => 'á',
5816 1
        '%E2' => 'â',
5817
        '%E3' => 'ã',
5818 1
        '%E4' => 'ä',
5819 1
        '%E5' => 'å',
5820 1
        '%E6' => 'æ',
5821 1
        '%E7' => 'ç',
5822 1
        '%E8' => 'è',
5823
        '%E9' => 'é',
5824 1
        '%EA' => 'ê',
5825 1
        '%EB' => 'ë',
5826 1
        '%EC' => 'ì',
5827 1
        '%ED' => 'í',
5828 1
        '%EE' => 'î',
5829
        '%EF' => 'ï',
5830 1
        '%F0' => 'ð',
5831 1
        '%F1' => 'ñ',
5832
        '%F2' => 'ò',
5833 1
        '%F3' => 'ó',
5834
        '%F4' => 'ô',
5835
        '%F5' => 'õ',
5836
        '%F6' => 'ö',
5837
        '%F7' => '÷',
5838
        '%F8' => 'ø',
5839
        '%F9' => 'ù',
5840
        '%FA' => 'ú',
5841
        '%FB' => 'û',
5842
        '%FC' => 'ü',
5843 1
        '%FD' => 'ý',
5844
        '%FE' => 'þ',
5845
        '%FF' => 'ÿ',
5846 1
    );
5847
5848 1
    return $array;
5849
  }
5850
5851
  /**
5852
   * Decodes an UTF-8 string to ISO-8859-1.
5853
   *
5854
   * @param string $str
5855
   *
5856
   * @return string
5857
   */
5858
  public static function utf8_decode($str)
5859
  {
5860
    static $utf8ToWin1252Keys = null;
5861
    static $utf8ToWin1252Values = null;
5862
5863
    $str = (string)$str;
5864
5865
    if (!isset($str[0])) {
5866
      return '';
5867
    }
5868
5869
    // init
5870 47
    $str = self::to_utf8($str);
5871
5872 47
    if ($utf8ToWin1252Keys === null) {
5873
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
5874 47
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
5875 11
    }
5876
5877
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
5878
  }
5879 45
5880
  /**
5881 45
   * Encodes an ISO-8859-1 string to UTF-8.
5882
   *
5883
   * @param string $str
5884
   *
5885 1
   * @return string
5886 1
   */
5887
  public static function utf8_encode($str)
5888 45
  {
5889 45
    $str = \utf8_encode($str);
5890 37
5891 37
    if (false === strpos($str, "\xC2")) {
5892
      return $str;
5893 45
    } else {
5894 2
5895
      static $cp1252ToUtf8Keys = null;
5896
      static $cp1252ToUtf8Values = null;
5897 43
5898 20
      if ($cp1252ToUtf8Keys === null) {
5899 20
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
5900 41
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
5901
      }
5902
5903 43
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
5904
    }
5905
  }
5906 43
5907 1
  /**
5908 1
   * fix -> utf8-win1252 chars
5909 43
   *
5910
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
5911
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
5912 43
   * See: http://en.wikipedia.org/wiki/Windows-1252
5913
   *
5914
   * @deprecated use "UTF8::fix_simple_utf8()"
5915
   *
5916
   * @param   string $str
5917
   *
5918
   * @return  string
5919
   */
5920
  public static function utf8_fix_win1252_chars($str)
5921
  {
5922
    return self::fix_simple_utf8($str);
5923
  }
5924
5925
  /**
5926
   * Returns an array with all utf8 whitespace characters.
5927
   *
5928
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
5929
   *
5930
   * @author: Derek E. [email protected]
5931
   *
5932
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
5933
   *         as defined in above URL
5934
   */
5935
  public static function whitespace_table()
5936
  {
5937
    return self::$whitespaceTable;
5938
  }
5939
5940
  /**
5941 1
   * Limit the number of words in a string.
5942
   *
5943 1
   * @param  string $str
5944 1
   * @param  int    $words
5945
   * @param  string $strAddOn
5946 1
   *
5947
   * @return string
5948
   */
5949
  public static function words_limit($str, $words = 100, $strAddOn = '...')
5950
  {
5951
    $str = (string)$str;
5952
5953
    if (!isset($str[0])) {
5954
      return '';
5955
    }
5956
5957
    $words = (int)$words;
5958
5959
    if ($words < 1) {
5960
      return '';
5961
    }
5962
5963
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
5964
5965
    if (
5966
        !isset($matches[0])
5967
        ||
5968
        self::strlen($str) === self::strlen($matches[0])
5969
    ) {
5970
      return $str;
5971 1
    }
5972
5973 1
    return self::rtrim($matches[0]) . $strAddOn;
5974 1
  }
5975
5976 1
  /**
5977 1
   * Wraps a string to a given number of characters
5978
   *
5979
   * @link  http://php.net/manual/en/function.wordwrap.php
5980 1
   *
5981 1
   * @param string $str   <p>
5982 1
   *                      The input string.
5983
   *                      </p>
5984 1
   * @param int    $width [optional] <p>
5985 1
   *                      The column width.
5986
   *                      </p>
5987
   * @param string $break [optional] <p>
5988 1
   *                      The line is broken using the optional
5989 1
   *                      break parameter.
5990
   *                      </p>
5991 1
   * @param bool   $cut   [optional] <p>
5992
   *                      If the cut is set to true, the string is
5993 1
   *                      always wrapped at or before the specified width. So if you have
5994
   *                      a word that is larger than the given width, it is broken apart.
5995
   *                      (See second example).
5996
   *                      </p>
5997
   *
5998
   * @return string the given string wrapped at the specified column.
5999
   */
6000
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6001
  {
6002
    $str = (string)$str;
6003
    $break = (string)$break;
6004
6005
    if (!isset($str[0], $break[0])) {
6006
      return '';
6007
    }
6008 6
6009
    $w = '';
6010 6
    $strSplit = explode($break, $str);
6011 1
    $count = count($strSplit);
6012
6013
    if (1 === $count && '' === $strSplit[0]) {
6014 1
      return '';
6015 1
    }
6016 1
6017 1
    $chars = array();
6018
    /** @noinspection ForeachInvariantsInspection */
6019
    for ($i = 0; $i < $count; ++$i) {
6020
6021 1
      if ($i) {
6022 1
        $chars[] = $break;
6023 1
        $w .= '#';
6024 1
      }
6025 1
6026 1
      $c = $strSplit[$i];
6027 1
      unset($strSplit[$i]);
6028 1
6029
      foreach (self::split($c) as $c) {
6030
        $chars[] = $c;
6031
        $w .= ' ' === $c ? ' ' : '?';
6032 1
      }
6033 1
    }
6034 1
6035 1
    $strReturn = '';
6036 1
    $j = 0;
6037 1
    $b = $i = -1;
6038 1
    $w = wordwrap($w, $width, '#', $cut);
6039 1
6040
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
6041
      for (++$i; $i < $b; ++$i) {
6042 1
        $strReturn .= $chars[$j];
6043 1
        unset($chars[$j++]);
6044 1
      }
6045 1
6046
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
6047
        unset($chars[$j++]);
6048
      }
6049 1
6050
      $strReturn .= $break;
6051 6
    }
6052 1
6053 1
    return $strReturn . implode('', $chars);
6054 1
  }
6055 1
6056
  /**
6057 1
   * Returns an array of Unicode White Space characters.
6058
   *
6059
   * @return   array An array with numeric code point as key and White Space Character as value.
6060 6
   */
6061 6
  public static function ws()
6062
  {
6063 6
    return self::$whitespace;
6064 4
  }
6065
6066
}
6067